fix: recover table-style nutrition evidence
Browse filesCodex-authored nutrition-table parsing, evidence diagnostics, and conservative OCR near-matching.
- frontend/app.js +1 -0
- frontend/index.html +1 -1
- frontend/styles.css +1 -0
- nemotron_space/app.py +2 -0
- src/packetcourt/audit.py +40 -0
- src/packetcourt/investigator.py +11 -0
- src/packetcourt/parser.py +28 -4
- src/packetcourt/vlm.py +3 -0
- tests/test_audit.py +42 -0
- vision_space/app.py +3 -1
frontend/app.js
CHANGED
|
@@ -115,6 +115,7 @@ function render(data) {
|
|
| 115 |
["Whole-packet sodium", packet.sodium_mg == null ? "Not calculable" : `${packet.sodium_mg}mg`],
|
| 116 |
];
|
| 117 |
$("#nutrition-grid").innerHTML = facts.map(([key, value]) => `<div><span>${key}</span><b>${value}</b></div>`).join("");
|
|
|
|
| 118 |
$("#expiry-status").textContent = data.expiry.status;
|
| 119 |
$("#opening-status").textContent = data.expiry.after_opening_instruction
|
| 120 |
? `After opening: ${data.expiry.after_opening_instruction}`
|
|
|
|
| 115 |
["Whole-packet sodium", packet.sodium_mg == null ? "Not calculable" : `${packet.sodium_mg}mg`],
|
| 116 |
];
|
| 117 |
$("#nutrition-grid").innerHTML = facts.map(([key, value]) => `<div><span>${key}</span><b>${value}</b></div>`).join("");
|
| 118 |
+
$("#nutrition-explanation").textContent = packet.explanation;
|
| 119 |
$("#expiry-status").textContent = data.expiry.status;
|
| 120 |
$("#opening-status").textContent = data.expiry.after_opening_instruction
|
| 121 |
? `After opening: ${data.expiry.after_opening_instruction}`
|
frontend/index.html
CHANGED
|
@@ -113,7 +113,7 @@
|
|
| 113 |
</section>
|
| 114 |
<div class="claim-grid" id="claim-grid"></div>
|
| 115 |
<div class="evidence-summary">
|
| 116 |
-
<article><p class="kicker">NUTRITION EVIDENCE</p><div id="nutrition-grid"></div></article>
|
| 117 |
<article class="date-card"><p class="kicker">DATE EVIDENCE</p><h3 id="expiry-status"></h3><p id="opening-status"></p><p>Expiry interpretation is evidence, not a food-safety guarantee.</p></article>
|
| 118 |
</div>
|
| 119 |
<details><summary>View machine-readable evidence case</summary><pre id="raw-json"></pre></details>
|
|
|
|
| 113 |
</section>
|
| 114 |
<div class="claim-grid" id="claim-grid"></div>
|
| 115 |
<div class="evidence-summary">
|
| 116 |
+
<article><p class="kicker">NUTRITION EVIDENCE</p><div id="nutrition-grid"></div><p class="nutrition-explanation" id="nutrition-explanation"></p></article>
|
| 117 |
<article class="date-card"><p class="kicker">DATE EVIDENCE</p><h3 id="expiry-status"></h3><p id="opening-status"></p><p>Expiry interpretation is evidence, not a food-safety guarantee.</p></article>
|
| 118 |
</div>
|
| 119 |
<details><summary>View machine-readable evidence case</summary><pre id="raw-json"></pre></details>
|
frontend/styles.css
CHANGED
|
@@ -19,6 +19,7 @@ main{max-width:1320px;margin:auto;padding:0 4vw}.hero{min-height:670px;display:g
|
|
| 19 |
.gap-section{margin-bottom:28px;padding:26px;border:1px solid var(--ink);background:#1b1b17;color:var(--cream);border-radius:18px}.gap-heading{display:flex;justify-content:space-between;gap:20px;align-items:end;margin-bottom:18px}.gap-heading .kicker{color:#bdb4a6}.gap-heading h3{font:700 clamp(27px,4vw,46px)/1 Georgia,serif;max-width:700px;margin:0}.gap-grid{display:grid;grid-template-columns:repeat(2,1fr);gap:12px}.gap-card,.gap-empty{padding:19px;border:1px solid #4a483f;border-radius:13px;background:#26251f}.gap-card.high{border-color:var(--red)}.gap-card.medium{border-color:var(--amber)}.gap-severity{font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.13em;text-transform:uppercase;color:#d9a65e}.gap-card h4{font-size:20px;margin:10px 0 16px}.gap-compare{display:grid;grid-template-columns:1fr 1fr;gap:10px}.gap-compare p{margin:0;padding:11px;background:#313029;border-radius:8px;font-size:12px;line-height:1.45}.gap-compare b{display:block;font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;color:#bdb4a6;margin-bottom:5px}.gap-card .evidence{border-top-color:#4a483f}.gap-empty{display:grid;gap:5px;color:#bdb4a6}
|
| 20 |
.claim-grid{grid-template-columns:repeat(2,1fr)}.claim-card{background:var(--cream);border:1px solid var(--line);border-top:6px solid var(--muted);border-radius:16px;padding:23px}.claim-card.supported{border-top-color:var(--green)}.claim-card.contradicted{border-top-color:var(--red)}.claim-card.context{border-top-color:var(--amber)}.claim-top{display:flex;justify-content:space-between;gap:10px;align-items:start}.claim-name{font-size:21px;font-weight:800}.verdict{font:500 8px/1.3 ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.08em;border:1px solid var(--line);border-radius:99px;padding:7px 9px;text-align:right}.confidence{display:block;margin-top:8px;font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.1em;text-transform:uppercase;color:var(--muted)}.summary{min-height:45px;color:#534d43;line-height:1.5}.evidence{padding:11px 0;border-top:1px solid var(--line)}.evidence b,.evidence span{display:block}.evidence b{font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.12em;color:var(--muted);text-transform:uppercase}.evidence span{font-size:13px;margin-top:4px}.caveat{font-size:11px;color:var(--muted);margin-top:15px}
|
| 21 |
.evidence-summary{margin-top:16px}.evidence-summary article{padding:25px;border:1px solid var(--line);border-radius:16px;background:var(--cream)}#nutrition-grid div{display:flex;justify-content:space-between;padding:10px 0;border-bottom:1px solid var(--line);font-size:13px}.date-card{background:var(--ink)!important;color:var(--cream)}.date-card .kicker{color:#cfc5b6}.date-card h3{font:700 28px/1.15 "Playfair Display"}details{margin-top:16px;border:1px solid var(--line);border-radius:14px;padding:17px;background:var(--cream)}summary{cursor:pointer;font-weight:700}pre{white-space:pre-wrap;font:11px/1.5 "DM Mono";overflow:auto}
|
|
|
|
| 22 |
.method{border-top:1px solid var(--line)}.method-grid{grid-template-columns:repeat(4,1fr);margin-top:40px}.method-grid div{padding:20px;border-top:2px solid var(--ink)}.method-grid span{font:500 10px "DM Mono";color:var(--red)}.method-grid p{font-size:13px;line-height:1.5;color:var(--muted)}
|
| 23 |
.feedback-agent{display:grid;grid-template-columns:.8fr 1.2fr;gap:28px;margin-top:18px;padding:26px;border:1px solid var(--line);border-radius:18px;background:var(--cream)}.feedback-agent h3{font:700 clamp(25px,4vw,42px)/1 Georgia,serif;margin:0}.feedback-agent p{font-size:12px;line-height:1.55;color:var(--muted)}.feedback-controls{display:grid;gap:10px}.feedback-choice{display:flex;gap:8px;flex-wrap:wrap}.feedback-choice button.active{background:var(--red);border-color:var(--red);color:white}.feedback-controls textarea{min-height:95px;padding:13px;border:1px solid var(--line);border-radius:11px;background:#f8f3e9;resize:vertical}.feedback-controls>span{font:500 9px/1.5 ui-monospace,SFMono-Regular,Menlo,monospace;color:var(--green)}
|
| 24 |
footer{display:flex;justify-content:space-between;gap:20px;padding:25px 5vw;border-top:1px solid var(--line);font:500 10px "DM Mono";color:var(--muted)}
|
|
|
|
| 19 |
.gap-section{margin-bottom:28px;padding:26px;border:1px solid var(--ink);background:#1b1b17;color:var(--cream);border-radius:18px}.gap-heading{display:flex;justify-content:space-between;gap:20px;align-items:end;margin-bottom:18px}.gap-heading .kicker{color:#bdb4a6}.gap-heading h3{font:700 clamp(27px,4vw,46px)/1 Georgia,serif;max-width:700px;margin:0}.gap-grid{display:grid;grid-template-columns:repeat(2,1fr);gap:12px}.gap-card,.gap-empty{padding:19px;border:1px solid #4a483f;border-radius:13px;background:#26251f}.gap-card.high{border-color:var(--red)}.gap-card.medium{border-color:var(--amber)}.gap-severity{font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.13em;text-transform:uppercase;color:#d9a65e}.gap-card h4{font-size:20px;margin:10px 0 16px}.gap-compare{display:grid;grid-template-columns:1fr 1fr;gap:10px}.gap-compare p{margin:0;padding:11px;background:#313029;border-radius:8px;font-size:12px;line-height:1.45}.gap-compare b{display:block;font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;color:#bdb4a6;margin-bottom:5px}.gap-card .evidence{border-top-color:#4a483f}.gap-empty{display:grid;gap:5px;color:#bdb4a6}
|
| 20 |
.claim-grid{grid-template-columns:repeat(2,1fr)}.claim-card{background:var(--cream);border:1px solid var(--line);border-top:6px solid var(--muted);border-radius:16px;padding:23px}.claim-card.supported{border-top-color:var(--green)}.claim-card.contradicted{border-top-color:var(--red)}.claim-card.context{border-top-color:var(--amber)}.claim-top{display:flex;justify-content:space-between;gap:10px;align-items:start}.claim-name{font-size:21px;font-weight:800}.verdict{font:500 8px/1.3 ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.08em;border:1px solid var(--line);border-radius:99px;padding:7px 9px;text-align:right}.confidence{display:block;margin-top:8px;font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.1em;text-transform:uppercase;color:var(--muted)}.summary{min-height:45px;color:#534d43;line-height:1.5}.evidence{padding:11px 0;border-top:1px solid var(--line)}.evidence b,.evidence span{display:block}.evidence b{font:500 8px ui-monospace,SFMono-Regular,Menlo,monospace;letter-spacing:.12em;color:var(--muted);text-transform:uppercase}.evidence span{font-size:13px;margin-top:4px}.caveat{font-size:11px;color:var(--muted);margin-top:15px}
|
| 21 |
.evidence-summary{margin-top:16px}.evidence-summary article{padding:25px;border:1px solid var(--line);border-radius:16px;background:var(--cream)}#nutrition-grid div{display:flex;justify-content:space-between;padding:10px 0;border-bottom:1px solid var(--line);font-size:13px}.date-card{background:var(--ink)!important;color:var(--cream)}.date-card .kicker{color:#cfc5b6}.date-card h3{font:700 28px/1.15 "Playfair Display"}details{margin-top:16px;border:1px solid var(--line);border-radius:14px;padding:17px;background:var(--cream)}summary{cursor:pointer;font-weight:700}pre{white-space:pre-wrap;font:11px/1.5 "DM Mono";overflow:auto}
|
| 22 |
+
.nutrition-explanation{font-size:11px;line-height:1.5;color:var(--muted);margin:14px 0 0}
|
| 23 |
.method{border-top:1px solid var(--line)}.method-grid{grid-template-columns:repeat(4,1fr);margin-top:40px}.method-grid div{padding:20px;border-top:2px solid var(--ink)}.method-grid span{font:500 10px "DM Mono";color:var(--red)}.method-grid p{font-size:13px;line-height:1.5;color:var(--muted)}
|
| 24 |
.feedback-agent{display:grid;grid-template-columns:.8fr 1.2fr;gap:28px;margin-top:18px;padding:26px;border:1px solid var(--line);border-radius:18px;background:var(--cream)}.feedback-agent h3{font:700 clamp(25px,4vw,42px)/1 Georgia,serif;margin:0}.feedback-agent p{font-size:12px;line-height:1.55;color:var(--muted)}.feedback-controls{display:grid;gap:10px}.feedback-choice{display:flex;gap:8px;flex-wrap:wrap}.feedback-choice button.active{background:var(--red);border-color:var(--red);color:white}.feedback-controls textarea{min-height:95px;padding:13px;border:1px solid var(--line);border-radius:11px;background:#f8f3e9;resize:vertical}.feedback-controls>span{font:500 9px/1.5 ui-monospace,SFMono-Regular,Menlo,monospace;color:var(--green)}
|
| 25 |
footer{display:flex;justify-content:space-between;gap:20px;padding:25px 5vw;border-top:1px solid var(--line);font:500 10px "DM Mono";color:var(--muted)}
|
nemotron_space/app.py
CHANGED
|
@@ -19,6 +19,8 @@ compact JSON with these keys:
|
|
| 19 |
- priority: one short sentence naming the most important next action
|
| 20 |
- evidence_request: one short sentence, or an empty string
|
| 21 |
- rationale: one short sentence grounded only in the supplied investigation
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
|
| 24 |
|
|
|
|
| 19 |
- priority: one short sentence naming the most important next action
|
| 20 |
- evidence_request: one short sentence, or an empty string
|
| 21 |
- rationale: one short sentence grounded only in the supplied investigation
|
| 22 |
+
Prioritize missing evidence required to resolve front-of-pack claims. Treat
|
| 23 |
+
expiry evidence as secondary unless expiry or shelf life is itself a front claim.
|
| 24 |
"""
|
| 25 |
|
| 26 |
|
src/packetcourt/audit.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
|
|
|
| 4 |
|
| 5 |
from .models import ClaimAudit, Evidence, PacketAudit, PersuasionFinding, Verdict
|
| 6 |
from .investigator import build_investigation
|
|
@@ -221,6 +222,26 @@ def _audit_claim(claim: str, back_text: str, ingredients: list[str], nutrition)
|
|
| 221 |
confidence="high",
|
| 222 |
)
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
meaningful_terms = [
|
| 225 |
token.lower()
|
| 226 |
for token in re.findall(r"[A-Za-z]{3,}", claim)
|
|
@@ -229,6 +250,16 @@ def _audit_claim(claim: str, back_text: str, ingredients: list[str], nutrition)
|
|
| 229 |
evidence_matches = [
|
| 230 |
item for item in ingredients if any(term in item.lower() for term in meaningful_terms)
|
| 231 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
if evidence_matches:
|
| 233 |
return ClaimAudit(
|
| 234 |
claim=claim,
|
|
@@ -238,6 +269,15 @@ def _audit_claim(claim: str, back_text: str, ingredients: list[str], nutrition)
|
|
| 238 |
caveat="PacketCourt will not infer quantity, quality, or nutritional significance from an ingredient name alone.",
|
| 239 |
confidence="medium",
|
| 240 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
return ClaimAudit(
|
| 243 |
claim=claim,
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
| 4 |
+
from difflib import SequenceMatcher
|
| 5 |
|
| 6 |
from .models import ClaimAudit, Evidence, PacketAudit, PersuasionFinding, Verdict
|
| 7 |
from .investigator import build_investigation
|
|
|
|
| 222 |
confidence="high",
|
| 223 |
)
|
| 224 |
|
| 225 |
+
enrichment_terms = [
|
| 226 |
+
term
|
| 227 |
+
for term in ("calcium", "dha", "iron", "vitamin", "mineral", "zinc")
|
| 228 |
+
if term in claim.lower()
|
| 229 |
+
]
|
| 230 |
+
enrichment_matches = [
|
| 231 |
+
match.group(0)
|
| 232 |
+
for term in enrichment_terms
|
| 233 |
+
for match in re.finditer(rf"\b{re.escape(term)}\b[^,.;\n]{{0,45}}", back_text, re.IGNORECASE)
|
| 234 |
+
]
|
| 235 |
+
if enrichment_matches:
|
| 236 |
+
return ClaimAudit(
|
| 237 |
+
claim=claim,
|
| 238 |
+
verdict=Verdict.CONTEXT_MISSING,
|
| 239 |
+
summary="Related enrichment evidence is visible, but the supplied label does not establish the full front-of-pack impression.",
|
| 240 |
+
evidence=[Evidence(source="back label", text=text.strip()) for text in dict.fromkeys(enrichment_matches)],
|
| 241 |
+
caveat="A quantified nutrient declaration is required to assess the strength of an enrichment claim.",
|
| 242 |
+
confidence="medium",
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
meaningful_terms = [
|
| 246 |
token.lower()
|
| 247 |
for token in re.findall(r"[A-Za-z]{3,}", claim)
|
|
|
|
| 250 |
evidence_matches = [
|
| 251 |
item for item in ingredients if any(term in item.lower() for term in meaningful_terms)
|
| 252 |
]
|
| 253 |
+
fuzzy_matches = [
|
| 254 |
+
item
|
| 255 |
+
for item in ingredients
|
| 256 |
+
if item not in evidence_matches
|
| 257 |
+
and any(
|
| 258 |
+
SequenceMatcher(None, term, token).ratio() >= 0.8
|
| 259 |
+
for term in meaningful_terms
|
| 260 |
+
for token in re.findall(r"[a-z]{4,}", item.lower())
|
| 261 |
+
)
|
| 262 |
+
]
|
| 263 |
if evidence_matches:
|
| 264 |
return ClaimAudit(
|
| 265 |
claim=claim,
|
|
|
|
| 269 |
caveat="PacketCourt will not infer quantity, quality, or nutritional significance from an ingredient name alone.",
|
| 270 |
confidence="medium",
|
| 271 |
)
|
| 272 |
+
if fuzzy_matches:
|
| 273 |
+
return ClaimAudit(
|
| 274 |
+
claim=claim,
|
| 275 |
+
verdict=Verdict.CANNOT_VERIFY,
|
| 276 |
+
summary="A possible one-character OCR match appears in the ingredient list, but the physical packet should be checked.",
|
| 277 |
+
evidence=_ingredient_evidence(ingredients, fuzzy_matches),
|
| 278 |
+
caveat="Near-matches are surfaced as possible OCR corrections, never treated as exact evidence.",
|
| 279 |
+
confidence="low",
|
| 280 |
+
)
|
| 281 |
|
| 282 |
return ClaimAudit(
|
| 283 |
claim=claim,
|
src/packetcourt/investigator.py
CHANGED
|
@@ -65,6 +65,17 @@ def build_investigation(
|
|
| 65 |
missing.append("A readable ingredient list")
|
| 66 |
if claim_names and nutrition.basis == "unknown" and any(policy_tool_for(name) == "inspect_nutrition" for name in claim_names):
|
| 67 |
missing.append("A readable nutrition panel with its measurement basis")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
if expiry.instruction and not expiry.packed_on:
|
| 69 |
missing.append("The packing or manufacturing date needed to resolve relative shelf life")
|
| 70 |
|
|
|
|
| 65 |
missing.append("A readable ingredient list")
|
| 66 |
if claim_names and nutrition.basis == "unknown" and any(policy_tool_for(name) == "inspect_nutrition" for name in claim_names):
|
| 67 |
missing.append("A readable nutrition panel with its measurement basis")
|
| 68 |
+
elif claim_names and any(policy_tool_for(name) == "inspect_nutrition" for name in claim_names) and not any(
|
| 69 |
+
value is not None
|
| 70 |
+
for value in (
|
| 71 |
+
nutrition.protein_g,
|
| 72 |
+
nutrition.total_sugar_g,
|
| 73 |
+
nutrition.added_sugar_g,
|
| 74 |
+
nutrition.sodium_mg,
|
| 75 |
+
nutrition.saturated_fat_g,
|
| 76 |
+
)
|
| 77 |
+
):
|
| 78 |
+
missing.append("Readable nutrient quantities from the nutrition table")
|
| 79 |
if expiry.instruction and not expiry.packed_on:
|
| 80 |
missing.append("The packing or manufacturing date needed to resolve relative shelf life")
|
| 81 |
|
src/packetcourt/parser.py
CHANGED
|
@@ -49,9 +49,18 @@ def extract_claims(front_text: str) -> list[str]:
|
|
| 49 |
|
| 50 |
|
| 51 |
def _number_after(label: str, text: str, unit: str) -> float | None:
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def parse_nutrition(back_text: str) -> NutritionFacts:
|
|
@@ -88,6 +97,21 @@ def calculate_whole_packet(nutrition: NutritionFacts) -> WholePacketNutrition:
|
|
| 88 |
)
|
| 89 |
if multiplier is None:
|
| 90 |
return WholePacketNutrition(explanation=explanation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
def scale(value: float | None) -> float | None:
|
| 93 |
return round(value * multiplier, 1) if value is not None else None
|
|
@@ -116,7 +140,7 @@ def extract_ingredients(back_text: str) -> list[str]:
|
|
| 116 |
)
|
| 117 |
if not match:
|
| 118 |
return []
|
| 119 |
-
return [item.strip(" .") for item in re.split(r"[,;]", match.group(1)) if item.strip()]
|
| 120 |
|
| 121 |
|
| 122 |
def _parse_date(value: str) -> date | None:
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
def _number_after(label: str, text: str, unit: str) -> float | None:
|
| 52 |
+
# Labels transcribed from nutrition tables commonly place the unit before
|
| 53 |
+
# the value: "Protein (g) 12" or "Sodium | mg | 410".
|
| 54 |
+
patterns = [
|
| 55 |
+
rf"\b{label}\b[^0-9]{{0,28}}(\d+(?:\.\d+)?)\s*{unit}\b",
|
| 56 |
+
rf"\b{label}\b[\s|:()\[\]\-]*{unit}[\s|:()\[\]\-]*(\d+(?:\.\d+)?)\b",
|
| 57 |
+
rf"\b{label}\b[\s|:()\[\]\-]+(\d+(?:\.\d+)?)\b",
|
| 58 |
+
]
|
| 59 |
+
for pattern in patterns:
|
| 60 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 61 |
+
if match:
|
| 62 |
+
return float(match.group(1))
|
| 63 |
+
return None
|
| 64 |
|
| 65 |
|
| 66 |
def parse_nutrition(back_text: str) -> NutritionFacts:
|
|
|
|
| 97 |
)
|
| 98 |
if multiplier is None:
|
| 99 |
return WholePacketNutrition(explanation=explanation)
|
| 100 |
+
values = (
|
| 101 |
+
nutrition.protein_g,
|
| 102 |
+
nutrition.total_sugar_g,
|
| 103 |
+
nutrition.added_sugar_g,
|
| 104 |
+
nutrition.sodium_mg,
|
| 105 |
+
nutrition.saturated_fat_g,
|
| 106 |
+
)
|
| 107 |
+
if not any(value is not None for value in values):
|
| 108 |
+
return WholePacketNutrition(
|
| 109 |
+
multiplier=round(multiplier, 2),
|
| 110 |
+
explanation=(
|
| 111 |
+
f"Found {nutrition.basis} and a {nutrition.package_size_g:g}g packet, "
|
| 112 |
+
"but no readable nutrient quantities were extracted."
|
| 113 |
+
),
|
| 114 |
+
)
|
| 115 |
|
| 116 |
def scale(value: float | None) -> float | None:
|
| 117 |
return round(value * multiplier, 1) if value is not None else None
|
|
|
|
| 140 |
)
|
| 141 |
if not match:
|
| 142 |
return []
|
| 143 |
+
return [item.strip(" .*_") for item in re.split(r"[,;]", match.group(1)) if item.strip(" .*_")]
|
| 144 |
|
| 145 |
|
| 146 |
def _parse_date(value: str) -> date | None:
|
src/packetcourt/vlm.py
CHANGED
|
@@ -17,6 +17,9 @@ PROMPTS = {
|
|
| 17 |
"Transcribe the visible food-label evidence from this package image. Focus on the "
|
| 18 |
"ingredient list, nutrition values with their basis, net weight, FSSAI license, "
|
| 19 |
"manufacturing or packing date, best-before or use-by date, and after-opening instructions. "
|
|
|
|
|
|
|
|
|
|
| 20 |
"Return only visibly printed evidence. Do not explain or infer anything."
|
| 21 |
),
|
| 22 |
}
|
|
|
|
| 17 |
"Transcribe the visible food-label evidence from this package image. Focus on the "
|
| 18 |
"ingredient list, nutrition values with their basis, net weight, FSSAI license, "
|
| 19 |
"manufacturing or packing date, best-before or use-by date, and after-opening instructions. "
|
| 20 |
+
"For nutrition tables, preserve every visible row as 'nutrient name | unit | value' and "
|
| 21 |
+
"include the declared basis such as per 100g or per serving. Do not summarize or omit rows "
|
| 22 |
+
"whose value is zero. "
|
| 23 |
"Return only visibly printed evidence. Do not explain or infer anything."
|
| 24 |
),
|
| 25 |
}
|
tests/test_audit.py
CHANGED
|
@@ -130,3 +130,45 @@ def test_multiple_photo_extractions_are_labeled_and_exact_duplicates_are_skipped
|
|
| 130 |
assert text.count("HIGH PROTEIN") == 1
|
| 131 |
assert "2 unique front photos" in status
|
| 132 |
assert "Exact duplicate skipped" in images[1]["status"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
assert text.count("HIGH PROTEIN") == 1
|
| 131 |
assert "2 unique front photos" in status
|
| 132 |
assert "Exact duplicate skipped" in images[1]["status"]
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def test_table_style_nutrition_values_are_calculated():
|
| 136 |
+
result = audit_packet(
|
| 137 |
+
"SUGAR FREE",
|
| 138 |
+
(
|
| 139 |
+
"Nutrition Information Per 100g | Protein (g) 12 | Total Sugars | g | 0 | "
|
| 140 |
+
"Added Sugar (g) 0 | Sodium (mg) 410 | Saturated Fat g 2.5 | Net Weight: 200g."
|
| 141 |
+
),
|
| 142 |
+
)
|
| 143 |
+
assert result.nutrition.protein_g == 12
|
| 144 |
+
assert result.nutrition.total_sugar_g == 0
|
| 145 |
+
assert result.nutrition.sodium_mg == 410
|
| 146 |
+
assert result.whole_packet.protein_g == 24
|
| 147 |
+
assert result.whole_packet.total_sugar_g == 0
|
| 148 |
+
assert result.whole_packet.sodium_mg == 820
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def test_basis_and_packet_without_nutrient_rows_explain_the_real_missing_evidence():
|
| 152 |
+
result = audit_packet("SUGAR FREE", "Nutrition Information Per 100g. Net Weight: 200g.")
|
| 153 |
+
assert result.whole_packet.calculable is False
|
| 154 |
+
assert "no readable nutrient quantities" in result.whole_packet.explanation.lower()
|
| 155 |
+
assert any("nutrient quantities" in item.lower() for item in result.investigation.missing_evidence)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def test_enrichment_claim_cites_visible_back_label_evidence():
|
| 159 |
+
result = audit_packet(
|
| 160 |
+
"Extra Calcium with DHA",
|
| 161 |
+
"Nutrition per 100g: Calcium 400mg, DHA 25mg. Net Weight: 200g.",
|
| 162 |
+
)
|
| 163 |
+
claim = by_claim(result, "Extra Calcium with DHA")
|
| 164 |
+
assert claim.verdict == Verdict.CONTEXT_MISSING
|
| 165 |
+
assert any("Calcium" in evidence.text for evidence in claim.evidence)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def test_one_character_ocr_claim_mismatch_is_surfaced_conservatively():
|
| 169 |
+
result = audit_packet("Real Badar", "Ingredients: ** Maltodextrin (65%), Badam, Sucralose.")
|
| 170 |
+
claim = by_claim(result, "Real Badar")
|
| 171 |
+
assert claim.verdict == Verdict.CANNOT_VERIFY
|
| 172 |
+
assert claim.confidence == "low"
|
| 173 |
+
assert any(evidence.text == "Badam" for evidence in claim.evidence)
|
| 174 |
+
assert result.ingredients[0] == "Maltodextrin (65%)"
|
vision_space/app.py
CHANGED
|
@@ -17,7 +17,9 @@ PROMPTS = {
|
|
| 17 |
),
|
| 18 |
"back": (
|
| 19 |
"Transcribe only visibly printed food-label evidence. Focus on ingredients, nutrition values "
|
| 20 |
-
"and basis, net weight, FSSAI license, dates, and after-opening instructions.
|
|
|
|
|
|
|
| 21 |
),
|
| 22 |
}
|
| 23 |
|
|
|
|
| 17 |
),
|
| 18 |
"back": (
|
| 19 |
"Transcribe only visibly printed food-label evidence. Focus on ingredients, nutrition values "
|
| 20 |
+
"and basis, net weight, FSSAI license, dates, and after-opening instructions. For nutrition "
|
| 21 |
+
"tables, preserve every visible row as 'nutrient name | unit | value', include the declared "
|
| 22 |
+
"basis, and do not omit zero values. Do not summarize or infer."
|
| 23 |
),
|
| 24 |
}
|
| 25 |
|