Spaces:

build-small-hackathon
/

packetcourt

Running

App Files Files Community

DIV-45 commited on 2 days ago

Commit

516ee53

verified ·

1 Parent(s): a896663

feat: audit multi-angle packet photos

Browse files

Codex-authored additive phone capture and merged multi-panel OCR evidence.

Files changed (7) hide show

README.md +1 -0
app.py +24 -11
frontend/app.js +27 -9
frontend/index.html +9 -8
src/packetcourt/ocr.py +28 -0
src/packetcourt/parser.py +2 -1
tests/test_audit.py +17 -0

README.md CHANGED Viewed

@@ -130,6 +130,7 @@ PacketCourt uses four deliberately conservative verdicts:
 ## Product Surface
 - Phone-friendly front and back photo capture
 - OpenBMB small-model label transcription with Tesseract fallback
 - Paste-text workflow for difficult or damaged labels
 - Prepared cases for an immediate product walkthrough

 ## Product Surface
 - Phone-friendly front and back photo capture
+- Additive multi-angle capture for up to six front/side and six back/side photos
 - OpenBMB small-model label transcription with Tesseract fallback
 - Paste-text workflow for difficult or damaged labels
 - Prepared cases for an immediate product walkthrough

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ ROOT = Path(__file__).parent
 sys.path.insert(0, str(ROOT / "src"))
 from packetcourt import audit_packet
-from packetcourt.ocr import extract_text
 from packetcourt.remote_vision import extract_remote, is_configured
 from packetcourt.remote_nemotron import is_configured as nemotron_is_configured
 from packetcourt.remote_nemotron import review as nemotron_review
@@ -182,20 +182,33 @@ def feedback(request: FeedbackRequest) -> dict:
     }
-@app.post("/api/ocr")
-async def ocr(front: UploadFile | None = File(default=None), back: UploadFile | None = File(default=None)) -> dict:
-    result: dict[str, dict[str, str]] = {}
-    for name, upload in (("front", front), ("back", back)):
-        if not upload:
-            result[name] = {"text": "", "status": "No image supplied."}
-            continue
         suffix = Path(upload.filename or "image.jpg").suffix or ".jpg"
         with NamedTemporaryFile(suffix=suffix) as temp:
             temp.write(await upload.read())
             temp.flush()
-            text, status = extract_text(temp.name, name, extract_remote if is_configured() else None)
-        result[name] = {"text": text, "status": status}
-    return result
 app = gr.mount_gradio_app(app, build_gradio_engine(), path="/engine")

 sys.path.insert(0, str(ROOT / "src"))
 from packetcourt import audit_packet
+from packetcourt.ocr import extract_text, merge_extractions
 from packetcourt.remote_vision import extract_remote, is_configured
 from packetcourt.remote_nemotron import is_configured as nemotron_is_configured
 from packetcourt.remote_nemotron import review as nemotron_review
     }
+async def _read_uploads(uploads: list[UploadFile], side: str) -> dict:
+    extracted: list[tuple[str, str]] = []
+    for upload in uploads[:6]:
         suffix = Path(upload.filename or "image.jpg").suffix or ".jpg"
         with NamedTemporaryFile(suffix=suffix) as temp:
             temp.write(await upload.read())
             temp.flush()
+            extracted.append(extract_text(temp.name, side, extract_remote if is_configured() else None))
+    text, status, images = merge_extractions(extracted, side)
+    if len(uploads) > 6:
+        status += f" Only the first 6 of {len(uploads)} photos were processed."
+    return {"text": text, "status": status, "images": images}
+@app.post("/api/ocr")
+async def ocr(
+    fronts: list[UploadFile] | None = File(default=None),
+    backs: list[UploadFile] | None = File(default=None),
+    front: UploadFile | None = File(default=None),
+    back: UploadFile | None = File(default=None),
+) -> dict:
+    front_uploads = list(fronts or []) + ([front] if front else [])
+    back_uploads = list(backs or []) + ([back] if back else [])
+    return {
+        "front": await _read_uploads(front_uploads, "front"),
+        "back": await _read_uploads(back_uploads, "back"),
+    }
 app = gr.mount_gradio_app(app, build_gradio_engine(), path="/engine")

frontend/app.js CHANGED Viewed

@@ -18,16 +18,34 @@ function setMode(mode) {
 $$(".mode-switch button").forEach((button) => button.addEventListener("click", () => setMode(button.dataset.mode)));
 $$("[data-scroll]").forEach((button) => button.addEventListener("click", () => $(`#${button.dataset.scroll}`).scrollIntoView()));
-function preview(input, target) {
   input.addEventListener("change", () => {
-    const file = input.files[0];
-    if (!file) return;
     target.style.backgroundImage = `url(${URL.createObjectURL(file)})`;
     target.innerHTML = "";
   });
 }
-preview($("#front-file"), $("#front-preview"));
-preview($("#back-file"), $("#back-preview"));
 async function runAudit(frontText, backText) {
   const response = await fetch("api/audit", {
@@ -139,15 +157,15 @@ $("#audit-text").addEventListener("click", async () => {
 $("#read-photos").addEventListener("click", async () => {
   const form = new FormData();
-  if ($("#front-file").files[0]) form.append("front", $("#front-file").files[0]);
-  if ($("#back-file").files[0]) form.append("back", $("#back-file").files[0]);
-  $("#ocr-status").textContent = "Reading label evidence...";
   try {
     const response = await fetch("api/ocr", { method: "POST", body: form });
     const result = await response.json();
     $("#front-text").value = result.front.text;
     $("#back-text").value = result.back.text;
-    $("#ocr-status").textContent = `${result.front.status} ${result.back.status}`;
     setMode("text");
   } catch (error) { $("#ocr-status").textContent = "OCR failed. Paste the label text to continue."; }
 });

 $$(".mode-switch button").forEach((button) => button.addEventListener("click", () => setMode(button.dataset.mode)));
 $$("[data-scroll]").forEach((button) => button.addEventListener("click", () => $(`#${button.dataset.scroll}`).scrollIntoView()));
+const photoSets = { front: [], back: [] };
+function preview(input, target, side) {
   input.addEventListener("change", () => {
+    const incoming = [...input.files];
+    if (!incoming.length) return;
+    const available = Math.max(0, 6 - photoSets[side].length);
+    photoSets[side].push(...incoming.slice(0, available));
+    const file = photoSets[side][photoSets[side].length - 1];
     target.style.backgroundImage = `url(${URL.createObjectURL(file)})`;
     target.innerHTML = "";
+    $(`#${side}-count`).textContent = `${photoSets[side].length} photo${photoSets[side].length === 1 ? "" : "s"} · add more`;
+    input.value = "";
   });
 }
+preview($("#front-file"), $("#front-preview"), "front");
+preview($("#back-file"), $("#back-preview"), "back");
+$("#clear-photos").addEventListener("click", () => {
+  photoSets.front.length = 0;
+  photoSets.back.length = 0;
+  [["front", "F"], ["back", "B"]].forEach(([side, label]) => {
+    $(`#${side}-preview`).style.backgroundImage = "";
+    $(`#${side}-preview`).innerHTML = `<span class="upload-icon">${label}</span>`;
+    $(`#${side}-count`).textContent = "Add photos";
+  });
+  $("#ocr-status").textContent = "Selected photos cleared.";
+});
 async function runAudit(frontText, backText) {
   const response = await fetch("api/audit", {
 $("#read-photos").addEventListener("click", async () => {
   const form = new FormData();
+  photoSets.front.forEach((file) => form.append("fronts", file));
+  photoSets.back.forEach((file) => form.append("backs", file));
+  $("#ocr-status").textContent = `Reading ${photoSets.front.length + photoSets.back.length} packet photos...`;
   try {
     const response = await fetch("api/ocr", { method: "POST", body: form });
     const result = await response.json();
     $("#front-text").value = result.front.text;
     $("#back-text").value = result.back.text;
+    $("#ocr-status").textContent = `${result.front.status} ${result.back.status} Review merged evidence before trial.`;
     setMode("text");
   } catch (error) { $("#ocr-status").textContent = "OCR failed. Paste the label text to continue."; }
 });

frontend/index.html CHANGED Viewed

@@ -60,20 +60,21 @@
       <div class="mode-panel active" id="mode-photos">
         <div class="upload-grid">
           <label class="upload-card">
-            <input type="file" id="front-file" accept="image/*" capture="environment">
             <div class="upload-preview" id="front-preview"><span class="upload-icon">F</span></div>
-            <div><b>Front of packet</b><span>Claims, badges and promises</span></div>
-            <strong>Choose photo</strong>
           </label>
           <label class="upload-card">
-            <input type="file" id="back-file" accept="image/*" capture="environment">
             <div class="upload-preview" id="back-preview"><span class="upload-icon">B</span></div>
-            <div><b>Back of packet</b><span>Ingredients, nutrition and dates</span></div>
-            <strong>Choose photo</strong>
           </label>
         </div>
-        <button class="button dark wide" id="read-photos">Read both label photos <span>→</span></button>
-        <p class="status-line" id="ocr-status">Images stay inside this private Space while the case is processed.</p>
       </div>
       <div class="mode-panel" id="mode-text">

       <div class="mode-panel active" id="mode-photos">
         <div class="upload-grid">
           <label class="upload-card">
+            <input type="file" id="front-file" accept="image/*" capture="environment" multiple>
             <div class="upload-preview" id="front-preview"><span class="upload-icon">F</span></div>
+            <div><b>Front and side claims</b><span>Claims, badges and promises · up to 6 photos</span></div>
+            <strong id="front-count">Add photos</strong>
           </label>
           <label class="upload-card">
+            <input type="file" id="back-file" accept="image/*" capture="environment" multiple>
             <div class="upload-preview" id="back-preview"><span class="upload-icon">B</span></div>
+            <div><b>Back and side evidence</b><span>Ingredients, nutrition, dates and directions · up to 6 photos</span></div>
+            <strong id="back-count">Add photos</strong>
           </label>
         </div>
+        <button class="button dark wide" id="read-photos">Read all packet photos <span>→</span></button>
+        <button class="button quiet wide" id="clear-photos">Clear selected photos</button>
+        <p class="status-line" id="ocr-status">Add close photos of every panel. Reopen the camera picker to add another angle.</p>
       </div>
       <div class="mode-panel" id="mode-text">

src/packetcourt/ocr.py CHANGED Viewed

@@ -7,6 +7,34 @@ from PIL import Image
 from .vlm import extract_with_vlm, is_enabled
 def extract_text(image_path: str | None, side: str = "back", vlm_extractor=None) -> tuple[str, str]:
     if not image_path:
         return "", "No image supplied."

 from .vlm import extract_with_vlm, is_enabled
+def merge_extractions(results: list[tuple[str, str]], side: str) -> tuple[str, str, list[dict[str, str]]]:
+    unique_texts: set[str] = set()
+    merged: list[str] = []
+    images: list[dict[str, str]] = []
+    readable = 0
+    for index, (text, status) in enumerate(results, start=1):
+        clean = text.strip()
+        duplicate = bool(clean and clean in unique_texts)
+        if clean and not duplicate:
+            unique_texts.add(clean)
+            merged.append(f"[{side.title()} photo {index}]\n{clean}")
+            readable += 1
+        images.append(
+            {
+                "photo": str(index),
+                "status": f"{status} Exact duplicate skipped." if duplicate else status,
+                "text": clean,
+            }
+        )
+    status = (
+        f"Read {readable} unique {side} photo{'s' if readable != 1 else ''} "
+        f"from {len(results)} supplied."
+        if results
+        else f"No {side} photos supplied."
+    )
+    return "\n\n".join(merged), status, images
 def extract_text(image_path: str | None, side: str = "back", vlm_extractor=None) -> tuple[str, str]:
     if not image_path:
         return "", "No image supplied."

src/packetcourt/parser.py CHANGED Viewed

@@ -109,7 +109,8 @@ def calculate_whole_packet(nutrition: NutritionFacts) -> WholePacketNutrition:
 def extract_ingredients(back_text: str) -> list[str]:
     match = re.search(
         r"\bingredients?\s*:\s*(.+?)(?=\b(?:nutrition|allergen|contains|net\s*(?:weight|wt)|storage|directions?"
-        r"|after[\s-]*opening|best before|mfd|pkd|manufactured|packed|fssai|dates?|unit sale price)\b|$)",
         normalize_space(back_text),
         re.IGNORECASE,
     )

 def extract_ingredients(back_text: str) -> list[str]:
     match = re.search(
         r"\bingredients?\s*:\s*(.+?)(?=\b(?:nutrition|allergen|contains|net\s*(?:weight|wt)|storage|directions?"
+        r"|after[\s-]*opening|best before|mfd|pkd|manufactured|packed|fssai|dates?|unit sale price)\b"
+        r"|\[(?:front|back)\s+photo\s+\d+\]|$)",
         normalize_space(back_text),
         re.IGNORECASE,
     )

tests/test_audit.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from packetcourt import audit_packet
 from packetcourt.models import Verdict
 def by_claim(result, name):
@@ -113,3 +114,19 @@ def test_dynamic_front_claim_is_audited_instead_of_dropped():
     claim = by_claim(result, "Real Badam")
     assert claim.verdict == Verdict.CONTEXT_MISSING
     assert any("Badam" in evidence.text for evidence in claim.evidence)

 from packetcourt import audit_packet
 from packetcourt.models import Verdict
+from packetcourt.ocr import merge_extractions
 def by_claim(result, name):
     claim = by_claim(result, "Real Badam")
     assert claim.verdict == Verdict.CONTEXT_MISSING
     assert any("Badam" in evidence.text for evidence in claim.evidence)
+def test_multiple_photo_extractions_are_labeled_and_exact_duplicates_are_skipped():
+    text, status, images = merge_extractions(
+        [
+            ("HIGH PROTEIN", "read one"),
+            ("HIGH PROTEIN", "read duplicate"),
+            ("REAL BADAM", "read another panel"),
+        ],
+        "front",
+    )
+    assert "[Front photo 1]" in text
+    assert "[Front photo 3]" in text
+    assert text.count("HIGH PROTEIN") == 1
+    assert "2 unique front photos" in status
+    assert "Exact duplicate skipped" in images[1]["status"]