DIV-45 commited on
Commit
516ee53
·
verified ·
1 Parent(s): a896663

feat: audit multi-angle packet photos

Browse files

Codex-authored additive phone capture and merged multi-panel OCR evidence.

README.md CHANGED
@@ -130,6 +130,7 @@ PacketCourt uses four deliberately conservative verdicts:
130
  ## Product Surface
131
 
132
  - Phone-friendly front and back photo capture
 
133
  - OpenBMB small-model label transcription with Tesseract fallback
134
  - Paste-text workflow for difficult or damaged labels
135
  - Prepared cases for an immediate product walkthrough
 
130
  ## Product Surface
131
 
132
  - Phone-friendly front and back photo capture
133
+ - Additive multi-angle capture for up to six front/side and six back/side photos
134
  - OpenBMB small-model label transcription with Tesseract fallback
135
  - Paste-text workflow for difficult or damaged labels
136
  - Prepared cases for an immediate product walkthrough
app.py CHANGED
@@ -18,7 +18,7 @@ ROOT = Path(__file__).parent
18
  sys.path.insert(0, str(ROOT / "src"))
19
 
20
  from packetcourt import audit_packet
21
- from packetcourt.ocr import extract_text
22
  from packetcourt.remote_vision import extract_remote, is_configured
23
  from packetcourt.remote_nemotron import is_configured as nemotron_is_configured
24
  from packetcourt.remote_nemotron import review as nemotron_review
@@ -182,20 +182,33 @@ def feedback(request: FeedbackRequest) -> dict:
182
  }
183
 
184
 
185
- @app.post("/api/ocr")
186
- async def ocr(front: UploadFile | None = File(default=None), back: UploadFile | None = File(default=None)) -> dict:
187
- result: dict[str, dict[str, str]] = {}
188
- for name, upload in (("front", front), ("back", back)):
189
- if not upload:
190
- result[name] = {"text": "", "status": "No image supplied."}
191
- continue
192
  suffix = Path(upload.filename or "image.jpg").suffix or ".jpg"
193
  with NamedTemporaryFile(suffix=suffix) as temp:
194
  temp.write(await upload.read())
195
  temp.flush()
196
- text, status = extract_text(temp.name, name, extract_remote if is_configured() else None)
197
- result[name] = {"text": text, "status": status}
198
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
 
201
  app = gr.mount_gradio_app(app, build_gradio_engine(), path="/engine")
 
18
  sys.path.insert(0, str(ROOT / "src"))
19
 
20
  from packetcourt import audit_packet
21
+ from packetcourt.ocr import extract_text, merge_extractions
22
  from packetcourt.remote_vision import extract_remote, is_configured
23
  from packetcourt.remote_nemotron import is_configured as nemotron_is_configured
24
  from packetcourt.remote_nemotron import review as nemotron_review
 
182
  }
183
 
184
 
185
+ async def _read_uploads(uploads: list[UploadFile], side: str) -> dict:
186
+ extracted: list[tuple[str, str]] = []
187
+ for upload in uploads[:6]:
 
 
 
 
188
  suffix = Path(upload.filename or "image.jpg").suffix or ".jpg"
189
  with NamedTemporaryFile(suffix=suffix) as temp:
190
  temp.write(await upload.read())
191
  temp.flush()
192
+ extracted.append(extract_text(temp.name, side, extract_remote if is_configured() else None))
193
+ text, status, images = merge_extractions(extracted, side)
194
+ if len(uploads) > 6:
195
+ status += f" Only the first 6 of {len(uploads)} photos were processed."
196
+ return {"text": text, "status": status, "images": images}
197
+
198
+
199
+ @app.post("/api/ocr")
200
+ async def ocr(
201
+ fronts: list[UploadFile] | None = File(default=None),
202
+ backs: list[UploadFile] | None = File(default=None),
203
+ front: UploadFile | None = File(default=None),
204
+ back: UploadFile | None = File(default=None),
205
+ ) -> dict:
206
+ front_uploads = list(fronts or []) + ([front] if front else [])
207
+ back_uploads = list(backs or []) + ([back] if back else [])
208
+ return {
209
+ "front": await _read_uploads(front_uploads, "front"),
210
+ "back": await _read_uploads(back_uploads, "back"),
211
+ }
212
 
213
 
214
  app = gr.mount_gradio_app(app, build_gradio_engine(), path="/engine")
frontend/app.js CHANGED
@@ -18,16 +18,34 @@ function setMode(mode) {
18
  $$(".mode-switch button").forEach((button) => button.addEventListener("click", () => setMode(button.dataset.mode)));
19
  $$("[data-scroll]").forEach((button) => button.addEventListener("click", () => $(`#${button.dataset.scroll}`).scrollIntoView()));
20
 
21
- function preview(input, target) {
 
 
22
  input.addEventListener("change", () => {
23
- const file = input.files[0];
24
- if (!file) return;
 
 
 
25
  target.style.backgroundImage = `url(${URL.createObjectURL(file)})`;
26
  target.innerHTML = "";
 
 
27
  });
28
  }
29
- preview($("#front-file"), $("#front-preview"));
30
- preview($("#back-file"), $("#back-preview"));
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  async function runAudit(frontText, backText) {
33
  const response = await fetch("api/audit", {
@@ -139,15 +157,15 @@ $("#audit-text").addEventListener("click", async () => {
139
 
140
  $("#read-photos").addEventListener("click", async () => {
141
  const form = new FormData();
142
- if ($("#front-file").files[0]) form.append("front", $("#front-file").files[0]);
143
- if ($("#back-file").files[0]) form.append("back", $("#back-file").files[0]);
144
- $("#ocr-status").textContent = "Reading label evidence...";
145
  try {
146
  const response = await fetch("api/ocr", { method: "POST", body: form });
147
  const result = await response.json();
148
  $("#front-text").value = result.front.text;
149
  $("#back-text").value = result.back.text;
150
- $("#ocr-status").textContent = `${result.front.status} ${result.back.status}`;
151
  setMode("text");
152
  } catch (error) { $("#ocr-status").textContent = "OCR failed. Paste the label text to continue."; }
153
  });
 
18
  $$(".mode-switch button").forEach((button) => button.addEventListener("click", () => setMode(button.dataset.mode)));
19
  $$("[data-scroll]").forEach((button) => button.addEventListener("click", () => $(`#${button.dataset.scroll}`).scrollIntoView()));
20
 
21
+ const photoSets = { front: [], back: [] };
22
+
23
+ function preview(input, target, side) {
24
  input.addEventListener("change", () => {
25
+ const incoming = [...input.files];
26
+ if (!incoming.length) return;
27
+ const available = Math.max(0, 6 - photoSets[side].length);
28
+ photoSets[side].push(...incoming.slice(0, available));
29
+ const file = photoSets[side][photoSets[side].length - 1];
30
  target.style.backgroundImage = `url(${URL.createObjectURL(file)})`;
31
  target.innerHTML = "";
32
+ $(`#${side}-count`).textContent = `${photoSets[side].length} photo${photoSets[side].length === 1 ? "" : "s"} · add more`;
33
+ input.value = "";
34
  });
35
  }
36
+ preview($("#front-file"), $("#front-preview"), "front");
37
+ preview($("#back-file"), $("#back-preview"), "back");
38
+
39
+ $("#clear-photos").addEventListener("click", () => {
40
+ photoSets.front.length = 0;
41
+ photoSets.back.length = 0;
42
+ [["front", "F"], ["back", "B"]].forEach(([side, label]) => {
43
+ $(`#${side}-preview`).style.backgroundImage = "";
44
+ $(`#${side}-preview`).innerHTML = `<span class="upload-icon">${label}</span>`;
45
+ $(`#${side}-count`).textContent = "Add photos";
46
+ });
47
+ $("#ocr-status").textContent = "Selected photos cleared.";
48
+ });
49
 
50
  async function runAudit(frontText, backText) {
51
  const response = await fetch("api/audit", {
 
157
 
158
  $("#read-photos").addEventListener("click", async () => {
159
  const form = new FormData();
160
+ photoSets.front.forEach((file) => form.append("fronts", file));
161
+ photoSets.back.forEach((file) => form.append("backs", file));
162
+ $("#ocr-status").textContent = `Reading ${photoSets.front.length + photoSets.back.length} packet photos...`;
163
  try {
164
  const response = await fetch("api/ocr", { method: "POST", body: form });
165
  const result = await response.json();
166
  $("#front-text").value = result.front.text;
167
  $("#back-text").value = result.back.text;
168
+ $("#ocr-status").textContent = `${result.front.status} ${result.back.status} Review merged evidence before trial.`;
169
  setMode("text");
170
  } catch (error) { $("#ocr-status").textContent = "OCR failed. Paste the label text to continue."; }
171
  });
frontend/index.html CHANGED
@@ -60,20 +60,21 @@
60
  <div class="mode-panel active" id="mode-photos">
61
  <div class="upload-grid">
62
  <label class="upload-card">
63
- <input type="file" id="front-file" accept="image/*" capture="environment">
64
  <div class="upload-preview" id="front-preview"><span class="upload-icon">F</span></div>
65
- <div><b>Front of packet</b><span>Claims, badges and promises</span></div>
66
- <strong>Choose photo</strong>
67
  </label>
68
  <label class="upload-card">
69
- <input type="file" id="back-file" accept="image/*" capture="environment">
70
  <div class="upload-preview" id="back-preview"><span class="upload-icon">B</span></div>
71
- <div><b>Back of packet</b><span>Ingredients, nutrition and dates</span></div>
72
- <strong>Choose photo</strong>
73
  </label>
74
  </div>
75
- <button class="button dark wide" id="read-photos">Read both label photos <span>→</span></button>
76
- <p class="status-line" id="ocr-status">Images stay inside this private Space while the case is processed.</p>
 
77
  </div>
78
 
79
  <div class="mode-panel" id="mode-text">
 
60
  <div class="mode-panel active" id="mode-photos">
61
  <div class="upload-grid">
62
  <label class="upload-card">
63
+ <input type="file" id="front-file" accept="image/*" capture="environment" multiple>
64
  <div class="upload-preview" id="front-preview"><span class="upload-icon">F</span></div>
65
+ <div><b>Front and side claims</b><span>Claims, badges and promises · up to 6 photos</span></div>
66
+ <strong id="front-count">Add photos</strong>
67
  </label>
68
  <label class="upload-card">
69
+ <input type="file" id="back-file" accept="image/*" capture="environment" multiple>
70
  <div class="upload-preview" id="back-preview"><span class="upload-icon">B</span></div>
71
+ <div><b>Back and side evidence</b><span>Ingredients, nutrition, dates and directions · up to 6 photos</span></div>
72
+ <strong id="back-count">Add photos</strong>
73
  </label>
74
  </div>
75
+ <button class="button dark wide" id="read-photos">Read all packet photos <span>→</span></button>
76
+ <button class="button quiet wide" id="clear-photos">Clear selected photos</button>
77
+ <p class="status-line" id="ocr-status">Add close photos of every panel. Reopen the camera picker to add another angle.</p>
78
  </div>
79
 
80
  <div class="mode-panel" id="mode-text">
src/packetcourt/ocr.py CHANGED
@@ -7,6 +7,34 @@ from PIL import Image
7
  from .vlm import extract_with_vlm, is_enabled
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def extract_text(image_path: str | None, side: str = "back", vlm_extractor=None) -> tuple[str, str]:
11
  if not image_path:
12
  return "", "No image supplied."
 
7
  from .vlm import extract_with_vlm, is_enabled
8
 
9
 
10
+ def merge_extractions(results: list[tuple[str, str]], side: str) -> tuple[str, str, list[dict[str, str]]]:
11
+ unique_texts: set[str] = set()
12
+ merged: list[str] = []
13
+ images: list[dict[str, str]] = []
14
+ readable = 0
15
+ for index, (text, status) in enumerate(results, start=1):
16
+ clean = text.strip()
17
+ duplicate = bool(clean and clean in unique_texts)
18
+ if clean and not duplicate:
19
+ unique_texts.add(clean)
20
+ merged.append(f"[{side.title()} photo {index}]\n{clean}")
21
+ readable += 1
22
+ images.append(
23
+ {
24
+ "photo": str(index),
25
+ "status": f"{status} Exact duplicate skipped." if duplicate else status,
26
+ "text": clean,
27
+ }
28
+ )
29
+ status = (
30
+ f"Read {readable} unique {side} photo{'s' if readable != 1 else ''} "
31
+ f"from {len(results)} supplied."
32
+ if results
33
+ else f"No {side} photos supplied."
34
+ )
35
+ return "\n\n".join(merged), status, images
36
+
37
+
38
  def extract_text(image_path: str | None, side: str = "back", vlm_extractor=None) -> tuple[str, str]:
39
  if not image_path:
40
  return "", "No image supplied."
src/packetcourt/parser.py CHANGED
@@ -109,7 +109,8 @@ def calculate_whole_packet(nutrition: NutritionFacts) -> WholePacketNutrition:
109
  def extract_ingredients(back_text: str) -> list[str]:
110
  match = re.search(
111
  r"\bingredients?\s*:\s*(.+?)(?=\b(?:nutrition|allergen|contains|net\s*(?:weight|wt)|storage|directions?"
112
- r"|after[\s-]*opening|best before|mfd|pkd|manufactured|packed|fssai|dates?|unit sale price)\b|$)",
 
113
  normalize_space(back_text),
114
  re.IGNORECASE,
115
  )
 
109
  def extract_ingredients(back_text: str) -> list[str]:
110
  match = re.search(
111
  r"\bingredients?\s*:\s*(.+?)(?=\b(?:nutrition|allergen|contains|net\s*(?:weight|wt)|storage|directions?"
112
+ r"|after[\s-]*opening|best before|mfd|pkd|manufactured|packed|fssai|dates?|unit sale price)\b"
113
+ r"|\[(?:front|back)\s+photo\s+\d+\]|$)",
114
  normalize_space(back_text),
115
  re.IGNORECASE,
116
  )
tests/test_audit.py CHANGED
@@ -1,5 +1,6 @@
1
  from packetcourt import audit_packet
2
  from packetcourt.models import Verdict
 
3
 
4
 
5
  def by_claim(result, name):
@@ -113,3 +114,19 @@ def test_dynamic_front_claim_is_audited_instead_of_dropped():
113
  claim = by_claim(result, "Real Badam")
114
  assert claim.verdict == Verdict.CONTEXT_MISSING
115
  assert any("Badam" in evidence.text for evidence in claim.evidence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from packetcourt import audit_packet
2
  from packetcourt.models import Verdict
3
+ from packetcourt.ocr import merge_extractions
4
 
5
 
6
  def by_claim(result, name):
 
114
  claim = by_claim(result, "Real Badam")
115
  assert claim.verdict == Verdict.CONTEXT_MISSING
116
  assert any("Badam" in evidence.text for evidence in claim.evidence)
117
+
118
+
119
+ def test_multiple_photo_extractions_are_labeled_and_exact_duplicates_are_skipped():
120
+ text, status, images = merge_extractions(
121
+ [
122
+ ("HIGH PROTEIN", "read one"),
123
+ ("HIGH PROTEIN", "read duplicate"),
124
+ ("REAL BADAM", "read another panel"),
125
+ ],
126
+ "front",
127
+ )
128
+ assert "[Front photo 1]" in text
129
+ assert "[Front photo 3]" in text
130
+ assert text.count("HIGH PROTEIN") == 1
131
+ assert "2 unique front photos" in status
132
+ assert "Exact duplicate skipped" in images[1]["status"]