feat: audit multi-angle packet photos
Browse filesCodex-authored additive phone capture and merged multi-panel OCR evidence.
- README.md +1 -0
- app.py +24 -11
- frontend/app.js +27 -9
- frontend/index.html +9 -8
- src/packetcourt/ocr.py +28 -0
- src/packetcourt/parser.py +2 -1
- tests/test_audit.py +17 -0
README.md
CHANGED
|
@@ -130,6 +130,7 @@ PacketCourt uses four deliberately conservative verdicts:
|
|
| 130 |
## Product Surface
|
| 131 |
|
| 132 |
- Phone-friendly front and back photo capture
|
|
|
|
| 133 |
- OpenBMB small-model label transcription with Tesseract fallback
|
| 134 |
- Paste-text workflow for difficult or damaged labels
|
| 135 |
- Prepared cases for an immediate product walkthrough
|
|
|
|
| 130 |
## Product Surface
|
| 131 |
|
| 132 |
- Phone-friendly front and back photo capture
|
| 133 |
+
- Additive multi-angle capture for up to six front/side and six back/side photos
|
| 134 |
- OpenBMB small-model label transcription with Tesseract fallback
|
| 135 |
- Paste-text workflow for difficult or damaged labels
|
| 136 |
- Prepared cases for an immediate product walkthrough
|
app.py
CHANGED
|
@@ -18,7 +18,7 @@ ROOT = Path(__file__).parent
|
|
| 18 |
sys.path.insert(0, str(ROOT / "src"))
|
| 19 |
|
| 20 |
from packetcourt import audit_packet
|
| 21 |
-
from packetcourt.ocr import extract_text
|
| 22 |
from packetcourt.remote_vision import extract_remote, is_configured
|
| 23 |
from packetcourt.remote_nemotron import is_configured as nemotron_is_configured
|
| 24 |
from packetcourt.remote_nemotron import review as nemotron_review
|
|
@@ -182,20 +182,33 @@ def feedback(request: FeedbackRequest) -> dict:
|
|
| 182 |
}
|
| 183 |
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
for name, upload in (("front", front), ("back", back)):
|
| 189 |
-
if not upload:
|
| 190 |
-
result[name] = {"text": "", "status": "No image supplied."}
|
| 191 |
-
continue
|
| 192 |
suffix = Path(upload.filename or "image.jpg").suffix or ".jpg"
|
| 193 |
with NamedTemporaryFile(suffix=suffix) as temp:
|
| 194 |
temp.write(await upload.read())
|
| 195 |
temp.flush()
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
app = gr.mount_gradio_app(app, build_gradio_engine(), path="/engine")
|
|
|
|
| 18 |
sys.path.insert(0, str(ROOT / "src"))
|
| 19 |
|
| 20 |
from packetcourt import audit_packet
|
| 21 |
+
from packetcourt.ocr import extract_text, merge_extractions
|
| 22 |
from packetcourt.remote_vision import extract_remote, is_configured
|
| 23 |
from packetcourt.remote_nemotron import is_configured as nemotron_is_configured
|
| 24 |
from packetcourt.remote_nemotron import review as nemotron_review
|
|
|
|
| 182 |
}
|
| 183 |
|
| 184 |
|
| 185 |
+
async def _read_uploads(uploads: list[UploadFile], side: str) -> dict:
|
| 186 |
+
extracted: list[tuple[str, str]] = []
|
| 187 |
+
for upload in uploads[:6]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
suffix = Path(upload.filename or "image.jpg").suffix or ".jpg"
|
| 189 |
with NamedTemporaryFile(suffix=suffix) as temp:
|
| 190 |
temp.write(await upload.read())
|
| 191 |
temp.flush()
|
| 192 |
+
extracted.append(extract_text(temp.name, side, extract_remote if is_configured() else None))
|
| 193 |
+
text, status, images = merge_extractions(extracted, side)
|
| 194 |
+
if len(uploads) > 6:
|
| 195 |
+
status += f" Only the first 6 of {len(uploads)} photos were processed."
|
| 196 |
+
return {"text": text, "status": status, "images": images}
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@app.post("/api/ocr")
|
| 200 |
+
async def ocr(
|
| 201 |
+
fronts: list[UploadFile] | None = File(default=None),
|
| 202 |
+
backs: list[UploadFile] | None = File(default=None),
|
| 203 |
+
front: UploadFile | None = File(default=None),
|
| 204 |
+
back: UploadFile | None = File(default=None),
|
| 205 |
+
) -> dict:
|
| 206 |
+
front_uploads = list(fronts or []) + ([front] if front else [])
|
| 207 |
+
back_uploads = list(backs or []) + ([back] if back else [])
|
| 208 |
+
return {
|
| 209 |
+
"front": await _read_uploads(front_uploads, "front"),
|
| 210 |
+
"back": await _read_uploads(back_uploads, "back"),
|
| 211 |
+
}
|
| 212 |
|
| 213 |
|
| 214 |
app = gr.mount_gradio_app(app, build_gradio_engine(), path="/engine")
|
frontend/app.js
CHANGED
|
@@ -18,16 +18,34 @@ function setMode(mode) {
|
|
| 18 |
$$(".mode-switch button").forEach((button) => button.addEventListener("click", () => setMode(button.dataset.mode)));
|
| 19 |
$$("[data-scroll]").forEach((button) => button.addEventListener("click", () => $(`#${button.dataset.scroll}`).scrollIntoView()));
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
| 22 |
input.addEventListener("change", () => {
|
| 23 |
-
const
|
| 24 |
-
if (!
|
|
|
|
|
|
|
|
|
|
| 25 |
target.style.backgroundImage = `url(${URL.createObjectURL(file)})`;
|
| 26 |
target.innerHTML = "";
|
|
|
|
|
|
|
| 27 |
});
|
| 28 |
}
|
| 29 |
-
preview($("#front-file"), $("#front-preview"));
|
| 30 |
-
preview($("#back-file"), $("#back-preview"));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
async function runAudit(frontText, backText) {
|
| 33 |
const response = await fetch("api/audit", {
|
|
@@ -139,15 +157,15 @@ $("#audit-text").addEventListener("click", async () => {
|
|
| 139 |
|
| 140 |
$("#read-photos").addEventListener("click", async () => {
|
| 141 |
const form = new FormData();
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
$("#ocr-status").textContent =
|
| 145 |
try {
|
| 146 |
const response = await fetch("api/ocr", { method: "POST", body: form });
|
| 147 |
const result = await response.json();
|
| 148 |
$("#front-text").value = result.front.text;
|
| 149 |
$("#back-text").value = result.back.text;
|
| 150 |
-
$("#ocr-status").textContent = `${result.front.status} ${result.back.status}`;
|
| 151 |
setMode("text");
|
| 152 |
} catch (error) { $("#ocr-status").textContent = "OCR failed. Paste the label text to continue."; }
|
| 153 |
});
|
|
|
|
| 18 |
$$(".mode-switch button").forEach((button) => button.addEventListener("click", () => setMode(button.dataset.mode)));
|
| 19 |
$$("[data-scroll]").forEach((button) => button.addEventListener("click", () => $(`#${button.dataset.scroll}`).scrollIntoView()));
|
| 20 |
|
| 21 |
+
const photoSets = { front: [], back: [] };
|
| 22 |
+
|
| 23 |
+
function preview(input, target, side) {
|
| 24 |
input.addEventListener("change", () => {
|
| 25 |
+
const incoming = [...input.files];
|
| 26 |
+
if (!incoming.length) return;
|
| 27 |
+
const available = Math.max(0, 6 - photoSets[side].length);
|
| 28 |
+
photoSets[side].push(...incoming.slice(0, available));
|
| 29 |
+
const file = photoSets[side][photoSets[side].length - 1];
|
| 30 |
target.style.backgroundImage = `url(${URL.createObjectURL(file)})`;
|
| 31 |
target.innerHTML = "";
|
| 32 |
+
$(`#${side}-count`).textContent = `${photoSets[side].length} photo${photoSets[side].length === 1 ? "" : "s"} · add more`;
|
| 33 |
+
input.value = "";
|
| 34 |
});
|
| 35 |
}
|
| 36 |
+
preview($("#front-file"), $("#front-preview"), "front");
|
| 37 |
+
preview($("#back-file"), $("#back-preview"), "back");
|
| 38 |
+
|
| 39 |
+
$("#clear-photos").addEventListener("click", () => {
|
| 40 |
+
photoSets.front.length = 0;
|
| 41 |
+
photoSets.back.length = 0;
|
| 42 |
+
[["front", "F"], ["back", "B"]].forEach(([side, label]) => {
|
| 43 |
+
$(`#${side}-preview`).style.backgroundImage = "";
|
| 44 |
+
$(`#${side}-preview`).innerHTML = `<span class="upload-icon">${label}</span>`;
|
| 45 |
+
$(`#${side}-count`).textContent = "Add photos";
|
| 46 |
+
});
|
| 47 |
+
$("#ocr-status").textContent = "Selected photos cleared.";
|
| 48 |
+
});
|
| 49 |
|
| 50 |
async function runAudit(frontText, backText) {
|
| 51 |
const response = await fetch("api/audit", {
|
|
|
|
| 157 |
|
| 158 |
$("#read-photos").addEventListener("click", async () => {
|
| 159 |
const form = new FormData();
|
| 160 |
+
photoSets.front.forEach((file) => form.append("fronts", file));
|
| 161 |
+
photoSets.back.forEach((file) => form.append("backs", file));
|
| 162 |
+
$("#ocr-status").textContent = `Reading ${photoSets.front.length + photoSets.back.length} packet photos...`;
|
| 163 |
try {
|
| 164 |
const response = await fetch("api/ocr", { method: "POST", body: form });
|
| 165 |
const result = await response.json();
|
| 166 |
$("#front-text").value = result.front.text;
|
| 167 |
$("#back-text").value = result.back.text;
|
| 168 |
+
$("#ocr-status").textContent = `${result.front.status} ${result.back.status} Review merged evidence before trial.`;
|
| 169 |
setMode("text");
|
| 170 |
} catch (error) { $("#ocr-status").textContent = "OCR failed. Paste the label text to continue."; }
|
| 171 |
});
|
frontend/index.html
CHANGED
|
@@ -60,20 +60,21 @@
|
|
| 60 |
<div class="mode-panel active" id="mode-photos">
|
| 61 |
<div class="upload-grid">
|
| 62 |
<label class="upload-card">
|
| 63 |
-
<input type="file" id="front-file" accept="image/*" capture="environment">
|
| 64 |
<div class="upload-preview" id="front-preview"><span class="upload-icon">F</span></div>
|
| 65 |
-
<div><b>Front
|
| 66 |
-
<strong>
|
| 67 |
</label>
|
| 68 |
<label class="upload-card">
|
| 69 |
-
<input type="file" id="back-file" accept="image/*" capture="environment">
|
| 70 |
<div class="upload-preview" id="back-preview"><span class="upload-icon">B</span></div>
|
| 71 |
-
<div><b>Back
|
| 72 |
-
<strong>
|
| 73 |
</label>
|
| 74 |
</div>
|
| 75 |
-
<button class="button dark wide" id="read-photos">Read
|
| 76 |
-
<
|
|
|
|
| 77 |
</div>
|
| 78 |
|
| 79 |
<div class="mode-panel" id="mode-text">
|
|
|
|
| 60 |
<div class="mode-panel active" id="mode-photos">
|
| 61 |
<div class="upload-grid">
|
| 62 |
<label class="upload-card">
|
| 63 |
+
<input type="file" id="front-file" accept="image/*" capture="environment" multiple>
|
| 64 |
<div class="upload-preview" id="front-preview"><span class="upload-icon">F</span></div>
|
| 65 |
+
<div><b>Front and side claims</b><span>Claims, badges and promises · up to 6 photos</span></div>
|
| 66 |
+
<strong id="front-count">Add photos</strong>
|
| 67 |
</label>
|
| 68 |
<label class="upload-card">
|
| 69 |
+
<input type="file" id="back-file" accept="image/*" capture="environment" multiple>
|
| 70 |
<div class="upload-preview" id="back-preview"><span class="upload-icon">B</span></div>
|
| 71 |
+
<div><b>Back and side evidence</b><span>Ingredients, nutrition, dates and directions · up to 6 photos</span></div>
|
| 72 |
+
<strong id="back-count">Add photos</strong>
|
| 73 |
</label>
|
| 74 |
</div>
|
| 75 |
+
<button class="button dark wide" id="read-photos">Read all packet photos <span>→</span></button>
|
| 76 |
+
<button class="button quiet wide" id="clear-photos">Clear selected photos</button>
|
| 77 |
+
<p class="status-line" id="ocr-status">Add close photos of every panel. Reopen the camera picker to add another angle.</p>
|
| 78 |
</div>
|
| 79 |
|
| 80 |
<div class="mode-panel" id="mode-text">
|
src/packetcourt/ocr.py
CHANGED
|
@@ -7,6 +7,34 @@ from PIL import Image
|
|
| 7 |
from .vlm import extract_with_vlm, is_enabled
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def extract_text(image_path: str | None, side: str = "back", vlm_extractor=None) -> tuple[str, str]:
|
| 11 |
if not image_path:
|
| 12 |
return "", "No image supplied."
|
|
|
|
| 7 |
from .vlm import extract_with_vlm, is_enabled
|
| 8 |
|
| 9 |
|
| 10 |
+
def merge_extractions(results: list[tuple[str, str]], side: str) -> tuple[str, str, list[dict[str, str]]]:
|
| 11 |
+
unique_texts: set[str] = set()
|
| 12 |
+
merged: list[str] = []
|
| 13 |
+
images: list[dict[str, str]] = []
|
| 14 |
+
readable = 0
|
| 15 |
+
for index, (text, status) in enumerate(results, start=1):
|
| 16 |
+
clean = text.strip()
|
| 17 |
+
duplicate = bool(clean and clean in unique_texts)
|
| 18 |
+
if clean and not duplicate:
|
| 19 |
+
unique_texts.add(clean)
|
| 20 |
+
merged.append(f"[{side.title()} photo {index}]\n{clean}")
|
| 21 |
+
readable += 1
|
| 22 |
+
images.append(
|
| 23 |
+
{
|
| 24 |
+
"photo": str(index),
|
| 25 |
+
"status": f"{status} Exact duplicate skipped." if duplicate else status,
|
| 26 |
+
"text": clean,
|
| 27 |
+
}
|
| 28 |
+
)
|
| 29 |
+
status = (
|
| 30 |
+
f"Read {readable} unique {side} photo{'s' if readable != 1 else ''} "
|
| 31 |
+
f"from {len(results)} supplied."
|
| 32 |
+
if results
|
| 33 |
+
else f"No {side} photos supplied."
|
| 34 |
+
)
|
| 35 |
+
return "\n\n".join(merged), status, images
|
| 36 |
+
|
| 37 |
+
|
| 38 |
def extract_text(image_path: str | None, side: str = "back", vlm_extractor=None) -> tuple[str, str]:
|
| 39 |
if not image_path:
|
| 40 |
return "", "No image supplied."
|
src/packetcourt/parser.py
CHANGED
|
@@ -109,7 +109,8 @@ def calculate_whole_packet(nutrition: NutritionFacts) -> WholePacketNutrition:
|
|
| 109 |
def extract_ingredients(back_text: str) -> list[str]:
|
| 110 |
match = re.search(
|
| 111 |
r"\bingredients?\s*:\s*(.+?)(?=\b(?:nutrition|allergen|contains|net\s*(?:weight|wt)|storage|directions?"
|
| 112 |
-
r"|after[\s-]*opening|best before|mfd|pkd|manufactured|packed|fssai|dates?|unit sale price)\b
|
|
|
|
| 113 |
normalize_space(back_text),
|
| 114 |
re.IGNORECASE,
|
| 115 |
)
|
|
|
|
| 109 |
def extract_ingredients(back_text: str) -> list[str]:
|
| 110 |
match = re.search(
|
| 111 |
r"\bingredients?\s*:\s*(.+?)(?=\b(?:nutrition|allergen|contains|net\s*(?:weight|wt)|storage|directions?"
|
| 112 |
+
r"|after[\s-]*opening|best before|mfd|pkd|manufactured|packed|fssai|dates?|unit sale price)\b"
|
| 113 |
+
r"|\[(?:front|back)\s+photo\s+\d+\]|$)",
|
| 114 |
normalize_space(back_text),
|
| 115 |
re.IGNORECASE,
|
| 116 |
)
|
tests/test_audit.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from packetcourt import audit_packet
|
| 2 |
from packetcourt.models import Verdict
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def by_claim(result, name):
|
|
@@ -113,3 +114,19 @@ def test_dynamic_front_claim_is_audited_instead_of_dropped():
|
|
| 113 |
claim = by_claim(result, "Real Badam")
|
| 114 |
assert claim.verdict == Verdict.CONTEXT_MISSING
|
| 115 |
assert any("Badam" in evidence.text for evidence in claim.evidence)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from packetcourt import audit_packet
|
| 2 |
from packetcourt.models import Verdict
|
| 3 |
+
from packetcourt.ocr import merge_extractions
|
| 4 |
|
| 5 |
|
| 6 |
def by_claim(result, name):
|
|
|
|
| 114 |
claim = by_claim(result, "Real Badam")
|
| 115 |
assert claim.verdict == Verdict.CONTEXT_MISSING
|
| 116 |
assert any("Badam" in evidence.text for evidence in claim.evidence)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def test_multiple_photo_extractions_are_labeled_and_exact_duplicates_are_skipped():
|
| 120 |
+
text, status, images = merge_extractions(
|
| 121 |
+
[
|
| 122 |
+
("HIGH PROTEIN", "read one"),
|
| 123 |
+
("HIGH PROTEIN", "read duplicate"),
|
| 124 |
+
("REAL BADAM", "read another panel"),
|
| 125 |
+
],
|
| 126 |
+
"front",
|
| 127 |
+
)
|
| 128 |
+
assert "[Front photo 1]" in text
|
| 129 |
+
assert "[Front photo 3]" in text
|
| 130 |
+
assert text.count("HIGH PROTEIN") == 1
|
| 131 |
+
assert "2 unique front photos" in status
|
| 132 |
+
assert "Exact duplicate skipped" in images[1]["status"]
|