Picarones / tests /engines /test_sprint50_google_vision_confidences.py
Claude
test: rΓ©organiser les 110 fichiers tests/test_*.py par cercle architectural
d109222 unverified
Raw
History Blame
15.1 kB
"""Tests Sprint 50 β€” adaptation Google Vision pour exposer token_confidences.
Couvre :
1. ``_extract_token_confidences_from_full_text`` reconstruit chaque mot
par concatΓ©nation des ``word.symbols[i].text`` et associe la
``word.confidence``.
2. HiΓ©rarchie pages β†’ blocks β†’ paragraphs β†’ words est traversΓ©e
correctement (multi-pages, multi-blocks).
3. Mots sans confidence, conf nΓ©gative, symboles vides β†’ ignorΓ©s.
4. ``expose_confidences=False`` dΓ©sactive l'extraction.
5. ``full_text_annotation = None`` (cas TEXT_DETECTION) β†’ retourne
``None``.
6. ``run()`` orchestre les deux chemins :
- SDK : ``response.full_text_annotation`` proto converti en dict
- REST : ``r["fullTextAnnotation"]`` directement utilisΓ©
Le texte reste celui de ``full_text_annotation.text``
(rΓ©trocompat).
7. Γ‰chec API β†’ ``error`` renseignΓ©, ``token_confidences = None``.
8. Conversion SDK β†’ dict normalisΓ© : un mock proto est correctement
sΓ©rialisΓ©.
9. IntΓ©gration runner : ``calibration_metrics`` calculΓ©e bout-en-bout.
"""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import MagicMock
import pytest
import picarones.engines.google_vision as gv_module
from picarones.engines.google_vision import GoogleVisionEngine
# ──────────────────────────────────────────────────────────────────────────
# Helpers : construire un fullTextAnnotation au format dict normalisΓ©
# ──────────────────────────────────────────────────────────────────────────
def _word(text: str, conf: float) -> dict:
return {
"confidence": conf,
"symbols": [{"text": c} for c in text],
}
def _full_text(words: list[dict]) -> dict:
return {
"pages": [{
"blocks": [{
"paragraphs": [{"words": words}],
}],
}],
}
# ──────────────────────────────────────────────────────────────────────────
# 1-3. Extraction depuis full_text_annotation
# ──────────────────────────────────────────────────────────────────────────
class TestExtractFromFullText:
def test_reconstructs_word_from_symbols(self) -> None:
engine = GoogleVisionEngine()
full = _full_text([_word("Bonjour", 0.95)])
assert engine._normalize_token_confidences(engine._extract_raw_confidences(full)) == [
{"token": "Bonjour", "confidence": 0.95},
]
def test_multiple_words(self) -> None:
engine = GoogleVisionEngine()
full = _full_text([
_word("Bonjour", 0.95),
_word("monde", 0.88),
])
out = engine._normalize_token_confidences(engine._extract_raw_confidences(full))
assert out == [
{"token": "Bonjour", "confidence": 0.95},
{"token": "monde", "confidence": 0.88},
]
def test_skips_word_without_confidence(self) -> None:
engine = GoogleVisionEngine()
full = _full_text([
{"confidence": 0.95, "symbols": [{"text": "ok"}]},
{"symbols": [{"text": "nope"}]}, # pas de confidence
{"confidence": None, "symbols": [{"text": "nope"}]}, # None
])
out = engine._normalize_token_confidences(engine._extract_raw_confidences(full))
assert out == [{"token": "ok", "confidence": 0.95}]
def test_skips_negative_confidence(self) -> None:
engine = GoogleVisionEngine()
full = _full_text([
_word("ok", 0.9),
_word("dropped", -0.1),
])
out = engine._normalize_token_confidences(engine._extract_raw_confidences(full))
assert out == [{"token": "ok", "confidence": 0.9}]
def test_skips_empty_text(self) -> None:
engine = GoogleVisionEngine()
full = _full_text([
_word("", 0.95),
_word("ok", 0.9),
])
out = engine._normalize_token_confidences(engine._extract_raw_confidences(full))
assert out == [{"token": "ok", "confidence": 0.9}]
def test_traverses_multiple_pages_and_blocks(self) -> None:
engine = GoogleVisionEngine()
full = {
"pages": [
{"blocks": [
{"paragraphs": [{"words": [_word("alpha", 0.9)]}]},
{"paragraphs": [{"words": [_word("beta", 0.85)]}]},
]},
{"blocks": [
{"paragraphs": [{"words": [_word("gamma", 0.8)]}]},
]},
],
}
out = engine._normalize_token_confidences(engine._extract_raw_confidences(full))
assert out is not None
tokens = [tc["token"] for tc in out]
assert tokens == ["alpha", "beta", "gamma"]
# ──────────────────────────────────────────────────────────────────────────
# 4. expose_confidences=False
# ──────────────────────────────────────────────────────────────────────────
class TestExposeFlag:
def test_disabled_returns_none(self) -> None:
engine = GoogleVisionEngine(config={"expose_confidences": False})
full = _full_text([_word("ok", 0.95)])
assert engine._normalize_token_confidences(engine._extract_raw_confidences(full)) is None
# ──────────────────────────────────────────────────────────────────────────
# 5. Cas dΓ©gΓ©nΓ©rΓ©s
# ──────────────────────────────────────────────────────────────────────────
class TestDegenerateInputs:
def test_none(self) -> None:
engine = GoogleVisionEngine()
assert engine._normalize_token_confidences(engine._extract_raw_confidences(None)) is None
def test_empty_dict(self) -> None:
engine = GoogleVisionEngine()
assert engine._normalize_token_confidences(engine._extract_raw_confidences({})) is None
def test_no_pages(self) -> None:
engine = GoogleVisionEngine()
assert engine._normalize_token_confidences(engine._extract_raw_confidences(
{"pages": []},
)) is None
def test_pages_without_blocks(self) -> None:
engine = GoogleVisionEngine()
assert engine._normalize_token_confidences(engine._extract_raw_confidences(
{"pages": [{"text": "raw text only"}]},
)) is None
# ──────────────────────────────────────────────────────────────────────────
# 6. Conversion SDK β†’ dict
# ──────────────────────────────────────────────────────────────────────────
class TestSdkConversion:
def test_sdk_proto_to_dict(self) -> None:
# Simule un proto SDK avec des objets attribut-based
word_mock = MagicMock()
word_mock.confidence = 0.92
sym_b = MagicMock()
sym_b.text = "B"
sym_o = MagicMock()
sym_o.text = "o"
sym_n = MagicMock()
sym_n.text = "n"
word_mock.symbols = [sym_b, sym_o, sym_n]
para_mock = MagicMock()
para_mock.words = [word_mock]
block_mock = MagicMock()
block_mock.paragraphs = [para_mock]
page_mock = MagicMock()
page_mock.blocks = [block_mock]
full_mock = MagicMock()
full_mock.pages = [page_mock]
result = GoogleVisionEngine._sdk_full_text_to_dict(full_mock)
assert "pages" in result
assert len(result["pages"]) == 1
word = result["pages"][0]["blocks"][0]["paragraphs"][0]["words"][0]
assert word["confidence"] == pytest.approx(0.92)
assert "".join(s["text"] for s in word["symbols"]) == "Bon"
# ──────────────────────────────────────────────────────────────────────────
# 7. run() bout-en-bout via mock du chemin rΓ©seau
# ──────────────────────────────────────────────────────────────────────────
def _patch_run_with_full(
monkeypatch: pytest.MonkeyPatch,
text: str,
full: dict | None,
*,
raise_exc: Exception | None = None,
) -> GoogleVisionEngine:
engine = GoogleVisionEngine()
engine._api_key = "test" # bypass auth check
def _fake(self, image_path):
if raise_exc is not None:
raise raise_exc
return text, full
monkeypatch.setattr(
GoogleVisionEngine, "_run_with_native", _fake,
)
return engine
class TestRunOverride:
def test_run_exposes_confidences(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
) -> None:
engine = _patch_run_with_full(
monkeypatch,
text="Bonjour monde",
full=_full_text([_word("Bonjour", 0.95), _word("monde", 0.88)]),
)
img = tmp_path / "p.png"
img.write_bytes(b"x")
result = engine.run(img)
assert result.text == "Bonjour monde"
assert result.error is None
assert result.token_confidences is not None
assert len(result.token_confidences) == 2
def test_run_text_detection_no_confidences(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
) -> None:
"""TEXT_DETECTION : full = None β†’ token_confidences = None."""
engine = _patch_run_with_full(monkeypatch, text="Texte court", full=None)
img = tmp_path / "p.png"
img.write_bytes(b"x")
result = engine.run(img)
assert result.text == "Texte court"
assert result.token_confidences is None
def test_run_api_failure_keeps_error(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
) -> None:
engine = _patch_run_with_full(
monkeypatch, text="", full=None,
raise_exc=RuntimeError("Quota exceeded"),
)
img = tmp_path / "p.png"
img.write_bytes(b"x")
result = engine.run(img)
assert result.error == "Quota exceeded"
assert result.text == ""
assert result.token_confidences is None
# ──────────────────────────────────────────────────────────────────────────
# 8. REST direct : parsing du JSON complet
# ──────────────────────────────────────────────────────────────────────────
class TestRESTPath:
def test_rest_passes_full_text_through(
self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path,
) -> None:
"""Le chemin REST renvoie tel quel le ``fullTextAnnotation``
du JSON, qui est un dict directement consommable par
``_extract_token_confidences_from_full_text``."""
engine = GoogleVisionEngine()
engine._api_key = "test-key"
engine._credentials_path = None
# Mock urllib.request.urlopen pour retourner une rΓ©ponse REST
# contenant un fullTextAnnotation complet.
fake_response = json.dumps({
"responses": [{
"fullTextAnnotation": {
"text": "Bonjour",
**_full_text([_word("Bonjour", 0.97)]),
},
}],
}).encode("utf-8")
class FakeResp:
def __enter__(self):
return self
def __exit__(self, *args):
pass
def read(self):
return fake_response
monkeypatch.setattr(
gv_module.urllib.request, "urlopen",
lambda req, timeout=60: FakeResp(),
)
img = tmp_path / "p.png"
img.write_bytes(b"\x89PNG\r\n\x1a\n")
text, full = engine._run_via_rest(img)
assert text == "Bonjour"
assert full is not None
assert "pages" in full
# L'extraction passe ensuite normalement
out = engine._normalize_token_confidences(engine._extract_raw_confidences(full))
assert out == [{"token": "Bonjour", "confidence": 0.97}]
# ──────────────────────────────────────────────────────────────────────────
# 9. IntΓ©gration runner
# ──────────────────────────────────────────────────────────────────────────
class TestEndToEndWithRunner:
def test_runner_picks_up_google_vision_confidences(self) -> None:
from picarones.measurements.runner import _compute_document_result
from picarones.engines.base import EngineResult
ocr = EngineResult(
engine_name="google_vision",
image_path="/tmp/x.png",
text="alpha beta gamma",
duration_seconds=0.1,
token_confidences=[
{"token": "alpha", "confidence": 0.95},
{"token": "beta", "confidence": 0.92},
{"token": "gamma", "confidence": 0.97},
],
)
dr = _compute_document_result(
doc_id="d1", image_path="/tmp/x.png",
ground_truth="alpha beta gamma",
ocr_result=ocr, char_exclude=None,
)
assert dr.calibration_metrics is not None
assert dr.calibration_metrics["overall_accuracy"] == 1.0
assert dr.calibration_metrics["overall_confidence"] == pytest.approx(
(0.95 + 0.92 + 0.97) / 3,
)