Spaces:
Sleeping
test(legacy): tests indépendants pour _legacy_request_to_run_request
Browse filesAvant : ``_legacy_request_to_run_request`` (introduit Phase 4.2
chantier post-rewrite, qui convertit ``BenchmarkRequest`` legacy
→ ``BenchmarkRunRequest`` pour unifier ``/api/benchmark/start`` vers
le worker v2) était testé indirectement via l'intégration full-stack
sprint6. Une régression sur un cas limite (zero engine, alias ``tess``,
``lang`` non Tesseract) pouvait passer inaperçue jusqu'à un test
end-to-end.
``TestLegacyRequestConversion`` (6 tests) couvre désormais le helper
indépendamment :
- Tesseract simple : ``lang`` véhiculé via ``ocr_model``.
- Engines multiples : seul Tesseract reçoit ``lang``, les autres
laissent ``ocr_model=""``.
- ``normalization_profile`` + ``char_exclude`` + ``output_dir``
préservés.
- ``report_lang`` (fr/en) préservé.
- Alias ``tess`` traité comme Tesseract (cohérent avec la factory).
- Requête minimale (juste ``corpus_path``) → defaults Pydantic
propagés sans exception.
Garantit qu'un patch sécurité/méthodologique appliqué au chemin
canonique (v2) est répercuté correctement sur le chemin legacy
``/start`` avant son éviction définitive.
https://claude.ai/code/session_01ArfZ8kcgv7Cyda7VbJVmpn
|
@@ -1279,3 +1279,131 @@ class TestSynthesisPreviewUIBinding:
|
|
| 1279 |
)
|
| 1280 |
# i18n key déclarée FR + EN.
|
| 1281 |
assert "bench_synthesis_title:" in src
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1279 |
)
|
| 1280 |
# i18n key déclarée FR + EN.
|
| 1281 |
assert "bench_synthesis_title:" in src
|
| 1282 |
+
|
| 1283 |
+
|
| 1284 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 1285 |
+
# 13. Phase 6 — _legacy_request_to_run_request conversion test
|
| 1286 |
+
# ──────────────────────────────────────────────────────────────────────
|
| 1287 |
+
|
| 1288 |
+
|
| 1289 |
+
class TestLegacyRequestConversion:
|
| 1290 |
+
"""Phase 4.2 du chantier post-rewrite avait unifié
|
| 1291 |
+
``/api/benchmark/start`` via ``_legacy_request_to_run_request``
|
| 1292 |
+
qui convertit ``BenchmarkRequest`` (legacy) → ``BenchmarkRunRequest``.
|
| 1293 |
+
Phase 6 ajoute des tests dédiés indépendants — auparavant le
|
| 1294 |
+
helper était testé indirectement via l'intégration full-stack,
|
| 1295 |
+
ce qui masquait les régressions sur des cas limites (zero engine,
|
| 1296 |
+
lang non-Tesseract, etc.)."""
|
| 1297 |
+
|
| 1298 |
+
def test_simple_tesseract_request_converted(self) -> None:
|
| 1299 |
+
from picarones.interfaces.web.benchmark_utils import (
|
| 1300 |
+
_legacy_request_to_run_request,
|
| 1301 |
+
)
|
| 1302 |
+
from picarones.interfaces.web.models import BenchmarkRequest
|
| 1303 |
+
|
| 1304 |
+
req = BenchmarkRequest(
|
| 1305 |
+
corpus_path="/tmp/c",
|
| 1306 |
+
engines=["tesseract"],
|
| 1307 |
+
normalization_profile="nfc",
|
| 1308 |
+
char_exclude="",
|
| 1309 |
+
output_dir="/tmp/out",
|
| 1310 |
+
report_name="r",
|
| 1311 |
+
lang="fra",
|
| 1312 |
+
report_lang="fr",
|
| 1313 |
+
)
|
| 1314 |
+
run = _legacy_request_to_run_request(req)
|
| 1315 |
+
assert run.corpus_path == "/tmp/c"
|
| 1316 |
+
assert len(run.competitors) == 1
|
| 1317 |
+
comp = run.competitors[0]
|
| 1318 |
+
assert comp.engine_name == "tesseract"
|
| 1319 |
+
# Le ``lang`` Tesseract est véhiculé via ``ocr_model``.
|
| 1320 |
+
assert comp.ocr_model == "fra"
|
| 1321 |
+
# OCR seul → pas de LLM ni de pipeline mode.
|
| 1322 |
+
assert comp.llm_provider == ""
|
| 1323 |
+
assert comp.pipeline_mode == ""
|
| 1324 |
+
|
| 1325 |
+
def test_multiple_engines_each_becomes_pipeline_config(self) -> None:
|
| 1326 |
+
from picarones.interfaces.web.benchmark_utils import (
|
| 1327 |
+
_legacy_request_to_run_request,
|
| 1328 |
+
)
|
| 1329 |
+
from picarones.interfaces.web.models import BenchmarkRequest
|
| 1330 |
+
|
| 1331 |
+
req = BenchmarkRequest(
|
| 1332 |
+
corpus_path="/tmp/c",
|
| 1333 |
+
engines=["tesseract", "pero_ocr", "kraken"],
|
| 1334 |
+
normalization_profile="nfc",
|
| 1335 |
+
output_dir="/tmp/out",
|
| 1336 |
+
lang="lat",
|
| 1337 |
+
)
|
| 1338 |
+
run = _legacy_request_to_run_request(req)
|
| 1339 |
+
names = [c.engine_name for c in run.competitors]
|
| 1340 |
+
assert names == ["tesseract", "pero_ocr", "kraken"]
|
| 1341 |
+
# Le ``lang`` legacy ne s'applique qu'à Tesseract — les autres
|
| 1342 |
+
# engines reçoivent ``ocr_model=""`` et utilisent leur propre
|
| 1343 |
+
# défaut côté adapter.
|
| 1344 |
+
models = [c.ocr_model for c in run.competitors]
|
| 1345 |
+
assert models == ["lat", "", ""]
|
| 1346 |
+
|
| 1347 |
+
def test_normalization_and_char_exclude_preserved(self) -> None:
|
| 1348 |
+
from picarones.interfaces.web.benchmark_utils import (
|
| 1349 |
+
_legacy_request_to_run_request,
|
| 1350 |
+
)
|
| 1351 |
+
from picarones.interfaces.web.models import BenchmarkRequest
|
| 1352 |
+
|
| 1353 |
+
req = BenchmarkRequest(
|
| 1354 |
+
corpus_path="/tmp/c",
|
| 1355 |
+
engines=["tesseract"],
|
| 1356 |
+
normalization_profile="medieval_french",
|
| 1357 |
+
char_exclude="',-",
|
| 1358 |
+
output_dir="/tmp/out",
|
| 1359 |
+
lang="fra",
|
| 1360 |
+
)
|
| 1361 |
+
run = _legacy_request_to_run_request(req)
|
| 1362 |
+
assert run.normalization_profile == "medieval_french"
|
| 1363 |
+
assert run.char_exclude == "',-"
|
| 1364 |
+
assert run.output_dir == "/tmp/out"
|
| 1365 |
+
|
| 1366 |
+
def test_report_lang_preserved(self) -> None:
|
| 1367 |
+
from picarones.interfaces.web.benchmark_utils import (
|
| 1368 |
+
_legacy_request_to_run_request,
|
| 1369 |
+
)
|
| 1370 |
+
from picarones.interfaces.web.models import BenchmarkRequest
|
| 1371 |
+
|
| 1372 |
+
req_en = BenchmarkRequest(
|
| 1373 |
+
corpus_path="/tmp/c", engines=["tesseract"],
|
| 1374 |
+
output_dir="/tmp/out", lang="eng", report_lang="en",
|
| 1375 |
+
)
|
| 1376 |
+
run = _legacy_request_to_run_request(req_en)
|
| 1377 |
+
assert run.report_lang == "en"
|
| 1378 |
+
|
| 1379 |
+
def test_tess_alias_treated_as_tesseract(self) -> None:
|
| 1380 |
+
"""L'alias ``tess`` accepté côté factory doit aussi recevoir
|
| 1381 |
+
le ``lang`` en ``ocr_model``."""
|
| 1382 |
+
from picarones.interfaces.web.benchmark_utils import (
|
| 1383 |
+
_legacy_request_to_run_request,
|
| 1384 |
+
)
|
| 1385 |
+
from picarones.interfaces.web.models import BenchmarkRequest
|
| 1386 |
+
|
| 1387 |
+
req = BenchmarkRequest(
|
| 1388 |
+
corpus_path="/tmp/c", engines=["tess"],
|
| 1389 |
+
output_dir="/tmp/out", lang="fra",
|
| 1390 |
+
)
|
| 1391 |
+
run = _legacy_request_to_run_request(req)
|
| 1392 |
+
assert run.competitors[0].engine_name == "tess"
|
| 1393 |
+
assert run.competitors[0].ocr_model == "fra"
|
| 1394 |
+
|
| 1395 |
+
def test_minimal_request_default_values_propagated(self) -> None:
|
| 1396 |
+
"""Une requête legacy minimale (juste corpus_path) génère un
|
| 1397 |
+
BenchmarkRunRequest avec defaults Pydantic — pas d'exception."""
|
| 1398 |
+
from picarones.interfaces.web.benchmark_utils import (
|
| 1399 |
+
_legacy_request_to_run_request,
|
| 1400 |
+
)
|
| 1401 |
+
from picarones.interfaces.web.models import BenchmarkRequest
|
| 1402 |
+
|
| 1403 |
+
req = BenchmarkRequest(corpus_path="/tmp/c")
|
| 1404 |
+
run = _legacy_request_to_run_request(req)
|
| 1405 |
+
assert run.corpus_path == "/tmp/c"
|
| 1406 |
+
# Défaut : engines=["tesseract"] → 1 compétiteur tesseract+fra.
|
| 1407 |
+
assert len(run.competitors) == 1
|
| 1408 |
+
assert run.competitors[0].engine_name == "tesseract"
|
| 1409 |
+
assert run.competitors[0].ocr_model == "fra"
|