"""Tests Sprint 91 — A.II.6 : throughput effectif + coût marginal. Couvre : 1. ``compute_effective_throughput`` : formule, garde-fous, cas limite. 2. ``aggregate_effective_throughput`` : agrégation par moteur. 3. ``compute_marginal_cost`` : cas standard, dominé, non comparable. 4. ``compute_marginal_cost_matrix`` : tri, paires, n-engines. 5. Cas réaliste BnF. 6. Vue HTML : adaptive, anti-injection, FR + EN. 7. Complétude i18n FR/EN. """ from __future__ import annotations import json from pathlib import Path import pytest from picarones.evaluation.metrics.marginal_cost import ( compute_marginal_cost, compute_marginal_cost_matrix, ) from picarones.evaluation.metrics.throughput import ( aggregate_effective_throughput, compute_effective_throughput, ) from picarones.reports.html.renderers.throughput import build_throughput_html def _load_labels(lang: str) -> dict: p = ( Path(__file__).parent.parent.parent / "picarones" / "reports" / "i18n" / f"{lang}.json" ) return json.loads(p.read_text(encoding="utf-8")) # ────────────────────────────────────────────────────────────────────────── # 1. compute_effective_throughput # ────────────────────────────────────────────────────────────────────────── class TestEffectiveThroughput: def test_basic_formula(self) -> None: # 100 pages en 600s OCR + 50 erreurs × 5s = 250s correction # → 850s total, 100 pages → 423.53 pages/h r = compute_effective_throughput(100, 600, 50) assert r is not None assert r["correction_time_seconds"] == 250.0 assert r["total_seconds"] == 850.0 assert r["pages_per_hour_effective"] == pytest.approx( 100 / 850 * 3600, ) def test_raw_throughput(self) -> None: r = compute_effective_throughput(100, 600, 0) # Pas d'erreurs → effective == raw assert r["pages_per_hour_effective"] == r["pages_per_hour_raw"] assert r["drag_ratio"] == 0.0 def test_custom_time_per_error(self) -> None: r = compute_effective_throughput( 100, 600, 50, time_per_error_seconds=10, ) assert r["correction_time_seconds"] == 500.0 def test_zero_pages_returns_none(self) -> None: assert compute_effective_throughput(0, 100, 5) is None def test_negative_raises(self) -> None: with pytest.raises(ValueError): compute_effective_throughput(10, -1, 0) with pytest.raises(ValueError): compute_effective_throughput(10, 1, -5) with pytest.raises(ValueError): compute_effective_throughput( 10, 1, 0, time_per_error_seconds=-1, ) def test_zero_duration_zero_errors_returns_none(self) -> None: # Aucun temps total → indéfini assert compute_effective_throughput(10, 0, 0) is None def test_drag_ratio_high_when_many_errors(self) -> None: r = compute_effective_throughput(100, 200, 200) # 200s OCR + 1000s correction = 1200s, drag = 1000/1200 ≈ 0.83 assert r["drag_ratio"] > 0.8 # ────────────────────────────────────────────────────────────────────────── # 2. aggregate_effective_throughput # ────────────────────────────────────────────────────────────────────────── class TestAggregate: def test_aggregates_multiple(self) -> None: agg = aggregate_effective_throughput([ {"engine_name": "a", "n_pages": 10, "duration_seconds": 60, "n_errors": 5}, {"engine_name": "b", "n_pages": 20, "duration_seconds": 120, "n_errors": 0}, ]) assert agg is not None names = [e["engine_name"] for e in agg["engines"]] assert names == ["a", "b"] def test_empty_returns_none(self) -> None: assert aggregate_effective_throughput([]) is None def test_skips_invalid(self) -> None: agg = aggregate_effective_throughput([ {"engine_name": "a", "n_pages": 0, "duration_seconds": 0, "n_errors": 0}, # n_pages=0 → None, ignoré {"engine_name": "b", "n_pages": 10, "duration_seconds": 60, "n_errors": 0}, ]) assert len(agg["engines"]) == 1 # ────────────────────────────────────────────────────────────────────────── # 3. compute_marginal_cost # ────────────────────────────────────────────────────────────────────────── class TestMarginalCost: def test_standard_case(self) -> None: # Tess (0€, 200 err) → Mistral (5€, 50 err) : # 5/150 = 0.033€ par erreur évitée r = compute_marginal_cost(0, 200, 5, 50) assert r["cost_per_avoided_error"] == pytest.approx(5 / 150) assert r["n_errors_avoided"] == 150 assert r["dominated"] is False def test_dominated_case(self) -> None: # B moins cher ET plus précis → dominé r = compute_marginal_cost(10, 100, 8, 60) assert r["dominated"] is True assert r["cost_per_avoided_error"] < 0 def test_b_worse_returns_none(self) -> None: assert compute_marginal_cost(0, 50, 5, 80) is None def test_equal_errors_returns_none(self) -> None: # Pas de réduction d'erreur → indéfini assert compute_marginal_cost(0, 100, 10, 100) is None def test_invalid_returns_none(self) -> None: assert compute_marginal_cost(None, 100, 10, 50) is None # ────────────────────────────────────────────────────────────────────────── # 4. compute_marginal_cost_matrix # ────────────────────────────────────────────────────────────────────────── class TestMarginalMatrix: def test_basic(self) -> None: m = compute_marginal_cost_matrix({ "tess": {"cost": 0, "errors": 200}, "mistral": {"cost": 5, "errors": 50}, }) assert m is not None # Une seule paire valide : tess → mistral assert len(m["pairs"]) == 1 p = m["pairs"][0] assert p["engine_a"] == "tess" assert p["engine_b"] == "mistral" def test_sorted_by_marginal_cost_ascending(self) -> None: m = compute_marginal_cost_matrix({ "a": {"cost": 0, "errors": 100}, "b": {"cost": 5, "errors": 50}, "c": {"cost": 50, "errors": 25}, }) costs = [p["cost_per_avoided_error"] for p in m["pairs"]] assert costs == sorted(costs) def test_lt_two_returns_none(self) -> None: assert compute_marginal_cost_matrix({}) is None assert compute_marginal_cost_matrix({"a": {"cost": 0, "errors": 0}}) is None def test_skips_invalid_data(self) -> None: m = compute_marginal_cost_matrix({ "a": {"cost": 0, "errors": 100}, "b": {"cost": None, "errors": 50}, }) assert m is None # toutes les paires impliquant b échouent # ────────────────────────────────────────────────────────────────────────── # 5. Cas réaliste BnF # ────────────────────────────────────────────────────────────────────────── class TestRealistic: def test_local_beats_fast_cloud_on_effective(self) -> None: # Tesseract local : 100 pages en 600s OCR, 50 erreurs # GPT-4o cloud : 100 pages en 200s OCR mais 200 erreurs tess = compute_effective_throughput(100, 600, 50) gpt = compute_effective_throughput(100, 200, 200) # Brut : gpt 4× plus rapide assert gpt["pages_per_hour_raw"] > tess["pages_per_hour_raw"] # Effectif : tesseract gagne assert ( tess["pages_per_hour_effective"] > gpt["pages_per_hour_effective"] ) # ────────────────────────────────────────────────────────────────────────── # 6. Vue HTML # ────────────────────────────────────────────────────────────────────────── class TestRender: def test_empty_returns_empty(self) -> None: assert build_throughput_html(None) == "" assert build_throughput_html({"engines": []}) == "" def test_renders_table(self) -> None: agg = aggregate_effective_throughput([ {"engine_name": "tess", "n_pages": 100, "duration_seconds": 600, "n_errors": 50}, ]) html = build_throughput_html(agg, _load_labels("fr")) assert "