Spaces:
Sleeping
Sleeping
| """Tests Sprint 91 โ A.II.6 : throughput effectif + coรปt marginal. | |
| Couvre : | |
| 1. ``compute_effective_throughput`` : formule, garde-fous, cas | |
| limite. | |
| 2. ``aggregate_effective_throughput`` : agrรฉgation par moteur. | |
| 3. ``compute_marginal_cost`` : cas standard, dominรฉ, non | |
| comparable. | |
| 4. ``compute_marginal_cost_matrix`` : tri, paires, n-engines. | |
| 5. Cas rรฉaliste BnF. | |
| 6. Vue HTML : adaptive, anti-injection, FR + EN. | |
| 7. Complรฉtude i18n FR/EN. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| import pytest | |
| from picarones.evaluation.metrics.marginal_cost import ( | |
| compute_marginal_cost, | |
| compute_marginal_cost_matrix, | |
| ) | |
| from picarones.evaluation.metrics.throughput import ( | |
| aggregate_effective_throughput, | |
| compute_effective_throughput, | |
| ) | |
| from picarones.reports.html.renderers.throughput import build_throughput_html | |
| def _load_labels(lang: str) -> dict: | |
| p = ( | |
| Path(__file__).parent.parent.parent | |
| / "picarones" / "reports" / "i18n" / f"{lang}.json" | |
| ) | |
| return json.loads(p.read_text(encoding="utf-8")) | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 1. compute_effective_throughput | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class TestEffectiveThroughput: | |
| def test_basic_formula(self) -> None: | |
| # 100 pages en 600s OCR + 50 erreurs ร 5s = 250s correction | |
| # โ 850s total, 100 pages โ 423.53 pages/h | |
| r = compute_effective_throughput(100, 600, 50) | |
| assert r is not None | |
| assert r["correction_time_seconds"] == 250.0 | |
| assert r["total_seconds"] == 850.0 | |
| assert r["pages_per_hour_effective"] == pytest.approx( | |
| 100 / 850 * 3600, | |
| ) | |
| def test_raw_throughput(self) -> None: | |
| r = compute_effective_throughput(100, 600, 0) | |
| # Pas d'erreurs โ effective == raw | |
| assert r["pages_per_hour_effective"] == r["pages_per_hour_raw"] | |
| assert r["drag_ratio"] == 0.0 | |
| def test_custom_time_per_error(self) -> None: | |
| r = compute_effective_throughput( | |
| 100, 600, 50, time_per_error_seconds=10, | |
| ) | |
| assert r["correction_time_seconds"] == 500.0 | |
| def test_zero_pages_returns_none(self) -> None: | |
| assert compute_effective_throughput(0, 100, 5) is None | |
| def test_negative_raises(self) -> None: | |
| with pytest.raises(ValueError): | |
| compute_effective_throughput(10, -1, 0) | |
| with pytest.raises(ValueError): | |
| compute_effective_throughput(10, 1, -5) | |
| with pytest.raises(ValueError): | |
| compute_effective_throughput( | |
| 10, 1, 0, time_per_error_seconds=-1, | |
| ) | |
| def test_zero_duration_zero_errors_returns_none(self) -> None: | |
| # Aucun temps total โ indรฉfini | |
| assert compute_effective_throughput(10, 0, 0) is None | |
| def test_drag_ratio_high_when_many_errors(self) -> None: | |
| r = compute_effective_throughput(100, 200, 200) | |
| # 200s OCR + 1000s correction = 1200s, drag = 1000/1200 โ 0.83 | |
| assert r["drag_ratio"] > 0.8 | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 2. aggregate_effective_throughput | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class TestAggregate: | |
| def test_aggregates_multiple(self) -> None: | |
| agg = aggregate_effective_throughput([ | |
| {"engine_name": "a", "n_pages": 10, | |
| "duration_seconds": 60, "n_errors": 5}, | |
| {"engine_name": "b", "n_pages": 20, | |
| "duration_seconds": 120, "n_errors": 0}, | |
| ]) | |
| assert agg is not None | |
| names = [e["engine_name"] for e in agg["engines"]] | |
| assert names == ["a", "b"] | |
| def test_empty_returns_none(self) -> None: | |
| assert aggregate_effective_throughput([]) is None | |
| def test_skips_invalid(self) -> None: | |
| agg = aggregate_effective_throughput([ | |
| {"engine_name": "a", "n_pages": 0, "duration_seconds": 0, | |
| "n_errors": 0}, # n_pages=0 โ None, ignorรฉ | |
| {"engine_name": "b", "n_pages": 10, | |
| "duration_seconds": 60, "n_errors": 0}, | |
| ]) | |
| assert len(agg["engines"]) == 1 | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 3. compute_marginal_cost | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class TestMarginalCost: | |
| def test_standard_case(self) -> None: | |
| # Tess (0โฌ, 200 err) โ Mistral (5โฌ, 50 err) : | |
| # 5/150 = 0.033โฌ par erreur รฉvitรฉe | |
| r = compute_marginal_cost(0, 200, 5, 50) | |
| assert r["cost_per_avoided_error"] == pytest.approx(5 / 150) | |
| assert r["n_errors_avoided"] == 150 | |
| assert r["dominated"] is False | |
| def test_dominated_case(self) -> None: | |
| # B moins cher ET plus prรฉcis โ dominรฉ | |
| r = compute_marginal_cost(10, 100, 8, 60) | |
| assert r["dominated"] is True | |
| assert r["cost_per_avoided_error"] < 0 | |
| def test_b_worse_returns_none(self) -> None: | |
| assert compute_marginal_cost(0, 50, 5, 80) is None | |
| def test_equal_errors_returns_none(self) -> None: | |
| # Pas de rรฉduction d'erreur โ indรฉfini | |
| assert compute_marginal_cost(0, 100, 10, 100) is None | |
| def test_invalid_returns_none(self) -> None: | |
| assert compute_marginal_cost(None, 100, 10, 50) is None | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 4. compute_marginal_cost_matrix | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class TestMarginalMatrix: | |
| def test_basic(self) -> None: | |
| m = compute_marginal_cost_matrix({ | |
| "tess": {"cost": 0, "errors": 200}, | |
| "mistral": {"cost": 5, "errors": 50}, | |
| }) | |
| assert m is not None | |
| # Une seule paire valide : tess โ mistral | |
| assert len(m["pairs"]) == 1 | |
| p = m["pairs"][0] | |
| assert p["engine_a"] == "tess" | |
| assert p["engine_b"] == "mistral" | |
| def test_sorted_by_marginal_cost_ascending(self) -> None: | |
| m = compute_marginal_cost_matrix({ | |
| "a": {"cost": 0, "errors": 100}, | |
| "b": {"cost": 5, "errors": 50}, | |
| "c": {"cost": 50, "errors": 25}, | |
| }) | |
| costs = [p["cost_per_avoided_error"] for p in m["pairs"]] | |
| assert costs == sorted(costs) | |
| def test_lt_two_returns_none(self) -> None: | |
| assert compute_marginal_cost_matrix({}) is None | |
| assert compute_marginal_cost_matrix({"a": {"cost": 0, "errors": 0}}) is None | |
| def test_skips_invalid_data(self) -> None: | |
| m = compute_marginal_cost_matrix({ | |
| "a": {"cost": 0, "errors": 100}, | |
| "b": {"cost": None, "errors": 50}, | |
| }) | |
| assert m is None # toutes les paires impliquant b รฉchouent | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 5. Cas rรฉaliste BnF | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class TestRealistic: | |
| def test_local_beats_fast_cloud_on_effective(self) -> None: | |
| # Tesseract local : 100 pages en 600s OCR, 50 erreurs | |
| # GPT-4o cloud : 100 pages en 200s OCR mais 200 erreurs | |
| tess = compute_effective_throughput(100, 600, 50) | |
| gpt = compute_effective_throughput(100, 200, 200) | |
| # Brut : gpt 4ร plus rapide | |
| assert gpt["pages_per_hour_raw"] > tess["pages_per_hour_raw"] | |
| # Effectif : tesseract gagne | |
| assert ( | |
| tess["pages_per_hour_effective"] | |
| > gpt["pages_per_hour_effective"] | |
| ) | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 6. Vue HTML | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| class TestRender: | |
| def test_empty_returns_empty(self) -> None: | |
| assert build_throughput_html(None) == "" | |
| assert build_throughput_html({"engines": []}) == "" | |
| def test_renders_table(self) -> None: | |
| agg = aggregate_effective_throughput([ | |
| {"engine_name": "tess", "n_pages": 100, | |
| "duration_seconds": 600, "n_errors": 50}, | |
| ]) | |
| html = build_throughput_html(agg, _load_labels("fr")) | |
| assert "<table" in html | |
| assert "tess" in html | |
| # Le drag โ 29.4 % apparaรฎt | |
| assert "29.4" in html | |
| def test_anti_injection(self) -> None: | |
| agg = aggregate_effective_throughput([ | |
| {"engine_name": "<script>alert(1)</script>", | |
| "n_pages": 10, "duration_seconds": 60, "n_errors": 0}, | |
| ]) | |
| html = build_throughput_html(agg, _load_labels("fr")) | |
| assert "<script>alert" not in html | |
| assert "<script>" in html | |
| def test_renders_in_english(self) -> None: | |
| agg = aggregate_effective_throughput([ | |
| {"engine_name": "tess", "n_pages": 10, | |
| "duration_seconds": 60, "n_errors": 0}, | |
| ]) | |
| html = build_throughput_html(agg, _load_labels("en")) | |
| assert "Effective throughput" in html | |
| def test_sorted_by_effective_descending(self) -> None: | |
| agg = aggregate_effective_throughput([ | |
| {"engine_name": "slow", "n_pages": 100, | |
| "duration_seconds": 3600, "n_errors": 0}, | |
| {"engine_name": "fast", "n_pages": 100, | |
| "duration_seconds": 100, "n_errors": 0}, | |
| ]) | |
| html = build_throughput_html(agg, _load_labels("fr")) | |
| assert html.index("fast") < html.index("slow") | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| # 7. Complรฉtude i18n | |
| # โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
| _KEYS = { | |
| "throughput_title", "throughput_note", "throughput_engine", | |
| "throughput_raw", "throughput_effective", "throughput_drag", | |
| "throughput_pages", "throughput_errors", | |
| } | |
| class TestI18n: | |
| def test_fr(self) -> None: | |
| d = _load_labels("fr") | |
| assert not _KEYS - d.keys() | |
| def test_en(self) -> None: | |
| d = _load_labels("en") | |
| assert not _KEYS - d.keys() | |