Spaces:
Running
Running
Claude
test: rΓ©organiser les 110 fichiers tests/test_*.py par cercle architectural
d109222 unverified | """Tests Sprint 81 β A.I.8 : robustesse projetΓ©e sur corpus rΓ©el. | |
| Couvre : | |
| 1. ``_interpolate_cer`` : | |
| - Niveau exact sur la courbe β CER exact | |
| - Interpolation entre 2 points | |
| - Clip lower/upper | |
| - Pas de cer valide β None | |
| 2. ``_extract_quality_value`` : mapping default + custom. | |
| 3. ``project_robustness_on_corpus`` : | |
| - 1 moteur Γ 1 dΓ©gradation Γ N docs β projection cohΓ©rente | |
| - Multi-moteurs / multi-dΓ©gradations | |
| - Document sans qualitΓ© β ignorΓ© | |
| - Aucune courbe β projection vide | |
| - Aucun doc β entry omis | |
| - n_docs_above_critical correct | |
| 4. ``aggregate_projection_per_engine`` : | |
| - Total deficit sur N types | |
| - Worst degradation type identifiΓ© | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from picarones.measurements.robustness_projection import ( | |
| _extract_quality_value, | |
| _interpolate_cer, | |
| aggregate_projection_per_engine, | |
| project_robustness_on_corpus, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. _interpolate_cer | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestInterpolate: | |
| def test_exact_match(self) -> None: | |
| assert _interpolate_cer( | |
| [0, 5, 10, 20], [0.05, 0.10, 0.20, 0.50], 10, | |
| ) == 0.20 | |
| def test_linear_interpolation(self) -> None: | |
| # Entre 5 (CER 0.10) et 10 (CER 0.20), niveau 7.5 β CER 0.15 | |
| assert _interpolate_cer( | |
| [5, 10], [0.10, 0.20], 7.5, | |
| ) == pytest.approx(0.15) | |
| def test_clip_lower(self) -> None: | |
| # Niveau en-dessous du min β CER au min | |
| assert _interpolate_cer([5, 10], [0.10, 0.20], -1) == 0.10 | |
| def test_clip_upper(self) -> None: | |
| assert _interpolate_cer([5, 10], [0.10, 0.20], 100) == 0.20 | |
| def test_empty_levels(self) -> None: | |
| assert _interpolate_cer([], [], 5) is None | |
| def test_all_cer_none(self) -> None: | |
| assert _interpolate_cer([0, 5], [None, None], 3) is None | |
| def test_some_cer_none_skipped(self) -> None: | |
| # Le None est ignorΓ©, on interpole entre les valides | |
| result = _interpolate_cer( | |
| [0, 5, 10], [0.10, None, 0.30], 5, | |
| ) | |
| # InterpolΓ© entre (0, 0.10) et (10, 0.30) Γ level 5 β 0.20 | |
| assert result == pytest.approx(0.20) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. _extract_quality_value | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestExtractQuality: | |
| def test_default_mapping(self) -> None: | |
| q = {"noise_level": 15.0, "blur_score": 200.0} | |
| assert _extract_quality_value(q, "noise") == 15.0 | |
| assert _extract_quality_value(q, "blur") == 200.0 | |
| def test_unknown_degradation(self) -> None: | |
| assert _extract_quality_value({}, "unknown") is None | |
| def test_missing_field(self) -> None: | |
| assert _extract_quality_value({}, "noise") is None | |
| def test_custom_mapping(self) -> None: | |
| q = {"my_noise_metric": 22.0} | |
| result = _extract_quality_value( | |
| q, "noise", custom_mapping={"noise": "my_noise_metric"}, | |
| ) | |
| assert result == 22.0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. project_robustness_on_corpus | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestProjection: | |
| def _curve(self, engine="t", deg="noise") -> dict: | |
| return { | |
| "engine_name": engine, | |
| "degradation_type": deg, | |
| "levels": [0, 5, 10, 20], | |
| "cer_values": [0.05, 0.10, 0.20, 0.50], | |
| "critical_threshold_level": 10, | |
| "cer_threshold": 0.20, | |
| } | |
| def test_single_curve_single_doc(self) -> None: | |
| curve = self._curve() | |
| # Un doc avec niveau de bruit 7.5 β CER 0.15 | |
| qualities = [{"noise_level": 7.5}] | |
| result = project_robustness_on_corpus([curve], qualities) | |
| assert "t" in result | |
| deg_data = result["t"]["noise"] | |
| assert deg_data["n_docs"] == 1 | |
| assert deg_data["n_docs_with_data"] == 1 | |
| assert deg_data["expected_cer_mean"] == pytest.approx(0.15) | |
| assert deg_data["baseline_cer"] == pytest.approx(0.05) | |
| assert deg_data["deficit_vs_baseline"] == pytest.approx(0.10) | |
| def test_doc_above_critical(self) -> None: | |
| curve = self._curve() | |
| # 3 docs : 2 sous le seuil critique (niveau 5 β CER 0.10), | |
| # 1 au-dessus (niveau 15 β CER 0.35) | |
| qualities = [ | |
| {"noise_level": 5}, {"noise_level": 5}, {"noise_level": 15}, | |
| ] | |
| result = project_robustness_on_corpus([curve], qualities) | |
| deg = result["t"]["noise"] | |
| # critical_threshold_cer = 0.20 β 1 doc au-dessus | |
| assert deg["n_docs_above_critical"] == 1 | |
| def test_doc_without_data_ignored(self) -> None: | |
| curve = self._curve() | |
| qualities = [ | |
| {"noise_level": 5}, | |
| {}, # pas de noise_level | |
| ] | |
| result = project_robustness_on_corpus([curve], qualities) | |
| deg = result["t"]["noise"] | |
| assert deg["n_docs"] == 2 | |
| assert deg["n_docs_with_data"] == 1 | |
| def test_multiple_engines_and_types(self) -> None: | |
| curves = [ | |
| self._curve("alpha", "noise"), | |
| self._curve("alpha", "blur"), | |
| self._curve("beta", "noise"), | |
| ] | |
| qualities = [{"noise_level": 5, "blur_score": 5}] | |
| result = project_robustness_on_corpus(curves, qualities) | |
| assert "alpha" in result | |
| assert "beta" in result | |
| assert "noise" in result["alpha"] | |
| assert "blur" in result["alpha"] | |
| def test_no_curves_returns_empty(self) -> None: | |
| assert project_robustness_on_corpus([], [{"noise_level": 5}]) == {} | |
| def test_no_docs_omits_entry(self) -> None: | |
| curve = self._curve() | |
| result = project_robustness_on_corpus([curve], []) | |
| # Pas d'entry pour t/noise puisque per_doc_cer est vide | |
| assert result == {} | |
| def test_critical_threshold_override(self) -> None: | |
| curve = self._curve() | |
| # Niveau 5 β CER 0.10, niveau 10 β CER 0.20 | |
| qualities = [{"noise_level": 7}, {"noise_level": 10}] | |
| # Avec critical=0.15, le doc Γ niveau 7 (CER β 0.14) est sous, niveau 10 (CER 0.20) est au-dessus | |
| result = project_robustness_on_corpus( | |
| [curve], qualities, critical_threshold=0.15, | |
| ) | |
| assert result["t"]["noise"]["n_docs_above_critical"] >= 1 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. aggregate_projection_per_engine | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestAggregate: | |
| def test_total_deficit_summed(self) -> None: | |
| projection = { | |
| "t": { | |
| "noise": {"deficit_vs_baseline": 0.10}, | |
| "blur": {"deficit_vs_baseline": 0.05}, | |
| }, | |
| } | |
| agg = aggregate_projection_per_engine(projection) | |
| assert agg["t"]["total_expected_deficit"] == pytest.approx(0.15) | |
| assert agg["t"]["n_degradation_types"] == 2 | |
| def test_worst_degradation_identified(self) -> None: | |
| projection = { | |
| "t": { | |
| "noise": {"deficit_vs_baseline": 0.05}, | |
| "blur": {"deficit_vs_baseline": 0.20}, | |
| "rotation": {"deficit_vs_baseline": 0.02}, | |
| }, | |
| } | |
| agg = aggregate_projection_per_engine(projection) | |
| assert agg["t"]["worst_degradation_type"] == "blur" | |
| assert agg["t"]["worst_degradation_deficit"] == 0.20 | |
| def test_none_deficit_skipped(self) -> None: | |
| projection = { | |
| "t": { | |
| "noise": {"deficit_vs_baseline": 0.05}, | |
| "blur": {"deficit_vs_baseline": None}, | |
| }, | |
| } | |
| agg = aggregate_projection_per_engine(projection) | |
| assert agg["t"]["total_expected_deficit"] == pytest.approx(0.05) | |
| assert agg["t"]["n_degradation_types"] == 1 | |
| def test_empty_projection(self) -> None: | |
| assert aggregate_projection_per_engine({}) == {} | |