Spaces:
Running
Running
| """Sprint A14-S13 β ``DefaultEvaluationViewExecutor``. | |
| Tests d'orchestration : la vue + ses dΓ©pendances (registries + | |
| payload loader) sur 10+ cas couvrant les chemins critiques. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from picarones.domain import ( | |
| Artifact, | |
| ArtifactType, | |
| EvaluationView, | |
| MetricSpec, | |
| ProjectionError, | |
| ProjectionSpec, | |
| ) | |
| from picarones.evaluation.projectors import ( | |
| ProjectionReport, | |
| ProjectorRegistry, | |
| ProjectorRegistrationError, | |
| ProjectorNotFoundError, | |
| ) | |
| from picarones.evaluation.registry import MetricRegistry | |
| from picarones.evaluation.views import ( | |
| DefaultEvaluationViewExecutor, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Stubs rΓ©utilisables | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class _StubProjector: | |
| """Projecteur ALTO β texte simple pour les tests.""" | |
| name = "stub_alto_to_text" | |
| source_type = ArtifactType.ALTO_XML | |
| target_type = ArtifactType.RAW_TEXT | |
| def __init__(self, output_payload: str = "projected text") -> None: | |
| self.output_payload = output_payload | |
| def project(self, artifact, params): | |
| target = Artifact( | |
| id=f"{artifact.id}:projected", | |
| document_id=artifact.document_id, | |
| type=self.target_type, | |
| ) | |
| report = ProjectionReport( | |
| source_artifact_id=artifact.id, | |
| source_type=self.source_type, | |
| target_type=self.target_type, | |
| projector_name=self.name, | |
| lossy=True, | |
| ignored_dimensions=("geometry", "blocks"), | |
| warnings=("ordre de lecture devinΓ©",), | |
| ) | |
| # Sprint S25 β retourne le payload directement. | |
| return target, self.output_payload, report | |
| def _build_executor( | |
| payloads: dict[str, object], | |
| *, | |
| register_projector: bool = True, | |
| extra_metrics: dict[str, object] | None = None, | |
| ) -> DefaultEvaluationViewExecutor: | |
| metrics = MetricRegistry() | |
| metrics.register( | |
| MetricSpec( | |
| name="cer", | |
| input_types=(ArtifactType.RAW_TEXT, ArtifactType.RAW_TEXT), | |
| ), | |
| lambda gt, hyp: 0.0 if gt == hyp else ( | |
| 0.5 if isinstance(gt, str) and isinstance(hyp, str) and len(gt) == len(hyp) | |
| else 1.0 | |
| ), | |
| ) | |
| metrics.register( | |
| MetricSpec( | |
| name="wer", | |
| input_types=(ArtifactType.RAW_TEXT, ArtifactType.RAW_TEXT), | |
| ), | |
| lambda gt, hyp: 0.0 if gt == hyp else 0.5, | |
| ) | |
| if extra_metrics: | |
| for name, fn in extra_metrics.items(): | |
| metrics.register( | |
| MetricSpec( | |
| name=name, | |
| input_types=(ArtifactType.RAW_TEXT, ArtifactType.RAW_TEXT), | |
| ), | |
| fn, | |
| ) | |
| projectors = ProjectorRegistry() | |
| if register_projector: | |
| projectors.register(_StubProjector()) | |
| def loader(artifact: Artifact): | |
| if artifact.id not in payloads: | |
| raise KeyError(f"payload manquant : {artifact.id}") | |
| return payloads[artifact.id] | |
| return DefaultEvaluationViewExecutor.from_registries(metrics, projectors, loader) | |
| def _text_view( | |
| *, | |
| name: str = "text_final", | |
| candidate_types: frozenset = frozenset({ | |
| ArtifactType.RAW_TEXT, | |
| ArtifactType.CORRECTED_TEXT, | |
| ArtifactType.ALTO_XML, | |
| }), | |
| projection: ProjectionSpec | None = None, | |
| normalization_profile: str | None = None, | |
| metric_names: tuple[str, ...] = ("cer",), | |
| ignored_dimensions: tuple[str, ...] = (), | |
| warnings: tuple[str, ...] = (), | |
| ) -> EvaluationView: | |
| return EvaluationView( | |
| name=name, | |
| candidate_types=candidate_types, | |
| projection=projection, | |
| normalization_profile=normalization_profile, | |
| metric_names=metric_names, | |
| ignored_dimensions=ignored_dimensions, | |
| warnings=warnings, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 10 cas d'Γ©valuation | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestEvaluator: | |
| def test_text_direct_no_projection(self) -> None: | |
| """Cas 1 β RAW_TEXT direct, pas de projection.""" | |
| payloads = {"cand": "hello", "gt": "hello"} | |
| executor = _build_executor(payloads) | |
| view = _text_view(metric_names=("cer", "wer")) | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert result.metric_values["cer"] == 0.0 | |
| assert result.metric_values["wer"] == 0.0 | |
| assert result.projection_report is None | |
| assert result.failed_metrics == {} | |
| def test_text_direct_with_difference(self) -> None: | |
| """Cas 2 β RAW_TEXT, candidat diffΓ©rent de la GT.""" | |
| payloads = {"cand": "world", "gt": "hello"} | |
| executor = _build_executor(payloads) | |
| view = _text_view() | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert result.metric_values["cer"] > 0 | |
| def test_alto_to_text_via_projection(self) -> None: | |
| """Cas 3 β ALTO_XML projetΓ© en RAW_TEXT, projection_report prΓ©sent.""" | |
| payloads = { | |
| "alto:projected": "projected text", | |
| "gt": "projected text", | |
| } | |
| executor = _build_executor(payloads) | |
| view = _text_view( | |
| projection=ProjectionSpec( | |
| source_type=ArtifactType.ALTO_XML, | |
| target_type=ArtifactType.RAW_TEXT, | |
| projector_name="stub_alto_to_text", | |
| ), | |
| ) | |
| cand = Artifact(id="alto", document_id="d", type=ArtifactType.ALTO_XML) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert result.projection_report is not None | |
| assert result.projection_report.projector_name == "stub_alto_to_text" | |
| assert "geometry" in result.ignored_dimensions | |
| assert "ordre de lecture devinΓ©" in result.warnings | |
| assert result.metric_values["cer"] == 0.0 | |
| def test_view_rejects_wrong_artifact_type(self) -> None: | |
| """Cas 4 β la vue n'accepte pas IMAGE β ValueError.""" | |
| payloads = {} | |
| executor = _build_executor(payloads) | |
| view = _text_view( | |
| candidate_types=frozenset({ArtifactType.RAW_TEXT}), | |
| ) | |
| cand = Artifact(id="x", document_id="d", type=ArtifactType.IMAGE) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| with pytest.raises(ValueError, match="n'accepte pas"): | |
| executor.evaluate(view, cand, gt, pipeline_name="test") | |
| def test_unknown_projector_raises_projection_error(self) -> None: | |
| """Cas 5 β la vue rΓ©fΓ©rence un projecteur non enregistrΓ©.""" | |
| payloads = {"cand": "x", "gt": "x"} | |
| executor = _build_executor(payloads, register_projector=False) | |
| view = _text_view( | |
| projection=ProjectionSpec( | |
| source_type=ArtifactType.ALTO_XML, | |
| target_type=ArtifactType.RAW_TEXT, | |
| projector_name="nonexistent", | |
| ), | |
| ) | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.ALTO_XML) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| with pytest.raises(ProjectionError, match="introuvable"): | |
| executor.evaluate(view, cand, gt, pipeline_name="test") | |
| def test_projector_that_raises_wraps_in_projection_error(self) -> None: | |
| """Cas 6 β le projecteur lΓ¨ve une exception interne.""" | |
| class _CrashingProjector: | |
| name = "crash" | |
| source_type = ArtifactType.ALTO_XML | |
| target_type = ArtifactType.RAW_TEXT | |
| def project(self, artifact, params): | |
| raise RuntimeError("boom interne") | |
| metrics = MetricRegistry() | |
| projectors = ProjectorRegistry() | |
| projectors.register(_CrashingProjector()) | |
| executor = DefaultEvaluationViewExecutor.from_registries( | |
| metrics, projectors, lambda a: None, | |
| ) | |
| view = _text_view( | |
| projection=ProjectionSpec( | |
| source_type=ArtifactType.ALTO_XML, | |
| target_type=ArtifactType.RAW_TEXT, | |
| projector_name="crash", | |
| ), | |
| metric_names=(), | |
| ) | |
| cand = Artifact(id="c", document_id="d", type=ArtifactType.ALTO_XML) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| with pytest.raises(ProjectionError, match="boom interne"): | |
| executor.evaluate(view, cand, gt, pipeline_name="test") | |
| def test_metric_that_raises_goes_to_failed_metrics(self) -> None: | |
| """Cas 7 β une mΓ©trique qui lΓ¨ve β failed_metrics, pas plante.""" | |
| def _broken(gt, hyp): | |
| raise ValueError("mΓ©trique cassΓ©e") | |
| payloads = {"cand": "x", "gt": "x"} | |
| executor = _build_executor( | |
| payloads, | |
| extra_metrics={"broken": _broken}, | |
| ) | |
| view = _text_view(metric_names=("cer", "broken", "wer")) | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert "cer" in result.metric_values | |
| assert "wer" in result.metric_values | |
| assert "broken" in result.failed_metrics | |
| assert "mΓ©trique cassΓ©e" in result.failed_metrics["broken"] | |
| def test_unknown_metric_goes_to_failed_metrics(self) -> None: | |
| """Cas 8 β une mΓ©trique non enregistrΓ©e β failed_metrics.""" | |
| payloads = {"cand": "x", "gt": "x"} | |
| executor = _build_executor(payloads) | |
| view = _text_view(metric_names=("cer", "nonexistent_metric")) | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert "cer" in result.metric_values | |
| assert "nonexistent_metric" in result.failed_metrics | |
| assert "non enregistrΓ©e" in result.failed_metrics["nonexistent_metric"] | |
| def test_normalization_profile_applied(self) -> None: | |
| """Cas 9 β vue avec normalization_profile applique la | |
| normalisation aux deux payloads.""" | |
| # Avec medieval_french : ΕΏ β s, u β v | |
| payloads = {"cand": "afpre", "gt": "aΕΏpre"} | |
| executor = _build_executor(payloads) | |
| view = _text_view(normalization_profile="medieval_french") | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| # Après normalisation, les deux deviennent "aspre" (cer stub | |
| # retourne 0.5 pour len Γ©gal, 0.0 pour Γ©galitΓ© stricte). | |
| # On vΓ©rifie au moins que la mΓ©trique a Γ©tΓ© calculΓ©e. | |
| assert "cer" in result.metric_values | |
| def test_payload_loader_failure_blocks_all_metrics(self) -> None: | |
| """Cas 10 β le loader plante β toutes les mΓ©triques sont | |
| marquΓ©es en Γ©chec global.""" | |
| # Loader plante systΓ©matiquement. | |
| metrics = MetricRegistry() | |
| metrics.register( | |
| MetricSpec( | |
| name="cer", | |
| input_types=(ArtifactType.RAW_TEXT, ArtifactType.RAW_TEXT), | |
| ), | |
| lambda r, h: 0.0, | |
| ) | |
| projectors = ProjectorRegistry() | |
| def _bad_loader(artifact): | |
| raise FileNotFoundError(f"missing file for {artifact.id}") | |
| executor = DefaultEvaluationViewExecutor.from_registries( | |
| metrics, projectors, _bad_loader, | |
| ) | |
| view = _text_view(metric_names=("cer",)) | |
| cand = Artifact(id="cand", document_id="d", type=ArtifactType.RAW_TEXT) | |
| gt = Artifact(id="gt", document_id="d", type=ArtifactType.RAW_TEXT) | |
| result = executor.evaluate(view, cand, gt, pipeline_name="test") | |
| assert result.metric_values == {} | |
| assert "cer" in result.failed_metrics | |
| assert "payload_loader a Γ©chouΓ©" in result.failed_metrics["cer"] | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Constructor validation | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestConstructor: | |
| """Le constructeur canonique (S27) attend deux engines + un loader.""" | |
| def test_rejects_non_projection_engine(self) -> None: | |
| from picarones.evaluation.evaluation_engine import EvaluationEngine | |
| with pytest.raises(TypeError, match="projection_engine"): | |
| DefaultEvaluationViewExecutor( | |
| "not an engine", # type: ignore[arg-type] | |
| EvaluationEngine(MetricRegistry()), | |
| lambda a: None, | |
| ) | |
| def test_rejects_non_evaluation_engine(self) -> None: | |
| from picarones.evaluation.projection_engine import ProjectionEngine | |
| with pytest.raises(TypeError, match="evaluation_engine"): | |
| DefaultEvaluationViewExecutor( | |
| ProjectionEngine(ProjectorRegistry()), | |
| "nope", # type: ignore[arg-type] | |
| lambda a: None, | |
| ) | |
| def test_rejects_non_callable_loader(self) -> None: | |
| from picarones.evaluation.evaluation_engine import EvaluationEngine | |
| from picarones.evaluation.projection_engine import ProjectionEngine | |
| with pytest.raises(TypeError, match="callable"): | |
| DefaultEvaluationViewExecutor( | |
| ProjectionEngine(ProjectorRegistry()), | |
| EvaluationEngine(MetricRegistry()), | |
| "not_callable", # type: ignore[arg-type] | |
| ) | |
| def test_from_registries_rejects_non_metric_registry(self) -> None: | |
| with pytest.raises(TypeError, match="metric_registry"): | |
| DefaultEvaluationViewExecutor.from_registries( | |
| "not a registry", ProjectorRegistry(), lambda a: None, # type: ignore[arg-type] | |
| ) | |
| def test_from_registries_rejects_non_projector_registry(self) -> None: | |
| with pytest.raises(TypeError, match="projector_registry"): | |
| DefaultEvaluationViewExecutor.from_registries( | |
| MetricRegistry(), "nope", lambda a: None, # type: ignore[arg-type] | |
| ) | |
| def test_from_registries_rejects_non_callable_loader(self) -> None: | |
| with pytest.raises(TypeError, match="callable"): | |
| DefaultEvaluationViewExecutor.from_registries( | |
| MetricRegistry(), ProjectorRegistry(), "not_callable", # type: ignore[arg-type] | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ProjectorRegistry β tests directs | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestProjectorRegistry: | |
| def test_register_and_get(self) -> None: | |
| reg = ProjectorRegistry() | |
| p = _StubProjector() | |
| reg.register(p) | |
| assert "stub_alto_to_text" in reg | |
| assert reg.get("stub_alto_to_text") is p | |
| def test_register_non_protocol_raises(self) -> None: | |
| reg = ProjectorRegistry() | |
| class _NotAProjector: | |
| pass | |
| with pytest.raises(ProjectorRegistrationError): | |
| reg.register(_NotAProjector()) # type: ignore[arg-type] | |
| def test_idempotent_re_registration(self) -> None: | |
| reg = ProjectorRegistry() | |
| p = _StubProjector() | |
| reg.register(p) | |
| reg.register(p) # ne lève pas | |
| assert len(reg) == 1 | |
| def test_get_unknown_raises(self) -> None: | |
| reg = ProjectorRegistry() | |
| with pytest.raises(ProjectorNotFoundError): | |
| reg.get("missing") | |
| def test_two_registries_independent(self) -> None: | |
| a = ProjectorRegistry() | |
| b = ProjectorRegistry() | |
| a.register(_StubProjector()) | |
| assert "stub_alto_to_text" in a | |
| assert "stub_alto_to_text" not in b | |