Spaces:
Sleeping
Sleeping
Claude
test: rΓ©organiser les 110 fichiers tests/test_*.py par cercle architectural
d109222 unverified | """Tests Sprint 33 β Interface module gΓ©nΓ©rique (Phase 0.2). | |
| VΓ©rifie : | |
| 1. ``BaseModule`` est instanciable via une sous-classe minimale qui | |
| dΓ©clare ses ``input_types`` / ``output_types`` et implΓ©mente | |
| ``process``. | |
| 2. La validation des entrΓ©es/sorties (``validate_inputs`` / | |
| ``validate_outputs``) lève ``ValueError`` quand un type déclaré est | |
| manquant. | |
| 3. Un ``MockModule`` qui consomme ``TEXT`` et produit ``ALTO`` peut | |
| exister β l'interface n'est pas restreinte aux OCR (critΓ¨re | |
| explicite du plan). | |
| 4. ``BaseOCREngine`` hΓ©rite de ``BaseModule`` et expose | |
| ``input_types=(IMAGE,)``, ``output_types=(TEXT,)``. | |
| 5. La méthode ``process`` d'un moteur OCR existant délègue correctement | |
| Γ ``run``/``_run_ocr`` et retourne le bon type d'artefact. | |
| 6. Les valeurs string de ``ArtifactType`` correspondent Γ celles de | |
| ``GTLevel`` pour permettre la conversion triviale. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| from typing import Any | |
| import pytest | |
| from picarones.core.corpus import GTLevel | |
| from picarones.core.modules import ArtifactType, BaseModule | |
| from picarones.engines.base import BaseOCREngine, EngineResult | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Fixtures de modules de test | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class UpperCaseTextModule(BaseModule): | |
| """Module trivial TEXT β TEXT pour valider le contrat de base.""" | |
| input_types = (ArtifactType.TEXT,) | |
| output_types = (ArtifactType.TEXT,) | |
| execution_mode = "cpu" | |
| def name(self) -> str: | |
| return "uppercase" | |
| def process(self, inputs: dict[ArtifactType, Any]) -> dict[ArtifactType, Any]: | |
| self.validate_inputs(inputs) | |
| return {ArtifactType.TEXT: inputs[ArtifactType.TEXT].upper()} | |
| class TextToAltoMock(BaseModule): | |
| """Mock TEXT β ALTO : le critΓ¨re de rΓ©ussite explicite du plan. | |
| Un cas d'Γ©cole pour le futur ``alto_reconstructor`` BnF (cf. plan | |
| d'Γ©volution, Sprint B.1). | |
| """ | |
| input_types = (ArtifactType.TEXT,) | |
| output_types = (ArtifactType.ALTO,) | |
| execution_mode = "cpu" | |
| def name(self) -> str: | |
| return "text_to_alto_mock" | |
| def process(self, inputs: dict[ArtifactType, Any]) -> dict[ArtifactType, Any]: | |
| self.validate_inputs(inputs) | |
| text = inputs[ArtifactType.TEXT] | |
| # Génère un ALTO trivial qui contient le texte en CONTENT | |
| alto = ( | |
| '<?xml version="1.0" encoding="UTF-8"?>' | |
| '<alto xmlns="http://www.loc.gov/standards/alto/ns-v4#">' | |
| f'<Layout><Page><PrintSpace><TextBlock><TextLine>' | |
| f'<String CONTENT="{text}"/>' | |
| f'</TextLine></TextBlock></PrintSpace></Page></Layout>' | |
| '</alto>' | |
| ) | |
| return {ArtifactType.ALTO: alto} | |
| def metadata(self) -> dict: | |
| return {"strategy": "trivial_single_string"} | |
| class FaultyModule(BaseModule): | |
| """Module qui prΓ©tend produire ALTO mais ne le fait pas β pour tester | |
| la validation des sorties.""" | |
| input_types = (ArtifactType.TEXT,) | |
| output_types = (ArtifactType.ALTO,) | |
| def name(self) -> str: | |
| return "faulty" | |
| def process(self, inputs: dict[ArtifactType, Any]) -> dict[ArtifactType, Any]: | |
| return {ArtifactType.TEXT: "oops"} # mauvais type de sortie | |
| class FakeOCREngine(BaseOCREngine): | |
| """Moteur OCR factice pour tester la dΓ©lΓ©gation BaseOCREngine.process.""" | |
| def name(self) -> str: | |
| return "fake_ocr" | |
| def version(self) -> str: | |
| return "0.1.0" | |
| def _run_ocr(self, image_path: Path) -> str: | |
| return f"transcription de {image_path.name}" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1 & 2. Contrat BaseModule : instanciation et validation | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestBaseModuleContract: | |
| def test_minimal_module_runs(self) -> None: | |
| m = UpperCaseTextModule() | |
| out = m.process({ArtifactType.TEXT: "bonjour"}) | |
| assert out == {ArtifactType.TEXT: "BONJOUR"} | |
| def test_validate_inputs_missing_raises(self) -> None: | |
| m = UpperCaseTextModule() | |
| with pytest.raises(ValueError, match="entrΓ©es manquantes"): | |
| m.validate_inputs({}) | |
| def test_validate_outputs_missing_raises(self) -> None: | |
| m = UpperCaseTextModule() | |
| with pytest.raises(ValueError, match="sorties manquantes"): | |
| m.validate_outputs({}) | |
| def test_validate_outputs_passes_when_complete(self) -> None: | |
| m = UpperCaseTextModule() | |
| # Doit passer sans lever | |
| m.validate_outputs({ArtifactType.TEXT: "hello"}) | |
| def test_default_metadata_is_empty(self) -> None: | |
| assert UpperCaseTextModule().metadata() == {} | |
| def test_repr_shows_io_types(self) -> None: | |
| m = UpperCaseTextModule() | |
| r = repr(m) | |
| assert "uppercase" in r | |
| assert "textβtext" in r | |
| def test_default_execution_mode(self) -> None: | |
| # UpperCaseTextModule a forcΓ© "cpu" ; un module qui ne dΓ©clare | |
| # rien hΓ©rite de "io". | |
| class IOModule(BaseModule): | |
| input_types = (ArtifactType.TEXT,) | |
| output_types = (ArtifactType.TEXT,) | |
| def name(self) -> str: | |
| return "io" | |
| def process(self, inputs): | |
| return {ArtifactType.TEXT: inputs[ArtifactType.TEXT]} | |
| assert IOModule.execution_mode == "io" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. MockModule TEXT β ALTO (critΓ¨re explicite du plan) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestMockTextToAlto: | |
| def test_text_to_alto_runs(self) -> None: | |
| m = TextToAltoMock() | |
| out = m.process({ArtifactType.TEXT: "Hello"}) | |
| assert ArtifactType.ALTO in out | |
| assert "Hello" in out[ArtifactType.ALTO] | |
| assert "alto" in out[ArtifactType.ALTO] | |
| def test_text_to_alto_declares_correct_types(self) -> None: | |
| assert TextToAltoMock.input_types == (ArtifactType.TEXT,) | |
| assert TextToAltoMock.output_types == (ArtifactType.ALTO,) | |
| def test_text_to_alto_metadata_exposed(self) -> None: | |
| assert TextToAltoMock().metadata() == {"strategy": "trivial_single_string"} | |
| def test_validate_inputs_catches_missing_text(self) -> None: | |
| m = TextToAltoMock() | |
| with pytest.raises(ValueError): | |
| # Donne une IMAGE alors qu'on attend TEXT | |
| m.process({ArtifactType.IMAGE: Path("/tmp/x.png")}) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4 & 5. BaseOCREngine est rΓ©trocompatible et respecte BaseModule | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestOCREngineAsModule: | |
| def test_baseocrengine_is_basemodule(self) -> None: | |
| assert issubclass(BaseOCREngine, BaseModule) | |
| def test_baseocrengine_io_types(self) -> None: | |
| assert BaseOCREngine.input_types == (ArtifactType.IMAGE,) | |
| assert BaseOCREngine.output_types == (ArtifactType.TEXT,) | |
| def test_fake_engine_run_unchanged(self, tmp_path: Path) -> None: | |
| """L'API historique ``run`` retourne un ``EngineResult`` intact.""" | |
| image = tmp_path / "doc.png" | |
| image.write_bytes(b"\x89PNG") | |
| engine = FakeOCREngine() | |
| result = engine.run(image) | |
| assert isinstance(result, EngineResult) | |
| assert result.success | |
| assert result.text == "transcription de doc.png" | |
| assert result.engine_name == "fake_ocr" | |
| def test_fake_engine_process_returns_text_artifact(self, tmp_path: Path) -> None: | |
| """``process`` délègue à ``run`` et retourne ``{TEXT: ...}``.""" | |
| image = tmp_path / "doc.png" | |
| image.write_bytes(b"\x89PNG") | |
| engine = FakeOCREngine() | |
| outputs = engine.process({ArtifactType.IMAGE: image}) | |
| assert outputs == {ArtifactType.TEXT: "transcription de doc.png"} | |
| def test_fake_engine_process_validates_missing_image(self) -> None: | |
| engine = FakeOCREngine() | |
| with pytest.raises(ValueError, match="entrΓ©es manquantes"): | |
| engine.process({ArtifactType.TEXT: "wrong artifact"}) | |
| def test_fake_engine_metadata_exposes_version(self) -> None: | |
| meta = FakeOCREngine().metadata() | |
| assert meta == {"engine_version": "0.1.0"} | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. CohΓ©rence ArtifactType / GTLevel | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestArtifactTypeGTLevelCoherence: | |
| def test_each_gtlevel_maps_to_artifacttype(self, level: GTLevel) -> None: | |
| """La conversion ``GTLevel β ArtifactType`` doit Γͺtre triviale.""" | |
| assert ArtifactType(level.value) is not None | |
| def test_image_has_no_gtlevel_counterpart(self) -> None: | |
| """``IMAGE`` n'est pas une GT, c'est cohΓ©rent avec le plan.""" | |
| gt_values = {lvl.value for lvl in GTLevel} | |
| assert ArtifactType.IMAGE.value not in gt_values | |