Spaces:
Sleeping
Sleeping
Claude
test: rΓ©organiser les 110 fichiers tests/test_*.py par cercle architectural
d109222 unverified | """Tests Sprint 70 β CLI pipeline + loader YAML. | |
| Couvre : | |
| 1. ``_resolve_class`` : dotted path valide, module manquant, | |
| classe manquante, chemin invalide. | |
| 2. ``load_pipeline_spec_from_dict`` : spec valide, ``name`` | |
| manquant, ``steps`` manquants, Γ©tape sans ``module``, args | |
| invalides, classe non BaseModule, ``inputs_from`` valide, | |
| ``inputs_from`` avec type inconnu. | |
| 3. ``load_pipeline_spec_from_yaml`` : fichier introuvable, | |
| YAML invalide, document complet. | |
| 4. ``load_comparison_specs_*`` : champ ``pipelines`` requis, N | |
| specs construites. | |
| 5. CLI ``picarones pipeline run`` : exΓ©cution end-to-end avec un | |
| MockOCR rΓ©fΓ©rencΓ© via dotted path, sortie JSON et HTML. | |
| 6. CLI ``picarones pipeline compare`` : comparaison avec ranking | |
| affichΓ©, output HTML. | |
| """ | |
| from __future__ import annotations | |
| from pathlib import Path | |
| from typing import Any | |
| import pytest | |
| from click.testing import CliRunner | |
| from picarones.core.modules import ArtifactType, BaseModule | |
| from picarones.measurements.pipeline_spec_loader import ( | |
| PipelineSpecLoadError, | |
| _resolve_class, | |
| load_comparison_specs_from_dict, | |
| load_pipeline_spec_from_dict, | |
| load_pipeline_spec_from_yaml, | |
| ) | |
| # Module de test au top-level pour pouvoir Γͺtre rΓ©fΓ©rencΓ© par dotted path. | |
| class _CLIMockOCR(BaseModule): | |
| input_types = (ArtifactType.IMAGE,) | |
| output_types = (ArtifactType.TEXT,) | |
| execution_mode: Any = "io" | |
| def __init__(self, fixed_text: str = "hello world") -> None: | |
| self._fixed = fixed_text | |
| def name(self) -> str: | |
| return "cli-mock-ocr" | |
| def process(self, inputs): | |
| return {ArtifactType.TEXT: self._fixed} | |
| class _NotABaseModule: | |
| pass | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. _resolve_class | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestResolveClass: | |
| def test_valid_dotted_path(self) -> None: | |
| cls = _resolve_class( | |
| "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| ) | |
| assert cls is _CLIMockOCR | |
| def test_missing_dot(self) -> None: | |
| with pytest.raises(PipelineSpecLoadError, match="invalide"): | |
| _resolve_class("invalid_no_dot") | |
| def test_module_not_found(self) -> None: | |
| with pytest.raises(PipelineSpecLoadError, match="introuvable"): | |
| _resolve_class("non_existing_module_xyz.Foo") | |
| def test_class_not_in_module(self) -> None: | |
| with pytest.raises(PipelineSpecLoadError, match="introuvable"): | |
| _resolve_class( | |
| "tests.cli.test_sprint70_pipeline_cli.DoesNotExist", | |
| ) | |
| def test_target_is_not_a_class(self) -> None: | |
| with pytest.raises(PipelineSpecLoadError, match="n'est pas une classe"): | |
| # sys.path est un attribut du module sys, pas une classe | |
| _resolve_class("sys.path") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. load_pipeline_spec_from_dict | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestLoadFromDict: | |
| def test_valid_minimal(self) -> None: | |
| data = { | |
| "name": "ocr_only", | |
| "steps": [ | |
| { | |
| "name": "ocr", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| }, | |
| ], | |
| } | |
| spec = load_pipeline_spec_from_dict(data) | |
| assert spec.name == "ocr_only" | |
| assert len(spec.steps) == 1 | |
| assert spec.steps[0].name == "ocr" | |
| assert isinstance(spec.steps[0].module, _CLIMockOCR) | |
| def test_with_args(self) -> None: | |
| data = { | |
| "name": "ocr_with_args", | |
| "steps": [ | |
| { | |
| "name": "ocr", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| "args": {"fixed_text": "custom output"}, | |
| }, | |
| ], | |
| } | |
| spec = load_pipeline_spec_from_dict(data) | |
| assert spec.steps[0].module._fixed == "custom output" | |
| def test_missing_name(self) -> None: | |
| data = {"steps": [{"name": "x", "module": "foo.bar"}]} | |
| with pytest.raises(PipelineSpecLoadError, match="``name``"): | |
| load_pipeline_spec_from_dict(data) | |
| def test_missing_steps(self) -> None: | |
| data = {"name": "p"} | |
| with pytest.raises(PipelineSpecLoadError, match="``steps``"): | |
| load_pipeline_spec_from_dict(data) | |
| def test_step_without_module(self) -> None: | |
| data = {"name": "p", "steps": [{"name": "x"}]} | |
| with pytest.raises(PipelineSpecLoadError, match="``module``"): | |
| load_pipeline_spec_from_dict(data) | |
| def test_step_args_not_dict(self) -> None: | |
| data = { | |
| "name": "p", | |
| "steps": [ | |
| { | |
| "name": "x", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| "args": "not_a_dict", | |
| }, | |
| ], | |
| } | |
| with pytest.raises(PipelineSpecLoadError, match="``args``"): | |
| load_pipeline_spec_from_dict(data) | |
| def test_class_not_basemodule(self) -> None: | |
| data = { | |
| "name": "p", | |
| "steps": [ | |
| { | |
| "name": "x", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._NotABaseModule", | |
| }, | |
| ], | |
| } | |
| with pytest.raises(PipelineSpecLoadError, match="BaseModule"): | |
| load_pipeline_spec_from_dict(data) | |
| def test_invalid_constructor_args(self) -> None: | |
| data = { | |
| "name": "p", | |
| "steps": [ | |
| { | |
| "name": "x", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| "args": {"unknown_arg": 42}, | |
| }, | |
| ], | |
| } | |
| with pytest.raises(PipelineSpecLoadError, match="instancier"): | |
| load_pipeline_spec_from_dict(data) | |
| def test_inputs_from_valid(self) -> None: | |
| data = { | |
| "name": "p", | |
| "steps": [ | |
| { | |
| "name": "ocr", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| }, | |
| { | |
| "name": "second", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| "inputs_from": {"image": "__initial__"}, | |
| }, | |
| ], | |
| } | |
| spec = load_pipeline_spec_from_dict(data) | |
| assert spec.steps[1].inputs_from == {ArtifactType.IMAGE: "__initial__"} | |
| def test_inputs_from_unknown_type(self) -> None: | |
| data = { | |
| "name": "p", | |
| "steps": [ | |
| { | |
| "name": "x", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| "inputs_from": {"unknown_type": "ocr"}, | |
| }, | |
| ], | |
| } | |
| with pytest.raises(PipelineSpecLoadError, match="type d'artefact"): | |
| load_pipeline_spec_from_dict(data) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. load_pipeline_spec_from_yaml | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestLoadFromYaml: | |
| def test_file_not_found(self, tmp_path: Path) -> None: | |
| with pytest.raises(PipelineSpecLoadError, match="introuvable"): | |
| load_pipeline_spec_from_yaml(tmp_path / "nope.yaml") | |
| def test_invalid_yaml(self, tmp_path: Path) -> None: | |
| p = tmp_path / "broken.yaml" | |
| p.write_text("name: ok\nsteps: [unclosed", encoding="utf-8") | |
| with pytest.raises(PipelineSpecLoadError, match="YAML invalide"): | |
| load_pipeline_spec_from_yaml(p) | |
| def test_valid_yaml_round_trip(self, tmp_path: Path) -> None: | |
| p = tmp_path / "spec.yaml" | |
| p.write_text( | |
| "name: ocr\n" | |
| "steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n", | |
| encoding="utf-8", | |
| ) | |
| spec = load_pipeline_spec_from_yaml(p) | |
| assert spec.name == "ocr" | |
| assert len(spec.steps) == 1 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4. load_comparison_specs | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestLoadComparison: | |
| def test_valid(self) -> None: | |
| data = { | |
| "pipelines": [ | |
| { | |
| "name": "a", | |
| "steps": [{ | |
| "name": "ocr", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| }], | |
| }, | |
| { | |
| "name": "b", | |
| "steps": [{ | |
| "name": "ocr", | |
| "module": "tests.cli.test_sprint70_pipeline_cli._CLIMockOCR", | |
| }], | |
| }, | |
| ], | |
| } | |
| specs = load_comparison_specs_from_dict(data) | |
| assert [s.name for s in specs] == ["a", "b"] | |
| def test_missing_pipelines(self) -> None: | |
| with pytest.raises(PipelineSpecLoadError, match="``pipelines``"): | |
| load_comparison_specs_from_dict({}) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 5. CLI pipeline run | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _make_corpus_dir(tmp_path: Path) -> Path: | |
| """CrΓ©e un rΓ©pertoire de corpus minimal avec 1 doc.""" | |
| img = tmp_path / "doc1.png" | |
| img.write_bytes(b"\x89PNG\r\n\x1a\n") # header PNG vide | |
| gt = tmp_path / "doc1.gt.txt" | |
| gt.write_text("hello world", encoding="utf-8") | |
| return tmp_path | |
| class TestPipelineRunCLI: | |
| def test_run_basic(self, tmp_path: Path) -> None: | |
| from picarones.cli import cli | |
| corpus_dir = tmp_path / "corpus" | |
| corpus_dir.mkdir() | |
| _make_corpus_dir(corpus_dir) | |
| spec_path = tmp_path / "spec.yaml" | |
| spec_path.write_text( | |
| "name: ocr_only\n" | |
| "steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n", | |
| encoding="utf-8", | |
| ) | |
| runner = CliRunner() | |
| result = runner.invoke( | |
| cli, | |
| ["pipeline", "run", str(spec_path), "--corpus", str(corpus_dir)], | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert "ocr_only" in result.output | |
| assert "1/1 succès" in result.output or "1 / 1 succès" in result.output | |
| def test_run_with_outputs(self, tmp_path: Path) -> None: | |
| from picarones.cli import cli | |
| corpus_dir = tmp_path / "corpus" | |
| corpus_dir.mkdir() | |
| _make_corpus_dir(corpus_dir) | |
| spec_path = tmp_path / "spec.yaml" | |
| spec_path.write_text( | |
| "name: ocr_only\n" | |
| "steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n", | |
| encoding="utf-8", | |
| ) | |
| json_out = tmp_path / "out.json" | |
| html_out = tmp_path / "out.html" | |
| runner = CliRunner() | |
| result = runner.invoke( | |
| cli, | |
| [ | |
| "pipeline", "run", str(spec_path), | |
| "--corpus", str(corpus_dir), | |
| "--output-json", str(json_out), | |
| "--output-html", str(html_out), | |
| ], | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert json_out.exists() | |
| assert html_out.exists() | |
| assert "<!doctype html>" in html_out.read_text(encoding="utf-8") | |
| import json | |
| payload = json.loads(json_out.read_text(encoding="utf-8")) | |
| assert payload["pipeline_name"] == "ocr_only" | |
| assert payload["n_docs"] == 1 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. CLI pipeline compare | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestPipelineCompareCLI: | |
| def test_compare_basic(self, tmp_path: Path) -> None: | |
| from picarones.cli import cli | |
| corpus_dir = tmp_path / "corpus" | |
| corpus_dir.mkdir() | |
| _make_corpus_dir(corpus_dir) | |
| specs_path = tmp_path / "specs.yaml" | |
| specs_path.write_text( | |
| "pipelines:\n" | |
| " - name: a\n" | |
| " steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n" | |
| " - name: b\n" | |
| " steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n", | |
| encoding="utf-8", | |
| ) | |
| runner = CliRunner() | |
| result = runner.invoke( | |
| cli, | |
| [ | |
| "pipeline", "compare", | |
| str(specs_path), | |
| "--corpus", str(corpus_dir), | |
| ], | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert "Comparaison de 2 pipelines" in result.output | |
| assert "Classement par CER" in result.output | |
| def test_compare_with_html_and_baseline(self, tmp_path: Path) -> None: | |
| from picarones.cli import cli | |
| corpus_dir = tmp_path / "corpus" | |
| corpus_dir.mkdir() | |
| _make_corpus_dir(corpus_dir) | |
| specs_path = tmp_path / "specs.yaml" | |
| specs_path.write_text( | |
| "pipelines:\n" | |
| " - name: a\n" | |
| " steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n" | |
| " - name: b\n" | |
| " steps:\n" | |
| " - name: ocr\n" | |
| " module: tests.cli.test_sprint70_pipeline_cli._CLIMockOCR\n", | |
| encoding="utf-8", | |
| ) | |
| html_out = tmp_path / "comparison.html" | |
| runner = CliRunner() | |
| result = runner.invoke( | |
| cli, | |
| [ | |
| "pipeline", "compare", | |
| str(specs_path), | |
| "--corpus", str(corpus_dir), | |
| "--output-html", str(html_out), | |
| "--baseline", "a", | |
| ], | |
| ) | |
| assert result.exit_code == 0, result.output | |
| assert html_out.exists() | |
| content = html_out.read_text(encoding="utf-8") | |
| assert "<!doctype html>" in content | |
| # Baseline marquΓ©e dans le tableau de gain | |
| assert "(rΓ©fΓ©rence)" in content | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 7. CLI help discoverable | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestCliHelp: | |
| def test_pipeline_group_listed(self) -> None: | |
| from picarones.cli import cli | |
| runner = CliRunner() | |
| result = runner.invoke(cli, ["--help"]) | |
| assert "pipeline" in result.output | |
| def test_pipeline_run_help(self) -> None: | |
| from picarones.cli import cli | |
| runner = CliRunner() | |
| result = runner.invoke(cli, ["pipeline", "run", "--help"]) | |
| assert "SPEC_PATH" in result.output | |
| assert "--corpus" in result.output | |
| assert "--output-json" in result.output | |
| assert "--output-html" in result.output | |
| def test_pipeline_compare_help(self) -> None: | |
| from picarones.cli import cli | |
| runner = CliRunner() | |
| result = runner.invoke(cli, ["pipeline", "compare", "--help"]) | |
| assert "SPECS_PATH" in result.output | |
| assert "--baseline" in result.output | |