File size: 4,711 Bytes
b9ff8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2308fc
 
b9ff8de
f2308fc
b9ff8de
 
 
f2308fc
 
 
b9ff8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d00572
 
b9ff8de
0d00572
b9ff8de
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""Sprint A14-S6 β€” ``PipelineStep``, ``PipelineSpec`` (dΓ©claratifs)."""

from __future__ import annotations

import pytest

from picarones.domain import ArtifactType, PicaronesError
from picarones.pipeline import INITIAL_STEP_ID, PipelineSpec, PipelineStep


# ──────────────────────────────────────────────────────────────────────
# PipelineStep β€” validation des id et champs
# ──────────────────────────────────────────────────────────────────────


class TestPipelineStep:
    def test_minimal_step(self) -> None:
        s = PipelineStep(
            id="ocr",
            kind="ocr",
            adapter_name="tesseract",
            input_types=(ArtifactType.IMAGE,),
            output_types=(ArtifactType.RAW_TEXT,),
        )
        assert s.id == "ocr"
        assert s.params == {}
        assert s.inputs_from == {}

    def test_step_with_inputs_from(self) -> None:
        s = PipelineStep(
            id="correction",
            kind="post_correction",
            adapter_name="openai:gpt-4o",
            input_types=(ArtifactType.RAW_TEXT,),
            output_types=(ArtifactType.CORRECTED_TEXT,),
            inputs_from={ArtifactType.RAW_TEXT: "ocr"},
        )
        assert s.inputs_from[ArtifactType.RAW_TEXT] == "ocr"

    def test_step_with_params(self) -> None:
        s = PipelineStep(
            id="ocr",
            kind="ocr",
            adapter_name="tesseract",
            params={"lang": "fra", "psm": 6, "preserve_interword_spaces": True},
        )
        assert s.params["lang"] == "fra"
        assert s.params["psm"] == 6

    def test_id_validation_rejects_space(self) -> None:
        with pytest.raises(PicaronesError, match="step id invalide"):
            PipelineStep(id="bad id", kind="x", adapter_name="y")

    def test_id_validation_rejects_dot(self) -> None:
        with pytest.raises(PicaronesError, match="step id invalide"):
            PipelineStep(id="bad.id", kind="x", adapter_name="y")

    def test_id_validation_rejects_initial_sentinel(self) -> None:
        """``__initial__`` est rΓ©servΓ© pour dΓ©signer les entrΓ©es
        initiales du runner β€” un step ne peut pas porter ce nom."""
        with pytest.raises(PicaronesError, match="rΓ©servΓ©"):
            PipelineStep(id=INITIAL_STEP_ID, kind="x", adapter_name="y")

    def test_id_accepts_alphanum_underscore_dash(self) -> None:
        s = PipelineStep(id="step_1-final", kind="x", adapter_name="y")
        assert s.id == "step_1-final"

    def test_frozen(self) -> None:
        from pydantic import ValidationError

        s = PipelineStep(id="a", kind="b", adapter_name="c")
        with pytest.raises(ValidationError):
            s.id = "d"  # type: ignore[misc]

    def test_extra_field_rejected(self) -> None:
        from pydantic import ValidationError

        with pytest.raises(ValidationError):
            PipelineStep(  # type: ignore[call-arg]
                id="a", kind="b", adapter_name="c", bogus=42,
            )


# ──────────────────────────────────────────────────────────────────────
# PipelineSpec
# ──────────────────────────────────────────────────────────────────────


class TestPipelineSpec:
    def test_minimal_spec(self) -> None:
        s = PipelineSpec(name="empty")
        assert s.name == "empty"
        assert s.steps == ()
        assert s.initial_inputs == ()

    def test_spec_with_steps(self) -> None:
        s = PipelineSpec(
            name="ocr_only",
            initial_inputs=(ArtifactType.IMAGE,),
            steps=(
                PipelineStep(
                    id="ocr",
                    kind="ocr",
                    adapter_name="tesseract",
                    input_types=(ArtifactType.IMAGE,),
                    output_types=(ArtifactType.RAW_TEXT,),
                ),
            ),
        )
        assert len(s.steps) == 1
        assert s.step_by_id("ocr") is not None
        assert s.step_by_id("missing") is None

    def test_frozen(self) -> None:
        from pydantic import ValidationError

        s = PipelineSpec(name="x")
        with pytest.raises(ValidationError):
            s.name = "y"  # type: ignore[misc]