File size: 6,154 Bytes
b9ff8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2308fc
 
b9ff8de
f2308fc
b9ff8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2308fc
 
 
b9ff8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162c559
 
 
b9ff8de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""Sprint A14-S6 β€” protocoles ``StepExecutor`` + types runtime.

VΓ©rifie que :

- une classe minimale satisfait ``StepExecutor`` ;
- ``RunContext``, ``StepResult``, ``PipelineResult`` se construisent
  et sΓ©rialisent ;
- ``isinstance(x, StepExecutor)`` rejette les classes non-conformes.
"""

from __future__ import annotations

import pytest

from picarones.domain import Artifact, ArtifactType
from picarones.pipeline import (
    PipelineResult,
    RunContext,
    StepExecutor,
    StepResult,
)


# ──────────────────────────────────────────────────────────────────────
# RunContext
# ──────────────────────────────────────────────────────────────────────


class TestRunContext:
    def test_minimal_context(self) -> None:
        ctx = RunContext(
            document_id="d1",
            code_version="1.0.0",
            pipeline_name="ocr_only",
        )
        assert ctx.workspace_uri is None

    def test_with_workspace(self) -> None:
        ctx = RunContext(
            document_id="d1",
            code_version="1.0.0",
            pipeline_name="ocr_only",
            workspace_uri="/tmp/picarones/runs/abc",
        )
        assert ctx.workspace_uri == "/tmp/picarones/runs/abc"

    def test_frozen(self) -> None:
        from pydantic import ValidationError

        ctx = RunContext(document_id="d", code_version="v", pipeline_name="p")
        with pytest.raises(ValidationError):
            ctx.document_id = "x"  # type: ignore[misc]


# ──────────────────────────────────────────────────────────────────────
# StepResult & PipelineResult
# ──────────────────────────────────────────────────────────────────────


class TestStepResult:
    def test_success(self) -> None:
        r = StepResult(
            step_id="ocr",
            succeeded=True,
            duration_seconds=2.5,
            produced_artifacts={"raw_text": "d1:ocr:raw_text"},
        )
        assert r.succeeded
        assert r.error is None

    def test_failure(self) -> None:
        r = StepResult(
            step_id="ocr",
            succeeded=False,
            duration_seconds=0.1,
            error="Tesseract introuvable",
        )
        assert not r.succeeded
        assert r.produced_artifacts == {}
        assert r.error == "Tesseract introuvable"

    def test_negative_duration_rejected(self) -> None:
        from pydantic import ValidationError

        with pytest.raises(ValidationError):
            StepResult(step_id="x", succeeded=True, duration_seconds=-1.0)


class TestPipelineResult:
    def test_with_artifacts(self) -> None:
        a = Artifact(id="d1:ocr:raw_text", document_id="d1",
                     type=ArtifactType.RAW_TEXT)
        b = Artifact(id="d1:ocr:alto_xml", document_id="d1",
                     type=ArtifactType.ALTO_XML)
        result = PipelineResult(
            pipeline_name="ocr_only",
            document_id="d1",
            step_results=(
                StepResult(step_id="ocr", succeeded=True, duration_seconds=1.0,
                           produced_artifacts={
                               "raw_text": a.id, "alto_xml": b.id,
                           }),
            ),
            succeeded=True,
            duration_seconds=1.05,
            artifacts=(a, b),
        )
        assert result.step_result_by_id("ocr") is not None
        assert result.step_result_by_id("missing") is None
        text_arts = result.artifacts_of_type(ArtifactType.RAW_TEXT)
        assert len(text_arts) == 1
        assert text_arts[0].id == a.id


# ──────────────────────────────────────────────────────────────────────
# StepExecutor protocol
# ──────────────────────────────────────────────────────────────────────


class _StubExecutor:
    """Minimum pour satisfaire ``StepExecutor``."""

    name = "tesseract"
    input_types = frozenset({ArtifactType.IMAGE})
    output_types = frozenset({ArtifactType.RAW_TEXT})
    execution_mode = "cpu"

    def execute(
        self,
        inputs: dict[ArtifactType, Artifact],
        params: dict[str, str | int | float | bool],
        context: RunContext,
    ) -> dict[ArtifactType, Artifact]:
        # VΓ©rifie la prΓ©sence sans utiliser la valeur β€” l'appel a un
        # effet de bord en termes de validation des inputs.
        _ = inputs[ArtifactType.IMAGE]
        return {
            ArtifactType.RAW_TEXT: Artifact(
                id=f"{context.document_id}:tesseract:raw_text",
                document_id=context.document_id,
                type=ArtifactType.RAW_TEXT,
                produced_by_step="ocr",
            ),
        }


class TestStepExecutorProtocol:
    def test_stub_satisfies_protocol(self) -> None:
        ex = _StubExecutor()
        assert isinstance(ex, StepExecutor)

    def test_non_conforming_does_not_satisfy(self) -> None:
        class _NotAnExecutor:
            pass
        assert not isinstance(_NotAnExecutor(), StepExecutor)

    def test_stub_can_execute(self) -> None:
        ex = _StubExecutor()
        ctx = RunContext(document_id="d1", code_version="v", pipeline_name="p")
        img = Artifact(id="d1:img", document_id="d1", type=ArtifactType.IMAGE)
        out = ex.execute({ArtifactType.IMAGE: img}, {}, ctx)
        assert ArtifactType.RAW_TEXT in out
        assert out[ArtifactType.RAW_TEXT].document_id == "d1"