Spaces:
Sleeping
Sleeping
File size: 6,154 Bytes
b9ff8de f2308fc b9ff8de f2308fc b9ff8de f2308fc b9ff8de 162c559 b9ff8de | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | """Sprint A14-S6 β protocoles ``StepExecutor`` + types runtime.
VΓ©rifie que :
- une classe minimale satisfait ``StepExecutor`` ;
- ``RunContext``, ``StepResult``, ``PipelineResult`` se construisent
et sΓ©rialisent ;
- ``isinstance(x, StepExecutor)`` rejette les classes non-conformes.
"""
from __future__ import annotations
import pytest
from picarones.domain import Artifact, ArtifactType
from picarones.pipeline import (
PipelineResult,
RunContext,
StepExecutor,
StepResult,
)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# RunContext
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestRunContext:
def test_minimal_context(self) -> None:
ctx = RunContext(
document_id="d1",
code_version="1.0.0",
pipeline_name="ocr_only",
)
assert ctx.workspace_uri is None
def test_with_workspace(self) -> None:
ctx = RunContext(
document_id="d1",
code_version="1.0.0",
pipeline_name="ocr_only",
workspace_uri="/tmp/picarones/runs/abc",
)
assert ctx.workspace_uri == "/tmp/picarones/runs/abc"
def test_frozen(self) -> None:
from pydantic import ValidationError
ctx = RunContext(document_id="d", code_version="v", pipeline_name="p")
with pytest.raises(ValidationError):
ctx.document_id = "x" # type: ignore[misc]
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# StepResult & PipelineResult
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestStepResult:
def test_success(self) -> None:
r = StepResult(
step_id="ocr",
succeeded=True,
duration_seconds=2.5,
produced_artifacts={"raw_text": "d1:ocr:raw_text"},
)
assert r.succeeded
assert r.error is None
def test_failure(self) -> None:
r = StepResult(
step_id="ocr",
succeeded=False,
duration_seconds=0.1,
error="Tesseract introuvable",
)
assert not r.succeeded
assert r.produced_artifacts == {}
assert r.error == "Tesseract introuvable"
def test_negative_duration_rejected(self) -> None:
from pydantic import ValidationError
with pytest.raises(ValidationError):
StepResult(step_id="x", succeeded=True, duration_seconds=-1.0)
class TestPipelineResult:
def test_with_artifacts(self) -> None:
a = Artifact(id="d1:ocr:raw_text", document_id="d1",
type=ArtifactType.RAW_TEXT)
b = Artifact(id="d1:ocr:alto_xml", document_id="d1",
type=ArtifactType.ALTO_XML)
result = PipelineResult(
pipeline_name="ocr_only",
document_id="d1",
step_results=(
StepResult(step_id="ocr", succeeded=True, duration_seconds=1.0,
produced_artifacts={
"raw_text": a.id, "alto_xml": b.id,
}),
),
succeeded=True,
duration_seconds=1.05,
artifacts=(a, b),
)
assert result.step_result_by_id("ocr") is not None
assert result.step_result_by_id("missing") is None
text_arts = result.artifacts_of_type(ArtifactType.RAW_TEXT)
assert len(text_arts) == 1
assert text_arts[0].id == a.id
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# StepExecutor protocol
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class _StubExecutor:
"""Minimum pour satisfaire ``StepExecutor``."""
name = "tesseract"
input_types = frozenset({ArtifactType.IMAGE})
output_types = frozenset({ArtifactType.RAW_TEXT})
execution_mode = "cpu"
def execute(
self,
inputs: dict[ArtifactType, Artifact],
params: dict[str, str | int | float | bool],
context: RunContext,
) -> dict[ArtifactType, Artifact]:
# VΓ©rifie la prΓ©sence sans utiliser la valeur β l'appel a un
# effet de bord en termes de validation des inputs.
_ = inputs[ArtifactType.IMAGE]
return {
ArtifactType.RAW_TEXT: Artifact(
id=f"{context.document_id}:tesseract:raw_text",
document_id=context.document_id,
type=ArtifactType.RAW_TEXT,
produced_by_step="ocr",
),
}
class TestStepExecutorProtocol:
def test_stub_satisfies_protocol(self) -> None:
ex = _StubExecutor()
assert isinstance(ex, StepExecutor)
def test_non_conforming_does_not_satisfy(self) -> None:
class _NotAnExecutor:
pass
assert not isinstance(_NotAnExecutor(), StepExecutor)
def test_stub_can_execute(self) -> None:
ex = _StubExecutor()
ctx = RunContext(document_id="d1", code_version="v", pipeline_name="p")
img = Artifact(id="d1:img", document_id="d1", type=ArtifactType.IMAGE)
out = ex.execute({ArtifactType.IMAGE: img}, {}, ctx)
assert ArtifactType.RAW_TEXT in out
assert out[ArtifactType.RAW_TEXT].document_id == "d1"
|