Spaces:
Sleeping
Sleeping
File size: 5,091 Bytes
f003981 9011070 f003981 88add17 f003981 88add17 f003981 88add17 f003981 88add17 f003981 88add17 f003981 88add17 f003981 88add17 f003981 88add17 f003981 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | """Sprint A14-S42 β ``CsvReportRenderer``."""
from __future__ import annotations
import csv
import io
from picarones.app.results import RunDocumentResult, RunResult
from picarones.domain import RunManifest, utcnow
from picarones.evaluation.views.base import ViewResult
from picarones.reports.csv import CsvReportRenderer
def _make_minimal_result(
metric_values: dict | None = None,
failed_metrics: dict | None = None,
candidate_artifact_id: str = "doc01:tess:raw_text",
pipeline_name: str = "tess",
) -> RunResult:
started = utcnow()
completed = utcnow()
manifest = RunManifest(
run_id="run_001",
corpus_name="demo",
n_documents=1,
pipeline_names=(pipeline_name,),
view_specs=(),
code_version="1.0.0-s42",
started_at=started,
completed_at=completed,
)
view_result = ViewResult(
view_name="text_final",
pipeline_name=pipeline_name,
candidate_artifact_id=candidate_artifact_id,
ground_truth_artifact_id="doc01:gt",
metric_values=metric_values or {},
failed_metrics=failed_metrics or {},
)
return RunResult(
manifest=manifest,
document_results=(
RunDocumentResult(
document_id="doc01",
pipeline_results=(),
view_results=(view_result,),
),
),
)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Renderer
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestCsvRendererHeader:
def test_header_columns_in_order(self) -> None:
result = _make_minimal_result()
text = CsvReportRenderer().render(result)
# Première ligne = header.
first_line = text.splitlines()[0]
cols = first_line.split(",")
expected = list(CsvReportRenderer.HEADER)
assert cols == expected
class TestCsvRendererSuccessfulMetrics:
def test_successful_metric_emits_value_and_status_ok(self) -> None:
result = _make_minimal_result(
metric_values={"cer": 0.12, "wer": 0.25},
)
text = CsvReportRenderer().render(result)
rows = list(csv.DictReader(io.StringIO(text)))
assert len(rows) == 2
cer_row = next(r for r in rows if r["metric_name"] == "cer")
assert cer_row["status"] == "ok"
assert cer_row["value"] == "0.120000"
assert cer_row["pipeline_name"] == "tess"
def test_value_formatted_to_6_decimals(self) -> None:
result = _make_minimal_result(
metric_values={"cer": 1.0 / 3.0},
)
text = CsvReportRenderer().render(result)
rows = list(csv.DictReader(io.StringIO(text)))
assert rows[0]["value"] == "0.333333"
class TestCsvRendererFailedMetrics:
def test_failed_metric_emits_empty_value_and_status(self) -> None:
result = _make_minimal_result(
failed_metrics={"broken": "ValueError: x"},
)
text = CsvReportRenderer().render(result)
rows = list(csv.DictReader(io.StringIO(text)))
assert len(rows) == 1
assert rows[0]["metric_name"] == "broken"
assert rows[0]["status"] == "failed_metric"
assert rows[0]["value"] == ""
class TestCsvRendererPipelineName:
def test_pipeline_name_from_view_result_field(self) -> None:
"""``pipeline_name`` est lu directement depuis ``ViewResult.pipeline_name``,
pas infΓ©rΓ© par parsing de ``candidate_artifact_id``.
"""
result = _make_minimal_result(
metric_values={"cer": 0.0},
pipeline_name="my_pipe",
candidate_artifact_id="doc01:irrelevant_string:raw_text",
)
text = CsvReportRenderer().render(result)
rows = list(csv.DictReader(io.StringIO(text)))
assert rows[0]["pipeline_name"] == "my_pipe"
def test_pipeline_name_independent_of_artifact_id(self) -> None:
"""Le ``candidate_artifact_id`` peut contenir n'importe quoi β
``pipeline_name`` reste celui du champ structurel.
"""
result = _make_minimal_result(
metric_values={"cer": 0.0},
pipeline_name="real_pipeline",
candidate_artifact_id="bad_id_no_separators",
)
text = CsvReportRenderer().render(result)
rows = list(csv.DictReader(io.StringIO(text)))
assert rows[0]["pipeline_name"] == "real_pipeline"
class TestCsvRendererDeterminism:
def test_render_twice_yields_same_bytes(self) -> None:
result = _make_minimal_result(
metric_values={"cer": 0.1, "wer": 0.2, "mer": 0.15},
)
renderer = CsvReportRenderer()
a = renderer.render(result)
b = renderer.render(result)
assert a == b
|