Spaces:
Sleeping
Sleeping
File size: 3,238 Bytes
52412a3 0d00572 52412a3 0d00572 52412a3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | """Sprint A14-S4 — ``DocumentRef`` et ``GroundTruthRef`` multi-niveaux."""
from __future__ import annotations
import pytest
from picarones.domain import (
ArtifactType,
CorpusSpecError,
DocumentRef,
GroundTruthRef,
)
class TestDocumentRefBasics:
def test_minimal_document(self) -> None:
d = DocumentRef(id="folio_001")
assert d.id == "folio_001"
assert d.image_uri is None
assert d.ground_truths == ()
def test_document_with_image_and_text_gt(self) -> None:
d = DocumentRef(
id="folio_001",
image_uri="/corpus/folio_001.png",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/corpus/folio_001.gt.txt"),
),
)
assert d.image_uri == "/corpus/folio_001.png"
assert len(d.ground_truths) == 1
def test_id_validation_rejects_spaces(self) -> None:
with pytest.raises(CorpusSpecError, match="document id invalide"):
DocumentRef(id="bad id")
class TestMultiLevelGT:
def test_multi_level_gt(self) -> None:
d = DocumentRef(
id="folio_001",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/x.gt.txt"),
GroundTruthRef(type=ArtifactType.ALTO_XML, uri="/x.gt.alto.xml"),
GroundTruthRef(type=ArtifactType.READING_ORDER, uri="/x.ro.json"),
),
)
assert len(d.ground_truths) == 3
assert d.available_gt_types == (
ArtifactType.RAW_TEXT,
ArtifactType.ALTO_XML,
ArtifactType.READING_ORDER,
)
def test_gt_for_returns_matching_level(self) -> None:
d = DocumentRef(
id="x",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/x.txt"),
GroundTruthRef(type=ArtifactType.ALTO_XML, uri="/x.xml"),
),
)
gt = d.gt_for(ArtifactType.ALTO_XML)
assert gt is not None
assert gt.uri == "/x.xml"
def test_gt_for_returns_none_when_absent(self) -> None:
d = DocumentRef(id="x")
assert d.gt_for(ArtifactType.RAW_TEXT) is None
def test_duplicate_gt_type_rejected(self) -> None:
with pytest.raises(CorpusSpecError, match="GT dupliquée"):
DocumentRef(
id="x",
ground_truths=(
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/a.txt"),
GroundTruthRef(type=ArtifactType.RAW_TEXT, uri="/b.txt"),
),
)
class TestDocumentRefImmutability:
def test_frozen_blocks_mutation(self) -> None:
from pydantic import ValidationError
d = DocumentRef(id="x")
with pytest.raises(ValidationError):
d.id = "y" # type: ignore[misc]
def test_json_roundtrip(self) -> None:
d = DocumentRef(
id="vol_a/folio_001",
image_uri="/c/folio_001.png",
ground_truths=(
GroundTruthRef(type=ArtifactType.ALTO_XML, uri="/x.xml"),
),
)
j = d.model_dump_json()
d2 = DocumentRef.model_validate_json(j)
assert d == d2
|