Spaces:
Running
Running
File size: 13,792 Bytes
d733b48 979f3c3 d733b48 979f3c3 d733b48 979f3c3 d733b48 979f3c3 d733b48 a2bea75 d733b48 a2bea75 d733b48 a2bea75 d733b48 979f3c3 d733b48 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 | """Tests Sprint 40 โ backend extracteur NER + cรขblage runner.
Couvre :
1. ``SpacyEntityExtractor`` lazy-importe spaCy ; sans spaCy installรฉ,
l'extracteur retourne ``[]`` avec un warning explicite.
2. ``is_spacy_available`` reflรจte l'รฉtat rรฉel.
3. ``get_extractor(profile)`` accepte une clรฉ de profil ou un nom de
modรจle direct.
4. ``DocumentResult.ner_metrics`` est sรฉrialisรฉ via ``as_dict``
uniquement quand renseignรฉ, et libรฉrรฉ par ``compact()``.
5. ``EngineReport.aggregated_ner`` apparaรฎt dans ``as_dict`` quand
renseignรฉ (rรฉtrocompat sinon).
6. Cรขblage runner avec un extracteur **mock** (callable injectรฉ) :
- ``ner_metrics`` est attachรฉ aux DR dont le doc a une GT entitรฉs ;
- ``aggregated_ner`` est calculรฉ sur l'EngineReport ;
- les docs sans GT entitรฉs sont ignorรฉs.
7. Sans extracteur fourni au runner, rien n'est calculรฉ (rรฉtrocompat).
8. Un extracteur qui lรจve sur un doc spรฉcifique โ warning, autres docs
inchangรฉs.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from picarones.core.corpus import Corpus, Document, EntitiesGT, GTLevel, TextGT
from picarones.measurements.ner_backends import (
SPACY_PROFILES,
SpacyEntityExtractor,
get_extractor,
is_spacy_available,
)
from picarones.core.results import DocumentResult, EngineReport
from picarones.measurements.runner import _aggregate_ner, _attach_ner_metrics
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# 1-3. Backend SpacyEntityExtractor
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
class TestSpacyExtractor:
def test_falls_back_silently_without_spacy(
self, caplog: pytest.LogCaptureFixture
) -> None:
"""Sans spaCy installรฉ, l'extracteur retourne [] avec un warning
explicite et ne lรจve pas."""
ext = SpacyEntityExtractor("fr_core_news_sm")
with caplog.at_level("WARNING", logger="picarones.measurements.ner_backends"):
result = ext("Marie de Bourgogne en 1477.")
# Sans spaCy, on a toujours [] et un warning
if not is_spacy_available():
assert result == []
assert any(
"spaCy" in rec.message or "spacy" in rec.message
for rec in caplog.records
)
assert ext.available is False
def test_empty_text_returns_empty(self) -> None:
ext = SpacyEntityExtractor()
assert ext("") == []
def test_idempotent_load(self) -> None:
"""L'appel rรฉpรฉtรฉ ne re-tente pas le chargement."""
ext = SpacyEntityExtractor("inexistant_model_xyz")
ext("test") # premier appel : tentative de chargement
ext("test") # deuxiรจme : pas de re-tentative
assert ext._loaded is True
class TestProfilesAndFactory:
def test_known_profiles_listed(self) -> None:
for key in ("fr", "en", "multilingual"):
assert key in SPACY_PROFILES
def test_get_extractor_with_known_profile(self) -> None:
ext = get_extractor("fr")
assert isinstance(ext, SpacyEntityExtractor)
assert ext.model_name == SPACY_PROFILES["fr"]
def test_get_extractor_with_direct_model_name(self) -> None:
ext = get_extractor("custom_model_name")
assert ext.model_name == "custom_model_name"
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# 4-5. DocumentResult / EngineReport sรฉrialisation
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def _make_document_result(
doc_id: str = "d1",
hypothesis: str = "Marie de Bourgogne en 1477.",
ner_metrics: dict | None = None,
) -> DocumentResult:
from picarones.measurements.metrics import MetricsResult
return DocumentResult(
doc_id=doc_id,
image_path="/tmp/x.png",
ground_truth="Marie de Bourgogne en 1477.",
hypothesis=hypothesis,
metrics=MetricsResult(
cer=0.0, cer_nfc=0.0, cer_caseless=0.0,
wer=0.0, wer_normalized=0.0, mer=0.0, wil=0.0,
reference_length=27, hypothesis_length=27,
),
duration_seconds=0.1,
ner_metrics=ner_metrics,
)
class TestModelSerialization:
def test_ner_metrics_omitted_when_none(self) -> None:
dr = _make_document_result(ner_metrics=None)
d = dr.as_dict()
assert "ner_metrics" not in d
def test_ner_metrics_present_when_set(self) -> None:
dr = _make_document_result(ner_metrics={"global": {"f1": 0.8}})
d = dr.as_dict()
assert d["ner_metrics"] == {"global": {"f1": 0.8}}
def test_compact_clears_ner_metrics(self) -> None:
# Sprint A14-S1 โ A.I.0 P0 : ``compact()`` est dรฉsormais no-op
# par dรฉfaut (cf. core/results.py). Le comportement
# "efface les analyses" est explicitement opt-in via
# ``drop_analyses=True``.
dr = _make_document_result(ner_metrics={"global": {"f1": 0.8}})
dr.compact(drop_analyses=True)
assert dr.ner_metrics is None
def test_compact_default_is_noop(self) -> None:
"""Sprint A14-S1 โ dรฉfaut sans argument ne touche ร rien."""
dr = _make_document_result(ner_metrics={"global": {"f1": 0.8}})
dr.compact()
assert dr.ner_metrics == {"global": {"f1": 0.8}}
def test_engine_report_aggregated_ner_omitted_when_none(self) -> None:
rep = EngineReport(
engine_name="t", engine_version="1", engine_config={},
document_results=[_make_document_result()],
)
d = rep.as_dict()
assert "aggregated_ner" not in d
def test_engine_report_aggregated_ner_included_when_set(self) -> None:
rep = EngineReport(
engine_name="t", engine_version="1", engine_config={},
document_results=[_make_document_result()],
aggregated_ner={"global": {"f1": 0.75}, "doc_count": 1},
)
d = rep.as_dict()
assert d["aggregated_ner"] == {"global": {"f1": 0.75}, "doc_count": 1}
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# 6. Cรขblage runner avec extracteur mock
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def _mock_extractor_factory(per_text: dict[str, list[dict]]) -> callable:
"""Construit un extracteur qui renvoie une rรฉponse prรฉdรฉfinie par
texte d'entrรฉe โ utile pour tester le cรขblage runner sans dรฉpendance
NLP rรฉelle."""
def _extract(text: str) -> list[dict]:
return per_text.get(text, [])
return _extract
def _corpus_with_entities(tmp_path: Path) -> Corpus:
"""Crรฉe un corpus minimal avec deux documents, dont un seul porte
une GT entitรฉs."""
image1 = tmp_path / "doc1.png"
image2 = tmp_path / "doc2.png"
image1.write_bytes(b"fake")
image2.write_bytes(b"fake")
doc1 = Document(
image_path=image1,
ground_truth="Marie de Bourgogne en 1477.",
ground_truths={
GTLevel.TEXT: TextGT(text="Marie de Bourgogne en 1477."),
GTLevel.ENTITIES: EntitiesGT(entities=[
{"label": "PER", "start": 0, "end": 17, "text": "Marie de Bourgogne"},
{"label": "DATE", "start": 21, "end": 25, "text": "1477"},
]),
},
)
doc2 = Document(
image_path=image2,
ground_truth="Texte sans GT entitรฉs.",
)
return Corpus(name="test", documents=[doc1, doc2])
class TestRunnerWiring:
def test_attach_ner_only_for_docs_with_entities(self, tmp_path: Path) -> None:
corpus = _corpus_with_entities(tmp_path)
# Mock extractor : renvoie la mรชme chose que la GT pour doc1 (parfait)
extractor = _mock_extractor_factory({
"Marie de Bourgogne en 1477.": [
{"label": "PER", "start": 0, "end": 17, "text": "Marie de Bourgogne"},
{"label": "DATE", "start": 21, "end": 25, "text": "1477"},
],
"Texte sans GT entitรฉs.": [], # pas appelรฉ en rรฉalitรฉ
})
dr1 = _make_document_result(
doc_id="doc1", hypothesis="Marie de Bourgogne en 1477.",
)
dr2 = _make_document_result(
doc_id="doc2", hypothesis="Texte sans GT entitรฉs.",
)
_attach_ner_metrics(corpus, [dr1, dr2], extractor)
# doc1 : a une GT entitรฉs โ ner_metrics calculรฉ
assert dr1.ner_metrics is not None
assert dr1.ner_metrics["global"]["f1"] == pytest.approx(1.0)
# doc2 : pas de GT entitรฉs โ rien
assert dr2.ner_metrics is None
def test_aggregate_ner_combines_doc_metrics(self, tmp_path: Path) -> None:
# Deux documents avec ner_metrics fournis
dr1 = _make_document_result()
dr1.ner_metrics = {
"global": {"precision": 1.0, "recall": 0.5, "f1": 2/3, "support": 2},
"per_category": {
"PER": {"precision": 1.0, "recall": 0.5, "f1": 2/3, "support": 2},
},
"true_positives": 1, "false_positives": 0, "false_negatives": 1,
"hallucinated_entities": [], "missed_entities": [{"label": "PER"}],
"iou_threshold": 0.5,
}
dr2 = _make_document_result()
dr2.ner_metrics = {
"global": {"precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 1},
"per_category": {
"LOC": {"precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 1},
},
"true_positives": 1, "false_positives": 0, "false_negatives": 0,
"hallucinated_entities": [], "missed_entities": [],
"iou_threshold": 0.5,
}
agg = _aggregate_ner([dr1, dr2])
assert agg is not None
assert agg["doc_count"] == 2
assert agg["true_positives"] == 2
assert agg["false_negatives"] == 1
assert agg["missed_total"] == 1
# Micro F1 global : TP=2, FP=0, FN=1 โ P=1, R=2/3, F1=0.8
assert agg["global"]["f1"] == pytest.approx(0.8)
def test_aggregate_returns_none_when_no_ner_metrics(self) -> None:
dr = _make_document_result(ner_metrics=None)
assert _aggregate_ner([dr]) is None
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# 7. Rรฉtrocompat : sans extractor, rien ne change
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
class TestBackwardCompat:
def test_no_extractor_no_calculation(self, tmp_path: Path) -> None:
"""Si entity_extractor=None, le runner ne touche pas aux
ner_metrics. On valide que le DocumentResult par dรฉfaut a bien
ner_metrics=None โ le runner ne l'attribue pas spontanรฉment."""
# Les deux DRs ne reรงoivent jamais d'extracteur ; ils restent
# tels quels. Le corpus n'est pas nรฉcessaire ici (valide la
# rรฉtrocompat du modรจle).
dr1 = _make_document_result(doc_id="doc1")
dr2 = _make_document_result(doc_id="doc2")
assert dr1.ner_metrics is None
assert dr2.ner_metrics is None
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# 8. Robustesse : extracteur qui lรจve
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
class TestRobustness:
def test_extractor_raising_does_not_break_others(
self, tmp_path: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""Si l'extracteur lรจve sur le doc1, le doc2 doit tout de mรชme
รชtre traitรฉ (et inversement, ici doc1 est le seul avec GT
entitรฉs, donc on vรฉrifie qu'aucun crash ne casse le runner)."""
corpus = _corpus_with_entities(tmp_path)
def _broken_extractor(text: str) -> list[dict]:
raise RuntimeError("boom")
dr1 = _make_document_result(
doc_id="doc1", hypothesis="Marie de Bourgogne en 1477.",
)
with caplog.at_level("WARNING", logger="picarones.measurements.runner"):
_attach_ner_metrics(corpus, [dr1], _broken_extractor)
# Pas de propagation, ner_metrics reste None
assert dr1.ner_metrics is None
# Et un warning explicite a รฉtรฉ รฉmis
assert any("ner.attach" in rec.message for rec in caplog.records)
|