File size: 13,439 Bytes
f6dc855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77d9c47
f6dc855
 
 
 
979f3c3
f6dc855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
979f3c3
f6dc855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
979f3c3
f6dc855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
"""Tests Sprint 61 β€” cΓ’blage backend des mΓ©triques philologiques.

Couvre :

1. Champs ``DocumentResult.philological_metrics`` et
   ``EngineReport.aggregated_philological`` posΓ©s.
2. SΓ©rialisation conditionnelle dans ``as_dict``.
3. LibΓ©ration par ``compact``.
4. ``compute_philological_metrics`` :
   - GT mΓ©diΓ©val dΓ©clenche abbreviations + mufi
   - GT imprimΓ© ancien dΓ©clenche early_modern
   - GT moderne dΓ©clenche modern_archives
   - GT avec numΓ©raux romains dΓ©clenche roman_numerals
   - GT avec caractères hors Basic Latin déclenche unicode_blocks
   - GT en ASCII pur sans marqueur β†’ ``None``
   - GT vide / None β†’ ``None``
5. ``aggregate_philological_metrics`` :
   - Somme correcte des compteurs par module
   - Recalcul correct des scores globaux
   - Doc count cohΓ©rent
   - Aucun document avec signal β†’ ``None``
6. IntΓ©gration runner end-to-end via fixture mock.
"""

from __future__ import annotations

from picarones.measurements.philological_hooks import (
    aggregate_philological_metrics,
    compute_philological_metrics,
)
from picarones.core.results import DocumentResult, EngineReport
from picarones.measurements.metrics import MetricsResult


def _make_doc(
    doc_id: str = "d1",
    gt: str = "",
    hyp: str = "",
    philological: dict | None = None,
) -> DocumentResult:
    """Helper : construit un DocumentResult minimal pour les tests."""
    return DocumentResult(
        doc_id=doc_id,
        image_path=f"/tmp/{doc_id}.png",
        ground_truth=gt,
        hypothesis=hyp,
        metrics=MetricsResult(
            cer=0.0, cer_nfc=0.0, cer_caseless=0.0,
            wer=0.0, wer_normalized=0.0, mer=0.0, wil=0.0,
            reference_length=len(gt), hypothesis_length=len(hyp),
        ),
        duration_seconds=0.1,
        philological_metrics=philological,
    )


# ──────────────────────────────────────────────────────────────────────────
# 1. Champs posΓ©s sur DocumentResult / EngineReport
# ──────────────────────────────────────────────────────────────────────────


class TestFields:
    def test_document_result_default_none(self) -> None:
        dr = _make_doc()
        assert dr.philological_metrics is None

    def test_document_result_accepts_dict(self) -> None:
        dr = _make_doc(philological={"mufi": {"coverage": 0.9}})
        assert dr.philological_metrics == {"mufi": {"coverage": 0.9}}

    def test_engine_report_default_none(self) -> None:
        report = EngineReport(
            engine_name="test", engine_version="1.0",
            engine_config={}, document_results=[],
        )
        assert report.aggregated_philological is None

    def test_engine_report_accepts_dict(self) -> None:
        report = EngineReport(
            engine_name="test", engine_version="1.0",
            engine_config={}, document_results=[],
            aggregated_philological={"mufi": {"coverage": 0.9}},
        )
        assert report.aggregated_philological == {"mufi": {"coverage": 0.9}}


# ──────────────────────────────────────────────────────────────────────────
# 2. SΓ©rialisation as_dict
# ──────────────────────────────────────────────────────────────────────────


class TestSerialization:
    def test_as_dict_omits_none(self) -> None:
        dr = _make_doc()
        d = dr.as_dict()
        assert "philological_metrics" not in d

    def test_as_dict_includes_when_present(self) -> None:
        dr = _make_doc(philological={"mufi": {"coverage": 1.0}})
        d = dr.as_dict()
        assert d["philological_metrics"] == {"mufi": {"coverage": 1.0}}

    def test_engine_report_as_dict_omits_none(self) -> None:
        report = EngineReport(
            engine_name="t", engine_version="1", engine_config={},
            document_results=[],
        )
        assert "aggregated_philological" not in report.as_dict()

    def test_engine_report_as_dict_includes_when_present(self) -> None:
        report = EngineReport(
            engine_name="t", engine_version="1", engine_config={},
            document_results=[],
            aggregated_philological={"mufi": {"coverage": 0.5}},
        )
        d = report.as_dict()
        assert d["aggregated_philological"] == {"mufi": {"coverage": 0.5}}


# ──────────────────────────────────────────────────────────────────────────
# 3. LibΓ©ration par compact()
# ──────────────────────────────────────────────────────────────────────────


class TestCompact:
    def test_compact_clears_philological(self) -> None:
        dr = _make_doc(philological={"mufi": {"coverage": 1.0}})
        dr.compact()
        assert dr.philological_metrics is None


# ──────────────────────────────────────────────────────────────────────────
# 4. compute_philological_metrics β€” adaptive masking
# ──────────────────────────────────────────────────────────────────────────


class TestComputeAdaptive:
    def test_medieval_triggers_abbreviations_and_mufi(self) -> None:
        gt = "fait en lan ꝑ regem ΓΎΓ¦t"
        m = compute_philological_metrics(gt, gt)
        assert m is not None
        assert "abbreviations" in m
        assert "mufi" in m

    def test_early_modern_triggers_typography(self) -> None:
        gt = "le ſerpent finement & ã"
        m = compute_philological_metrics(gt, gt)
        assert m is not None
        assert "early_modern" in m

    def test_modern_archives_triggers_module(self) -> None:
        gt = "Mme Dupont au bd Voltaire vol. II"
        m = compute_philological_metrics(gt, gt)
        assert m is not None
        assert "modern_archives" in m

    def test_roman_numerals_triggers_module(self) -> None:
        gt = "Louis XIV mourut en MDCCXV"
        m = compute_philological_metrics(gt, gt)
        assert m is not None
        assert "roman_numerals" in m

    def test_unicode_blocks_triggered_only_outside_basic_latin(self) -> None:
        # ASCII pur sans marqueur β†’ unicode_blocks omis (Basic Latin
        # uniquement, breakdown trivial).
        m = compute_philological_metrics("hello world", "hello world")
        assert m is None

    def test_unicode_blocks_triggered_with_diacritics(self) -> None:
        # Du Latin Extended β†’ unicode_blocks inclus
        gt = "cafΓ© Γ  Γ© Γ΄"
        m = compute_philological_metrics(gt, gt)
        assert m is not None
        assert "unicode_blocks" in m

    def test_empty_returns_none(self) -> None:
        assert compute_philological_metrics("", "") is None
        assert compute_philological_metrics(None, None) is None

    def test_no_signal_returns_none(self) -> None:
        # Pure Basic Latin sans aucun marqueur philologique
        m = compute_philological_metrics("hello", "hello")
        assert m is None


# ──────────────────────────────────────────────────────────────────────────
# 5. aggregate_philological_metrics
# ──────────────────────────────────────────────────────────────────────────


class TestAggregation:
    def test_no_data_returns_none(self) -> None:
        assert aggregate_philological_metrics([]) is None
        assert aggregate_philological_metrics([None, None]) is None

    def test_aggregates_only_present_modules(self) -> None:
        # Doc 1 a mufi+abbr, Doc 2 a juste roman_numerals
        d1 = compute_philological_metrics("ꝑ ꝓ ꝗ", "per pro qui")
        d2 = compute_philological_metrics("Louis XIV", "Louis 14")
        agg = aggregate_philological_metrics([d1, d2])
        assert agg is not None
        # mufi prΓ©sent (Doc1 le dΓ©clenchait avec ꝑ/ꝓ/ꝗ qui sont MUFI)
        assert "abbreviations" in agg
        assert "roman_numerals" in agg
        # doc_count par module
        assert agg["abbreviations"]["doc_count"] == 1
        assert agg["roman_numerals"]["doc_count"] == 1

    def test_aggregation_sums_counters(self) -> None:
        # 3 docs avec MUFI : "ΓΎΓ¦t ꝑ" = 3 caractΓ¨res MUFI (ΓΎ, Γ¦, ꝑ)
        gt = "ΓΎΓ¦t ꝑ"
        per_doc = [compute_philological_metrics(gt, gt) for _ in range(3)]
        agg = aggregate_philological_metrics(per_doc)
        assert agg is not None
        assert "mufi" in agg
        # 3 caractères × 3 docs = 9
        assert agg["mufi"]["n_mufi_chars_reference"] == 9
        assert agg["mufi"]["n_mufi_chars_preserved"] == 9
        assert agg["mufi"]["coverage"] == 1.0
        assert agg["mufi"]["doc_count"] == 3

    def test_aggregation_recomputes_global_score(self) -> None:
        # Doc1 prΓ©serve 100%, Doc2 prΓ©serve 0% β†’ moyenne pondΓ©rΓ©e
        d1 = compute_philological_metrics("XIV", "XIV")
        d2 = compute_philological_metrics("V", "perdu")
        agg = aggregate_philological_metrics([d1, d2])
        roman = agg["roman_numerals"]
        # Doc1 : 1 strict_preserved (XIV)
        # Doc2 : 1 lost (V)
        # Total : 2 numΓ©raux, 1 strict β†’ 0.5
        assert roman["n_numerals_reference"] == 2
        assert roman["global_strict_score"] == 0.5

    def test_per_category_aggregation_modern_archives(self) -> None:
        # Deux docs avec modern_archives sur catΓ©gories diffΓ©rentes
        d1 = compute_philological_metrics("Mme bd", "Mme bd")
        d2 = compute_philological_metrics("vol. p.", "vol. p.")
        agg = aggregate_philological_metrics([d1, d2])
        per_cat = agg["modern_archives"]["per_category"]
        # Doc1 : civility_titles + address ; Doc2 : bibliographic
        assert "civility_titles" in per_cat
        assert "address" in per_cat
        assert "bibliographic" in per_cat
        for cat in per_cat.values():
            assert cat["strict_score"] == 1.0


# ──────────────────────────────────────────────────────────────────────────
# 6. IntΓ©gration end-to-end (mock lΓ©ger sur le runner)
# ──────────────────────────────────────────────────────────────────────────


class TestRunnerIntegration:
    """VΓ©rifie que ``_compute_document_result`` attache bien les
    ``philological_metrics`` quand la GT a du signal."""

    def test_runner_attaches_philological(self, tmp_path) -> None:
        from picarones.measurements.runner import _compute_document_result
        from picarones.engines.base import EngineResult

        # CrΓ©er une image fictive (le module image_quality Γ©chouera
        # gracieusement, ce qui est OK pour le test).
        img = tmp_path / "doc.png"
        img.write_bytes(b"")  # vide ; on ignore le rΓ©sultat image_quality

        gt = "ꝑ regem mcclxxxij"
        ocr_result = EngineResult(
            engine_name="mock", image_path=str(img),
            text=gt, duration_seconds=0.1, error=None,
        )
        dr = _compute_document_result(
            doc_id="d1",
            image_path=str(img),
            ground_truth=gt,
            ocr_result=ocr_result,
            char_exclude=None,
        )
        assert dr.philological_metrics is not None
        assert "abbreviations" in dr.philological_metrics
        assert "roman_numerals" in dr.philological_metrics

    def test_runner_omits_philological_on_plain_text(self, tmp_path) -> None:
        from picarones.measurements.runner import _compute_document_result
        from picarones.engines.base import EngineResult

        img = tmp_path / "doc.png"
        img.write_bytes(b"")

        # Texte ASCII pur sans marqueur philologique
        gt = "hello world without any markers"
        ocr_result = EngineResult(
            engine_name="mock", image_path=str(img),
            text=gt, duration_seconds=0.1, error=None,
        )
        dr = _compute_document_result(
            doc_id="d1",
            image_path=str(img),
            ground_truth=gt,
            ocr_result=ocr_result,
            char_exclude=None,
        )
        assert dr.philological_metrics is None