File size: 14,310 Bytes
1d89034
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9011070
1d89034
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d109222
9011070
1d89034
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
"""Tests Sprint 62 โ€” vue HTML ยซ Profil philologique ยป.

Couvre :

1. Sections individuelles : rendu correct quand au moins un moteur a
   du signal pour le module donnรฉ ; chaรฎne vide si aucun.
2. Agrรฉgateur : 6 sections prรฉsentes si les 6 modules ont du signal,
   sinon seulement les sections avec signal.
3. Adaptive masking complet : aucun moteur n'a de signal โ†’ ``""``.
4. Anti-injection HTML : noms de moteurs / catรฉgories / caractรจres
   contenant ``<script>`` correctement รฉchappรฉs.
5. Cellules : code couleur appliquรฉ, valeurs en %.
6. Pas de classification automatique (le mot
   ยซ diplomatique ยป / ยซ modernisant ยป apparaรฎt seulement dans la
   note explicative, jamais comme รฉtiquette de moteur).
7. Intรฉgration dans le rapport HTML complet (FR + EN).
8. Complรฉtude i18n : toutes les clรฉs ``philo_*`` prรฉsentes en FR et EN.
"""

from __future__ import annotations

import json
from pathlib import Path

from picarones.reports.html.renderers.philological import (
    build_abbreviations_section,
    build_early_modern_section,
    build_modern_archives_section,
    build_mufi_section,
    build_philological_profile_html,
    build_roman_numerals_section,
    build_unicode_blocks_section,
)


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Fixtures
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


def _eng_with_unicode(name: str = "Tesseract", acc: float = 0.85) -> dict:
    return {
        "name": name,
        "aggregated_philological": {
            "unicode_blocks": {
                "global_accuracy": acc, "n_chars_total": 1000,
                "per_block": {
                    "Latin Extended-A": {
                        "correct": int(acc * 100), "total": 100,
                        "accuracy": acc,
                    },
                    "Alphabetic Presentation Forms": {
                        "correct": 5, "total": 10, "accuracy": 0.5,
                    },
                },
            },
        },
    }


def _eng_with_mufi(name: str = "Pero", coverage: float = 0.78) -> dict:
    return {
        "name": name,
        "aggregated_philological": {
            "mufi": {"coverage": coverage, "n_mufi_chars_reference": 100},
        },
    }


def _eng_with_abbreviations(name: str = "T", strict: float = 0.6) -> dict:
    return {
        "name": name,
        "aggregated_philological": {
            "abbreviations": {
                "global_strict_score": strict,
                "global_expansion_score": 0.95,
                "n_abbreviations_in_reference": 50,
            },
        },
    }


def _eng_with_early_modern(name: str = "T") -> dict:
    return {
        "name": name,
        "aggregated_philological": {
            "early_modern": {
                "n_markers_reference": 100,
                "n_markers_preserved": 70,
                "global_preservation": 0.7,
                "per_category": {
                    "ligatures": {"total": 30, "preserved": 25, "preservation": 25 / 30},
                    "long_s": {"total": 50, "preserved": 30, "preservation": 0.6},
                    "ampersand": {"total": 20, "preserved": 15, "preservation": 0.75},
                },
            },
        },
    }


def _eng_with_modern_archives(name: str = "T") -> dict:
    return {
        "name": name,
        "aggregated_philological": {
            "modern_archives": {
                "n_markers_reference": 100,
                "n_strict_preserved": 60,
                "n_expansion_preserved": 90,
                "global_strict_score": 0.6,
                "global_expansion_score": 0.9,
                "per_category": {
                    "civility_titles": {
                        "n_total": 30, "n_strict_preserved": 25,
                        "n_expansion_preserved": 28,
                        "strict_score": 25 / 30, "expansion_score": 28 / 30,
                    },
                    "address": {
                        "n_total": 20, "n_strict_preserved": 10,
                        "n_expansion_preserved": 18,
                        "strict_score": 0.5, "expansion_score": 0.9,
                    },
                },
            },
        },
    }


def _eng_with_roman(name: str = "T") -> dict:
    return {
        "name": name,
        "aggregated_philological": {
            "roman_numerals": {
                "n_numerals_reference": 20,
                "per_status": {
                    "strict_preserved": 12, "case_changed": 3,
                    "j_dropped": 2, "converted_to_arabic": 2, "lost": 1,
                },
            },
        },
    }


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1. Sections individuelles
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


class TestIndividualSections:
    def test_unicode_blocks_renders(self) -> None:
        html = build_unicode_blocks_section([_eng_with_unicode()])
        assert "Prรฉcision par bloc Unicode" in html
        assert "Tesseract" in html
        assert "Latin Extended-A" in html

    def test_unicode_blocks_empty_without_signal(self) -> None:
        html = build_unicode_blocks_section([_eng_with_mufi()])
        assert html == ""

    def test_abbreviations_renders(self) -> None:
        html = build_abbreviations_section([_eng_with_abbreviations()])
        assert "Abrรฉviations mรฉdiรฉvales" in html
        assert "T" in html

    def test_mufi_renders(self) -> None:
        html = build_mufi_section([_eng_with_mufi()])
        assert "Couverture MUFI" in html
        assert "Pero" in html

    def test_early_modern_renders(self) -> None:
        html = build_early_modern_section([_eng_with_early_modern()])
        assert "Marqueurs typographiques" in html
        assert "ligatures" in html
        assert "long_s" in html
        assert "ampersand" in html

    def test_modern_archives_renders(self) -> None:
        html = build_modern_archives_section([_eng_with_modern_archives()])
        assert "Abrรฉviations des archives modernes" in html
        assert "civility_titles" in html
        assert "address" in html

    def test_roman_numerals_renders(self) -> None:
        html = build_roman_numerals_section([_eng_with_roman()])
        assert "Numรฉraux romains" in html


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2-3. Agrรฉgateur + adaptive masking
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


class TestAggregator:
    def test_returns_empty_when_no_engine_has_signal(self) -> None:
        engines = [{"name": "X", "aggregated_philological": None}]
        assert build_philological_profile_html(engines) == ""

    def test_returns_empty_when_engines_summary_empty(self) -> None:
        assert build_philological_profile_html([]) == ""

    def test_includes_only_modules_with_signal(self) -> None:
        # Un seul moteur avec MUFI uniquement
        html = build_philological_profile_html([_eng_with_mufi()])
        assert html != ""
        assert "Couverture MUFI" in html
        # Sections sans signal absentes
        assert "Prรฉcision par bloc Unicode" not in html
        assert "Abrรฉviations mรฉdiรฉvales" not in html
        assert "Marqueurs typographiques" not in html
        assert "Abrรฉviations des archives modernes" not in html
        assert "Numรฉraux romains" not in html

    def test_includes_all_six_when_full_signal(self) -> None:
        engines = [
            _eng_with_unicode(),
            _eng_with_mufi(),
            _eng_with_abbreviations(),
            _eng_with_early_modern(),
            _eng_with_modern_archives(),
            _eng_with_roman(),
        ]
        html = build_philological_profile_html(engines)
        for marker in (
            "Prรฉcision par bloc Unicode",
            "Abrรฉviations mรฉdiรฉvales",
            "Couverture MUFI",
            "Marqueurs typographiques",
            "Abrรฉviations des archives modernes",
            "Numรฉraux romains",
        ):
            assert marker in html, f"section absente : {marker}"


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 4. Anti-injection HTML
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


class TestAntiInjection:
    def test_engine_name_with_script_escaped(self) -> None:
        eng = _eng_with_mufi(name="<script>alert(1)</script>")
        html = build_mufi_section([eng])
        assert "<script>" not in html
        assert "&lt;script&gt;" in html

    def test_section_title_safely_escaped_via_labels(self) -> None:
        labels = {"philo_mufi_title": "<b>Hack</b>"}
        html = build_mufi_section([_eng_with_mufi()], labels=labels)
        assert "<b>Hack</b>" not in html
        assert "&lt;b&gt;Hack&lt;/b&gt;" in html


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 5. Cellules : couleur + valeur en %
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


class TestCells:
    def test_score_displayed_in_percent(self) -> None:
        html = build_mufi_section([_eng_with_mufi(coverage=0.78)])
        assert "78.0%" in html

    def test_color_present(self) -> None:
        # Le style background:#... doit apparaรฎtre
        html = build_mufi_section([_eng_with_mufi(coverage=0.5)])
        assert "background:#" in html


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 6. Pas de classification imposรฉe
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


class TestNoForcedClassification:
    def test_engine_not_labeled_as_diplomatic_or_modernizing(self) -> None:
        # Le moteur a strict=1.0 (typique diplomatique) mais on ne
        # doit pas voir ยซ diplomatique ยป comme รฉtiquette de cellule.
        eng = _eng_with_abbreviations(name="DiploEngine", strict=1.0)
        html = build_abbreviations_section([eng])
        # ยซ DiploEngine ยป apparaรฎt parce que c'est le nom du moteur.
        assert "DiploEngine" in html
        # Le mot ยซ diplomatique ยป n'apparaรฎt que dans la note
        # explicative en bas (et peut รชtre absent par dรฉfaut).
        # On vรฉrifie qu'il n'est pas accolรฉ au nom du moteur dans
        # une cellule de tableau.
        assert "DiploEngine</td>diplomatique" not in html
        assert "DiploEngine</td>modernisant" not in html


# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 7. Complรฉtude i18n
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€


class TestI18nCompleteness:
    def _load(self, lang: str) -> dict:
        path = (
            Path(__file__).parent.parent.parent
            / "picarones" / "reports" / "i18n" / f"{lang}.json"
        )
        return json.loads(path.read_text(encoding="utf-8"))

    def test_all_philo_keys_present_fr(self) -> None:
        d = self._load("fr")
        required = (
            "philo_profile_title", "philo_profile_note",
            "philo_engine_label", "philo_global_label",
            "philo_strict_label", "philo_expansion_label",
            "philo_n_total_label",
            "philo_unicode_blocks_title", "philo_unicode_blocks_note",
            "philo_abbreviations_title", "philo_abbreviations_note",
            "philo_mufi_title", "philo_mufi_note",
            "philo_mufi_coverage_label",
            "philo_early_modern_title", "philo_early_modern_note",
            "philo_modern_archives_title", "philo_modern_archives_note",
            "philo_roman_numerals_title", "philo_roman_numerals_note",
            "philo_roman_status_strict_preserved",
            "philo_roman_status_case_changed",
            "philo_roman_status_j_dropped",
            "philo_roman_status_converted_to_arabic",
            "philo_roman_status_lost",
        )
        for key in required:
            assert key in d, f"manque clรฉ FR : {key}"

    def test_all_philo_keys_present_en(self) -> None:
        d_fr = self._load("fr")
        d_en = self._load("en")
        for key in d_fr:
            if key.startswith("philo_"):
                assert key in d_en, f"manque clรฉ EN : {key}"