Spaces:
Sleeping
Sleeping
Claude
fix: résoudre les 64 erreurs ruff pré-existantes révélées par le lint actif
6362212 unverified | """Tests Sprint 5 : métriques avancées patrimoniales. | |
| Couvre : | |
| - Matrice de confusion unicode (confusion.py) | |
| - Scores ligatures et diacritiques (char_scores.py) | |
| - Taxonomie des erreurs classes 1-9 (taxonomy.py) | |
| - Analyse structurelle (structure.py) | |
| - Qualité image (image_quality.py) | |
| - Intégration dans les fixtures et le rapport HTML | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| # =========================================================================== | |
| # Tests ConfusionMatrix | |
| # =========================================================================== | |
| from picarones.core.confusion import ( | |
| EMPTY_CHAR, | |
| build_confusion_matrix, | |
| aggregate_confusion_matrices, | |
| top_confused_chars, | |
| ) | |
| class TestBuildConfusionMatrix: | |
| def test_identical_texts(self): | |
| cm = build_confusion_matrix("abc", "abc") | |
| # Pas de substitutions | |
| assert cm.total_substitutions == 0 | |
| assert cm.total_insertions == 0 | |
| assert cm.total_deletions == 0 | |
| def test_empty_texts(self): | |
| cm = build_confusion_matrix("", "") | |
| assert cm.total_errors == 0 | |
| def test_simple_substitution(self): | |
| cm = build_confusion_matrix("abc", "axc") | |
| # 'b' → 'x' | |
| assert "b" in cm.matrix | |
| assert "x" in cm.matrix["b"] | |
| assert cm.matrix["b"]["x"] >= 1 | |
| def test_deletion_recorded(self): | |
| cm = build_confusion_matrix("abc", "ac") | |
| # 'b' supprimé | |
| assert "b" in cm.matrix | |
| assert EMPTY_CHAR in cm.matrix["b"] | |
| def test_insertion_recorded(self): | |
| cm = build_confusion_matrix("ac", "abc") | |
| # 'b' inséré | |
| assert EMPTY_CHAR in cm.matrix | |
| assert "b" in cm.matrix[EMPTY_CHAR] | |
| def test_no_whitespace_recorded_by_default(self): | |
| cm = build_confusion_matrix("a b", "a x") | |
| # Les espaces ne doivent pas être dans la matrice | |
| assert " " not in cm.matrix | |
| def test_as_dict_structure(self): | |
| cm = build_confusion_matrix("hello", "hallo") | |
| d = cm.as_dict() | |
| assert "matrix" in d | |
| assert "total_substitutions" in d | |
| assert "total_insertions" in d | |
| assert "total_deletions" in d | |
| def test_top_confusions(self): | |
| cm = build_confusion_matrix("eeee", "aaaa") | |
| tops = cm.top_confusions(n=5) | |
| assert len(tops) >= 1 | |
| assert tops[0]["gt"] == "e" | |
| assert tops[0]["ocr"] == "a" | |
| assert tops[0]["count"] == 4 | |
| def test_medieval_chars_tracked(self): | |
| cm = build_confusion_matrix("maiſon", "maifon") | |
| # ſ confondu avec f | |
| assert "ſ" in cm.matrix | |
| assert "f" in cm.matrix["ſ"] | |
| def test_as_compact_dict_filters_low_count(self): | |
| cm = build_confusion_matrix("aab", "axb") | |
| # avec min_count=2, une substitution unique filtrée | |
| compact = cm.as_compact_dict(min_count=2) | |
| # Le 'a'→'x' ne doit pas apparaître (1 seule occurrence) | |
| matrix = compact["matrix"] | |
| for gt_counts in matrix.values(): | |
| for ocr_char, cnt in gt_counts.items(): | |
| assert cnt >= 2 | |
| class TestAggregateConfusionMatrices: | |
| def test_empty_list(self): | |
| cm = aggregate_confusion_matrices([]) | |
| assert cm.total_errors == 0 | |
| def test_single_matrix(self): | |
| cm1 = build_confusion_matrix("abc", "axc") | |
| agg = aggregate_confusion_matrices([cm1]) | |
| assert agg.matrix == cm1.matrix | |
| def test_counts_sum(self): | |
| cm1 = build_confusion_matrix("abc", "axc") | |
| cm2 = build_confusion_matrix("abc", "axc") | |
| agg = aggregate_confusion_matrices([cm1, cm2]) | |
| # La confusion 'b'→'x' doit apparaître 2 fois | |
| assert agg.matrix.get("b", {}).get("x", 0) >= 2 | |
| def test_total_errors_sum(self): | |
| cm1 = build_confusion_matrix("abc", "axc") | |
| cm2 = build_confusion_matrix("def", "dxf") | |
| agg = aggregate_confusion_matrices([cm1, cm2]) | |
| assert agg.total_errors >= cm1.total_errors + cm2.total_errors | |
| class TestTopConfusedChars: | |
| def test_returns_list(self): | |
| cm = build_confusion_matrix("aaabbb", "aaaxxx") | |
| tops = top_confused_chars(cm, n=5) | |
| assert isinstance(tops, list) | |
| def test_sorted_by_errors_desc(self): | |
| cm = aggregate_confusion_matrices([ | |
| build_confusion_matrix("bbb", "xxx"), # 3 fois | |
| build_confusion_matrix("a", "y"), # 1 fois | |
| ]) | |
| tops = top_confused_chars(cm, n=10) | |
| if len(tops) >= 2: | |
| assert tops[0]["total_errors"] >= tops[1]["total_errors"] | |
| def test_excludes_empty_char(self): | |
| cm = build_confusion_matrix("abc", "ac") # b supprimé | |
| tops = top_confused_chars(cm, exclude_empty=True) | |
| assert all(t["char"] != EMPTY_CHAR for t in tops) | |
| # =========================================================================== | |
| # Tests LigatureScore | |
| # =========================================================================== | |
| from picarones.core.char_scores import ( | |
| LIGATURE_TABLE, | |
| LigatureScore, | |
| DiacriticScore, | |
| compute_ligature_score, | |
| compute_diacritic_score, | |
| aggregate_ligature_scores, | |
| aggregate_diacritic_scores, | |
| ) | |
| class TestLigatureTable: | |
| def test_fi_ligature_present(self): | |
| assert "\uFB01" in LIGATURE_TABLE # fi | |
| def test_fl_ligature_present(self): | |
| assert "\uFB02" in LIGATURE_TABLE # fl | |
| def test_oe_ligature_present(self): | |
| assert "\u0153" in LIGATURE_TABLE # œ | |
| def test_ae_ligature_present(self): | |
| assert "\u00E6" in LIGATURE_TABLE # æ | |
| def test_ff_ligature_present(self): | |
| assert "\uFB00" in LIGATURE_TABLE # ff | |
| def test_equivalents_are_lists(self): | |
| for lig, equivs in LIGATURE_TABLE.items(): | |
| assert isinstance(equivs, list) | |
| assert len(equivs) >= 1 | |
| class TestComputeLigatureScore: | |
| def test_no_ligatures_in_gt(self): | |
| result = compute_ligature_score("bonjour monde", "bonjour monde") | |
| assert result.score == pytest.approx(1.0) | |
| assert result.total_in_gt == 0 | |
| def test_ligature_correctly_recognized(self): | |
| # GT avec fi (fi ligature), OCR reconnaît "fi" | |
| result = compute_ligature_score("fin", "fin") | |
| assert result.total_in_gt == 1 | |
| assert result.score == pytest.approx(1.0) | |
| def test_ligature_unicode_to_unicode(self): | |
| # GT et OCR ont tous les deux fi | |
| result = compute_ligature_score("fin", "fin") | |
| assert result.score == pytest.approx(1.0) | |
| def test_oe_ligature(self): | |
| result = compute_ligature_score("œuvre", "oeuvre") | |
| assert result.total_in_gt == 1 | |
| assert result.score == pytest.approx(1.0) | |
| def test_ae_ligature(self): | |
| result = compute_ligature_score("æther", "aether") | |
| assert result.total_in_gt == 1 | |
| assert result.score == pytest.approx(1.0) | |
| def test_as_dict_structure(self): | |
| result = compute_ligature_score("fin", "fin") | |
| d = result.as_dict() | |
| assert "total_in_gt" in d | |
| assert "correctly_recognized" in d | |
| assert "score" in d | |
| assert "per_ligature" in d | |
| def test_empty_texts(self): | |
| result = compute_ligature_score("", "") | |
| assert result.score == pytest.approx(1.0) | |
| assert result.total_in_gt == 0 | |
| class TestComputeDiacriticScore: | |
| def test_no_diacritics(self): | |
| result = compute_diacritic_score("bonjour", "bonjour") | |
| assert result.score == pytest.approx(1.0) | |
| assert result.total_in_gt == 0 | |
| def test_accent_preserved(self): | |
| result = compute_diacritic_score("été", "été") | |
| assert result.score == pytest.approx(1.0) | |
| assert result.correctly_recognized == result.total_in_gt | |
| def test_accent_lost(self): | |
| result = compute_diacritic_score("étude", "etude") | |
| assert result.total_in_gt >= 1 | |
| # é → e : perte du diacritique | |
| assert result.correctly_recognized < result.total_in_gt | |
| assert result.score < 1.0 | |
| def test_cedille_tracked(self): | |
| result = compute_diacritic_score("façon", "facon") | |
| assert result.total_in_gt >= 1 | |
| assert result.score < 1.0 | |
| def test_empty_texts(self): | |
| result = compute_diacritic_score("", "") | |
| assert result.score == pytest.approx(1.0) | |
| def test_as_dict_structure(self): | |
| result = compute_diacritic_score("été", "ete") | |
| d = result.as_dict() | |
| assert "total_in_gt" in d | |
| assert "correctly_recognized" in d | |
| assert "score" in d | |
| class TestAggregateLigatureScores: | |
| def test_empty_list(self): | |
| result = aggregate_ligature_scores([]) | |
| assert result["score"] == pytest.approx(1.0) | |
| assert result["total_in_gt"] == 0 | |
| def test_aggregation(self): | |
| s1 = LigatureScore(total_in_gt=4, correctly_recognized=3, score=0.75) | |
| s2 = LigatureScore(total_in_gt=2, correctly_recognized=2, score=1.0) | |
| result = aggregate_ligature_scores([s1, s2]) | |
| assert result["total_in_gt"] == 6 | |
| assert result["correctly_recognized"] == 5 | |
| assert result["score"] == pytest.approx(5/6, abs=1e-4) | |
| class TestAggregateDiacriticScores: | |
| def test_aggregation(self): | |
| s1 = DiacriticScore(total_in_gt=10, correctly_recognized=8, score=0.8) | |
| s2 = DiacriticScore(total_in_gt=5, correctly_recognized=5, score=1.0) | |
| result = aggregate_diacritic_scores([s1, s2]) | |
| assert result["total_in_gt"] == 15 | |
| assert result["correctly_recognized"] == 13 | |
| # =========================================================================== | |
| # Tests TaxonomyResult | |
| # =========================================================================== | |
| from picarones.core.taxonomy import ( | |
| TaxonomyResult, | |
| ERROR_CLASSES, | |
| classify_errors, | |
| aggregate_taxonomy, | |
| ) | |
| class TestErrorClasses: | |
| def test_nine_classes(self): | |
| assert len(ERROR_CLASSES) == 9 | |
| def test_class_names(self): | |
| assert "visual_confusion" in ERROR_CLASSES | |
| assert "diacritic_error" in ERROR_CLASSES | |
| assert "case_error" in ERROR_CLASSES | |
| assert "ligature_error" in ERROR_CLASSES | |
| assert "lacuna" in ERROR_CLASSES | |
| class TestClassifyErrors: | |
| def test_identical_texts(self): | |
| result = classify_errors("bonjour monde", "bonjour monde") | |
| assert result.total_errors == 0 | |
| def test_empty_texts(self): | |
| result = classify_errors("", "") | |
| assert result.total_errors == 0 | |
| def test_case_error_detected(self): | |
| result = classify_errors("Bonjour Monde", "bonjour monde") | |
| assert result.counts["case_error"] >= 1 | |
| def test_diacritic_error_detected(self): | |
| result = classify_errors("été chez nous", "ete chez nous") | |
| assert result.counts["diacritic_error"] >= 1 | |
| def test_lacuna_detected(self): | |
| result = classify_errors("le chat dort paisiblement", "le chat") | |
| assert result.counts["lacuna"] >= 1 | |
| def test_segmentation_detected(self): | |
| result = classify_errors("hello world test", "helloworld test") | |
| # "hello world" fusionné en "helloworld" | |
| assert result.counts["segmentation_error"] >= 0 # peut être classé hapax aussi | |
| def test_ligature_error_detected(self): | |
| result = classify_errors("fin de siècle", "fin de siècle") | |
| # fi vs fi est une ligature correcte, pas une erreur | |
| # Mais si on avait: GT=fi, OCR=fi → correct | |
| # Test avec ligature mal reconnue: GT=fin, OCR=fïn (erreur diac) | |
| assert result.total_errors >= 0 # pas d'erreur ici (fin est équivalent) | |
| def test_as_dict_structure(self): | |
| result = classify_errors("test erreur ici", "test erreur là") | |
| d = result.as_dict() | |
| assert "counts" in d | |
| assert "total_errors" in d | |
| assert "class_distribution" in d | |
| assert "examples" in d | |
| def test_from_dict_roundtrip(self): | |
| result = classify_errors("bonjour monde", "Bonjour monde") | |
| d = result.as_dict() | |
| restored = TaxonomyResult.from_dict(d) | |
| assert restored.total_errors == result.total_errors | |
| assert restored.counts == result.counts | |
| def test_class_distribution_sums_to_one(self): | |
| result = classify_errors("abc def ghi", "xyz uvw rst") | |
| dist = result.class_distribution | |
| if dist: | |
| assert abs(sum(dist.values()) - 1.0) < 1e-6 | |
| def test_all_classes_in_counts(self): | |
| result = classify_errors("test", "teSt") | |
| for cls in ERROR_CLASSES: | |
| assert cls in result.counts | |
| class TestAggregateTaxonomy: | |
| def test_empty(self): | |
| result = aggregate_taxonomy([]) | |
| assert result["total_errors"] == 0 | |
| def test_sums_counts(self): | |
| r1 = TaxonomyResult( | |
| counts={"visual_confusion": 2, "diacritic_error": 1, **{k: 0 for k in ERROR_CLASSES if k not in ["visual_confusion", "diacritic_error"]}}, | |
| total_errors=3, | |
| ) | |
| r2 = TaxonomyResult( | |
| counts={"visual_confusion": 1, "diacritic_error": 3, **{k: 0 for k in ERROR_CLASSES if k not in ["visual_confusion", "diacritic_error"]}}, | |
| total_errors=4, | |
| ) | |
| agg = aggregate_taxonomy([r1, r2]) | |
| assert agg["counts"]["visual_confusion"] == 3 | |
| assert agg["counts"]["diacritic_error"] == 4 | |
| assert agg["total_errors"] == 7 | |
| # =========================================================================== | |
| # Tests StructureResult | |
| # =========================================================================== | |
| from picarones.core.structure import ( | |
| StructureResult, | |
| analyze_structure, | |
| aggregate_structure, | |
| ) | |
| class TestAnalyzeStructure: | |
| def test_identical_single_line(self): | |
| result = analyze_structure("ligne unique", "ligne unique") | |
| assert result.gt_line_count == 1 | |
| assert result.ocr_line_count == 1 | |
| assert result.line_fusion_count == 0 | |
| assert result.line_fragmentation_count == 0 | |
| def test_empty_texts(self): | |
| result = analyze_structure("", "") | |
| assert result.gt_line_count == 0 | |
| assert result.ocr_line_count == 0 | |
| def test_multiline_equal(self): | |
| gt = "ligne 1\nligne 2\nligne 3" | |
| result = analyze_structure(gt, gt) | |
| assert result.gt_line_count == 3 | |
| assert result.ocr_line_count == 3 | |
| def test_line_fusion_detected(self): | |
| gt = "ligne 1\nligne 2\nligne 3" | |
| ocr = "ligne 1 ligne 2\nligne 3" # fusion de 2 lignes en 1 | |
| result = analyze_structure(gt, ocr) | |
| # Le nombre de lignes OCR < GT | |
| assert result.ocr_line_count < result.gt_line_count | |
| def test_reading_order_score_perfect(self): | |
| text = "le chat dort ici" | |
| result = analyze_structure(text, text) | |
| assert result.reading_order_score > 0.9 | |
| def test_reading_order_score_low_for_scrambled(self): | |
| gt = "le chat dort paisiblement sur le canapé" | |
| ocr = "canapé sur le paisiblement dort chat le" | |
| result = analyze_structure(gt, ocr) | |
| assert result.reading_order_score < 1.0 | |
| def test_line_accuracy_perfect(self): | |
| gt = "ligne 1\nligne 2" | |
| ocr = "ligne 1\nligne 2" | |
| result = analyze_structure(gt, ocr) | |
| assert result.line_accuracy == pytest.approx(1.0) | |
| def test_line_accuracy_degraded(self): | |
| gt = "ligne 1\nligne 2\nligne 3\nligne 4" | |
| ocr = "ligne 1" | |
| result = analyze_structure(gt, ocr) | |
| assert result.line_accuracy < 1.0 | |
| def test_as_dict_structure(self): | |
| result = analyze_structure("ligne 1\nligne 2", "ligne 1\nligne 2") | |
| d = result.as_dict() | |
| required = ["gt_line_count", "ocr_line_count", "line_fusion_count", | |
| "line_fragmentation_count", "reading_order_score", | |
| "paragraph_conservation_score", "line_accuracy"] | |
| for key in required: | |
| assert key in d | |
| def test_from_dict_roundtrip(self): | |
| result = analyze_structure("a\nb\nc", "a\nb") | |
| d = result.as_dict() | |
| restored = StructureResult.from_dict(d) | |
| assert restored.gt_line_count == result.gt_line_count | |
| assert restored.ocr_line_count == result.ocr_line_count | |
| def test_line_fusion_rate_property(self): | |
| result = StructureResult(gt_line_count=10, ocr_line_count=8, line_fusion_count=2) | |
| assert result.line_fusion_rate == pytest.approx(0.2) | |
| def test_line_fragmentation_rate_property(self): | |
| result = StructureResult(gt_line_count=5, ocr_line_count=8, line_fragmentation_count=3) | |
| assert result.line_fragmentation_rate == pytest.approx(0.6) | |
| class TestAggregateStructure: | |
| def test_empty(self): | |
| result = aggregate_structure([]) | |
| assert result == {} | |
| def test_single_result(self): | |
| r = StructureResult( | |
| gt_line_count=5, ocr_line_count=5, | |
| reading_order_score=0.9, paragraph_conservation_score=1.0, | |
| ) | |
| agg = aggregate_structure([r]) | |
| assert agg["mean_reading_order_score"] == pytest.approx(0.9) | |
| assert agg["document_count"] == 1 | |
| def test_mean_fusion_rate(self): | |
| r1 = StructureResult(gt_line_count=10, ocr_line_count=8, line_fusion_count=2) | |
| r2 = StructureResult(gt_line_count=10, ocr_line_count=6, line_fusion_count=4) | |
| agg = aggregate_structure([r1, r2]) | |
| # fusion rates: 0.2 et 0.4 → mean = 0.3 | |
| assert agg["mean_line_fusion_rate"] == pytest.approx(0.3, rel=1e-3) | |
| # =========================================================================== | |
| # Tests ImageQualityResult | |
| # =========================================================================== | |
| from picarones.core.image_quality import ( | |
| ImageQualityResult, | |
| generate_mock_quality_scores, | |
| aggregate_image_quality, | |
| _global_quality_score, | |
| ) | |
| class TestImageQualityResult: | |
| def test_quality_tier_good(self): | |
| r = ImageQualityResult(quality_score=0.8) | |
| assert r.quality_tier == "good" | |
| assert r.is_good_quality is True | |
| def test_quality_tier_medium(self): | |
| r = ImageQualityResult(quality_score=0.55) | |
| assert r.quality_tier == "medium" | |
| assert r.is_good_quality is False | |
| def test_quality_tier_poor(self): | |
| r = ImageQualityResult(quality_score=0.2) | |
| assert r.quality_tier == "poor" | |
| def test_as_dict_structure(self): | |
| r = ImageQualityResult( | |
| sharpness_score=0.8, noise_level=0.1, rotation_degrees=0.5, | |
| contrast_score=0.9, quality_score=0.75, analysis_method="mock", | |
| ) | |
| d = r.as_dict() | |
| assert "sharpness_score" in d | |
| assert "noise_level" in d | |
| assert "rotation_degrees" in d | |
| assert "contrast_score" in d | |
| assert "quality_score" in d | |
| assert "quality_tier" in d | |
| assert "analysis_method" in d | |
| def test_from_dict_roundtrip(self): | |
| r = ImageQualityResult( | |
| sharpness_score=0.7, noise_level=0.2, rotation_degrees=1.0, | |
| contrast_score=0.8, quality_score=0.65, analysis_method="pillow", | |
| ) | |
| d = r.as_dict() | |
| restored = ImageQualityResult.from_dict(d) | |
| assert restored.sharpness_score == pytest.approx(r.sharpness_score, rel=1e-3) | |
| assert restored.quality_score == pytest.approx(r.quality_score, rel=1e-3) | |
| assert restored.analysis_method == r.analysis_method | |
| def test_from_dict_ignores_quality_tier(self): | |
| # quality_tier est une propriété, pas un param init → from_dict doit l'ignorer | |
| data = { | |
| "sharpness_score": 0.5, "noise_level": 0.3, "rotation_degrees": 0.0, | |
| "contrast_score": 0.6, "quality_score": 0.5, "analysis_method": "mock", | |
| "quality_tier": "medium", # doit être ignoré | |
| } | |
| r = ImageQualityResult.from_dict(data) | |
| assert r.quality_score == pytest.approx(0.5) | |
| class TestGenerateMockQualityScores: | |
| def test_returns_image_quality_result(self): | |
| r = generate_mock_quality_scores("folio_001") | |
| assert isinstance(r, ImageQualityResult) | |
| def test_scores_in_range(self): | |
| r = generate_mock_quality_scores("folio_001", seed=42) | |
| assert 0.0 <= r.quality_score <= 1.0 | |
| assert 0.0 <= r.sharpness_score <= 1.0 | |
| assert 0.0 <= r.noise_level <= 1.0 | |
| assert 0.0 <= r.contrast_score <= 1.0 | |
| def test_reproducible_with_seed(self): | |
| r1 = generate_mock_quality_scores("folio_001", seed=42) | |
| r2 = generate_mock_quality_scores("folio_001", seed=42) | |
| assert r1.quality_score == r2.quality_score | |
| def test_analysis_method_mock(self): | |
| r = generate_mock_quality_scores("folio_001") | |
| assert r.analysis_method == "mock" | |
| def test_no_error(self): | |
| r = generate_mock_quality_scores("folio_001") | |
| assert r.error is None | |
| class TestGlobalQualityScore: | |
| def test_perfect_input(self): | |
| score = _global_quality_score(sharpness=1.0, noise=0.0, rotation_abs=0.0, contrast=1.0) | |
| assert score == pytest.approx(1.0) | |
| def test_worst_input(self): | |
| score = _global_quality_score(sharpness=0.0, noise=1.0, rotation_abs=10.0, contrast=0.0) | |
| assert score == pytest.approx(0.0) | |
| def test_medium_input(self): | |
| score = _global_quality_score(sharpness=0.5, noise=0.5, rotation_abs=0.0, contrast=0.5) | |
| assert 0.0 < score < 1.0 | |
| class TestAggregateImageQuality: | |
| def test_empty_list(self): | |
| result = aggregate_image_quality([]) | |
| assert result == {} | |
| def test_single_result(self): | |
| r = ImageQualityResult(quality_score=0.75, analysis_method="mock") | |
| agg = aggregate_image_quality([r]) | |
| assert agg["mean_quality_score"] == pytest.approx(0.75) | |
| assert agg["document_count"] == 1 | |
| def test_tier_distribution(self): | |
| results = [ | |
| ImageQualityResult(quality_score=0.8, analysis_method="mock"), # good | |
| ImageQualityResult(quality_score=0.5, analysis_method="mock"), # medium | |
| ImageQualityResult(quality_score=0.2, analysis_method="mock"), # poor | |
| ] | |
| agg = aggregate_image_quality(results) | |
| assert agg["quality_distribution"]["good"] == 1 | |
| assert agg["quality_distribution"]["medium"] == 1 | |
| assert agg["quality_distribution"]["poor"] == 1 | |
| def test_scores_list_present(self): | |
| results = [ImageQualityResult(quality_score=0.6, analysis_method="mock")] | |
| agg = aggregate_image_quality(results) | |
| assert "scores" in agg | |
| assert len(agg["scores"]) == 1 | |
| def test_errors_excluded(self): | |
| results = [ | |
| ImageQualityResult(quality_score=0.8, analysis_method="mock"), | |
| ImageQualityResult(quality_score=0.0, analysis_method="none", error="file not found"), | |
| ] | |
| agg = aggregate_image_quality(results) | |
| assert agg["document_count"] == 1 # seul le résultat sans erreur compte | |
| # =========================================================================== | |
| # Tests d'intégration Sprint 5 (fixtures + rapport) | |
| # =========================================================================== | |
| class TestFixturesSprint5: | |
| def test_doc_result_has_confusion_matrix(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| for dr in er.document_results: | |
| assert dr.confusion_matrix is not None, ( | |
| f"confusion_matrix manquante pour {er.engine_name}/{dr.doc_id}" | |
| ) | |
| break | |
| def test_doc_result_has_char_scores(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| dr = er.document_results[0] | |
| assert dr.char_scores is not None | |
| assert "ligature" in dr.char_scores | |
| assert "diacritic" in dr.char_scores | |
| def test_doc_result_has_taxonomy(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| dr = er.document_results[0] | |
| assert dr.taxonomy is not None | |
| assert "counts" in dr.taxonomy | |
| assert "total_errors" in dr.taxonomy | |
| def test_doc_result_has_structure(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| dr = er.document_results[0] | |
| assert dr.structure is not None | |
| assert "gt_line_count" in dr.structure | |
| def test_doc_result_has_image_quality(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| dr = er.document_results[0] | |
| assert dr.image_quality is not None | |
| assert "quality_score" in dr.image_quality | |
| def test_engine_report_has_aggregated_confusion(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| assert er.aggregated_confusion is not None | |
| assert "matrix" in er.aggregated_confusion | |
| def test_engine_report_has_aggregated_char_scores(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| assert er.aggregated_char_scores is not None | |
| assert "ligature" in er.aggregated_char_scores | |
| assert "diacritic" in er.aggregated_char_scores | |
| def test_engine_report_ligature_score_property(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| score = er.ligature_score | |
| assert score is not None | |
| assert 0.0 <= score <= 1.0 | |
| def test_engine_report_diacritic_score_property(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| score = er.diacritic_score | |
| assert score is not None | |
| assert 0.0 <= score <= 1.0 | |
| def test_engine_report_has_aggregated_taxonomy(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| assert er.aggregated_taxonomy is not None | |
| assert "total_errors" in er.aggregated_taxonomy | |
| def test_engine_report_has_aggregated_structure(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| assert er.aggregated_structure is not None | |
| assert "mean_reading_order_score" in er.aggregated_structure | |
| def test_engine_report_has_aggregated_image_quality(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| for er in bm.engine_reports: | |
| assert er.aggregated_image_quality is not None | |
| assert "mean_quality_score" in er.aggregated_image_quality | |
| def test_bad_engine_has_more_errors(self): | |
| """L'ancien moteur doit avoir plus d'erreurs taxonomiques que pero_ocr.""" | |
| from picarones.fixtures import generate_sample_benchmark | |
| bm = generate_sample_benchmark() | |
| pero = next(er for er in bm.engine_reports if er.engine_name == "pero_ocr") | |
| bad = next(er for er in bm.engine_reports if er.engine_name == "ancien_moteur") | |
| assert bad.aggregated_taxonomy["total_errors"] > pero.aggregated_taxonomy["total_errors"] | |
| class TestReportSprint5: | |
| def test_report_data_has_ligature_score(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import _build_report_data | |
| bm = generate_sample_benchmark() | |
| data = _build_report_data(bm, {}) | |
| for eng in data["engines"]: | |
| assert "ligature_score" in eng, f"ligature_score manquant pour {eng['name']}" | |
| def test_report_data_has_diacritic_score(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import _build_report_data | |
| bm = generate_sample_benchmark() | |
| data = _build_report_data(bm, {}) | |
| for eng in data["engines"]: | |
| assert "diacritic_score" in eng | |
| def test_report_data_has_aggregated_taxonomy(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import _build_report_data | |
| bm = generate_sample_benchmark() | |
| data = _build_report_data(bm, {}) | |
| for eng in data["engines"]: | |
| assert "aggregated_taxonomy" in eng | |
| def test_report_data_has_aggregated_image_quality(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import _build_report_data | |
| bm = generate_sample_benchmark() | |
| data = _build_report_data(bm, {}) | |
| for eng in data["engines"]: | |
| assert "aggregated_image_quality" in eng | |
| def test_html_has_characters_tab(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "report.html" | |
| ReportGenerator(bm).generate(out) | |
| html = out.read_text(encoding="utf-8") | |
| assert "Caractères" in html | |
| def test_html_has_ligatures_column(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "report.html" | |
| ReportGenerator(bm).generate(out) | |
| html = out.read_text(encoding="utf-8") | |
| assert "Ligatures" in html | |
| def test_html_has_diacritiques_column(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "report.html" | |
| ReportGenerator(bm).generate(out) | |
| html = out.read_text(encoding="utf-8") | |
| assert "Diacritiques" in html | |
| def test_html_has_scatter_plot(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "report.html" | |
| ReportGenerator(bm).generate(out) | |
| html = out.read_text(encoding="utf-8") | |
| assert "chart-quality-cer" in html | |
| def test_html_has_taxonomy_chart(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "report.html" | |
| ReportGenerator(bm).generate(out) | |
| html = out.read_text(encoding="utf-8") | |
| assert "chart-taxonomy" in html | |
| def test_html_has_confusion_heatmap(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import ReportGenerator | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "report.html" | |
| ReportGenerator(bm).generate(out) | |
| html = out.read_text(encoding="utf-8") | |
| assert "confusion-heatmap" in html or "matrice de confusion" in html.lower() | |
| def test_doc_results_have_image_quality_in_report(self): | |
| from picarones.fixtures import generate_sample_benchmark | |
| from picarones.report.generator import _build_report_data | |
| bm = generate_sample_benchmark() | |
| data = _build_report_data(bm, {}) | |
| doc = data["documents"][0] | |
| # Au moins un engine result doit avoir image_quality | |
| has_iq = any("image_quality" in er for er in doc["engine_results"]) | |
| assert has_iq, "Aucun document result n'a de données image_quality" | |
| def test_json_export_contains_sprint5_data(self, tmp_path): | |
| from picarones.fixtures import generate_sample_benchmark | |
| import json | |
| bm = generate_sample_benchmark() | |
| out = tmp_path / "results.json" | |
| bm.to_json(out) | |
| data = json.loads(out.read_text()) | |
| # Vérifier dans les engine_reports | |
| er = data["engine_reports"][0] | |
| assert "aggregated_taxonomy" in er | |
| assert "aggregated_char_scores" in er | |
| # Vérifier dans les document_results | |
| dr = er["document_results"][0] | |
| assert "taxonomy" in dr | |
| assert "char_scores" in dr | |
| assert "structure" in dr | |