Picarones / tests /measurements /test_sprint84_searchability.py
Claude
test: corriger 4 dΓ©fauts de classification du chantier B
315a6b9 unverified
Raw
History Blame
8.9 kB
"""Tests Sprint 84 β€” A.II.5 : recherchabilitΓ© fuzzy.
Couvre :
1. ``levenshtein_distance`` : invariants + cas standard.
2. ``compute_searchability`` :
- identitΓ© β†’ recall = 1
- aucun match β†’ recall = 0
- GT vide β†’ recall None
- hypothèse vide → recall = 0
- max_distance = 0 β†’ match exact uniquement
- max_distance large
- case insensitive par dΓ©faut
- case sensitive opt-in
- multiplicitΓ© (un token hyp utilisΓ© une seule fois)
- missed_tokens prΓ©serve la casse GT
- ValueError pour max_distance < 0
3. Cas rΓ©aliste : CER Γ©levΓ© mais findability Γ©levΓ©e.
4. ``searchability_recall_metric`` enregistrΓ© dans le registre typΓ©.
"""
from __future__ import annotations
import pytest
from picarones.measurements.searchability import (
compute_searchability,
levenshtein_distance,
searchability_recall_metric,
)
# ──────────────────────────────────────────────────────────────────────────
# 1. levenshtein_distance
# ──────────────────────────────────────────────────────────────────────────
class TestLevenshtein:
def test_identity(self) -> None:
assert levenshtein_distance("hello", "hello") == 0
def test_one_substitution(self) -> None:
assert levenshtein_distance("hello", "hallo") == 1
def test_one_deletion(self) -> None:
assert levenshtein_distance("hello", "helo") == 1
def test_one_insertion(self) -> None:
assert levenshtein_distance("helo", "hello") == 1
def test_disjoint(self) -> None:
assert levenshtein_distance("abc", "xyz") == 3
def test_empty_left(self) -> None:
assert levenshtein_distance("", "abc") == 3
def test_empty_right(self) -> None:
assert levenshtein_distance("abc", "") == 3
def test_both_empty(self) -> None:
assert levenshtein_distance("", "") == 0
def test_classical_kitten(self) -> None:
# Cas standard de la littΓ©rature : kitten β†’ sitting = 3
assert levenshtein_distance("kitten", "sitting") == 3
# ──────────────────────────────────────────────────────────────────────────
# 2. compute_searchability
# ──────────────────────────────────────────────────────────────────────────
class TestSearchability:
def test_identical_texts(self) -> None:
r = compute_searchability("le roi signa", "le roi signa")
assert r["recall"] == 1.0
assert r["missed_tokens"] == []
assert r["n_gt_tokens"] == 3
assert r["n_searchable"] == 3
def test_completely_different(self) -> None:
r = compute_searchability("alpha beta gamma", "rouge bleu vert")
assert r["recall"] == 0.0
assert sorted(r["missed_tokens"]) == ["alpha", "beta", "gamma"]
def test_empty_gt_returns_none_recall(self) -> None:
r = compute_searchability("", "anything")
assert r["recall"] is None
assert r["n_gt_tokens"] == 0
def test_empty_hypothesis_zero_recall(self) -> None:
r = compute_searchability("le roi", "")
assert r["recall"] == 0.0
assert r["missed_tokens"] == ["le", "roi"]
def test_max_distance_zero_requires_exact(self) -> None:
# Β« hallo Β» Γ  distance 1 de Β« hello Β» β†’ exclu si max_distance = 0
r = compute_searchability(
"hello world", "hallo world", max_distance=0,
)
assert r["n_searchable"] == 1 # Β« world Β» seulement
assert "hello" in r["missed_tokens"]
def test_max_distance_two_default(self) -> None:
r = compute_searchability("Charles", "Charlse") # 1 swap β†’ distance 2
assert r["recall"] == 1.0
def test_max_distance_large_matches_loosely(self) -> None:
r = compute_searchability(
"completely different",
"ompletely ifferent",
max_distance=2,
)
assert r["recall"] == 1.0
def test_case_insensitive_by_default(self) -> None:
r = compute_searchability("Le Roi", "le roi")
assert r["recall"] == 1.0
def test_case_sensitive_opt_in(self) -> None:
# Β« Le Β» distance 1 de Β« le Β» (casse) β†’ exclu si exact
r = compute_searchability(
"Le Roi", "le roi", max_distance=0, case_sensitive=True,
)
assert r["n_searchable"] == 0
def test_multiplicity_each_hyp_used_once(self) -> None:
# GT : Β« le le Β», hyp : Β« le Β» β†’ un seul matchΓ©
r = compute_searchability("le le", "le")
assert r["n_searchable"] == 1
assert r["missed_tokens"] == ["le"]
def test_missed_tokens_preserve_gt_case(self) -> None:
r = compute_searchability("Charlemagne", "absent")
assert r["missed_tokens"] == ["Charlemagne"]
def test_negative_max_distance_raises(self) -> None:
with pytest.raises(ValueError):
compute_searchability("a", "b", max_distance=-1)
def test_default_max_distance_is_two(self) -> None:
r = compute_searchability("a", "b")
assert r["max_distance"] == 2
# ──────────────────────────────────────────────────────────────────────────
# 3. Cas rΓ©aliste : findability robuste Γ  un CER Γ©levΓ©
# ──────────────────────────────────────────────────────────────────────────
class TestRealisticCase:
def test_high_cer_low_findability(self) -> None:
"""Erreurs concentrΓ©es sur quelques mots β†’ findability faible."""
gt = "le roi Charles VII signa la charte royale en 1450"
# Β« Charles Β» ↔ Β« Charlemagne Β» : distance 5 β†’ non retrouvΓ©
# Β« 1450 Β» ↔ Β« 1480 Β» : distance 1 β†’ retrouvΓ©
# Β« charte Β» remplacΓ© par Β« lettre Β» : distance 5 β†’ non retrouvΓ©
hyp = "le roi Charlemagne VII signa la lettre royale en 1480"
r = compute_searchability(gt, hyp)
assert r["n_searchable"] < r["n_gt_tokens"]
assert "Charles" in r["missed_tokens"]
assert "charte" in r["missed_tokens"]
def test_high_cer_high_findability(self) -> None:
"""Erreurs rΓ©parties (≀ 2 par mot) β†’ findability Γ©levΓ©e."""
gt = "maistre Pierre du Bois Γ©crivit cette charte"
# 1 faute par mot, distance ≀ 2
hyp = "maitre Piere du Boys ecrivit cete charte"
r = compute_searchability(gt, hyp)
# Le CER est non nΓ©gligeable mais tous les mots restent
# retrouvables en mode fuzzy
assert r["recall"] == 1.0
# ──────────────────────────────────────────────────────────────────────────
# 4. IntΓ©gration registre typΓ©
# ──────────────────────────────────────────────────────────────────────────
class TestRegistry:
def test_metric_registered(self) -> None:
from picarones.core.metric_registry import select_metrics
from picarones.core.modules import ArtifactType
metrics = select_metrics(
(ArtifactType.TEXT, ArtifactType.TEXT),
)
names = [m.name for m in metrics]
assert "searchability_recall" in names
def test_metric_callable(self) -> None:
v = searchability_recall_metric("hello world", "helo world")
assert v == 1.0
def test_metric_returns_zero_for_empty_gt(self) -> None:
# Convention : registre typΓ© attend un float, pas None
v = searchability_recall_metric("", "anything")
assert v == 0.0
def test_metric_via_compute_at_junction(self) -> None:
from picarones.core.metric_registry import compute_at_junction
from picarones.core.modules import ArtifactType
results = compute_at_junction(
"le roi", "le roi",
(ArtifactType.TEXT, ArtifactType.TEXT),
)
assert "searchability_recall" in results
assert results["searchability_recall"] == 1.0