Picarones / tests /evaluation /test_canonical_payload.py
Claude
refactor(architecture): inversion de dépendance reports_v2 + corrections audit
4255304 unverified
raw
history blame
7.61 kB
"""Tests des helpers de :mod:`picarones.evaluation.projectors.canonical`.
Couvre les branches de :func:`canonical_payload_to_text` et
:func:`markdown_to_text` qui n'étaient pas exercées par les tests
des vues canoniques (S14/S16) — payloads dict/list, fallback ``str()``,
patterns markdown variés.
"""
from __future__ import annotations
from picarones.evaluation.projectors.canonical import (
canonical_payload_to_text,
markdown_to_text,
)
# ──────────────────────────────────────────────────────────────────
# markdown_to_text — patterns markdown courants
# ──────────────────────────────────────────────────────────────────
class TestMarkdownToText:
def test_strips_headers(self) -> None:
assert markdown_to_text("# Titre") == "Titre"
assert markdown_to_text("## H2") == "H2"
assert markdown_to_text("###### H6") == "H6"
def test_strips_bullets(self) -> None:
assert markdown_to_text("- élément") == "élément"
assert markdown_to_text("* étoile") == "étoile"
assert markdown_to_text("+ plus") == "plus"
def test_strips_numbered_lists(self) -> None:
assert markdown_to_text("1. premier") == "premier"
assert markdown_to_text("42. quarante-deux") == "quarante-deux"
def test_strips_blockquote(self) -> None:
assert markdown_to_text("> citation") == "citation"
assert markdown_to_text(">sans espace") == "sans espace"
def test_strips_horizontal_rule(self) -> None:
# Les HR sont supprimés.
assert markdown_to_text("---").strip() == ""
assert markdown_to_text("***") == ""
def test_strips_bold_italic(self) -> None:
assert markdown_to_text("**gras**") == "gras"
assert markdown_to_text("*italique*") == "italique"
assert markdown_to_text("***gras-italique***") == "gras-italique"
def test_strips_underline(self) -> None:
assert markdown_to_text("_souligné_") == "souligné"
assert markdown_to_text("__double__") == "double"
def test_strips_inline_code(self) -> None:
assert markdown_to_text("`code`") == "code"
def test_strips_code_blocks(self) -> None:
text = "```python\nprint('hi')\n```"
assert "print('hi')" in markdown_to_text(text)
assert "```" not in markdown_to_text(text)
def test_strips_links_keeps_text(self) -> None:
assert markdown_to_text("[Picarones](https://example.com)") == "Picarones"
def test_strips_images_keeps_alt(self) -> None:
assert markdown_to_text("![alt](img.png)") == "alt"
def test_combined(self) -> None:
# Snippet réaliste VLM.
md = "# Titre\n\n**Bonjour** _le_ `monde`\n\n- item 1\n- item 2"
result = markdown_to_text(md)
assert "Titre" in result
assert "Bonjour" in result
assert "monde" in result
assert "item 1" in result
# Pas de balise résiduelle.
for marker in ("**", "##", "* ", "- ", "_", "`"):
assert marker not in result.replace("- ", "") # contre-faux-positif
# ──────────────────────────────────────────────────────────────────
# canonical_payload_to_text — dispatching par type
# ──────────────────────────────────────────────────────────────────
class TestCanonicalPayloadToText:
def test_none_returns_empty(self) -> None:
assert canonical_payload_to_text(None) == ""
def test_str_treated_as_markdown(self) -> None:
assert canonical_payload_to_text("# Titre\n\nBonjour") == "Titre\n\nBonjour"
def test_int_falls_back_to_str(self) -> None:
assert canonical_payload_to_text(42) == "42"
def test_float_falls_back_to_str(self) -> None:
assert canonical_payload_to_text(3.14) == "3.14"
def test_dict_with_text_key(self) -> None:
assert canonical_payload_to_text({"text": "Bonjour"}) == "Bonjour"
def test_dict_with_content_key(self) -> None:
assert canonical_payload_to_text({"content": "Hello"}) == "Hello"
def test_dict_with_markdown_key(self) -> None:
assert canonical_payload_to_text({"markdown": "# Titre"}) == "Titre"
def test_dict_with_plain_key(self) -> None:
assert canonical_payload_to_text({"plain": "brut"}) == "brut"
def test_dict_with_value_key(self) -> None:
assert canonical_payload_to_text({"value": "v"}) == "v"
def test_dict_with_paragraphs_list(self) -> None:
payload = {"paragraphs": ["para 1", "para 2", "para 3"]}
result = canonical_payload_to_text(payload)
assert "para 1" in result
assert "para 2" in result
assert "para 3" in result
def test_dict_with_lines_list(self) -> None:
payload = {"lines": ["ligne A", "ligne B"]}
result = canonical_payload_to_text(payload)
assert "ligne A" in result
assert "ligne B" in result
def test_dict_fallback_concatenates_string_values(self) -> None:
# Aucune clé standard reconnue → on concatène les str du dict.
payload = {"label1": "valeur 1", "label2": "valeur 2"}
result = canonical_payload_to_text(payload)
assert "valeur 1" in result
assert "valeur 2" in result
def test_dict_fallback_recurses_into_nested_dict(self) -> None:
payload = {"nested": {"text": "inner"}}
assert "inner" in canonical_payload_to_text(payload)
def test_dict_fallback_recurses_into_nested_list(self) -> None:
payload = {"items": ["a", "b"]}
result = canonical_payload_to_text(payload)
assert "a" in result
assert "b" in result
def test_list_concatenates_with_newlines(self) -> None:
result = canonical_payload_to_text(["alpha", "beta", "gamma"])
assert "alpha" in result
assert "beta" in result
assert "gamma" in result
def test_list_filters_empty_items(self) -> None:
# Les éléments vides doivent être filtrés (pas de \n\n résiduel).
result = canonical_payload_to_text(["alpha", "", "beta"])
# Pas de double saut de ligne si on filtre bien les vides.
assert "\n\n" not in result
def test_tuple_treated_like_list(self) -> None:
result = canonical_payload_to_text(("x", "y"))
assert "x" in result
assert "y" in result
def test_list_of_dicts(self) -> None:
payload = [{"text": "premier"}, {"text": "deuxième"}]
result = canonical_payload_to_text(payload)
assert "premier" in result
assert "deuxième" in result
def test_priority_text_over_content(self) -> None:
# Les clés sont essayées dans l'ordre text > content > markdown.
payload = {"text": "préféré", "content": "ignoré"}
assert canonical_payload_to_text(payload) == "préféré"
def test_non_str_value_in_known_key_skipped(self) -> None:
# ``text`` doit être un str pour être pris ; sinon on continue
# vers les clés suivantes ou le fallback.
payload = {"text": 42, "content": "fallback"}
assert canonical_payload_to_text(payload) == "fallback"