Spaces:
Running
Running
Claude
refactor(architecture): inversion de dépendance reports_v2 + corrections audit
4255304 unverified | """Tests des helpers de :mod:`picarones.evaluation.projectors.canonical`. | |
| Couvre les branches de :func:`canonical_payload_to_text` et | |
| :func:`markdown_to_text` qui n'étaient pas exercées par les tests | |
| des vues canoniques (S14/S16) — payloads dict/list, fallback ``str()``, | |
| patterns markdown variés. | |
| """ | |
| from __future__ import annotations | |
| from picarones.evaluation.projectors.canonical import ( | |
| canonical_payload_to_text, | |
| markdown_to_text, | |
| ) | |
| # ────────────────────────────────────────────────────────────────── | |
| # markdown_to_text — patterns markdown courants | |
| # ────────────────────────────────────────────────────────────────── | |
| class TestMarkdownToText: | |
| def test_strips_headers(self) -> None: | |
| assert markdown_to_text("# Titre") == "Titre" | |
| assert markdown_to_text("## H2") == "H2" | |
| assert markdown_to_text("###### H6") == "H6" | |
| def test_strips_bullets(self) -> None: | |
| assert markdown_to_text("- élément") == "élément" | |
| assert markdown_to_text("* étoile") == "étoile" | |
| assert markdown_to_text("+ plus") == "plus" | |
| def test_strips_numbered_lists(self) -> None: | |
| assert markdown_to_text("1. premier") == "premier" | |
| assert markdown_to_text("42. quarante-deux") == "quarante-deux" | |
| def test_strips_blockquote(self) -> None: | |
| assert markdown_to_text("> citation") == "citation" | |
| assert markdown_to_text(">sans espace") == "sans espace" | |
| def test_strips_horizontal_rule(self) -> None: | |
| # Les HR sont supprimés. | |
| assert markdown_to_text("---").strip() == "" | |
| assert markdown_to_text("***") == "" | |
| def test_strips_bold_italic(self) -> None: | |
| assert markdown_to_text("**gras**") == "gras" | |
| assert markdown_to_text("*italique*") == "italique" | |
| assert markdown_to_text("***gras-italique***") == "gras-italique" | |
| def test_strips_underline(self) -> None: | |
| assert markdown_to_text("_souligné_") == "souligné" | |
| assert markdown_to_text("__double__") == "double" | |
| def test_strips_inline_code(self) -> None: | |
| assert markdown_to_text("`code`") == "code" | |
| def test_strips_code_blocks(self) -> None: | |
| text = "```python\nprint('hi')\n```" | |
| assert "print('hi')" in markdown_to_text(text) | |
| assert "```" not in markdown_to_text(text) | |
| def test_strips_links_keeps_text(self) -> None: | |
| assert markdown_to_text("[Picarones](https://example.com)") == "Picarones" | |
| def test_strips_images_keeps_alt(self) -> None: | |
| assert markdown_to_text("") == "alt" | |
| def test_combined(self) -> None: | |
| # Snippet réaliste VLM. | |
| md = "# Titre\n\n**Bonjour** _le_ `monde`\n\n- item 1\n- item 2" | |
| result = markdown_to_text(md) | |
| assert "Titre" in result | |
| assert "Bonjour" in result | |
| assert "monde" in result | |
| assert "item 1" in result | |
| # Pas de balise résiduelle. | |
| for marker in ("**", "##", "* ", "- ", "_", "`"): | |
| assert marker not in result.replace("- ", "") # contre-faux-positif | |
| # ────────────────────────────────────────────────────────────────── | |
| # canonical_payload_to_text — dispatching par type | |
| # ────────────────────────────────────────────────────────────────── | |
| class TestCanonicalPayloadToText: | |
| def test_none_returns_empty(self) -> None: | |
| assert canonical_payload_to_text(None) == "" | |
| def test_str_treated_as_markdown(self) -> None: | |
| assert canonical_payload_to_text("# Titre\n\nBonjour") == "Titre\n\nBonjour" | |
| def test_int_falls_back_to_str(self) -> None: | |
| assert canonical_payload_to_text(42) == "42" | |
| def test_float_falls_back_to_str(self) -> None: | |
| assert canonical_payload_to_text(3.14) == "3.14" | |
| def test_dict_with_text_key(self) -> None: | |
| assert canonical_payload_to_text({"text": "Bonjour"}) == "Bonjour" | |
| def test_dict_with_content_key(self) -> None: | |
| assert canonical_payload_to_text({"content": "Hello"}) == "Hello" | |
| def test_dict_with_markdown_key(self) -> None: | |
| assert canonical_payload_to_text({"markdown": "# Titre"}) == "Titre" | |
| def test_dict_with_plain_key(self) -> None: | |
| assert canonical_payload_to_text({"plain": "brut"}) == "brut" | |
| def test_dict_with_value_key(self) -> None: | |
| assert canonical_payload_to_text({"value": "v"}) == "v" | |
| def test_dict_with_paragraphs_list(self) -> None: | |
| payload = {"paragraphs": ["para 1", "para 2", "para 3"]} | |
| result = canonical_payload_to_text(payload) | |
| assert "para 1" in result | |
| assert "para 2" in result | |
| assert "para 3" in result | |
| def test_dict_with_lines_list(self) -> None: | |
| payload = {"lines": ["ligne A", "ligne B"]} | |
| result = canonical_payload_to_text(payload) | |
| assert "ligne A" in result | |
| assert "ligne B" in result | |
| def test_dict_fallback_concatenates_string_values(self) -> None: | |
| # Aucune clé standard reconnue → on concatène les str du dict. | |
| payload = {"label1": "valeur 1", "label2": "valeur 2"} | |
| result = canonical_payload_to_text(payload) | |
| assert "valeur 1" in result | |
| assert "valeur 2" in result | |
| def test_dict_fallback_recurses_into_nested_dict(self) -> None: | |
| payload = {"nested": {"text": "inner"}} | |
| assert "inner" in canonical_payload_to_text(payload) | |
| def test_dict_fallback_recurses_into_nested_list(self) -> None: | |
| payload = {"items": ["a", "b"]} | |
| result = canonical_payload_to_text(payload) | |
| assert "a" in result | |
| assert "b" in result | |
| def test_list_concatenates_with_newlines(self) -> None: | |
| result = canonical_payload_to_text(["alpha", "beta", "gamma"]) | |
| assert "alpha" in result | |
| assert "beta" in result | |
| assert "gamma" in result | |
| def test_list_filters_empty_items(self) -> None: | |
| # Les éléments vides doivent être filtrés (pas de \n\n résiduel). | |
| result = canonical_payload_to_text(["alpha", "", "beta"]) | |
| # Pas de double saut de ligne si on filtre bien les vides. | |
| assert "\n\n" not in result | |
| def test_tuple_treated_like_list(self) -> None: | |
| result = canonical_payload_to_text(("x", "y")) | |
| assert "x" in result | |
| assert "y" in result | |
| def test_list_of_dicts(self) -> None: | |
| payload = [{"text": "premier"}, {"text": "deuxième"}] | |
| result = canonical_payload_to_text(payload) | |
| assert "premier" in result | |
| assert "deuxième" in result | |
| def test_priority_text_over_content(self) -> None: | |
| # Les clés sont essayées dans l'ordre text > content > markdown. | |
| payload = {"text": "préféré", "content": "ignoré"} | |
| assert canonical_payload_to_text(payload) == "préféré" | |
| def test_non_str_value_in_known_key_skipped(self) -> None: | |
| # ``text`` doit être un str pour être pris ; sinon on continue | |
| # vers les clés suivantes ou le fallback. | |
| payload = {"text": 42, "content": "fallback"} | |
| assert canonical_payload_to_text(payload) == "fallback" | |