Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Running

Picarones / tests /docs /test_translation_parity.py

Claude

feat(sprint-A11): doc institutionnelle (déploiement + RGPD + traduction EN)

95cbd83 unverified about 2 months ago

5.24 kB

	"""Tests Sprint A11 — parité de traduction FR/EN (item M-17).

	Garde-fou : pour chaque fichier ``X.md`` listé comme prioritaire,
	la version EN ``X.en.md`` doit exister et avoir des sections de
	premier niveau (``##``) qui se correspondent. Empêche une
	divergence éditoriale silencieuse entre les deux versions.
	"""

	from __future__ import annotations

	from pathlib import Path

	import pytest

	REPO_ROOT = Path(__file__).resolve().parents[2]


	# Couples (FR, EN) à valider. Sprint A11 livre les 5 prioritaires ;
	# d'autres viendront dans des sprints ultérieurs.
	TRANSLATION_PAIRS: list[tuple[str, str]] = [
	("docs/user/reading-a-report.md", "docs/user/reading-a-report.en.md"),
	("docs/developer/index.md", "docs/developer/index.en.md"),
	("docs/developer/narrative-engine.md", "docs/developer/narrative-engine.en.md"),
	("docs/developer/extending-glossary.md", "docs/developer/extending-glossary.en.md"),
	("docs/developer/extending-i18n.md", "docs/developer/extending-i18n.en.md"),
	("CONTRIBUTING.md", "CONTRIBUTING.en.md"),
	]


	def _h2_titles(path: Path) -> list[str]:
	"""Retourne la liste ordonnée des titres ``##`` d'un fichier markdown."""
	text = path.read_text(encoding="utf-8")
	return [
	line[3:].strip()
	for line in text.splitlines()
	if line.startswith("## ") and not line.startswith("###")
	]


	@pytest.mark.parametrize("fr_path,en_path", TRANSLATION_PAIRS)
	def test_en_version_exists(fr_path: str, en_path: str) -> None:
	"""Pour chaque paire, la version EN doit exister."""
	fr = REPO_ROOT / fr_path
	en = REPO_ROOT / en_path
	if not fr.exists():
	pytest.skip(f"Source FR absente : {fr_path}")
	assert en.exists(), f"Traduction EN manquante : {en_path}"


	@pytest.mark.parametrize("fr_path,en_path", TRANSLATION_PAIRS)
	def test_en_version_marks_translation_status(
	fr_path: str, en_path: str
	) -> None:
	"""La version EN doit marquer son statut (translation: ...) en
	tête, pour qu'un lecteur sache que la version FR fait foi en
	cas de divergence."""
	en = REPO_ROOT / en_path
	if not en.exists():
	pytest.skip(f"EN manquant : {en_path}")
	text = en.read_text(encoding="utf-8")
	assert "translation:" in text, (
	f"{en_path} doit contenir un marqueur HTML "
	"<!-- translation: ... --> en tête."
	)
	# Doit aussi linker vers la version FR canonique
	fr_basename = Path(fr_path).name
	assert fr_basename in text, (
	f"{en_path} doit linker vers la version FR canonique "
	f"({fr_basename})."
	)


	@pytest.mark.parametrize("fr_path,en_path", TRANSLATION_PAIRS)
	def test_section_count_consistent(fr_path: str, en_path: str) -> None:
	"""Les versions FR et EN doivent avoir un nombre cohérent de
	sections de premier niveau (``##``).

	Tolérances :

	- ±1 section par défaut (légère adaptation rédactionnelle).
	- ±5 sections si la version EN porte le marqueur
	``<!-- translation: machine + human review pending -->``
	(Sprint A11 livre des traductions synthétiques que le sprint
	A15 ou un sprint dédié de revue humaine alignera ensuite).

	Au-delà, c'est une divergence sérieuse à corriger.
	"""
	fr = REPO_ROOT / fr_path
	en = REPO_ROOT / en_path
	if not (fr.exists() and en.exists()):
	pytest.skip(f"Une des deux versions manque : {fr_path} / {en_path}")
	fr_sections = _h2_titles(fr)
	en_sections = _h2_titles(en)
	diff = abs(len(fr_sections) - len(en_sections))

	en_text = en.read_text(encoding="utf-8")
	is_pending_review = "machine + human review pending" in en_text
	threshold = 5 if is_pending_review else 1
	assert diff <= threshold, (
	f"Divergence de structure trop forte : FR a {len(fr_sections)} sections, "
	f"EN a {len(en_sections)} (diff {diff} > {threshold}). "
	f"Pending-review : {is_pending_review}.\n"
	f"FR: {fr_sections}\nEN: {en_sections}"
	)


	def test_no_orphan_en_translations() -> None:
	"""Aucun fichier ``*.en.md`` ne doit exister sans son pendant FR
	canonique (sinon dérive éditoriale)."""
	en_files = list(REPO_ROOT.glob("*/.en.md"))
	# Exclure les __pycache__ et venv
	en_files = [
	f for f in en_files
	if not any(part.startswith(".") or part == "__pycache__"
	for part in f.parts)
	]
	orphans: list[str] = []
	for en in en_files:
	# Pendant FR : remplacer .en.md → .md
	fr_name = en.name.replace(".en.md", ".md")
	fr = en.with_name(fr_name)
	if not fr.exists():
	orphans.append(str(en.relative_to(REPO_ROOT)))
	assert not orphans, (
	f"Traductions EN orphelines (sans version FR) : {orphans}"
	)


	def test_translation_pairs_listed_actually_exist() -> None:
	"""Méta-test : tous les couples listés ci-dessus doivent référencer
	des fichiers existants (au moins le FR)."""
	missing: list[str] = []
	for fr, _ in TRANSLATION_PAIRS:
	if not (REPO_ROOT / fr).exists():
	missing.append(fr)
	assert not missing, (
	f"Fichiers FR listés mais absents : {missing}. "
	f"Mettre à jour TRANSLATION_PAIRS si la doc canonique a bougé."
	)