Claude commited on
Commit
bb9f9b6
·
unverified ·
1 Parent(s): d83b13a

test(rename): dé-sprintage tests/evaluation (53 fichiers, git mv)

Browse files

+ sweep générique des imports inter-tests dans le script (corrige
l'ordre-dépendance qui cassait l'import sprint23→sprint19, attrapé
par la suite). Refs docs/ patchées en lockstep.

https://claude.ai/code/session_01EmLiMPJJuB44QHEFzDWUvF

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. docs/explanation/narrative-engine.en.md +3 -3
  2. docs/explanation/narrative-engine.md +3 -3
  3. docs/migration/option_b_test_inventory.md +6 -6
  4. docs/reference/comparing-views.md +1 -1
  5. scripts/rename_sprint_tests.py +21 -14
  6. tests/evaluation/metrics/{test_sprint56_abbreviations.py → test_abbreviations.py} +0 -0
  7. tests/evaluation/metrics/{test_sprint23_anti_hallucination.py → test_anti_hallucination.py} +1 -1
  8. tests/evaluation/metrics/{test_sprint73_baseline_comparison.py → test_baseline_comparison.py} +0 -0
  9. tests/evaluation/metrics/{test_sprint39_calibration.py → test_calibration.py} +0 -0
  10. tests/evaluation/metrics/{test_sprint79_cost_projection.py → test_cost_projection.py} +0 -0
  11. tests/evaluation/metrics/{test_sprint29_detector_registry.py → test_detector_registry.py} +0 -0
  12. tests/evaluation/metrics/{test_sprint58_early_modern.py → test_early_modern.py} +0 -0
  13. tests/evaluation/metrics/{test_sprint36_ensemble_narrative.py → test_ensemble_narrative.py} +0 -0
  14. tests/evaluation/metrics/{test_sprint78_equivalence_profile.py → test_equivalence_profile.py} +0 -0
  15. tests/evaluation/metrics/{test_sprint10_error_distribution.py → test_error_distribution.py} +0 -0
  16. tests/evaluation/metrics/{test_s5_extreme_inputs.py → test_extreme_inputs.py} +0 -0
  17. tests/evaluation/metrics/{test_sprint18_friedman_nemenyi_cdd.py → test_friedman_nemenyi_cdd.py} +0 -0
  18. tests/evaluation/metrics/{test_sprint93_image_predictive.py → test_image_predictive.py} +0 -0
  19. tests/evaluation/metrics/{test_sprint96_incremental_comparison.py → test_incremental_comparison.py} +0 -0
  20. tests/evaluation/metrics/{test_sprint35_inter_engine.py → test_inter_engine.py} +0 -0
  21. tests/evaluation/metrics/{test_sprint54_layout.py → test_layout.py} +0 -0
  22. tests/evaluation/metrics/{test_sprint15_llm_pipeline_bugs.py → test_llm_pipeline_bugs.py} +0 -0
  23. tests/evaluation/metrics/{test_sprint8_longitudinal_robustness.py → test_longitudinal_robustness.py} +0 -0
  24. tests/evaluation/metrics/{test_sprint44_median_default.py → test_median_default.py} +0 -0
  25. tests/evaluation/metrics/{test_sprint59_modern_archives.py → test_modern_archives.py} +0 -0
  26. tests/evaluation/metrics/{test_sprint97_module_policy.py → test_module_policy.py} +0 -0
  27. tests/evaluation/metrics/{test_sprint57_mufi.py → test_mufi.py} +0 -0
  28. tests/evaluation/metrics/{test_sprint19_narrative_engine.py → test_narrative_engine.py} +0 -0
  29. tests/evaluation/metrics/{test_sprint16_narrative_foundations.py → test_narrative_foundations.py} +0 -0
  30. tests/evaluation/metrics/{test_sprint38_ner_metrics.py → test_ner_metrics.py} +0 -0
  31. tests/evaluation/metrics/{test_sprint_a14_s1_normalization_propagation.py → test_normalization_propagation.py} +0 -0
  32. tests/evaluation/metrics/{test_sprint12_nouvelles_fonctionnalites.py → test_nouvelles_fonctionnalites.py} +0 -0
  33. tests/evaluation/metrics/{test_sprint85_numerical_sequences.py → test_numerical_sequences.py} +0 -0
  34. tests/evaluation/metrics/{test_sprint20_pareto_pricing.py → test_pareto_pricing.py} +0 -0
  35. tests/evaluation/metrics/{test_sprint71_rare_tokens.py → test_rare_tokens.py} +0 -0
  36. tests/evaluation/metrics/{test_sprint52_readability.py → test_readability.py} +0 -0
  37. tests/evaluation/metrics/{test_sprint53_reading_order.py → test_reading_order.py} +0 -0
  38. tests/evaluation/metrics/{test_sprint83_reliability.py → test_reliability.py} +0 -0
  39. tests/evaluation/metrics/{test_sprint81_robustness_projection.py → test_robustness_projection.py} +0 -0
  40. tests/evaluation/metrics/{test_sprint60_roman_numerals.py → test_roman_numerals.py} +0 -0
  41. tests/evaluation/metrics/{test_sprint84_searchability.py → test_searchability.py} +0 -0
  42. tests/evaluation/metrics/{test_sprint45_stratification.py → test_stratification.py} +0 -0
  43. tests/evaluation/metrics/{test_sprint55_unicode_blocks.py → test_unicode_blocks.py} +0 -0
  44. tests/evaluation/{test_sprint_a14_s1_compact_optin.py → test_compact_optin.py} +0 -0
  45. tests/evaluation/{test_s8_corpus_gt_levels.py → test_corpus_gt_levels.py} +0 -0
  46. tests/evaluation/{test_sprint_a14_s27_engines.py → test_engines.py} +0 -0
  47. tests/evaluation/{test_sprint34_metric_registry.py → test_metric_registry.py} +0 -0
  48. tests/evaluation/{test_sprint_a14_s1_metrics_error_returns_none.py → test_metrics_error_returns_none.py} +0 -0
  49. tests/evaluation/{test_sprint32_multi_level_gt.py → test_multi_level_gt.py} +0 -0
  50. tests/evaluation/{test_sprint_a14_s25_projector_payload.py → test_projector_payload.py} +0 -0
docs/explanation/narrative-engine.en.md CHANGED
@@ -92,7 +92,7 @@ In `tests/measurements/`:
92
  output where every number is in the payload.
93
 
94
  Update `tests/integration/test_chantier5.py` and
95
- `tests/measurements/test_sprint29_detector_registry.py` to bump
96
  the detector count.
97
 
98
  ## Editorial rules
@@ -110,8 +110,8 @@ the detector count.
110
  ## Testing the synthesis
111
 
112
  ```bash
113
- pytest tests/measurements/test_sprint19_narrative_engine.py
114
- pytest tests/measurements/test_sprint23_anti_hallucination.py
115
  ```
116
 
117
  The anti-hallucination test parses the rendered synthesis and
 
92
  output where every number is in the payload.
93
 
94
  Update `tests/integration/test_chantier5.py` and
95
+ `tests/measurements/test_detector_registry.py` to bump
96
  the detector count.
97
 
98
  ## Editorial rules
 
110
  ## Testing the synthesis
111
 
112
  ```bash
113
+ pytest tests/measurements/test_narrative_engine.py
114
+ pytest tests/measurements/test_anti_hallucination.py
115
  ```
116
 
117
  The anti-hallucination test parses the rendered synthesis and
docs/explanation/narrative-engine.md CHANGED
@@ -163,7 +163,7 @@ Dans `arbiter.py`, deux choses à considérer :
163
 
164
  Ajoutez au minimum :
165
 
166
- - Un test unitaire dans `tests/test_sprint19_narrative_engine.py` (ou
167
  un nouveau fichier) :
168
 
169
  ```python
@@ -297,7 +297,7 @@ comme tolérance numérique). Cette whitelist est désormais vide :
297
  **Si vous ajoutez un détecteur dont le template référence un nombre
298
  constant** (ex. *« seuil α = 0,05 »*), vous devez **systématiquement**
299
  le mettre dans le `payload`. Le test
300
- `test_sprint19_narrative_engine.py::test_every_number_in_synthesis_is_traceable`
301
  plus le test
302
- `test_sprint23_anti_hallucination.py::TestTemplatesNoHardcodedLiterals`
303
  échoueront sinon.
 
163
 
164
  Ajoutez au minimum :
165
 
166
+ - Un test unitaire dans `tests/test_narrative_engine.py` (ou
167
  un nouveau fichier) :
168
 
169
  ```python
 
297
  **Si vous ajoutez un détecteur dont le template référence un nombre
298
  constant** (ex. *« seuil α = 0,05 »*), vous devez **systématiquement**
299
  le mettre dans le `payload`. Le test
300
+ `test_narrative_engine.py::test_every_number_in_synthesis_is_traceable`
301
  plus le test
302
+ `test_anti_hallucination.py::TestTemplatesNoHardcodedLiterals`
303
  échoueront sinon.
docs/migration/option_b_test_inventory.md CHANGED
@@ -28,8 +28,8 @@ d'instances d'adapter en mémoire.
28
  | A4 | `tests/app/test_character_analysis_in_runner.py` | 246 LOC | 12 | Moyenne | Teste l'analyse caractère par engine. Conversion mécanique. |
29
  | A5 | `tests/app/test_sprint_h2b_canonical_in_runner.py` | 191 LOC | 9 | Moyenne | Teste l'extraction du `CANONICAL_DOCUMENT`. À adapter au nouveau ViewExecutor. |
30
  | A6 | `tests/evaluation/test_public_api.py` | — | 7 | Moyenne | API publique. Inclura un test de présence pour `RunOrchestrator`. |
31
- | A7 | `tests/evaluation/metrics/test_sprint12_nouvelles_fonctionnalites.py` | 288 LOC | 4 | Basse | Conversion mécanique. |
32
- | A8 | `tests/evaluation/metrics/test_sprint_a14_s1_normalization_propagation.py` | — | 2 | Basse | Vérifie `normalization_profile` — valide la Phase B2.5 (propagation via `EvaluationView`). |
33
  | A9 | `tests/evaluation/test_metric_hooks.py` | — | 1 | Basse | Trivial. Conversion en 1 ligne. |
34
  | A10 | `tests/architecture/test_file_budgets.py` | — | (référence uniquement) | Basse | Budgets des modules `_benchmark_*.py` à actualiser après Phase B2/B7. |
35
 
@@ -52,10 +52,10 @@ fait dans une fixture partagée.
52
  | B3 | `tests/reports/test_extra_metrics.py` | Métriques additionnelles attachées au rapport. |
53
  | B4 | `tests/reports/test_sprint72_worst_lines.py` | Worst-N lines (consomme `BenchmarkResult` non-compacté). |
54
  | B5 | `tests/evaluation/metrics/test_results.py` | API `MetricsResult` / `aggregate_metrics`. |
55
- | B6 | `tests/evaluation/metrics/test_sprint36_ensemble_narrative.py` | Narrative engine. Lit `benchmark_data` dict. |
56
- | B7 | `tests/evaluation/metrics/test_sprint44_median_default.py` | Médiane/Pareto. |
57
- | B8 | `tests/evaluation/metrics/test_sprint45_stratification.py` | Stratification du corpus. |
58
- | B9 | `tests/evaluation/test_sprint14_robust_filtering.py` | Filtre robustesse. |
59
  | B10 | `tests/adapters/corpus/test_sprint8_escriptorium_gallica.py` | Importer eScriptorium / Gallica. |
60
  | B11 | `tests/integration/test_importer_fallback_wiring.py` | Fallback importer. Test d'intégration. |
61
  | B12 | `tests/integration/test_s5_disk_full_simulation.py` | Disque plein. |
 
28
  | A4 | `tests/app/test_character_analysis_in_runner.py` | 246 LOC | 12 | Moyenne | Teste l'analyse caractère par engine. Conversion mécanique. |
29
  | A5 | `tests/app/test_sprint_h2b_canonical_in_runner.py` | 191 LOC | 9 | Moyenne | Teste l'extraction du `CANONICAL_DOCUMENT`. À adapter au nouveau ViewExecutor. |
30
  | A6 | `tests/evaluation/test_public_api.py` | — | 7 | Moyenne | API publique. Inclura un test de présence pour `RunOrchestrator`. |
31
+ | A7 | `tests/evaluation/metrics/test_nouvelles_fonctionnalites.py` | 288 LOC | 4 | Basse | Conversion mécanique. |
32
+ | A8 | `tests/evaluation/metrics/test_normalization_propagation.py` | — | 2 | Basse | Vérifie `normalization_profile` — valide la Phase B2.5 (propagation via `EvaluationView`). |
33
  | A9 | `tests/evaluation/test_metric_hooks.py` | — | 1 | Basse | Trivial. Conversion en 1 ligne. |
34
  | A10 | `tests/architecture/test_file_budgets.py` | — | (référence uniquement) | Basse | Budgets des modules `_benchmark_*.py` à actualiser après Phase B2/B7. |
35
 
 
52
  | B3 | `tests/reports/test_extra_metrics.py` | Métriques additionnelles attachées au rapport. |
53
  | B4 | `tests/reports/test_sprint72_worst_lines.py` | Worst-N lines (consomme `BenchmarkResult` non-compacté). |
54
  | B5 | `tests/evaluation/metrics/test_results.py` | API `MetricsResult` / `aggregate_metrics`. |
55
+ | B6 | `tests/evaluation/metrics/test_ensemble_narrative.py` | Narrative engine. Lit `benchmark_data` dict. |
56
+ | B7 | `tests/evaluation/metrics/test_median_default.py` | Médiane/Pareto. |
57
+ | B8 | `tests/evaluation/metrics/test_stratification.py` | Stratification du corpus. |
58
+ | B9 | `tests/evaluation/test_robust_filtering.py` | Filtre robustesse. |
59
  | B10 | `tests/adapters/corpus/test_sprint8_escriptorium_gallica.py` | Importer eScriptorium / Gallica. |
60
  | B11 | `tests/integration/test_importer_fallback_wiring.py` | Fallback importer. Test d'intégration. |
61
  | B12 | `tests/integration/test_s5_disk_full_simulation.py` | Disque plein. |
docs/reference/comparing-views.md CHANGED
@@ -26,7 +26,7 @@ masquerait des informations critiques.
26
  ### Pattern 1 : CER excellent, recherchabilité numérique catastrophique
27
 
28
  Démontré dans le test
29
- `tests/evaluation/test_sprint_a14_s16_views_consistency.py::TestDivergencePattern::test_year_corruption_invisible_to_cer_visible_to_search` :
30
 
31
  - **GT** : *"Charte signée à Paris le 14 juillet 1789 en présence du roi"*
32
  - **Hypothèse** : *"Charte signée à Paris le 14 juillet 1798 en présence du roi"*
 
26
  ### Pattern 1 : CER excellent, recherchabilité numérique catastrophique
27
 
28
  Démontré dans le test
29
+ `tests/evaluation/test_views_consistency.py::TestDivergencePattern::test_year_corruption_invisible_to_cer_visible_to_search` :
30
 
31
  - **GT** : *"Charte signée à Paris le 14 juillet 1789 en présence du roi"*
32
  - **Hypothèse** : *"Charte signée à Paris le 14 juillet 1798 en présence du roi"*
scripts/rename_sprint_tests.py CHANGED
@@ -70,12 +70,6 @@ EXTERNAL_REF_FILES = [
70
  ]
71
  # Docs : nombreuses réfs ``test_s*`` — patchées par lot via grep
72
  # ciblé au moment du renommage du dossier correspondant (cf. --apply).
73
- # Import inter-tests connu (même lot tests/evaluation/metrics) :
74
- INTRA_TEST_IMPORT = (
75
- "tests/evaluation/metrics/test_sprint23_anti_hallucination.py",
76
- "tests.evaluation.metrics.test_sprint19_narrative_engine",
77
- "tests.evaluation.metrics.test_narrative_engine",
78
- )
79
 
80
 
81
  def build_map() -> dict[str, str]:
@@ -158,14 +152,27 @@ def apply_dir(target_dir: str) -> int:
158
  if txt != orig:
159
  sp.write_text(txt, encoding="utf-8")
160
  print(f"patché refs : {src}")
161
- # Import inter-tests connu.
162
- if target_dir.rstrip("/") == "tests/evaluation/metrics":
163
- ip = REPO / INTRA_TEST_IMPORT[0]
164
- if ip.exists():
165
- t = ip.read_text(encoding="utf-8").replace(
166
- INTRA_TEST_IMPORT[1], INTRA_TEST_IMPORT[2])
167
- ip.write_text(t, encoding="utf-8")
168
- print(f"patché import inter-tests : {INTRA_TEST_IMPORT[0]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  return 0
170
 
171
 
 
70
  ]
71
  # Docs : nombreuses réfs ``test_s*`` — patchées par lot via grep
72
  # ciblé au moment du renommage du dossier correspondant (cf. --apply).
 
 
 
 
 
 
73
 
74
 
75
  def build_map() -> dict[str, str]:
 
152
  if txt != orig:
153
  sp.write_text(txt, encoding="utf-8")
154
  print(f"patché refs : {src}")
155
+ # Sweep GÉNÉRIQUE des imports inter-tests : tout module renommé
156
+ # dans ce lot, référencé en dotted-path depuis n'importe quel
157
+ # fichier de ``tests/`` (``from tests.x.y.<old_stem> import`` ou
158
+ # ``import tests.x.y.<old_stem>``), est repointé vers le nouveau
159
+ # stem. Remplace l'ancien cas hardcodé fragile (ordre-dépendant).
160
+ stem_map = {
161
+ Path(old_name).stem: Path(new_name).stem
162
+ for old_name, new_name in renamed
163
+ }
164
+ for tp in TESTS.rglob("*.py"):
165
+ t = tp.read_text(encoding="utf-8")
166
+ orig = t
167
+ for old_stem, new_stem in stem_map.items():
168
+ # Borné par ``.`` (dotted import) — pas de match partiel
169
+ # sur un préfixe de nom plus long.
170
+ t = re.sub(rf"(?<=\.){re.escape(old_stem)}(?=\s|$|\.| import)",
171
+ new_stem, t)
172
+ if t != orig:
173
+ tp.write_text(t, encoding="utf-8")
174
+ print(f"patché import inter-tests : "
175
+ f"{tp.relative_to(REPO).as_posix()}")
176
  return 0
177
 
178
 
tests/evaluation/metrics/{test_sprint56_abbreviations.py → test_abbreviations.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint23_anti_hallucination.py → test_anti_hallucination.py} RENAMED
@@ -163,7 +163,7 @@ class TestEndToEndWithEmptyWhitelist:
163
  def test_every_number_traceable_with_empty_whitelist(self, lang):
164
  from picarones.reports.narrative import extract_numbers
165
 
166
- from tests.evaluation.metrics.test_sprint19_narrative_engine import _numbers_in_payload
167
 
168
  result = build_synthesis(_full_data(), lang)
169
  allowed: set[str] = set()
 
163
  def test_every_number_traceable_with_empty_whitelist(self, lang):
164
  from picarones.reports.narrative import extract_numbers
165
 
166
+ from tests.evaluation.metrics.test_narrative_engine import _numbers_in_payload
167
 
168
  result = build_synthesis(_full_data(), lang)
169
  allowed: set[str] = set()
tests/evaluation/metrics/{test_sprint73_baseline_comparison.py → test_baseline_comparison.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint39_calibration.py → test_calibration.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint79_cost_projection.py → test_cost_projection.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint29_detector_registry.py → test_detector_registry.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint58_early_modern.py → test_early_modern.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint36_ensemble_narrative.py → test_ensemble_narrative.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint78_equivalence_profile.py → test_equivalence_profile.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint10_error_distribution.py → test_error_distribution.py} RENAMED
File without changes
tests/evaluation/metrics/{test_s5_extreme_inputs.py → test_extreme_inputs.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint18_friedman_nemenyi_cdd.py → test_friedman_nemenyi_cdd.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint93_image_predictive.py → test_image_predictive.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint96_incremental_comparison.py → test_incremental_comparison.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint35_inter_engine.py → test_inter_engine.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint54_layout.py → test_layout.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint15_llm_pipeline_bugs.py → test_llm_pipeline_bugs.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint8_longitudinal_robustness.py → test_longitudinal_robustness.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint44_median_default.py → test_median_default.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint59_modern_archives.py → test_modern_archives.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint97_module_policy.py → test_module_policy.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint57_mufi.py → test_mufi.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint19_narrative_engine.py → test_narrative_engine.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint16_narrative_foundations.py → test_narrative_foundations.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint38_ner_metrics.py → test_ner_metrics.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint_a14_s1_normalization_propagation.py → test_normalization_propagation.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint12_nouvelles_fonctionnalites.py → test_nouvelles_fonctionnalites.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint85_numerical_sequences.py → test_numerical_sequences.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint20_pareto_pricing.py → test_pareto_pricing.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint71_rare_tokens.py → test_rare_tokens.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint52_readability.py → test_readability.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint53_reading_order.py → test_reading_order.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint83_reliability.py → test_reliability.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint81_robustness_projection.py → test_robustness_projection.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint60_roman_numerals.py → test_roman_numerals.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint84_searchability.py → test_searchability.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint45_stratification.py → test_stratification.py} RENAMED
File without changes
tests/evaluation/metrics/{test_sprint55_unicode_blocks.py → test_unicode_blocks.py} RENAMED
File without changes
tests/evaluation/{test_sprint_a14_s1_compact_optin.py → test_compact_optin.py} RENAMED
File without changes
tests/evaluation/{test_s8_corpus_gt_levels.py → test_corpus_gt_levels.py} RENAMED
File without changes
tests/evaluation/{test_sprint_a14_s27_engines.py → test_engines.py} RENAMED
File without changes
tests/evaluation/{test_sprint34_metric_registry.py → test_metric_registry.py} RENAMED
File without changes
tests/evaluation/{test_sprint_a14_s1_metrics_error_returns_none.py → test_metrics_error_returns_none.py} RENAMED
File without changes
tests/evaluation/{test_sprint32_multi_level_gt.py → test_multi_level_gt.py} RENAMED
File without changes
tests/evaluation/{test_sprint_a14_s25_projector_payload.py → test_projector_payload.py} RENAMED
File without changes