File size: 15,386 Bytes
b6bdecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
"""Tests Sprint 20 — modélisation coût + vue Pareto.

Sprint 5 du plan rapport. Couvre :
  1. `pricing.py` : chargement de la table, estimation locale vs cloud.
  2. `compute_pareto_front` : cas canoniques + dégénérés.
  3. Intégration `_build_report_data` : coût annoté, front calculé, JSON ok.
  4. Détecteurs narratifs `pareto_alternative` et `cost_outlier`.
  5. Rendu HTML : section Pareto, toggles axes, notes méthodologiques.
"""

from __future__ import annotations

import re
from pathlib import Path

import pytest

from picarones.core.narrative import build_synthesis
from picarones.core.narrative.detectors import (
    detect_cost_outlier,
    detect_pareto_alternative,
)
from picarones.core.narrative.facts import FactType
from picarones.core.pricing import (
    build_costs_for_benchmark,
    estimate_cost,
    load_pricing_database,
)
from picarones.core.statistics import compute_pareto_front


# ---------------------------------------------------------------------------
# 1. Pricing
# ---------------------------------------------------------------------------

class TestLoadPricingDatabase:
    def test_default_file_loads(self):
        defaults, table = load_pricing_database()
        assert defaults.currency == "EUR"
        assert defaults.last_updated  # doit être rempli
        assert "tesseract" in table
        assert "gpt-4o" in table
        assert "google_vision" in table

    def test_missing_file_returns_empty(self, tmp_path):
        missing = tmp_path / "nope.yaml"
        defaults, table = load_pricing_database(missing)
        assert table == {}
        assert defaults.currency == "EUR"  # fallback


class TestEstimateCost:
    def test_cloud_api_uses_listed_price(self):
        cost = estimate_cost("google_vision")
        assert cost.type == "cloud_api"
        assert cost.cost_per_1k_pages_eur > 0
        assert cost.pricing_source_url is not None
        assert cost.api_price_per_1k_pages == cost.cost_per_1k_pages_eur

    def test_local_engine_uses_seconds_times_rate(self):
        cost = estimate_cost("tesseract")
        assert cost.type == "local"
        # 2s/page × 1000 pages / 3600 × 0.08 €/h ≈ 0.044 €
        assert cost.cost_per_1k_pages_eur == pytest.approx(0.044, abs=0.01)
        assert "Temps d'inférence" in " ".join(cost.assumptions)

    def test_measured_seconds_override_indicative(self):
        cost = estimate_cost("tesseract", measured_seconds_per_page=10.0)
        # Rate = 0.08 €/h → 10 × 1000 / 3600 × 0.08 ≈ 0.22 €
        assert cost.cost_per_1k_pages_eur == pytest.approx(0.222, abs=0.01)
        assert "mesuré" in " ".join(cost.assumptions)

    def test_pipeline_prefers_llm_model(self):
        cost = estimate_cost(
            engine_name="tesseract → gpt-4o",
            llm_model="gpt-4o",
            is_pipeline=True,
        )
        assert cost.engine_key == "gpt-4o"
        assert cost.type == "cloud_api"

    def test_unknown_engine_returns_unknown_type(self):
        cost = estimate_cost("totally-not-a-real-engine")
        assert cost.type == "unknown"
        assert cost.cost_per_1k_pages_eur is None
        assert "Aucune entrée" in " ".join(cost.assumptions)

    def test_hourly_rate_override(self):
        cheap = estimate_cost("tesseract", hourly_rate_override_eur=0.01)
        expensive = estimate_cost("tesseract", hourly_rate_override_eur=10.0)
        assert expensive.cost_per_1k_pages_eur > cheap.cost_per_1k_pages_eur

    def test_carbon_estimate_computed(self):
        cost = estimate_cost("gpt-4o")
        assert cost.co2_per_1k_pages_g is not None
        assert cost.co2_per_1k_pages_g > 0
        # kWh × grid intensity → positive et cohérent
        expected = cost.kwh_per_1k_pages * cost.grid_intensity_g_co2_per_kwh
        assert cost.co2_per_1k_pages_g == pytest.approx(expected)


class TestBuildCostsForBenchmark:
    def test_annotates_all_engines(self):
        engines = [
            {"name": "tesseract", "is_pipeline": False, "pipeline_info": {}},
            {"name": "pipeline", "is_pipeline": True,
             "pipeline_info": {"llm_model": "gpt-4o"}},
        ]
        durations = {"tesseract": 1.5, "pipeline": 12.0}
        costs = build_costs_for_benchmark(engines, durations)
        assert "tesseract" in costs
        assert "pipeline" in costs
        assert costs["tesseract"]["type"] == "local"
        assert costs["pipeline"]["type"] == "cloud_api"


# ---------------------------------------------------------------------------
# 2. Pareto
# ---------------------------------------------------------------------------

class TestComputeParetoFront:
    def test_trivial_front(self):
        points = [
            {"engine": "A", "cer": 0.05, "cost": 1.0},  # meilleur CER
            {"engine": "B", "cer": 0.10, "cost": 0.1},  # meilleur coût
            {"engine": "C", "cer": 0.08, "cost": 2.0},  # dominé par A
        ]
        front = compute_pareto_front(points)
        assert set(front) == {"A", "B"}

    def test_empty_input(self):
        assert compute_pareto_front([]) == []

    def test_single_point_is_its_own_front(self):
        assert compute_pareto_front([{"engine": "X", "cer": 0.1, "cost": 1.0}]) == ["X"]

    def test_skips_points_with_missing_values(self):
        points = [
            {"engine": "A", "cer": 0.05, "cost": 1.0},
            {"engine": "B", "cost": 0.5},  # pas de cer
            {"engine": "C", "cer": 0.10},  # pas de cost
        ]
        front = compute_pareto_front(points)
        assert front == ["A"]

    def test_three_dimensional_front(self):
        # 3 objectifs à minimiser — vérifie que le détecteur marche à k>2
        points = [
            {"name": "A", "a": 1, "b": 10, "c": 100},  # meilleur en a
            {"name": "B", "a": 10, "b": 1, "c": 100},  # meilleur en b
            {"name": "C", "a": 10, "b": 10, "c": 1},   # meilleur en c
            {"name": "D", "a": 20, "b": 20, "c": 200}, # dominé partout
        ]
        front = compute_pareto_front(
            points, objectives=("a", "b", "c"), name_key="name",
        )
        assert set(front) == {"A", "B", "C"}
        assert "D" not in front

    def test_mixed_min_max(self):
        # Minimiser CER, maximiser ancrage
        points = [
            {"engine": "A", "cer": 0.05, "anchor": 0.95},  # meilleur partout
            {"engine": "B", "cer": 0.10, "anchor": 0.85},  # dominé
            {"engine": "C", "cer": 0.08, "anchor": 0.99},  # meilleur anchor
        ]
        front = compute_pareto_front(
            points,
            objectives=("cer", "anchor"),
            minimize=(True, False),
        )
        assert set(front) == {"A", "C"}

    def test_minimize_length_mismatch_raises(self):
        with pytest.raises(ValueError):
            compute_pareto_front([{"engine": "A", "cer": 0.1, "cost": 1.0}],
                                 objectives=("cer", "cost"),
                                 minimize=(True,))


# ---------------------------------------------------------------------------
# 3. Détecteurs narratifs Pareto / cost
# ---------------------------------------------------------------------------

def _pareto_data(cost_points, front=None, speed_points=None, co2_points=None):
    return {
        "ranking": [{"engine": p["engine"], "mean_cer": p["cer"],
                     "documents": 10, "failed": 0} for p in cost_points],
        "pareto": {
            "cost": {"points": cost_points, "front": front or [p["engine"] for p in cost_points]},
            "speed": {"points": speed_points or [], "front": []},
            "co2": {"points": co2_points or [], "front": []},
        },
    }


class TestDetectParetoAlternative:
    def test_emits_when_alt_is_cheaper(self):
        data = _pareto_data(
            [
                {"engine": "best", "cer": 0.02, "cost": 5.0},
                {"engine": "cheap", "cer": 0.04, "cost": 0.1},
                {"engine": "dominated", "cer": 0.05, "cost": 3.0},
            ],
            front=["best", "cheap"],
        )
        # Forcer "best" comme leader
        data["ranking"] = [
            {"engine": "best", "mean_cer": 0.02, "documents": 10, "failed": 0},
            {"engine": "cheap", "mean_cer": 0.04, "documents": 10, "failed": 0},
            {"engine": "dominated", "mean_cer": 0.05, "documents": 10, "failed": 0},
        ]
        facts = detect_pareto_alternative(data)
        assert len(facts) == 1
        assert facts[0].payload["engine"] == "cheap"
        assert facts[0].payload["leader"] == "best"
        assert facts[0].payload["cost_saving_ratio"] >= 10

    def test_empty_when_front_has_only_leader(self):
        data = _pareto_data(
            [{"engine": "best", "cer": 0.02, "cost": 5.0}],
            front=["best"],
        )
        assert detect_pareto_alternative(data) == []

    def test_empty_when_no_pareto_section(self):
        assert detect_pareto_alternative({}) == []


class TestDetectCostOutlier:
    def test_flags_expensive_dominated_engine(self):
        data = _pareto_data(
            [
                {"engine": "cheap", "cer": 0.05, "cost": 0.1},
                {"engine": "normal", "cer": 0.08, "cost": 1.0},
                {"engine": "expensive_bad", "cer": 0.15, "cost": 20.0},
            ],
            front=["cheap"],
        )
        facts = detect_cost_outlier(data)
        assert any(f.payload["engine"] == "expensive_bad" for f in facts)

    def test_does_not_flag_expensive_on_front(self):
        # Un moteur cher mais sur le front = coût justifié par qualité unique
        data = _pareto_data(
            [
                {"engine": "cheap", "cer": 0.30, "cost": 0.1},
                {"engine": "normal", "cer": 0.15, "cost": 1.0},
                {"engine": "expensive_best", "cer": 0.02, "cost": 20.0},
            ],
            front=["cheap", "expensive_best"],
        )
        facts = detect_cost_outlier(data)
        names = {f.payload["engine"] for f in facts}
        assert "expensive_best" not in names


# ---------------------------------------------------------------------------
# 4. Intégration rapport HTML
# ---------------------------------------------------------------------------

@pytest.fixture(scope="module")
def benchmark_result():
    from picarones import fixtures
    return fixtures.generate_sample_benchmark(n_docs=8)


class TestReportIntegration:
    def test_report_contains_pareto_card(self, benchmark_result, tmp_path):
        from picarones.report.generator import ReportGenerator
        out = tmp_path / "report.html"
        ReportGenerator(benchmark_result).generate(out)
        html = out.read_text(encoding="utf-8")
        assert 'class="chart-card pareto-card"' in html
        assert 'id="pareto-chart"' in html
        assert 'setParetoAxis(\'cost\')' in html
        assert 'setParetoAxis(\'speed\')' in html
        assert 'setParetoAxis(\'co2\')' in html
        assert "pareto-experimental" in html  # étiquette expérimental

    def test_report_json_contains_pareto_data(self, benchmark_result):
        from picarones.report.generator import _build_report_data
        data = _build_report_data(benchmark_result, images_b64={})
        pareto = data.get("pareto", {})
        assert "cost" in pareto
        assert "speed" in pareto
        assert "co2" in pareto
        assert "pricing_meta" in pareto
        # Les moteurs doivent porter leur champ cost
        for e in data["engines"]:
            assert "cost" in e, f"Moteur {e.get('name')} sans champ cost"

    def test_synthesis_may_include_pareto_sentence(self, benchmark_result, tmp_path):
        # Sur la fixture de démo, pero_ocr + tesseract sont sur le front → la
        # synthèse doit remonter une alternative moins chère
        from picarones.report.generator import ReportGenerator
        out = tmp_path / "report.html"
        ReportGenerator(benchmark_result).generate(out)
        html = out.read_text(encoding="utf-8")
        m = re.search(r'<ul class="synth-list">(.*?)</ul>', html, re.DOTALL)
        assert m
        ul_content = m.group(1)
        # On n'exige pas "compromis" en dur (dépend de l'i18n) — simplement
        # qu'un moteur et "€" apparaissent (signe que pareto_alternative a tiré)
        assert "€" in ul_content or "pero_ocr" in ul_content

    def test_pricing_yaml_is_packaged(self):
        """Garde-fou : le YAML doit être accessible depuis le package."""
        from picarones.core.pricing import _DEFAULT_PRICING_PATH
        assert Path(_DEFAULT_PRICING_PATH).exists()

    def test_english_locale_renders_pareto_labels(self, benchmark_result, tmp_path):
        from picarones.report.generator import ReportGenerator
        out = tmp_path / "report_en.html"
        ReportGenerator(benchmark_result, lang="en").generate(out)
        html = out.read_text(encoding="utf-8")
        assert 'data-i18n="h_pareto"' in html
        assert 'data-i18n="pareto_axis_cost"' in html


# ---------------------------------------------------------------------------
# 5. Traçabilité des nombres (anti-hallucination pour les 2 nouveaux templates)
# ---------------------------------------------------------------------------

class TestAntiHallucinationOnPareto:
    def test_pareto_alternative_numbers_traceable(self):
        data = _pareto_data(
            [
                {"engine": "A", "cer": 0.02, "cost": 5.0},
                {"engine": "B", "cer": 0.04, "cost": 0.25},
            ],
            front=["A", "B"],
        )
        data["ranking"] = [
            {"engine": "A", "mean_cer": 0.02, "documents": 10, "failed": 0},
            {"engine": "B", "mean_cer": 0.04, "documents": 10, "failed": 0},
        ]
        # Autres infos requises par build_synthesis
        data.setdefault("meta", {"document_count": 10})
        data.setdefault("engines", [
            {"name": "A", "cer": 0.02},
            {"name": "B", "cer": 0.04},
        ])
        data.setdefault("statistics", {
            "pairwise_wilcoxon": [], "bootstrap_cis": [],
            "friedman": {}, "nemenyi": {"tied_groups": [], "mean_ranks": {}},
        })
        data.setdefault("documents", [])

        result = build_synthesis(data, "fr")
        # Chercher la phrase pareto
        pareto_sentences = [s for s in result["sentences"] if "compromis" in s or "€" in s]
        assert pareto_sentences
        # Les nombres principaux doivent venir du payload : 4 (cer_pct=4), 0.25 (cost),
        # 2 (leader_cer_pct=2), 5 (leader_cost), 20 (ratio=5/0.25)
        facts_by_type = {f["type"]: f for f in result["facts"]}
        assert FactType.PARETO_ALTERNATIVE.value in facts_by_type
        payload = facts_by_type[FactType.PARETO_ALTERNATIVE.value]["payload"]
        sentence = pareto_sentences[0]
        for k in ("cost", "leader_cost", "cer_pct", "leader_cer_pct", "cost_saving_ratio"):
            val = payload.get(k)
            if val is None:
                continue
            # Au moins une représentation du nombre doit apparaître
            variants = {str(val), str(float(val)), f"{float(val):.1f}", f"{float(val):.2f}"}
            if val == int(val):
                variants.add(str(int(val)))
            assert any(v in sentence for v in variants), (
                f"Valeur {k}={val} absente de la phrase : {sentence!r}"
            )