"""Tests Sprint 3 — Pipelines OCR+LLM, adaptateurs LLM, bibliothèque de prompts, sur-normalisation.

Ces tests couvrent :
- La détection de sur-normalisation LLM (classe 10)
- L'OCRLLMPipeline : modes, chargement de prompts, métadonnées
- Les adaptateurs LLM (instanciation, structure)
- L'intégration dans les fixtures (tesseract → gpt-4o)
- La présence des données pipeline dans le rapport HTML
"""

from __future__ import annotations

from pathlib import Path

import pytest


# ---------------------------------------------------------------------------
# Détection de sur-normalisation (classe 10)
# ---------------------------------------------------------------------------

class TestOverNormalization:

    def test_no_over_normalization(self):
        from picarones.pipelines.over_normalization import detect_over_normalization
        gt  = "nostre seigneur le roy"
        ocr = "noltre seigneur le roy"   # erreur OCR sur 'nostre'
        llm = "nostre seigneur le roy"   # LLM corrige → correct
        result = detect_over_normalization(gt, ocr, llm)
        assert result.score == 0.0
        assert result.over_normalized_count == 0

    def test_perfect_llm_no_over_norm(self):
        from picarones.pipelines.over_normalization import detect_over_normalization
        gt  = "nostre seigneur le roy"
        ocr = "nostre seigneur le roy"   # OCR correct
        llm = "nostre seigneur le roy"   # LLM conserve
        result = detect_over_normalization(gt, ocr, llm)
        assert result.score == 0.0
        assert result.total_correct_ocr_words == 4

    def test_over_normalization_detected(self):
        from picarones.pipelines.over_normalization import detect_over_normalization
        gt  = "nostre seigneur le roy"
        ocr = "nostre seigneur le roy"   # OCR correct
        llm = "notre seigneur le roy"    # LLM modifie 'nostre' → 'notre' : sur-normalisation
        result = detect_over_normalization(gt, ocr, llm)
        assert result.over_normalized_count == 1
        assert result.score > 0.0
        assert len(result.over_normalized_passages) == 1
        passage = result.over_normalized_passages[0]
        assert passage["gt"] == "nostre"
        assert passage["ocr"] == "nostre"
        assert passage["llm"] == "notre"

    def test_over_normalization_score_formula(self):
        from picarones.pipelines.over_normalization import detect_over_normalization
        # 4 mots, OCR correct sur tous, LLM modifie 2 → score = 2/4 = 0.5
        gt  = "maistre jehan nostre dame"
        ocr = "maistre jehan nostre dame"
        llm = "maître jehan notre dame"
        result = detect_over_normalization(gt, ocr, llm)
        assert result.total_correct_ocr_words == 4
        assert result.over_normalized_count == 2
        assert result.score == pytest.approx(0.5)

    def test_as_dict_keys(self):
        from picarones.pipelines.over_normalization import detect_over_normalization
        result = detect_over_normalization("foo bar", "foo baz", "foo baz")
        d = result.as_dict()
        assert "score" in d
        assert "total_correct_ocr_words" in d
        assert "over_normalized_count" in d
        assert "over_normalized_passages" in d

    def test_empty_texts(self):
        from picarones.pipelines.over_normalization import detect_over_normalization
        result = detect_over_normalization("", "", "")
        assert result.score == 0.0

    def test_aggregate_over_normalization(self):
        from picarones.pipelines.over_normalization import (
            OverNormalizationResult,
            aggregate_over_normalization,
        )
        results = [
            OverNormalizationResult(total_correct_ocr_words=10, over_normalized_count=1),
            OverNormalizationResult(total_correct_ocr_words=10, over_normalized_count=2),
            None,
        ]
        agg = aggregate_over_normalization(results)
        assert agg["total_correct_ocr_words"] == 20
        assert agg["over_normalized_count"] == 3
        assert agg["score"] == pytest.approx(0.15)
        assert agg["document_count"] == 2


# ---------------------------------------------------------------------------
# Bibliothèque de prompts
# ---------------------------------------------------------------------------

class TestPromptsLibrary:

    _PROMPTS_DIR = Path(__file__).parent.parent / "picarones" / "prompts"

    def test_prompts_directory_exists(self):
        assert self._PROMPTS_DIR.is_dir()

    def test_required_prompt_files_exist(self):
        expected = [
            "correction_medieval_french.txt",
            "correction_imprime_ancien.txt",
            "correction_image_medieval_french.txt",
            "zero_shot_medieval_french.txt",
            "zero_shot_imprime_ancien.txt",
        ]
        for fname in expected:
            assert (self._PROMPTS_DIR / fname).exists(), f"Prompt manquant : {fname}"

    def test_correction_prompt_has_ocr_variable(self):
        text = (self._PROMPTS_DIR / "correction_medieval_french.txt").read_text(encoding="utf-8")
        assert "{ocr_output}" in text

    def test_image_prompt_has_both_variables(self):
        text = (self._PROMPTS_DIR / "correction_image_medieval_french.txt").read_text(encoding="utf-8")
        assert "{ocr_output}" in text

    def test_zero_shot_prompt_has_no_ocr_variable(self):
        text = (self._PROMPTS_DIR / "zero_shot_medieval_french.txt").read_text(encoding="utf-8")
        assert "{ocr_output}" not in text

    def test_prompts_not_empty(self):
        for f in self._PROMPTS_DIR.glob("*.txt"):
            assert len(f.read_text(encoding="utf-8").strip()) > 100, f"Prompt trop court : {f.name}"


# ---------------------------------------------------------------------------
# PipelineMode enum
# ---------------------------------------------------------------------------

class TestPipelineMode:

    def test_enum_values(self):
        from picarones.pipelines.base import PipelineMode
        assert PipelineMode.TEXT_ONLY.value == "text_only"
        assert PipelineMode.TEXT_AND_IMAGE.value == "text_and_image"
        assert PipelineMode.ZERO_SHOT.value == "zero_shot"

    def test_from_string(self):
        from picarones.pipelines.base import PipelineMode
        assert PipelineMode("text_only") == PipelineMode.TEXT_ONLY


# ---------------------------------------------------------------------------
# Adaptateurs LLM — structure
# ---------------------------------------------------------------------------

class TestLLMAdapters:

    def test_openai_adapter_structure(self):
        from picarones.llm.openai_adapter import OpenAIAdapter
        adapter = OpenAIAdapter(model="gpt-4o")
        assert adapter.name == "openai"
        assert adapter.model == "gpt-4o"

    def test_anthropic_adapter_structure(self):
        from picarones.llm.anthropic_adapter import AnthropicAdapter
        adapter = AnthropicAdapter()
        assert adapter.name == "anthropic"
        assert "claude" in adapter.model.lower()

    def test_mistral_adapter_structure(self):
        from picarones.llm.mistral_adapter import MistralAdapter
        adapter = MistralAdapter()
        assert adapter.name == "mistral"
        assert "mistral" in adapter.model.lower()

    def test_ollama_adapter_structure(self):
        from picarones.llm.ollama_adapter import OllamaAdapter
        adapter = OllamaAdapter(model="llama3")
        assert adapter.name == "ollama"
        assert adapter.model == "llama3"

    def test_ollama_custom_base_url(self):
        from picarones.llm.ollama_adapter import OllamaAdapter
        adapter = OllamaAdapter(config={"base_url": "http://myserver:11434"})
        assert adapter._base_url == "http://myserver:11434"

    def test_llm_result_dataclass(self):
        from picarones.llm.base import LLMResult
        r = LLMResult(model_id="gpt-4o", text="bonjour", duration_seconds=1.2)
        assert r.success is True
        r_err = LLMResult(model_id="gpt-4o", text="", duration_seconds=0.1, error="fail")
        assert r_err.success is False

    def test_missing_api_key_raises(self):
        from picarones.llm.openai_adapter import OpenAIAdapter
        adapter = OpenAIAdapter()
        adapter._api_key = None  # simuler clé manquante
        with pytest.raises(RuntimeError, match="OPENAI_API_KEY"):
            adapter._call("test prompt")


# ---------------------------------------------------------------------------
# OCRLLMPipeline — prompt loading, name, steps
# ---------------------------------------------------------------------------

class TestOCRLLMPipeline:

    def _mock_llm(self, response: str = "texte corrigé"):
        """Crée un adaptateur LLM mock qui retourne toujours la même réponse."""
        from picarones.llm.base import BaseLLMAdapter
        class MockLLM(BaseLLMAdapter):
            @property
            def name(self): return "mock"
            @property
            def default_model(self): return "mock-v1"
            def _call(self, prompt, image_b64=None): return response
        return MockLLM()

    def test_load_builtin_prompt(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.TEXT_ONLY,
            prompt="correction_medieval_french.txt",
        )
        assert "{ocr_output}" in pipeline._prompt_template

    def test_prompt_substitution_text_only(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.TEXT_ONLY,
            prompt="correction_medieval_french.txt",
        )
        built = pipeline._build_prompt(ocr_text="mon texte ocr")
        assert "mon texte ocr" in built
        assert "{ocr_output}" not in built

    def test_auto_name_text_only(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        from picarones.engines.tesseract import TesseractEngine
        pipeline = OCRLLMPipeline(
            ocr_engine=TesseractEngine(),
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.TEXT_ONLY,
        )
        assert "tesseract" in pipeline.name.lower()
        assert "mock-v1" in pipeline.name

    def test_auto_name_zero_shot(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.ZERO_SHOT,
        )
        assert "zero-shot" in pipeline.name

    def test_custom_name(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.TEXT_ONLY,
            pipeline_name="mon_pipeline_custom",
        )
        assert pipeline.name == "mon_pipeline_custom"

    def test_pipeline_steps_without_ocr(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.ZERO_SHOT,
        )
        steps = pipeline._build_steps_info()
        assert len(steps) == 1
        assert steps[0]["type"] == "llm"
        assert steps[0]["mode"] == "zero_shot"

    def test_pipeline_steps_with_ocr(self):
        from picarones.engines.tesseract import TesseractEngine
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            ocr_engine=TesseractEngine(),
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.TEXT_ONLY,
        )
        steps = pipeline._build_steps_info()
        assert len(steps) == 2
        assert steps[0]["type"] == "ocr"
        assert steps[1]["type"] == "llm"

    def test_load_nonexistent_prompt_raises(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        with pytest.raises(FileNotFoundError):
            OCRLLMPipeline(
                llm_adapter=self._mock_llm(),
                mode=PipelineMode.TEXT_ONLY,
                prompt="inexistant_prompt_xyz.txt",
            )

    def test_text_only_requires_ocr_engine(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.TEXT_ONLY,
        )
        with pytest.raises(ValueError, match="ocr_engine"):
            pipeline._run_ocr(Path("/nonexistent/image.jpg"))

    def test_is_pipeline_flag(self):
        from picarones.pipelines.base import OCRLLMPipeline, PipelineMode
        from picarones.engines.base import BaseOCREngine
        pipeline = OCRLLMPipeline(
            llm_adapter=self._mock_llm(),
            mode=PipelineMode.ZERO_SHOT,
        )
        # Doit être utilisable comme BaseOCREngine
        assert isinstance(pipeline, BaseOCREngine)


# ---------------------------------------------------------------------------
# Intégration fixtures — pipeline tesseract → gpt-4o
# ---------------------------------------------------------------------------

class TestFixturesPipeline:

    @pytest.fixture(scope="class")
    def benchmark(self):
        from picarones.fixtures import generate_sample_benchmark
        return generate_sample_benchmark(n_docs=3, seed=42)

    def test_pipeline_engine_present(self, benchmark):
        names = [r.engine_name for r in benchmark.engine_reports]
        assert "tesseract → gpt-4o" in names

    def test_pipeline_report_has_pipeline_info(self, benchmark):
        report = next(r for r in benchmark.engine_reports if r.engine_name == "tesseract → gpt-4o")
        assert report.is_pipeline
        assert report.pipeline_info.get("pipeline_mode") == "text_and_image"
        assert report.pipeline_info.get("llm_model") == "gpt-4o"

    def test_pipeline_documents_have_ocr_intermediate(self, benchmark):
        report = next(r for r in benchmark.engine_reports if r.engine_name == "tesseract → gpt-4o")
        for dr in report.document_results:
            assert dr.ocr_intermediate is not None, f"ocr_intermediate manquant sur {dr.doc_id}"
            assert len(dr.ocr_intermediate) > 0

    def test_pipeline_documents_have_over_normalization(self, benchmark):
        report = next(r for r in benchmark.engine_reports if r.engine_name == "tesseract → gpt-4o")
        for dr in report.document_results:
            on = dr.pipeline_metadata.get("over_normalization")
            assert on is not None, f"over_normalization manquant sur {dr.doc_id}"
            assert "score" in on
            assert "total_correct_ocr_words" in on

    def test_pipeline_report_has_aggregated_over_normalization(self, benchmark):
        report = next(r for r in benchmark.engine_reports if r.engine_name == "tesseract → gpt-4o")
        on = report.pipeline_info.get("over_normalization")
        assert on is not None
        assert "score" in on
        assert on["document_count"] == 3

    def test_pipeline_pipeline_steps_in_info(self, benchmark):
        report = next(r for r in benchmark.engine_reports if r.engine_name == "tesseract → gpt-4o")
        steps = report.pipeline_info.get("pipeline_steps", [])
        assert len(steps) == 2
        assert steps[0]["type"] == "ocr"
        assert steps[1]["type"] == "llm"

    def test_non_pipeline_reports_empty_pipeline_info(self, benchmark):
        # Les concurrents pipeline (LLM ou VLM) ont un pipeline_info non vide
        pipeline_engines = {"tesseract → gpt-4o", "gpt-4o-vision (zero-shot)"}
        for report in benchmark.engine_reports:
            if report.engine_name not in pipeline_engines:
                assert not report.is_pipeline
                assert report.pipeline_info == {}


# ---------------------------------------------------------------------------
# Intégration rapport HTML — pipeline dans les données JSON
# ---------------------------------------------------------------------------

class TestReportWithPipeline:

    @pytest.fixture(scope="class")
    def report_data(self):
        from picarones.fixtures import generate_sample_benchmark
        from picarones.report.generator import _build_report_data
        bm = generate_sample_benchmark(n_docs=3, seed=42)
        images_b64 = bm.metadata.get("_images_b64", {})
        return _build_report_data(bm, images_b64)

    def test_pipeline_engine_in_data(self, report_data):
        names = [e["name"] for e in report_data["engines"]]
        assert "tesseract → gpt-4o" in names

    def test_pipeline_engine_has_is_pipeline_flag(self, report_data):
        pipeline_e = next(e for e in report_data["engines"] if e["name"] == "tesseract → gpt-4o")
        assert pipeline_e["is_pipeline"] is True

    def test_non_pipeline_engines_not_flagged(self, report_data):
        # Les concurrents pipeline (LLM ou VLM zero-shot) sont correctement marqués is_pipeline=True
        pipeline_engines = {"tesseract → gpt-4o", "gpt-4o-vision (zero-shot)"}
        for e in report_data["engines"]:
            if e["name"] not in pipeline_engines:
                assert e["is_pipeline"] is False

    def test_pipeline_has_over_normalization_in_info(self, report_data):
        pipeline_e = next(e for e in report_data["engines"] if e["name"] == "tesseract → gpt-4o")
        pi = pipeline_e.get("pipeline_info", {})
        assert pi.get("over_normalization") is not None

    def test_document_results_have_ocr_intermediate(self, report_data):
        for doc in report_data["documents"]:
            pipeline_er = next(
                (er for er in doc["engine_results"] if er["engine"] == "tesseract → gpt-4o"),
                None,
            )
            assert pipeline_er is not None
            assert "ocr_intermediate" in pipeline_er
            assert "ocr_diff" in pipeline_er
            assert "llm_correction_diff" in pipeline_er

    def test_document_results_have_over_normalization(self, report_data):
        for doc in report_data["documents"]:
            pipeline_er = next(
                (er for er in doc["engine_results"] if er["engine"] == "tesseract → gpt-4o"),
                None,
            )
            assert pipeline_er is not None
            assert "over_normalization" in pipeline_er

    def test_html_contains_pipeline_tag(self, tmp_path):
        from picarones.fixtures import generate_sample_benchmark
        from picarones.report.generator import ReportGenerator
        bm = generate_sample_benchmark(n_docs=3, seed=42)
        out = tmp_path / "report.html"
        ReportGenerator(bm).generate(out)
        html = out.read_text(encoding="utf-8")
        assert "pipeline" in html.lower()
        assert "tesseract" in html