from __future__ import annotations

import sys
from datetime import UTC, datetime
from pathlib import Path

from fastapi.testclient import TestClient


def get_memory_test_client() -> TestClient:
    repo_root = Path(__file__).resolve().parents[2]
    memory_service_root = repo_root / "services" / "memory-api"
    if str(memory_service_root) not in sys.path:
        sys.path.insert(0, str(memory_service_root))
    from app.db.base import Base  # type: ignore
    from app.db.models import (  # type: ignore
        ApprovalState,
        Claim,
        ClaimEvidenceLink,
        ClaimRelationship,
        Chunk,
        EvidenceAssessment,
        GraphRelationType,
        Source,
        SourceClass,
        SourceVersion,
        StrengthBand,
        SupportType,
        SensitivityClass,
    )
    from app.db.session import SessionLocal, engine  # type: ignore
    from app.main import app  # type: ignore

    Base.metadata.create_all(bind=engine)

    # ---------------------------------------------------------------------------
    # Idempotent fixture seeding — each record is inserted only if absent.
    # This works correctly on both empty and partially-populated databases,
    # including live PostgreSQL instances previously seeded by setup_eval_corpus.py.
    # ---------------------------------------------------------------------------
    with SessionLocal() as session:
        now = datetime.now(UTC)

        existing_source_ids = {
            row[0] for row in session.query(Source.source_id).all()
        }
        existing_version_ids = {
            row[0] for row in session.query(SourceVersion.version_id).all()
        }
        existing_chunk_ids = {
            row[0] for row in session.query(Chunk.chunk_id).all()
        }
        existing_claim_ids = {
            row[0] for row in session.query(Claim.claim_id).all()
        }
        existing_assessment_ids = {
            row[0] for row in session.query(EvidenceAssessment.assessment_id).all()
        }
        existing_relationship_ids = {
            row[0] for row in session.query(ClaimRelationship.relationship_id).all()
        }

        records: list = []

        # ---- Source: LBL ----
        if "LBL-NSCLC-DRUGA-EMA-2024" not in existing_source_ids:
            records.append(Source(
                source_id="LBL-NSCLC-DRUGA-EMA-2024",
                source_class=SourceClass.LBL,
                title="DRUG-A label",
                therapy_area="NSCLC",
                molecule="DRUG-A",
                geography="EU / EMA",
                audience_scope=["HCP", "Internal"],
                sensitivity_class=SensitivityClass.EXTERNAL,
                approval_state=ApprovalState.APPROVED,
                current_version_id="ver-lbl-1",
                hygiene_status="active",
                created_at=now,
                updated_at=now,
            ))
        if "ver-lbl-1" not in existing_version_ids:
            records.append(SourceVersion(
                version_id="ver-lbl-1",
                source_id="LBL-NSCLC-DRUGA-EMA-2024",
                version_label="v1",
                approval_state=ApprovalState.APPROVED,
                is_latest_approved=True,
                created_at=now,
            ))
        if "chk-lbl-1" not in existing_chunk_ids:
            records.append(Chunk(
                chunk_id="chk-lbl-1",
                source_id="LBL-NSCLC-DRUGA-EMA-2024",
                version_id="ver-lbl-1",
                text="The recommended dose is 80 mg once daily for first-line metastatic NSCLC. Dose reductions must remain within approved label boundaries.",
                claim_type="dose",
                section_path="2 POSOLOGY",
                page_start=2,
                page_end=2,
                token_count=18,
                audience_fit=["HCP", "Internal"],
                geography_fit="EU / EMA",
                therapy_area="NSCLC",
                created_at=now,
            ))

        # ---- Source: DOC-CSR ----
        if "DOC-CSR-NSCLC-014" not in existing_source_ids:
            records.append(Source(
                source_id="DOC-CSR-NSCLC-014",
                source_class=SourceClass.DOC_CSR,
                title="CSR summary",
                therapy_area="NSCLC",
                molecule="DRUG-A",
                geography="EU / EMA",
                audience_scope=["HCP", "Internal"],
                sensitivity_class=SensitivityClass.EXTERNAL,
                approval_state=ApprovalState.APPROVED,
                current_version_id="ver-csr-1",
                hygiene_status="active",
                created_at=now,
                updated_at=now,
            ))
        if "ver-csr-1" not in existing_version_ids:
            records.append(SourceVersion(
                version_id="ver-csr-1",
                source_id="DOC-CSR-NSCLC-014",
                version_label="v1",
                approval_state=ApprovalState.APPROVED,
                is_latest_approved=True,
                created_at=now,
            ))
        if "chk-csr-1" not in existing_chunk_ids:
            records.append(Chunk(
                chunk_id="chk-csr-1",
                source_id="DOC-CSR-NSCLC-014",
                version_id="ver-csr-1",
                text="DRUG-A improves progression-free survival in EGFR-positive NSCLC and supports efficacy interpretation.",
                claim_type="efficacy",
                section_path="RESULTS",
                page_start=5,
                page_end=5,
                token_count=12,
                audience_fit=["HCP", "Internal"],
                geography_fit="EU / EMA",
                therapy_area="NSCLC",
                created_at=now,
            ))

        # ---- Source: SOP-MED ----
        if "SOP-MED-NSCLC-010" not in existing_source_ids:
            records.append(Source(
                source_id="SOP-MED-NSCLC-010",
                source_class=SourceClass.SOP_MED,
                title="Medical SOP",
                therapy_area="NSCLC",
                molecule="DRUG-A",
                geography="EU / EMA",
                audience_scope=["Internal"],
                sensitivity_class=SensitivityClass.INTERNAL_ONLY,
                approval_state=ApprovalState.APPROVED,
                current_version_id="ver-sop-1",
                hygiene_status="active",
                created_at=now,
                updated_at=now,
            ))
        if "ver-sop-1" not in existing_version_ids:
            records.append(SourceVersion(
                version_id="ver-sop-1",
                source_id="SOP-MED-NSCLC-010",
                version_label="v1",
                approval_state=ApprovalState.APPROVED,
                is_latest_approved=True,
                created_at=now,
            ))
        if "chk-sop-1" not in existing_chunk_ids:
            records.append(Chunk(
                chunk_id="chk-sop-1",
                source_id="SOP-MED-NSCLC-010",
                version_id="ver-sop-1",
                text="Internal responders should preserve approved dose boundaries and citation discipline.",
                claim_type="dose",
                section_path="DOSING GUIDANCE",
                page_start=1,
                page_end=1,
                token_count=10,
                audience_fit=["Internal"],
                geography_fit="EU / EMA",
                therapy_area="NSCLC",
                created_at=now,
            ))

        # ---- Source: RMP (required by all golden and adversarial cases) ----
        if "RMP-NSCLC-DRUGA-2024" not in existing_source_ids:
            records.append(Source(
                source_id="RMP-NSCLC-DRUGA-2024",
                source_class=SourceClass.RMP,
                title="DRUG-A Risk Management Plan",
                therapy_area="NSCLC",
                molecule="DRUG-A",
                geography="EU / EMA",
                audience_scope=["HCP", "Internal"],
                sensitivity_class=SensitivityClass.EXTERNAL,
                approval_state=ApprovalState.APPROVED,
                current_version_id="ver-rmp-1",
                hygiene_status="active",
                created_at=now,
                updated_at=now,
            ))
        if "ver-rmp-1" not in existing_version_ids:
            records.append(SourceVersion(
                version_id="ver-rmp-1",
                source_id="RMP-NSCLC-DRUGA-2024",
                version_label="v1",
                approval_state=ApprovalState.APPROVED,
                is_latest_approved=True,
                created_at=now,
            ))
        if "chk-rmp-1" not in existing_chunk_ids:
            records.append(Chunk(
                chunk_id="chk-rmp-1",
                source_id="RMP-NSCLC-DRUGA-2024",
                version_id="ver-rmp-1",
                text=(
                    "DRUG-A risk management plan: dose modifications must follow EU-approved "
                    "label boundaries. Monitoring for ILD and hepatotoxicity is required. "
                    "Dose adjustment or interruption should adhere to the approved posology."
                ),
                claim_type="safety",
                section_path="RISK MINIMISATION MEASURES",
                page_start=3,
                page_end=4,
                token_count=32,
                audience_fit=["HCP", "Internal"],
                geography_fit="EU / EMA",
                therapy_area="NSCLC",
                created_at=now,
            ))

        # ---- Source: PK-SUMMARY (required by all golden and adversarial cases) ----
        if "PK-SUMMARY-NSCLC-005" not in existing_source_ids:
            records.append(Source(
                source_id="PK-SUMMARY-NSCLC-005",
                source_class=SourceClass.PK_SUMMARY,
                title="DRUG-A Pharmacokinetic Summary",
                therapy_area="NSCLC",
                molecule="DRUG-A",
                geography="EU / EMA",
                audience_scope=["HCP", "Internal"],
                sensitivity_class=SensitivityClass.EXTERNAL,
                approval_state=ApprovalState.APPROVED,
                current_version_id="ver-pk-1",
                hygiene_status="active",
                created_at=now,
                updated_at=now,
            ))
        if "ver-pk-1" not in existing_version_ids:
            records.append(SourceVersion(
                version_id="ver-pk-1",
                source_id="PK-SUMMARY-NSCLC-005",
                version_label="v1",
                approval_state=ApprovalState.APPROVED,
                is_latest_approved=True,
                created_at=now,
            ))
        if "chk-pk-1" not in existing_chunk_ids:
            records.append(Chunk(
                chunk_id="chk-pk-1",
                source_id="PK-SUMMARY-NSCLC-005",
                version_id="ver-pk-1",
                text=(
                    "DRUG-A pharmacokinetics: half-life approximately 48 hours, CYP3A4-mediated "
                    "metabolism. Dose-proportional exposure supports once-daily dosing schedule "
                    "across first-line metastatic NSCLC populations in the EU / EMA region."
                ),
                claim_type="dose",
                section_path="PHARMACOKINETIC SUMMARY",
                page_start=1,
                page_end=2,
                token_count=34,
                audience_fit=["HCP", "Internal"],
                geography_fit="EU / EMA",
                therapy_area="NSCLC",
                created_at=now,
            ))

        # ---- Claims ----
        if "clm-lbl-1" not in existing_claim_ids:
            records.append(Claim(
                claim_id="clm-lbl-1",
                canonical_text="Dose reductions must remain within approved label boundaries.",
                claim_type="dose",
                molecule_id="DRUG-A",
                geography_id="EU / EMA",
                approval_state="approved",
                primary_source_id="LBL-NSCLC-DRUGA-EMA-2024",
                current_evidence_score=0.92,
                strength_band=StrengthBand.HIGH,
                created_at=now,
            ))
        if "clm-csr-1" not in existing_claim_ids:
            records.append(Claim(
                claim_id="clm-csr-1",
                canonical_text="DRUG-A improves progression-free survival in EGFR-positive NSCLC.",
                claim_type="efficacy",
                molecule_id="DRUG-A",
                geography_id="EU / EMA",
                approval_state="approved",
                primary_source_id="DOC-CSR-NSCLC-014",
                current_evidence_score=0.88,
                strength_band=StrengthBand.HIGH,
                created_at=now,
            ))
        if "clm-rmp-1" not in existing_claim_ids:
            records.append(Claim(
                claim_id="clm-rmp-1",
                canonical_text=(
                    "DRUG-A dose modification and interruption must adhere to EU-approved "
                    "label boundaries per the risk management plan."
                ),
                claim_type="safety",
                molecule_id="DRUG-A",
                geography_id="EU / EMA",
                approval_state="approved",
                primary_source_id="RMP-NSCLC-DRUGA-2024",
                current_evidence_score=0.84,
                strength_band=StrengthBand.HIGH,
                created_at=now,
            ))
        if "clm-pk-1" not in existing_claim_ids:
            records.append(Claim(
                claim_id="clm-pk-1",
                canonical_text=(
                    "DRUG-A once-daily dosing is supported by dose-proportional "
                    "pharmacokinetics across first-line metastatic NSCLC populations."
                ),
                claim_type="dose",
                molecule_id="DRUG-A",
                geography_id="EU / EMA",
                approval_state="approved",
                primary_source_id="PK-SUMMARY-NSCLC-005",
                current_evidence_score=0.82,
                strength_band=StrengthBand.HIGH,
                created_at=now,
            ))

        # Flush sources/versions/chunks/claims before adding FK-dependent records
        if records:
            session.add_all(records)
            session.flush()

        # ---- ClaimEvidenceLinks (checked by claim+chunk pair) ----
        cel_pairs_existing = {
            (row[0], row[1])
            for row in session.query(
                ClaimEvidenceLink.claim_id, ClaimEvidenceLink.chunk_id
            ).all()
        }
        link_records: list = []
        for claim_id, chunk_id, source_id, confidence in [
            ("clm-lbl-1", "chk-lbl-1", "LBL-NSCLC-DRUGA-EMA-2024", 0.99),
            ("clm-csr-1", "chk-csr-1", "DOC-CSR-NSCLC-014", 0.95),
            ("clm-rmp-1", "chk-rmp-1", "RMP-NSCLC-DRUGA-2024", 0.93),
            ("clm-pk-1", "chk-pk-1", "PK-SUMMARY-NSCLC-005", 0.91),
        ]:
            if (claim_id, chunk_id) not in cel_pairs_existing:
                link_records.append(ClaimEvidenceLink(
                    claim_id=claim_id,
                    chunk_id=chunk_id,
                    source_id=source_id,
                    support_type=SupportType.PRIMARY,
                    extraction_confidence=confidence,
                    is_primary_support=True,
                ))
        if link_records:
            session.add_all(link_records)
            session.flush()

        # ---- EvidenceAssessments ----
        asmt_records: list = []
        for asmt_id, claim_id, src_prior, sme, explanation in [
            ("asm-1", "clm-lbl-1", 0.95, 0.7, {"reasons": ["Label source present"]}),
            ("asm-2", "clm-csr-1", 0.75, 0.6, {"reasons": ["CSR evidence present"]}),
            ("asm-3", "clm-rmp-1", 0.80, 0.65, {"reasons": ["RMP source present", "EU geography aligned"]}),
            ("asm-4", "clm-pk-1", 0.78, 0.65, {"reasons": ["PK summary source present", "dose-proportional exposure confirmed"]}),
        ]:
            if asmt_id not in existing_assessment_ids:
                asmt_records.append(EvidenceAssessment(
                    assessment_id=asmt_id,
                    claim_id=claim_id,
                    source_prior_score=src_prior,
                    recency_score=0.90,
                    approval_score=1.0,
                    sme_score=sme,
                    consistency_score=0.90,
                    audience_fit_score=1.0,
                    geography_fit_score=1.0,
                    penalty_score=0.0,
                    evidence_score=round(
                        0.30 * src_prior + 0.15 * 0.90 + 0.20 * 1.0
                        + 0.10 * sme + 0.15 * 0.90 + 0.05 * 1.0 + 0.05 * 1.0,
                        2,
                    ),
                    strength_band=StrengthBand.HIGH,
                    explanation_json=explanation,
                    scored_at=now,
                ))
        if asmt_records:
            session.add_all(asmt_records)
            session.flush()

        # ---- ClaimRelationships ----
        if "rel-1" not in existing_relationship_ids:
            session.add(ClaimRelationship(
                relationship_id="rel-1",
                from_claim_id="clm-lbl-1",
                to_claim_id="clm-csr-1",
                relation_type=GraphRelationType.SUPPORTED_BY,
                relation_metadata={"reason": "efficacy supports approved use context"},
                created_at=now,
            ))

        session.commit()

    return TestClient(app)