from __future__ import annotations import sys from datetime import UTC, datetime from pathlib import Path from fastapi.testclient import TestClient def get_memory_test_client() -> TestClient: repo_root = Path(__file__).resolve().parents[2] memory_service_root = repo_root / "services" / "memory-api" if str(memory_service_root) not in sys.path: sys.path.insert(0, str(memory_service_root)) from app.db.base import Base # type: ignore from app.db.models import ( # type: ignore ApprovalState, Claim, ClaimEvidenceLink, ClaimRelationship, Chunk, EvidenceAssessment, GraphRelationType, Source, SourceClass, SourceVersion, StrengthBand, SupportType, SensitivityClass, ) from app.db.session import SessionLocal, engine # type: ignore from app.main import app # type: ignore Base.metadata.create_all(bind=engine) # --------------------------------------------------------------------------- # Idempotent fixture seeding — each record is inserted only if absent. # This works correctly on both empty and partially-populated databases, # including live PostgreSQL instances previously seeded by setup_eval_corpus.py. # --------------------------------------------------------------------------- with SessionLocal() as session: now = datetime.now(UTC) existing_source_ids = { row[0] for row in session.query(Source.source_id).all() } existing_version_ids = { row[0] for row in session.query(SourceVersion.version_id).all() } existing_chunk_ids = { row[0] for row in session.query(Chunk.chunk_id).all() } existing_claim_ids = { row[0] for row in session.query(Claim.claim_id).all() } existing_assessment_ids = { row[0] for row in session.query(EvidenceAssessment.assessment_id).all() } existing_relationship_ids = { row[0] for row in session.query(ClaimRelationship.relationship_id).all() } records: list = [] # ---- Source: LBL ---- if "LBL-NSCLC-DRUGA-EMA-2024" not in existing_source_ids: records.append(Source( source_id="LBL-NSCLC-DRUGA-EMA-2024", source_class=SourceClass.LBL, title="DRUG-A label", therapy_area="NSCLC", molecule="DRUG-A", geography="EU / EMA", audience_scope=["HCP", "Internal"], sensitivity_class=SensitivityClass.EXTERNAL, approval_state=ApprovalState.APPROVED, current_version_id="ver-lbl-1", hygiene_status="active", created_at=now, updated_at=now, )) if "ver-lbl-1" not in existing_version_ids: records.append(SourceVersion( version_id="ver-lbl-1", source_id="LBL-NSCLC-DRUGA-EMA-2024", version_label="v1", approval_state=ApprovalState.APPROVED, is_latest_approved=True, created_at=now, )) if "chk-lbl-1" not in existing_chunk_ids: records.append(Chunk( chunk_id="chk-lbl-1", source_id="LBL-NSCLC-DRUGA-EMA-2024", version_id="ver-lbl-1", text="The recommended dose is 80 mg once daily for first-line metastatic NSCLC. Dose reductions must remain within approved label boundaries.", claim_type="dose", section_path="2 POSOLOGY", page_start=2, page_end=2, token_count=18, audience_fit=["HCP", "Internal"], geography_fit="EU / EMA", therapy_area="NSCLC", created_at=now, )) # ---- Source: DOC-CSR ---- if "DOC-CSR-NSCLC-014" not in existing_source_ids: records.append(Source( source_id="DOC-CSR-NSCLC-014", source_class=SourceClass.DOC_CSR, title="CSR summary", therapy_area="NSCLC", molecule="DRUG-A", geography="EU / EMA", audience_scope=["HCP", "Internal"], sensitivity_class=SensitivityClass.EXTERNAL, approval_state=ApprovalState.APPROVED, current_version_id="ver-csr-1", hygiene_status="active", created_at=now, updated_at=now, )) if "ver-csr-1" not in existing_version_ids: records.append(SourceVersion( version_id="ver-csr-1", source_id="DOC-CSR-NSCLC-014", version_label="v1", approval_state=ApprovalState.APPROVED, is_latest_approved=True, created_at=now, )) if "chk-csr-1" not in existing_chunk_ids: records.append(Chunk( chunk_id="chk-csr-1", source_id="DOC-CSR-NSCLC-014", version_id="ver-csr-1", text="DRUG-A improves progression-free survival in EGFR-positive NSCLC and supports efficacy interpretation.", claim_type="efficacy", section_path="RESULTS", page_start=5, page_end=5, token_count=12, audience_fit=["HCP", "Internal"], geography_fit="EU / EMA", therapy_area="NSCLC", created_at=now, )) # ---- Source: SOP-MED ---- if "SOP-MED-NSCLC-010" not in existing_source_ids: records.append(Source( source_id="SOP-MED-NSCLC-010", source_class=SourceClass.SOP_MED, title="Medical SOP", therapy_area="NSCLC", molecule="DRUG-A", geography="EU / EMA", audience_scope=["Internal"], sensitivity_class=SensitivityClass.INTERNAL_ONLY, approval_state=ApprovalState.APPROVED, current_version_id="ver-sop-1", hygiene_status="active", created_at=now, updated_at=now, )) if "ver-sop-1" not in existing_version_ids: records.append(SourceVersion( version_id="ver-sop-1", source_id="SOP-MED-NSCLC-010", version_label="v1", approval_state=ApprovalState.APPROVED, is_latest_approved=True, created_at=now, )) if "chk-sop-1" not in existing_chunk_ids: records.append(Chunk( chunk_id="chk-sop-1", source_id="SOP-MED-NSCLC-010", version_id="ver-sop-1", text="Internal responders should preserve approved dose boundaries and citation discipline.", claim_type="dose", section_path="DOSING GUIDANCE", page_start=1, page_end=1, token_count=10, audience_fit=["Internal"], geography_fit="EU / EMA", therapy_area="NSCLC", created_at=now, )) # ---- Source: RMP (required by all golden and adversarial cases) ---- if "RMP-NSCLC-DRUGA-2024" not in existing_source_ids: records.append(Source( source_id="RMP-NSCLC-DRUGA-2024", source_class=SourceClass.RMP, title="DRUG-A Risk Management Plan", therapy_area="NSCLC", molecule="DRUG-A", geography="EU / EMA", audience_scope=["HCP", "Internal"], sensitivity_class=SensitivityClass.EXTERNAL, approval_state=ApprovalState.APPROVED, current_version_id="ver-rmp-1", hygiene_status="active", created_at=now, updated_at=now, )) if "ver-rmp-1" not in existing_version_ids: records.append(SourceVersion( version_id="ver-rmp-1", source_id="RMP-NSCLC-DRUGA-2024", version_label="v1", approval_state=ApprovalState.APPROVED, is_latest_approved=True, created_at=now, )) if "chk-rmp-1" not in existing_chunk_ids: records.append(Chunk( chunk_id="chk-rmp-1", source_id="RMP-NSCLC-DRUGA-2024", version_id="ver-rmp-1", text=( "DRUG-A risk management plan: dose modifications must follow EU-approved " "label boundaries. Monitoring for ILD and hepatotoxicity is required. " "Dose adjustment or interruption should adhere to the approved posology." ), claim_type="safety", section_path="RISK MINIMISATION MEASURES", page_start=3, page_end=4, token_count=32, audience_fit=["HCP", "Internal"], geography_fit="EU / EMA", therapy_area="NSCLC", created_at=now, )) # ---- Source: PK-SUMMARY (required by all golden and adversarial cases) ---- if "PK-SUMMARY-NSCLC-005" not in existing_source_ids: records.append(Source( source_id="PK-SUMMARY-NSCLC-005", source_class=SourceClass.PK_SUMMARY, title="DRUG-A Pharmacokinetic Summary", therapy_area="NSCLC", molecule="DRUG-A", geography="EU / EMA", audience_scope=["HCP", "Internal"], sensitivity_class=SensitivityClass.EXTERNAL, approval_state=ApprovalState.APPROVED, current_version_id="ver-pk-1", hygiene_status="active", created_at=now, updated_at=now, )) if "ver-pk-1" not in existing_version_ids: records.append(SourceVersion( version_id="ver-pk-1", source_id="PK-SUMMARY-NSCLC-005", version_label="v1", approval_state=ApprovalState.APPROVED, is_latest_approved=True, created_at=now, )) if "chk-pk-1" not in existing_chunk_ids: records.append(Chunk( chunk_id="chk-pk-1", source_id="PK-SUMMARY-NSCLC-005", version_id="ver-pk-1", text=( "DRUG-A pharmacokinetics: half-life approximately 48 hours, CYP3A4-mediated " "metabolism. Dose-proportional exposure supports once-daily dosing schedule " "across first-line metastatic NSCLC populations in the EU / EMA region." ), claim_type="dose", section_path="PHARMACOKINETIC SUMMARY", page_start=1, page_end=2, token_count=34, audience_fit=["HCP", "Internal"], geography_fit="EU / EMA", therapy_area="NSCLC", created_at=now, )) # ---- Claims ---- if "clm-lbl-1" not in existing_claim_ids: records.append(Claim( claim_id="clm-lbl-1", canonical_text="Dose reductions must remain within approved label boundaries.", claim_type="dose", molecule_id="DRUG-A", geography_id="EU / EMA", approval_state="approved", primary_source_id="LBL-NSCLC-DRUGA-EMA-2024", current_evidence_score=0.92, strength_band=StrengthBand.HIGH, created_at=now, )) if "clm-csr-1" not in existing_claim_ids: records.append(Claim( claim_id="clm-csr-1", canonical_text="DRUG-A improves progression-free survival in EGFR-positive NSCLC.", claim_type="efficacy", molecule_id="DRUG-A", geography_id="EU / EMA", approval_state="approved", primary_source_id="DOC-CSR-NSCLC-014", current_evidence_score=0.88, strength_band=StrengthBand.HIGH, created_at=now, )) if "clm-rmp-1" not in existing_claim_ids: records.append(Claim( claim_id="clm-rmp-1", canonical_text=( "DRUG-A dose modification and interruption must adhere to EU-approved " "label boundaries per the risk management plan." ), claim_type="safety", molecule_id="DRUG-A", geography_id="EU / EMA", approval_state="approved", primary_source_id="RMP-NSCLC-DRUGA-2024", current_evidence_score=0.84, strength_band=StrengthBand.HIGH, created_at=now, )) if "clm-pk-1" not in existing_claim_ids: records.append(Claim( claim_id="clm-pk-1", canonical_text=( "DRUG-A once-daily dosing is supported by dose-proportional " "pharmacokinetics across first-line metastatic NSCLC populations." ), claim_type="dose", molecule_id="DRUG-A", geography_id="EU / EMA", approval_state="approved", primary_source_id="PK-SUMMARY-NSCLC-005", current_evidence_score=0.82, strength_band=StrengthBand.HIGH, created_at=now, )) # Flush sources/versions/chunks/claims before adding FK-dependent records if records: session.add_all(records) session.flush() # ---- ClaimEvidenceLinks (checked by claim+chunk pair) ---- cel_pairs_existing = { (row[0], row[1]) for row in session.query( ClaimEvidenceLink.claim_id, ClaimEvidenceLink.chunk_id ).all() } link_records: list = [] for claim_id, chunk_id, source_id, confidence in [ ("clm-lbl-1", "chk-lbl-1", "LBL-NSCLC-DRUGA-EMA-2024", 0.99), ("clm-csr-1", "chk-csr-1", "DOC-CSR-NSCLC-014", 0.95), ("clm-rmp-1", "chk-rmp-1", "RMP-NSCLC-DRUGA-2024", 0.93), ("clm-pk-1", "chk-pk-1", "PK-SUMMARY-NSCLC-005", 0.91), ]: if (claim_id, chunk_id) not in cel_pairs_existing: link_records.append(ClaimEvidenceLink( claim_id=claim_id, chunk_id=chunk_id, source_id=source_id, support_type=SupportType.PRIMARY, extraction_confidence=confidence, is_primary_support=True, )) if link_records: session.add_all(link_records) session.flush() # ---- EvidenceAssessments ---- asmt_records: list = [] for asmt_id, claim_id, src_prior, sme, explanation in [ ("asm-1", "clm-lbl-1", 0.95, 0.7, {"reasons": ["Label source present"]}), ("asm-2", "clm-csr-1", 0.75, 0.6, {"reasons": ["CSR evidence present"]}), ("asm-3", "clm-rmp-1", 0.80, 0.65, {"reasons": ["RMP source present", "EU geography aligned"]}), ("asm-4", "clm-pk-1", 0.78, 0.65, {"reasons": ["PK summary source present", "dose-proportional exposure confirmed"]}), ]: if asmt_id not in existing_assessment_ids: asmt_records.append(EvidenceAssessment( assessment_id=asmt_id, claim_id=claim_id, source_prior_score=src_prior, recency_score=0.90, approval_score=1.0, sme_score=sme, consistency_score=0.90, audience_fit_score=1.0, geography_fit_score=1.0, penalty_score=0.0, evidence_score=round( 0.30 * src_prior + 0.15 * 0.90 + 0.20 * 1.0 + 0.10 * sme + 0.15 * 0.90 + 0.05 * 1.0 + 0.05 * 1.0, 2, ), strength_band=StrengthBand.HIGH, explanation_json=explanation, scored_at=now, )) if asmt_records: session.add_all(asmt_records) session.flush() # ---- ClaimRelationships ---- if "rel-1" not in existing_relationship_ids: session.add(ClaimRelationship( relationship_id="rel-1", from_claim_id="clm-lbl-1", to_claim_id="clm-csr-1", relation_type=GraphRelationType.SUPPORTED_BY, relation_metadata={"reason": "efficacy supports approved use context"}, created_at=now, )) session.commit() return TestClient(app)