"""Lazy ICAR knowledge-base lookup. The actual data lives in `icar_kb_data.py` (symlinked from the older project's `icar_knowledge_base.py`, ~5000 lines, 200+ validated entries). We expose a thin search wrapper that filters by crop+condition keywords and returns a list of dicts ready to be folded into the LLM context. """ from __future__ import annotations import logging from typing import List, Optional logger = logging.getLogger(__name__) _ENTRIES: list = [] _LOADED = False def _load(): global _ENTRIES, _LOADED if _LOADED: return _LOADED = True try: from . import icar_kb_data # symlinked module # The old module exposes ENTRIES (list of dicts). for name in ("ENTRIES", "ICAR_ENTRIES", "KB_ENTRIES"): if hasattr(icar_kb_data, name): _ENTRIES.extend(getattr(icar_kb_data, name)) break if not _ENTRIES: # Fall back to grabbing every dict-typed module attribute that has # 'crop' + 'condition' keys. for v in vars(icar_kb_data).values(): if isinstance(v, list) and v and isinstance(v[0], dict) \ and "crop" in v[0] and "condition" in v[0]: _ENTRIES.extend(v) break logger.info(f"[icar_kb] loaded {len(_ENTRIES)} entries") except Exception as e: logger.warning(f"[icar_kb] not loaded: {e}") def search(query: str, *, crop: Optional[str] = None, top_k: int = 3) -> List[dict]: _load() if not _ENTRIES: return [] q = query.lower() cl = (crop or "").lower() scored: list = [] for e in _ENTRIES: ec = str(e.get("crop", "")).lower() cond = str(e.get("condition", "")).lower() cond_h = str(e.get("condition_hindi", "")).lower() score = 0 if cl and cl in ec: score += 3 if cond and cond in q: score += 5 # match individual condition tokens for tok in cond.split(): if tok and tok in q and len(tok) > 3: score += 1 if cond_h and cond_h in query: # devanagari unchanged score += 5 if score > 0: scored.append((score, e)) scored.sort(key=lambda x: x[0], reverse=True) return [e for _, e in scored[:top_k]] def format_for_prompt(entries: List[dict]) -> str: if not entries: return "" lines = ["[ICAR VERIFIED CARDS — TRUST THESE OVER KCC RETRIEVAL]"] for i, e in enumerate(entries, 1): crop = e.get("crop", "") cond = e.get("condition", "") chem1 = e.get("chemical_1", "") dose1 = e.get("dose_1", "") timing = e.get("timing", "") ipm = e.get("ipm", "") lines.append(f"\n[ICAR-{i}] {crop} — {cond}") if chem1 and dose1: lines.append(f" Chemical: {chem1} @ {dose1}") if e.get("chemical_2") and e.get("dose_2"): lines.append(f" Alt: {e['chemical_2']} @ {e['dose_2']}") if timing: lines.append(f" Timing: {timing}") if ipm: lines.append(f" IPM: {ipm}") return "\n".join(lines)