| """Lazy ICAR knowledge-base lookup. |
| |
| The actual data lives in `icar_kb_data.py` (symlinked from the older project's |
| `icar_knowledge_base.py`, ~5000 lines, 200+ validated entries). |
| |
| We expose a thin search wrapper that filters by crop+condition keywords and |
| returns a list of dicts ready to be folded into the LLM context. |
| """ |
| from __future__ import annotations |
| import logging |
| from typing import List, Optional |
|
|
| logger = logging.getLogger(__name__) |
|
|
| _ENTRIES: list = [] |
| _LOADED = False |
|
|
|
|
| def _load(): |
| global _ENTRIES, _LOADED |
| if _LOADED: |
| return |
| _LOADED = True |
| try: |
| from . import icar_kb_data |
| |
| for name in ("ENTRIES", "ICAR_ENTRIES", "KB_ENTRIES"): |
| if hasattr(icar_kb_data, name): |
| _ENTRIES.extend(getattr(icar_kb_data, name)) |
| break |
| if not _ENTRIES: |
| |
| |
| for v in vars(icar_kb_data).values(): |
| if isinstance(v, list) and v and isinstance(v[0], dict) \ |
| and "crop" in v[0] and "condition" in v[0]: |
| _ENTRIES.extend(v) |
| break |
| logger.info(f"[icar_kb] loaded {len(_ENTRIES)} entries") |
| except Exception as e: |
| logger.warning(f"[icar_kb] not loaded: {e}") |
|
|
|
|
| def search(query: str, *, |
| crop: Optional[str] = None, |
| top_k: int = 3) -> List[dict]: |
| _load() |
| if not _ENTRIES: |
| return [] |
| q = query.lower() |
| cl = (crop or "").lower() |
|
|
| scored: list = [] |
| for e in _ENTRIES: |
| ec = str(e.get("crop", "")).lower() |
| cond = str(e.get("condition", "")).lower() |
| cond_h = str(e.get("condition_hindi", "")).lower() |
| score = 0 |
| if cl and cl in ec: |
| score += 3 |
| if cond and cond in q: |
| score += 5 |
| |
| for tok in cond.split(): |
| if tok and tok in q and len(tok) > 3: |
| score += 1 |
| if cond_h and cond_h in query: |
| score += 5 |
| if score > 0: |
| scored.append((score, e)) |
|
|
| scored.sort(key=lambda x: x[0], reverse=True) |
| return [e for _, e in scored[:top_k]] |
|
|
|
|
| def format_for_prompt(entries: List[dict]) -> str: |
| if not entries: |
| return "" |
| lines = ["[ICAR VERIFIED CARDS — TRUST THESE OVER KCC RETRIEVAL]"] |
| for i, e in enumerate(entries, 1): |
| crop = e.get("crop", "") |
| cond = e.get("condition", "") |
| chem1 = e.get("chemical_1", "") |
| dose1 = e.get("dose_1", "") |
| timing = e.get("timing", "") |
| ipm = e.get("ipm", "") |
| lines.append(f"\n[ICAR-{i}] {crop} — {cond}") |
| if chem1 and dose1: |
| lines.append(f" Chemical: {chem1} @ {dose1}") |
| if e.get("chemical_2") and e.get("dose_2"): |
| lines.append(f" Alt: {e['chemical_2']} @ {e['dose_2']}") |
| if timing: |
| lines.append(f" Timing: {timing}") |
| if ipm: |
| lines.append(f" IPM: {ipm}") |
| return "\n".join(lines) |
|
|