kcc-agri / kcc_core /icar_kb.py
hritikm15's picture
Day 9 — v4 merge deploy: kcc_core + advisors + Proof tab + pest heatmap
49818d2 verified
raw
history blame contribute delete
3.2 kB
"""Lazy ICAR knowledge-base lookup.
The actual data lives in `icar_kb_data.py` (symlinked from the older project's
`icar_knowledge_base.py`, ~5000 lines, 200+ validated entries).
We expose a thin search wrapper that filters by crop+condition keywords and
returns a list of dicts ready to be folded into the LLM context.
"""
from __future__ import annotations
import logging
from typing import List, Optional
logger = logging.getLogger(__name__)
_ENTRIES: list = []
_LOADED = False
def _load():
global _ENTRIES, _LOADED
if _LOADED:
return
_LOADED = True
try:
from . import icar_kb_data # symlinked module
# The old module exposes ENTRIES (list of dicts).
for name in ("ENTRIES", "ICAR_ENTRIES", "KB_ENTRIES"):
if hasattr(icar_kb_data, name):
_ENTRIES.extend(getattr(icar_kb_data, name))
break
if not _ENTRIES:
# Fall back to grabbing every dict-typed module attribute that has
# 'crop' + 'condition' keys.
for v in vars(icar_kb_data).values():
if isinstance(v, list) and v and isinstance(v[0], dict) \
and "crop" in v[0] and "condition" in v[0]:
_ENTRIES.extend(v)
break
logger.info(f"[icar_kb] loaded {len(_ENTRIES)} entries")
except Exception as e:
logger.warning(f"[icar_kb] not loaded: {e}")
def search(query: str, *,
crop: Optional[str] = None,
top_k: int = 3) -> List[dict]:
_load()
if not _ENTRIES:
return []
q = query.lower()
cl = (crop or "").lower()
scored: list = []
for e in _ENTRIES:
ec = str(e.get("crop", "")).lower()
cond = str(e.get("condition", "")).lower()
cond_h = str(e.get("condition_hindi", "")).lower()
score = 0
if cl and cl in ec:
score += 3
if cond and cond in q:
score += 5
# match individual condition tokens
for tok in cond.split():
if tok and tok in q and len(tok) > 3:
score += 1
if cond_h and cond_h in query: # devanagari unchanged
score += 5
if score > 0:
scored.append((score, e))
scored.sort(key=lambda x: x[0], reverse=True)
return [e for _, e in scored[:top_k]]
def format_for_prompt(entries: List[dict]) -> str:
if not entries:
return ""
lines = ["[ICAR VERIFIED CARDS — TRUST THESE OVER KCC RETRIEVAL]"]
for i, e in enumerate(entries, 1):
crop = e.get("crop", "")
cond = e.get("condition", "")
chem1 = e.get("chemical_1", "")
dose1 = e.get("dose_1", "")
timing = e.get("timing", "")
ipm = e.get("ipm", "")
lines.append(f"\n[ICAR-{i}] {crop}{cond}")
if chem1 and dose1:
lines.append(f" Chemical: {chem1} @ {dose1}")
if e.get("chemical_2") and e.get("dose_2"):
lines.append(f" Alt: {e['chemical_2']} @ {e['dose_2']}")
if timing:
lines.append(f" Timing: {timing}")
if ipm:
lines.append(f" IPM: {ipm}")
return "\n".join(lines)