"""Block-level feature lookup. Reads `data/block_features.parquet` if present (produced by `pipelines/build_block_features.py`). Falls back to coordinate-only lookup from the frontend's blockCoords.json so the API still works before the pipeline is run. Feature columns we aim for (all optional — pipeline fills them in stages): state, district, block, lat, lon, elevation_m, soil_clay_pct, soil_sand_pct, soil_oc_pct, soil_ph, soil_cec, soil_n_total, ndvi_mean_3mo, ndvi_anomaly_30d, nearest_mandi, nearest_mandi_km, historical_crop_top3 (json string) """ from __future__ import annotations import json import logging from functools import lru_cache from pathlib import Path from typing import Optional from kcc_core import config logger = logging.getLogger(__name__) @lru_cache(maxsize=1) def _load_block_coords() -> dict: """Load fallback coords from frontend's blockCoords.json (always present).""" p = Path(__file__).resolve().parents[1] / "frontend" / "src" / "blockCoords.json" if not p.exists(): # Try the rag_chatbot_2 source if frontend hasn't been copied yet. p = Path("/home/hritikm15/Projects/rag_chatbot_2/frontend/src/blockCoords.json") if p.exists(): with open(p, "r") as f: return json.load(f) return {} @lru_cache(maxsize=1) def _load_features_parquet(): fp = config.BLOCK_FEATURES_FILE if not fp.exists(): logger.info(f"[block_features] {fp} not present yet — coords-only mode") return None try: import pandas as pd return pd.read_parquet(fp) except Exception as e: logger.warning(f"[block_features] read failed: {e}") return None def lookup(state: str, district: str, block: Optional[str] = None) -> dict: """Return a dict of features for the given block (or district centroid). Always returns at least lat/lon if known. All other keys are optional. """ out: dict = {"state": state, "district": district, "block": block or ""} # 1. Try the parquet (filled by pipelines/build_block_features.py) df = _load_features_parquet() if df is not None: import pandas as pd m = (df["state"].str.lower() == state.lower()) & \ (df["district"].str.lower() == district.lower()) if block: m = m & (df["block"].str.lower() == block.lower()) sel = df[m] if len(sel) > 0: row = sel.iloc[0].to_dict() out.update({k: row[k] for k in row if pd.notna(row[k])}) return out # 2. Fallback: just lat/lon from blockCoords.json coords = _load_block_coords() sn, dn = state.strip(), district.strip() bn = (block or "").strip() if sn in coords: if dn in coords[sn]: d_entry = coords[sn][dn] if isinstance(d_entry, dict): if bn and bn in d_entry and isinstance(d_entry[bn], dict): out.update({"lat": d_entry[bn].get("lat"), "lon": d_entry[bn].get("lon")}) elif "lat" in d_entry: out.update({"lat": d_entry.get("lat"), "lon": d_entry.get("lon")}) return out def lat_lon(state: str, district: str, block: Optional[str] = None) -> Optional[tuple]: f = lookup(state, district, block) if f.get("lat") is not None and f.get("lon") is not None: return float(f["lat"]), float(f["lon"]) return None