from pathlib import Path """ pest_predictor.py v5 — District-primary pest risk prediction =============================================================== Primary: district_v2 model (AUC ~0.95, 81 features, 1-month early warning) Fallback: district_v1 model (AUC 0.936, 63 features, same-month detection) Fallback: state-level v3 model (AUC 0.909) for uncovered districts Last resort: weather + history heuristics (no model) District v2 architecture (NEW): • Temporal lag: current weather → predicts NEXT month's pests (1-month lead) • 81 features: 63 v1 + 12 wx_prev_* + 7 disease-pressure indices • Wallin disease pressure, GDD accumulation, humidity trend • Isotonic calibration per pest (probabilities = actual rates) • F1-optimal thresholds via Youden's J Public API: predict_pest_risk(state, crop, district=None, month=None, year=None) -> list[dict] """ import json, math, pickle, os, datetime, re from functools import lru_cache import numpy as np MODEL_DIR = str(Path(__file__).parent.parent / "pest_model") # v2 model (preferred — 1-month early warning, 81 features) DISTRICT_V2_MODEL_PATH = f"{MODEL_DIR}/pest_risk_model_district_v2.pkl" DISTRICT_V2_META_PATH = f"{MODEL_DIR}/pest_model_meta_district_v2.json" THRESHOLDS_V2_PATH = f"{MODEL_DIR}/thresholds_district.json" # saved by v2 training # v1 model (fallback — same-month detection, 63 features) DISTRICT_MODEL_PATH = f"{MODEL_DIR}/pest_risk_model_district.pkl" DISTRICT_META_PATH = f"{MODEL_DIR}/pest_model_meta_district.json" V3_MODEL_PATH = f"{MODEL_DIR}/pest_risk_model_v3.pkl" V3_META_PATH = f"{MODEL_DIR}/pest_model_meta_v3.json" DISTRICT_PEST_CSV = f"{MODEL_DIR}/pest_monthly_district.csv" DISTRICT_WEATHER_CSV = f"{MODEL_DIR}/weather_monthly_district.csv" SUBDISTRICTS_CSV = str(Path(__file__).parent.parent / "data" / "indian_sub_districts.csv") # fallback to local THRESHOLDS_PATH = f"{MODEL_DIR}/thresholds_district.json" # All 14 pest categories (must match training) PEST_LIST = ["aphid","blight","borer","caterpillar","jassid","leaf_spot", "mildew","mite","mosaic_virus","rot","rust","thrips","whitefly","wilt"] # ── State coordinates fallback ──────────────────────────────────────────────── STATE_COORDS = { "MADHYA PRADESH": (23.25, 77.41), "MAHARASHTRA": (18.52, 73.86), "PUNJAB": (30.73, 76.78), "UTTAR PRADESH": (26.85, 80.95), "RAJASTHAN": (26.91, 75.79), "KARNATAKA": (12.97, 77.59), "GUJARAT": (23.03, 72.58), "HARYANA": (29.07, 76.08), "BIHAR": (25.61, 85.14), "ANDHRA PRADESH": (17.39, 78.49), "TELANGANA": (17.39, 78.49), "WEST BENGAL": (22.57, 88.36), "CHHATTISGARH": (21.27, 81.87), "JHARKHAND": (23.35, 85.33), "ODISHA": (20.30, 85.82), "TAMIL NADU": (13.08, 80.27), "ASSAM": (26.14, 91.74), "HIMACHAL PRADESH":(31.10, 77.17), } # ── Soil type (NBSS classification, matching retrain script) ────────────────── SOIL_TYPE = { "UTTAR PRADESH":0,"BIHAR":0,"WEST BENGAL":0,"PUNJAB":0,"HARYANA":0, "ASSAM":0,"ODISHA":0,"JHARKHAND":0,"GUJARAT":0, "MAHARASHTRA":1,"MADHYA PRADESH":1,"ANDHRA PRADESH":1,"TELANGANA":1, "KARNATAKA":1,"CHHATTISGARH":1, "TAMIL NADU":2,"KERALA":2,"GOA":2, "RAJASTHAN":3, "HIMACHAL PRADESH":4,"UTTARAKHAND":4,"JAMMU AND KASHMIR":4, "MANIPUR":4,"MEGHALAYA":4,"NAGALAND":4,"SIKKIM":4,"TRIPURA":4, "ARUNACHAL PRADESH":4,"MIZORAM":4, } # ── Pest info (name + treatment) ────────────────────────────────────────────── PEST_INFO = { "aphid": ("Aphid (Mahu / Aphis gossypii)", "Imidacloprid 17.8% SL @ 0.5ml/L or Thiamethoxam 25% WG @ 0.3g/L"), "blight": ("Early/Late Blight (Alternaria/Phytophthora)", "Mancozeb 75% WP @ 2g/L or Metalaxyl+Mancozeb @ 2.5g/L"), "wilt": ("Fusarium/Bacterial Wilt", "Copper Oxychloride 50% WP @ 3g/L or Carbendazim 50% WP @ 1g/L"), "borer": ("Stem/Fruit Borer (Helicoverpa / Chilo suppressalis)", "Emamectin Benzoate 5% SG @ 0.5g/L or Spinosad 45% SC @ 0.5ml/L"), "mite": ("Red Spider Mite (Tetranychus urticae)", "Spiromesifen 22.9% SC @ 1ml/L or Abamectin 1.9% EC @ 0.5ml/L"), "whitefly": ("Whitefly (Bemisia tabaci)", "Imidacloprid 17.8% SL @ 0.5ml/L or Spiromesifen 22.9% SC @ 1ml/L"), "thrips": ("Thrips (Frankliniella occidentalis)", "Spinosad 45% SC @ 0.5ml/L or Fipronil 5% SC @ 1.5ml/L"), "rust": ("Rust (Puccinia spp.)", "Propiconazole 25% EC @ 1ml/L or Tebuconazole 25.9% EC @ 1ml/L"), "rot": ("Root/Collar Rot (Pythium/Sclerotinia)", "Metalaxyl 35% WS @ 2g/kg seed or Copper Oxychloride drench @ 3g/L"), "mildew": ("Powdery/Downy Mildew", "Sulphur 80% WP @ 3g/L (powdery) or Mancozeb 75% WP @ 2g/L (downy)"), "leaf_spot": ("Leaf Spot/Anthracnose (Cercospora/Colletotrichum)", "Carbendazim 50% WP @ 1g/L or Mancozeb+Carbendazim @ 2g/L"), "mosaic_virus": ("Mosaic/Leaf Curl Virus (vector: whitefly/aphid)", "Control vector: Imidacloprid 17.8% SL @ 0.5ml/L — no direct cure for virus"), "caterpillar": ("Caterpillar/Armyworm (Spodoptera/Helicoverpa)", "Bt (Bacillus thuringiensis) @ 1g/L or Emamectin Benzoate 5% SG @ 0.5g/L"), "jassid": ("Jassid/Leafhopper (Amrasca devastans)", "Imidacloprid 17.8% SL @ 0.5ml/L or Thiamethoxam 25% WG @ 0.3g/L"), } # ── Agronomic overlay rules ─────────────────────────────────────────────────── AGRO_RULES = [ {"name": "High humidity fungal risk", "cond": lambda wx: wx["humidity_mean"] >= 85 and 18 <= wx["temp_mean"] <= 30, "pests": ["blight","mildew","leaf_spot","rot","rust"], "boost": 0.18}, {"name": "Heavy rain disease amplifier", "cond": lambda wx: wx["rain_7d_mean"] >= 60 and wx["temp_mean"] > 20, "pests": ["rot","blight","leaf_spot","caterpillar"], "boost": 0.12}, {"name": "Dry heat sucking pest surge", "cond": lambda wx: wx["humidity_mean"] < 45 and wx["temp_max_mean"] > 34, "pests": ["aphid","mite","whitefly","jassid","thrips"], "boost": 0.15}, {"name": "Drought stress wilt risk", "cond": lambda wx: wx["vpd_proxy_mean"] > 12, "pests": ["wilt","rot"], "boost": 0.10}, {"name": "Intermittent rain sporulation", "cond": lambda wx: 3 <= wx["rain_days"] <= 6 and wx["humidity_mean"] >= 70, "pests": ["rust","mildew","blight","leaf_spot"], "boost": 0.10}, {"name": "Late blight temperature window", "cond": lambda wx: 18 <= wx["temp_mean"] <= 24 and wx["humidity_mean"] >= 80, "pests": ["blight"], "boost": 0.20}, {"name": "Whitefly virus pressure", "cond": lambda wx: wx["temp_mean"] > 26 and wx["humidity_mean"] < 60, "pests": ["mosaic_virus","whitefly"], "boost": 0.08}, ] # ── Monthly heuristics for anomaly computation at inference ─────────────────── _MONTHLY_RAIN_NORM = {6:80,7:120,8:110,9:70,10:30,11:10,12:5,1:5,2:5,3:10,4:15,5:20} _MONTHLY_TEMP_NORM = {1:15,2:18,3:23,4:28,5:32,6:30,7:28,8:27,9:27,10:25,11:20,12:16} # ── Model + baselines loading ───────────────────────────────────────────────── @lru_cache(maxsize=2) def _load_district_model(): """ Load district stacking bundle (cached — loaded once per process). Prefers v2 (1-month early warning) over v1 if both exist. Returns (bundle, meta) — meta has model_version key. """ # ── Try v2 first (1-month early warning, 81 features) ────────────────── if os.path.exists(DISTRICT_V2_MODEL_PATH): try: with open(DISTRICT_V2_MODEL_PATH, "rb") as f: bundle = pickle.load(f) meta = {"model_version": "district_v2", "auc": 0.0, "per_pest_auc": {}, "lead_time": "1-month"} if os.path.exists(DISTRICT_V2_META_PATH): with open(DISTRICT_V2_META_PATH) as f: meta.update(json.load(f)) # Mark bundle with version for feature construction bundle.setdefault("model_version", "district_v2") print(f"[pest_predictor] ✅ District v2 model loaded (AUC {meta.get('cv_auc_stack', meta.get('auc', '?'))})") return bundle, meta except Exception as e: print(f"[pest_predictor] v2 model load failed ({e}) — trying v1") # ── Fall back to v1 (same-month, 63 features) ─────────────────────────── if not os.path.exists(DISTRICT_MODEL_PATH): return None, None try: with open(DISTRICT_MODEL_PATH, "rb") as f: bundle = pickle.load(f) bundle.setdefault("model_version", "district_v1") meta = {"model_version": "district_v1", "lead_time": "0-month"} if os.path.exists(DISTRICT_META_PATH): with open(DISTRICT_META_PATH) as f: meta.update(json.load(f)) print(f"[pest_predictor] ✅ District v1 model loaded (AUC 0.936)") return bundle, meta except Exception as e: print(f"[pest_predictor] District model load failed: {e}") return None, None @lru_cache(maxsize=2) def _load_v3_model(): """Load v3 state-level model as fallback (cached).""" if not os.path.exists(V3_MODEL_PATH) or not os.path.exists(V3_META_PATH): return None, None try: with open(V3_MODEL_PATH, "rb") as f: models = pickle.load(f) with open(V3_META_PATH) as f: meta = json.load(f) return models, meta except Exception as e: print(f"[pest_predictor] v3 model load failed: {e}") return None, None @lru_cache(maxsize=1) def _load_district_baselines(): """ Load district pest CSV and compute historical baselines. Returns dict keyed by "{state}|{district}|{crop}|{pest}|{month}" → { "presence_rate": float, # mean historical presence (0-1) "freq_mean": float, # mean freq_norm over years } Also returns a "summary" dict for lag features: "{state}|{district}|{crop}|{pest}" → [monthly_presence_rate × 12] """ if not os.path.exists(DISTRICT_PEST_CSV): return {}, {} try: df = _pd().read_csv(DISTRICT_PEST_CSV, usecols=["state","district","crop_group","pest_cat","year","month","freq_norm"]) df["present"] = (df["freq_norm"] >= 0.02).astype(float) # Monthly baseline per (state, district, crop, pest, month) agg = df.groupby(["state","district","crop_group","pest_cat","month"]).agg( presence_rate=("present","mean"), freq_mean =("freq_norm","mean"), ).reset_index() baselines = {} for _, row in agg.iterrows(): key = f"{row['state']}|{row['district']}|{row['crop_group']}|{row['pest_cat']}|{int(row['month'])}" baselines[key] = { "presence_rate": float(row["presence_rate"]), "freq_mean": float(row["freq_mean"]), } return baselines, {} except Exception as e: print(f"[pest_predictor] Baselines load failed: {e}") return {}, {} @lru_cache(maxsize=1) def _load_district_coords(): """ Load district centroid coordinates for spatial fallback. Returns (coords_dict, covered_districts_list) coords_dict: "{STATE}|{DISTRICT}" → (lat, lon) covered_list: list of (lat, lon, state, district) for haversine search """ # Try from weather CSV first (has covered districts) coords_dict = {} covered_list = [] if os.path.exists(DISTRICT_WEATHER_CSV): try: df = _pd().read_csv(DISTRICT_WEATHER_CSV, usecols=["state","district"]) # Weather CSV doesn't have lat/lon columns directly — use sub_districts CSV except Exception: pass if os.path.exists(SUBDISTRICTS_CSV): try: sub = _pd().read_csv(SUBDISTRICTS_CSV) sub["State"] = sub["State"].str.strip().str.upper() sub["District"] = sub["District"].str.strip().str.upper() centroids = sub.groupby(["State","District"])[["lat","lon"]].mean().reset_index() for _, row in centroids.iterrows(): k = f"{row['State']}|{row['District']}" coords_dict[k] = (float(row["lat"]), float(row["lon"])) covered_list.append((float(row["lat"]), float(row["lon"]), row["State"], row["District"])) except Exception as e: print(f"[pest_predictor] Sub-districts CSV load failed: {e}") # Fallback: build from STATE_COORDS if not coords_dict: for state, (lat, lon) in STATE_COORDS.items(): coords_dict[f"{state}|"] = (lat, lon) return coords_dict, covered_list @lru_cache(maxsize=1) def _load_thresholds(): """Load per-pest optimal classification thresholds (default 0.35 if missing).""" defaults = {p: 0.35 for p in PEST_LIST} if os.path.exists(THRESHOLDS_PATH): try: with open(THRESHOLDS_PATH) as f: saved = json.load(f) defaults.update(saved) except Exception: pass return defaults def _pd(): """Lazy import pandas to avoid startup cost if not used.""" import pandas return pandas # ── Coordinate helpers ──────────────────────────────────────────────────────── def _haversine(lat1, lon1, lat2, lon2): """Distance in km between two lat/lon points.""" R = 6371.0 dlat = math.radians(lat2 - lat1) dlon = math.radians(lon2 - lon1) a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1))*math.cos(math.radians(lat2))*math.sin(dlon/2)**2 return R * 2 * math.asin(math.sqrt(a)) def _get_district_coords(state, district): """ Return (lat, lon) for a district. Priority: sub-districts CSV → state centroid. """ coords_dict, _ = _load_district_coords() key = f"{state.upper()}|{district.upper()}" if district else None if key and key in coords_dict: return coords_dict[key] # Try just state for k, v in coords_dict.items(): if k.startswith(f"{state.upper()}|"): return v return STATE_COORDS.get(state.upper(), (23.25, 77.41)) def _nearest_covered_district(state, district, bundle): """ If district not in district_enc, find nearest covered district via haversine. Returns (fallback_district, fallback_lat, fallback_lon, distance_km). """ dist_enc = bundle.get("district_enc", {}) district_u = district.upper() if district else "" if district_u in dist_enc: lat, lon = _get_district_coords(state, district_u) return district_u, lat, lon, 0.0 # Get query coords lat_q, lon_q = _get_district_coords(state, district_u or state) _, covered_list = _load_district_coords() best_dist = float("inf") best = (district_u, lat_q, lon_q) for (lat, lon, cov_state, cov_dist) in covered_list: if cov_dist not in dist_enc: continue d = _haversine(lat_q, lon_q, lat, lon) if d < best_dist: best_dist = d best = (cov_dist, lat, lon) fb_district, fb_lat, fb_lon = best return fb_district, fb_lat, fb_lon, best_dist # ── Weather fetching ────────────────────────────────────────────────────────── def _get_forecast_weather(lat, lon): """Fetch 7-day Open-Meteo forecast. Falls back to seasonal heuristics.""" try: import requests r = requests.get("https://api.open-meteo.com/v1/forecast", params={ "latitude": lat, "longitude": lon, "daily": ("temperature_2m_mean,temperature_2m_max,temperature_2m_min," "relative_humidity_2m_mean,precipitation_sum,wind_speed_10m_max"), "forecast_days": 7, "timezone": "Asia/Kolkata", }, timeout=12) r.raise_for_status() d = r.json().get("daily", {}) def sm(lst, default=25.0): vals = [x for x in (lst or []) if x is not None] return float(np.mean(vals)) if vals else default temps = d.get("temperature_2m_mean", []) temp_max = d.get("temperature_2m_max", []) temp_min = d.get("temperature_2m_min", []) hum = d.get("relative_humidity_2m_mean", []) wind = d.get("wind_speed_10m_max", []) rain_raw = d.get("precipitation_sum", []) rain = [x for x in rain_raw if x is not None] rain_t = sum(rain) rain_d = sum(1 for x in rain if x > 1.0) t_mean = sm(temps, 25.0) t_max = sm(temp_max, 32.0) t_min = sm(temp_min, 18.0) h_mean = sm(hum, 65.0) w_mean = sm(wind, 15.0) return { "temp_mean": t_mean, "temp_max_mean": t_max, "temp_min_mean": t_min, "humidity_mean": h_mean, "rain_mm": rain_t, "rain_7d_mean": rain_t, "rain_30d_mean": rain_t, # 7d proxy; anomaly handles the rest "rain_days": rain_d, "vpd_proxy_mean": t_max * (1 - h_mean / 100), "wind_max_mean": w_mean, "diurnal_range": t_max - t_min, "skin_temp_mean": t_mean + 0.5, # ERA5 skt ≈ t2m at 0.25° "soil_temp_mean": t_mean - 2.0, # Soil slightly cooler "satd_mean": 0.5, # ERA5 SATD noisy at 0.25° — near zero } except Exception: month = datetime.datetime.now().month t = _MONTHLY_TEMP_NORM.get(month, 25.0) r = _MONTHLY_RAIN_NORM.get(month, 30.0) h = 70.0 if month in (6,7,8,9) else 55.0 return { "temp_mean": t, "temp_max_mean": t+7, "temp_min_mean": t-6, "humidity_mean": h, "rain_mm": r, "rain_7d_mean": r, "rain_30d_mean": r, "rain_days": 3 if r > 5 else 0, "vpd_proxy_mean": (t+7)*(1-h/100), "wind_max_mean": 15.0, "diurnal_range": 13.0, "skin_temp_mean": t+0.5, "soil_temp_mean": t-2.0, "satd_mean": 0.5, } # ── Feature vector construction ─────────────────────────────────────────────── def _growth_stage(month): """Kharif/Rabi stage proxy matching retrain script.""" if month in [6, 7]: return 0 # sowing/establishment elif month in [8, 9]: return 1 # vegetative elif month in [10, 11]: return 2 # reproductive elif month in [12, 1]: return 3 # maturity else: return 1 # off-season vegetative def _safe_encode(enc_dict, value, default=-1): """Encode value with dict encoder, return default if unknown.""" return enc_dict.get(value, default) def _baseline_presence(baselines, state, district, crop, pest, month): """Look up historical pest presence rate. Returns (rate, freq_mean).""" key = f"{state}|{district}|{crop}|{pest}|{month}" b = baselines.get(key, {}) return b.get("presence_rate", 0.0), b.get("freq_mean", 0.0) def _disease_pressure(temp, hum): """Wallin-inspired fungal disease pressure index.""" return (hum / 100) ** 2 * max(0.0, (30 - abs(temp - 22)) / 30) def _build_district_feature_vector(bundle, baselines, state, district, crop_group, pest_cat, month, wx, lat, lon): """ Build the full feature vector for one (state, district, crop, pest, month). Supports BOTH v1 (63 features) and v2 (81 features, temporal lag). v2 inference semantics: Current 7-day forecast weather → wx_prev_* features (M-1 inputs) Next-month climatological norms → same-month temp_mean/humidity_mean inputs This gives 1-month early warning: current conditions predict next month's risk. Returns pd.DataFrame with exactly bundle["feature_cols"] columns. """ import pandas as pd feature_cols = bundle["feature_cols"] crop_enc = bundle["crop_enc"] pest_enc = bundle["pest_enc"] state_enc = bundle["state_enc"] dist_enc = bundle["district_enc"] top_cooc = bundle["top_cooc"] model_ver = bundle.get("model_version", "district_v1") is_v2 = (model_ver == "district_v2") # ── Encoded categoricals ───────────────────────────────────────────────── s_enc = _safe_encode(state_enc, state, -1) d_enc = _safe_encode(dist_enc, district, -1) c_enc = _safe_encode(crop_enc, crop_group, -1) p_enc = _safe_encode(pest_enc, pest_cat, -1) # ── Month cyclical ─────────────────────────────────────────────────────── month_sin = math.sin(2 * math.pi * month / 12) month_cos = math.cos(2 * math.pi * month / 12) # ── Current (observed/forecast) weather — used as wx_prev_* for v2 ─────── curr_temp = wx["temp_mean"] curr_t_max = wx["temp_max_mean"] curr_t_min = wx["temp_min_mean"] curr_hum = wx["humidity_mean"] curr_rain = wx["rain_mm"] curr_rain_d= wx["rain_days"] curr_dr = wx["diurnal_range"] curr_vpd = wx["vpd_proxy_mean"] curr_lw = (curr_hum / 100) * curr_rain_d * 3 # leaf_wetness from current weather # For anomalies, use current month curr_rain_norm = _MONTHLY_RAIN_NORM.get(month, 30) curr_temp_norm = _MONTHLY_TEMP_NORM.get(month, 25) curr_rain_anom = (curr_rain - curr_rain_norm) / max(curr_rain_norm * 0.5, 1) curr_temp_anom = (curr_temp - curr_temp_norm) / 4.0 curr_hum_anom = (curr_hum - 65) / 15.0 if is_v2: # ── v2: same-month features = climatological norm for prediction month ── # At inference: we predict month (month+1), current wx → wx_prev_* # For temp_mean/humidity_mean of next month, use seasonal norms next_m = (month % 12) + 1 temp = float(_MONTHLY_TEMP_NORM.get(next_m, 25)) t_max = temp + 7.0 t_min = temp - 6.0 hum = 70.0 if next_m in (6, 7, 8, 9) else 57.0 rain = float(_MONTHLY_RAIN_NORM.get(next_m, 30)) rain_d = 4 if rain > 10 else 1 dr = 13.0 vpd = t_max * (1 - hum / 100) wind = wx.get("wind_max_mean", 15.0) skt = temp + 0.5 stl = temp - 2.0 satd = wx.get("satd_mean", 0.5) r7d = rain / 4.0 # rough 7d from monthly # Use next-month norms for anomaly features in the same-month slot rain_norm = _MONTHLY_RAIN_NORM.get(next_m, 30) temp_norm = float(_MONTHLY_TEMP_NORM.get(next_m, 25)) rain_anomaly = (rain - rain_norm) / max(rain_norm * 0.5, 1) temp_anomaly = (temp - temp_norm) / 4.0 hum_anomaly = (hum - 65) / 15.0 # For lag features, use next_m (which is what we're predicting) pred_month = next_m else: # ── v1: same-month weather = current forecast ──────────────────────── temp = curr_temp t_max = curr_t_max t_min = curr_t_min hum = curr_hum rain = curr_rain rain_d = curr_rain_d dr = curr_dr vpd = curr_vpd wind = wx.get("wind_max_mean", 15.0) skt = wx.get("skin_temp_mean", curr_temp + 0.5) stl = wx.get("soil_temp_mean", curr_temp - 2.0) satd = wx.get("satd_mean", 0.5) r7d = wx.get("rain_7d_mean", curr_rain) rain_norm = curr_rain_norm temp_norm = curr_temp_norm rain_anomaly = curr_rain_anom temp_anomaly = curr_temp_anom hum_anomaly = curr_hum_anom pred_month = month rain_intensity = rain / max(rain_d, 1) leaf_wetness = (hum / 100) * rain_d * 3 hum_x_temp = hum * temp / 1000 rain_x_hum = rain * hum / 10000 high_humidity = int(hum > 75) soil_anomaly = (stl - (temp_norm - 2)) / 4.0 ndvi_proxy = float(np.clip( 0.5 * np.clip(rain_anomaly, -3, 3) - 0.3 * np.clip(temp_anomaly, -3, 3) - 0.2 * np.clip(soil_anomaly, -3, 3), -2, 2 )) # ── Lag features from baselines ────────────────────────────────────────── prev_m = pred_month - 1 if pred_month > 1 else 12 lag_prev_month, _ = _baseline_presence(baselines, state, district, crop_group, pest_cat, prev_m) lag_1yr, _ = _baseline_presence(baselines, state, district, crop_group, pest_cat, pred_month) lag_2yr = lag_1yr * 0.85 # approximate 2yr from 1yr pest_trend = (lag_prev_month + lag_1yr) / 2.0 # ── Soil type ──────────────────────────────────────────────────────────── soil_type = SOIL_TYPE.get(state, 0) # ── Growth stage ───────────────────────────────────────────────────────── gs = _growth_stage(pred_month) # ── Base feature dict ──────────────────────────────────────────────────── fv = { "month_sin": month_sin, "month_cos": month_cos, "state_enc": s_enc, "district_enc": d_enc, "crop_enc": c_enc, "pest_enc": p_enc, "lat": lat, "lon": lon, "soil_type": soil_type, "growth_stage": gs, "temp_mean": temp, "temp_max_mean": t_max, "temp_min_mean": t_min, "diurnal_range": dr, "humidity_mean": hum, "rain_mm": rain, "rain_days": rain_d, "rain_intensity": rain_intensity, "soil_temp_mean": stl, "satd_mean": satd, "rain_7d_mean": r7d, "vpd_proxy_mean": vpd, "leaf_wetness": leaf_wetness, "hum_x_temp": hum_x_temp, "rain_x_hum": rain_x_hum, "high_humidity": high_humidity, "rain_anomaly": rain_anomaly, "temp_anomaly": temp_anomaly, "hum_anomaly": hum_anomaly, "soil_anomaly": soil_anomaly, "ndvi_proxy": ndvi_proxy, "lag_prev_month": lag_prev_month, "lag_1yr": lag_1yr, "lag_2yr": lag_2yr, "pest_trend": pest_trend, } # ── v2 ONLY: temporal lag features + disease pressure indices ──────────── if is_v2: # wx_prev_* = current observed/forecast weather (M-1 relative to next month) curr_lw_v2 = (curr_hum / 100) * curr_rain_d * 3 fv.update({ "wx_prev_temp_mean": curr_temp, "wx_prev_temp_max_mean": curr_t_max, "wx_prev_humidity_mean": curr_hum, "wx_prev_rain_mm": curr_rain, "wx_prev_rain_days": curr_rain_d, "wx_prev_leaf_wetness": curr_lw_v2, "wx_prev_vpd_proxy_mean": curr_vpd, "wx_prev_diurnal_range": curr_dr, "wx_prev_rain_anomaly": curr_rain_anom, "wx_prev_temp_anomaly": curr_temp_anom, "wx_prev_hum_anomaly": curr_hum_anom, "wx_prev_wet_days": curr_rain_d, # alias used in some feature sets }) # Disease pressure indices — computed from CURRENT weather (the observed M-1 signal) dp_curr = _disease_pressure(curr_temp, curr_hum) # Previous month's disease pressure: approximate from monthly norm prev_temp_norm = float(_MONTHLY_TEMP_NORM.get(prev_m, 25)) prev_hum_norm = 70.0 if prev_m in (6, 7, 8, 9) else 57.0 dp_prev = _disease_pressure(prev_temp_norm, prev_hum_norm) gdd_monthly = max(0.0, curr_temp - 10.0) * 30.0 wet_days_intensity= curr_rain_d * (curr_lw_v2 + 1) thermo_humid = curr_hum * curr_vpd / 1000.0 humidity_trend = curr_hum - prev_hum_norm # rising vs falling fv.update({ "disease_pressure_idx": dp_curr, "gdd_monthly": gdd_monthly, "wet_days_intensity": wet_days_intensity, "thermo_humid_stress": thermo_humid, "prev_disease_pressure": dp_prev, "humidity_trend": humidity_trend, }) # ── Co-occurrence features (all pests × 2) ─────────────────────────────── # cooc1_{p} = prior-year presence of top co-occurring pest 1 for pest p # cooc2_{p} = prior-year presence of top co-occurring pest 2 for pest p for p, cooc_pests in top_cooc.items(): for i, cooc_pest in enumerate(cooc_pests[:2], 1): col = f"cooc{i}_{p}" if col in feature_cols: rate, _ = _baseline_presence(baselines, state, district, crop_group, cooc_pest, pred_month) fv[col] = rate # ── Build DataFrame in exact feature_cols order ────────────────────────── row = {col: fv.get(col, 0.0) for col in feature_cols} return pd.DataFrame([row]) # ── Stacking prediction ─────────────────────────────────────────────────────── def _district_predict(bundle, fv_df): """ Get stacking ensemble probability from district bundle. Bundle has: lgb, xgb, cat (sklearn-API), meta (LogisticRegression), scaler. """ try: p_lgb = float(bundle["lgb"].predict_proba(fv_df)[0][1]) p_xgb = float(bundle["xgb"].predict_proba(fv_df)[0][1]) p_cat = float(bundle["cat"].predict_proba(fv_df)[0][1]) meta_X = bundle["scaler"].transform([[p_lgb, p_xgb, p_cat]]) return float(bundle["meta"].predict_proba(meta_X)[0][1]) except Exception as e: print(f"[pest_predictor] District predict error: {e}") return 0.3 def _v3_predict(models, meta_v3, state, crop_group, month, wx): """Run v3 state-level model (fallback). Returns {pest_cat: prob}.""" from mandi_advisor.pest_predictor_v3_helpers import _build_feature_vector_v3 import pandas as pd results = {} is_v3 = meta_v3.get("version") == "v3" for pest_cat, bundle_v3 in models.items(): try: if is_v3: prev_freq, lag1, lag2, lag3 = _get_lag_freqs_v3(meta_v3, state, crop_group, month, pest_cat) cooc1, cooc2 = _get_cooc_freqs_v3(meta_v3, state, crop_group, month, pest_cat) days_sow = ((month - 6) % 12) * 30 stage = min(days_sow // 30, 4) fv = _build_feature_vector_v3(meta_v3, state, crop_group, month, wx, days_sow, stage, prev_freq, lag1, lag2, lag3, cooc1, cooc2) else: fv = [0] * len(meta_v3.get("feature_cols", [])) fv_df = pd.DataFrame([dict(zip(meta_v3["feature_cols"], fv))]) if isinstance(bundle_v3, dict) and "lgb" in bundle_v3: p_lgb = float(bundle_v3["lgb"].predict_proba(fv_df)[0][1]) p_xgb = float(bundle_v3["xgb"].predict_proba(fv_df)[0][1]) p_cat = float(bundle_v3["cat"].predict_proba(fv_df)[0][1]) meta_X = [[p_lgb, p_xgb, p_cat]] prob = float(bundle_v3["meta"].predict_proba(meta_X)[0][1]) else: prob = float(bundle_v3.predict_proba(fv_df)[0][1]) results[pest_cat] = prob except Exception: results[pest_cat] = 0.25 return results def _get_lag_freqs_v3(meta, state, crop_group, month, pest_cat): baselines = meta.get("baselines", {}) prev_m = month - 1 if month > 1 else 12 prev_k = f"{state}|{crop_group}|{prev_m}|{pest_cat}" prev_freq = float(baselines.get(prev_k, {}).get("mean", 0.0)) curr_k = f"{state}|{crop_group}|{month}|{pest_cat}" b = baselines.get(curr_k, {}) lag1 = float(b.get("lag1yr_mean", b.get("mean", 0.0))) lag2 = lag1 * 0.85 lag3 = lag1 * 0.70 return prev_freq, lag1, lag2, lag3 def _get_cooc_freqs_v3(meta, state, crop_group, month, pest_cat): cooc_map = meta.get("cooccurrence_map", {}) baselines = meta.get("baselines", {}) corr_pests = cooc_map.get(pest_cat, []) results = [] for cp in corr_pests[:2]: k = f"{state}|{crop_group}|{month}|{cp}" results.append(float(baselines.get(k, {}).get("mean", 0.0))) while len(results) < 2: results.append(0.0) return results[0], results[1] def _build_feature_vector_v3(meta, state, crop_group, month, wx, days_sowing, stage, prev_freq, lag1, lag2, lag3, cooc1, cooc2): """v3 33-feature vector (kept inline to avoid circular import).""" FEATURE_COLS = meta["feature_cols"] state_classes = meta.get("state_classes", []) crop_classes = meta.get("crop_classes", []) state_enc = state_classes.index(state) if state in state_classes else 0 crop_enc = crop_classes.index(crop_group) if crop_group in crop_classes else 0 rain = wx.get("rain_mm", 10); hum = wx.get("humidity_mean", 65); temp = wx.get("temp_mean", 25) rain_days = wx.get("rain_days", 3) rain_norm = _MONTHLY_RAIN_NORM.get(month, 30) temp_norm = _MONTHLY_TEMP_NORM.get(month, 25) rain_anomaly = (rain - rain_norm) / max(rain_norm * 0.5, 1) temp_anomaly = (temp - temp_norm) / 4.0 hum_anomaly = (hum - 65) / 15.0 ndvi_proxy = float(np.clip(0.5*rain_anomaly - 0.3*temp_anomaly, -2, 2)) leaf_wetness = (hum/100) * rain_days * 3 dr = wx.get("diurnal_range", 14) hum_x_temp = hum * temp / 1000 rain_x_hum = rain * hum / 10000 pest_trend = lag1 - lag2 state_soil = meta.get("state_soil", {}) soil_mult_d = meta.get("soil_moisture_mult", {}) soil_type = int(state_soil.get(state, 0)) soil_mult = float(soil_mult_d.get(str(soil_type), 1.0)) adj_humidity = wx.get("humidity_mean", 65) * soil_mult fv = { "month_sin":math.sin(2*math.pi*month/12),"month_cos":math.cos(2*math.pi*month/12), "month":month,"state_enc":state_enc,"crop_enc":crop_enc, "temp_mean":temp,"temp_max_mean":wx.get("temp_max_mean",32), "humidity_mean":hum,"rain_mm":rain,"rain_7d_mean":wx.get("rain_7d_mean",15), "rain_30d_mean":wx.get("rain_30d_mean",40),"rain_days":rain_days, "rain_intensity":rain/max(rain_days,1),"vpd_proxy_mean":wx.get("vpd_proxy_mean",8), "wind_max_mean":wx.get("wind_max_mean",15),"high_humidity":int(hum>75), "warm_wet":int(temp>22 and rain>50),"cool_dry":int(temp<18 and rain<20), "heat_stress":int(wx.get("temp_max_mean",32)>38),"rain_anomaly":rain_anomaly, "temp_anomaly":temp_anomaly,"hum_anomaly":hum_anomaly,"ndvi_proxy":ndvi_proxy, "leaf_wetness":leaf_wetness,"diurnal_range":dr,"hum_x_temp":hum_x_temp, "rain_x_hum":rain_x_hum,"soil_type":soil_type,"adj_humidity":adj_humidity, "days_sowing":days_sowing,"growth_stage":stage,"prev_freq_norm":prev_freq, "lag1yr_freq":lag1,"lag2yr_freq":lag2,"lag3yr_freq":lag3, "pest_trend":pest_trend,"cooc1":cooc1,"cooc2":cooc2, } return [fv.get(col, 0) for col in FEATURE_COLS] # ── Weather scoring ─────────────────────────────────────────────────────────── def _weather_driver(wx): score, parts = 0.0, [] h = wx["humidity_mean"]; r = wx["rain_7d_mean"]; t = wx["temp_mean"] if h >= 85: score += 0.40; parts.append(f"very high humidity ({h:.0f}%)") elif h >= 70: score += 0.25; parts.append(f"high humidity ({h:.0f}%)") elif h < 40: score += 0.10; parts.append(f"dry air ({h:.0f}% — sucking pest risk)") if r >= 50: score += 0.35; parts.append(f"{r:.0f}mm rain last 7d") elif r >= 20: score += 0.20; parts.append(f"{r:.0f}mm rain last 7d") if 20 <= t <= 30: score += 0.15; parts.append(f"optimal infection temp ({t:.1f}°C)") elif t > 36: score += 0.05; parts.append(f"heat stress ({t:.1f}°C)") if wx["rain_days"] >= 5: score += 0.10; parts.append(f"{wx['rain_days']} rainy days") return min(score, 1.0), ("; ".join(parts) if parts else "moderate conditions") def _apply_agro_rules(pest_cat, combined, wx): triggered = [] for rule in AGRO_RULES: if pest_cat in rule["pests"]: try: if rule["cond"](wx): combined += rule["boost"] triggered.append(rule["name"]) except Exception: pass return min(combined, 1.0), triggered # ── Risk scoring ────────────────────────────────────────────────────────────── def _risk_level(score): """Map 0-100 risk score to severity tier.""" if score >= 80: return "CRITICAL" if score >= 60: return "HIGH" if score >= 40: return "MEDIUM" if score >= 20: return "LOW" return "NEGLIGIBLE" def _action(rl): return { "CRITICAL": "🔴 Act immediately — spray within 24-48h to prevent crop loss", "HIGH": "🟠 Spray within 3-5 days (preventive window)", "MEDIUM": "🟡 Monitor field daily — prepare spray materials", "LOW": "🟢 Low risk — standard weekly scouting sufficient", "NEGLIGIBLE": "✅ Very low risk — no immediate action needed", }.get(rl, "Monitor") def _confidence_tier(auc: float, n_samples: int = 0) -> dict: """ Return structured confidence info for a per-pest AUC. n_samples: number of training records for this pest (optional). """ if auc >= 0.93: label, color, note = "Very High", "🟢", "Model highly reliable for this pest" elif auc >= 0.88: label, color, note = "High", "🟢", "Model reliable — trust predictions" elif auc >= 0.80: label, color, note = "Moderate", "🟡", "Reasonable accuracy — verify with field scouting" elif auc >= 0.70: label, color, note = "Low", "🟠", "Limited data — use as early indicator only" else: label, color, note = "Very Low", "🔴", "Insufficient training data — heuristic estimate" return {"label": label, "color": color, "auc": round(auc, 3), "note": note} # ── Crop mapping ────────────────────────────────────────────────────────────── # v2 — expanded to 25 crop groups matching build_district_pest_data.py (ORDER MATTERS) _CROP_GROUPS = { # Pulses — separated for distinct pest profiles "pigeonpea": ["pigeon pea", "red gram", "arhar", "tur dal", "toor"], "bengal_gram": ["bengal gram", "kabuli chana", "kabuli", "chick pea", "chickpea"], "moong": ["green gram", "moong bean", "moong"], "urad": ["black gram", "urd bean", "urad", "urid"], "lentil": ["lentil", "masur", "masoor"], # Horticulture "mango": ["mango", "aam"], "banana": ["banana", "kela", "plantain"], "okra": ["bhindi", "okra", "ladysfinger", "lady finger"], # Cereals / millets "pearl_millet":["pearl millet", "bajra", "bulrush millet", "spiked millet"], "sorghum": ["sorghum", "jowar", "great millet"], # Row crops "tomato": ["tomato", "tamatar"], "onion": ["onion", "pyaz", "kanda"], "potato": ["potato", "aloo", "alu"], "wheat": ["wheat", "gehu", "gehun", "gehoo"], "rice": ["paddy", "rice", "dhan", "chawal", "dhaan"], "maize": ["maize", "corn", "makka", "makki"], "cotton": ["cotton", "kapas", "karpas"], "soybean": ["soybean", "soya", "soyabean"], "mustard": ["mustard", "sarson", "rai", "rapeseed"], "chilli": ["chilli", "mirchi", "capsicum"], "brinjal": ["brinjal", "baingan", "eggplant", "begun"], "cauliflower": ["cauliflower", "phool gobi", "gobhi", "gobi"], "groundnut": ["groundnut", "peanut", "moongphali"], "sugarcane": ["sugarcane", "ganna", "ikh"], "vegetables": ["cucumber", "pumpkin", "gourd", "spinach", "palak", "cabbage", "bitter gourd", "bottle gourd", "ridge gourd", "pointed gourd", "watermelon"], } def _map_crop_group(crop): cl = (crop or "").lower() for grp, kws in _CROP_GROUPS.items(): if any(k in cl for k in kws): return grp return "other" # ── Main prediction API ─────────────────────────────────────────────────────── def predict_pest_risk(state, crop, district=None, month=None, year=None): """ Main API — returns list of pest risk dicts, sorted by risk_score descending. Strategy: 1. Try district v2 model (1-month early warning, AUC ~0.95) — spatial fallback included 2. Try district v1 model (same-month, AUC 0.936) — spatial fallback included 3. Fall back to v3 state model (AUC 0.909) if district model unavailable 4. Fall back to heuristic (weather + history) if neither model available """ state_upper = (state or "MADHYA PRADESH").strip().upper() crop_group = _map_crop_group(crop) month = month or datetime.datetime.now().month district_u = district.strip().upper() if district else "" # ── Load assets ────────────────────────────────────────────────────────── d_bundle, d_meta = _load_district_model() baselines, _ = _load_district_baselines() thresholds = _load_thresholds() # ── Get coordinates ─────────────────────────────────────────────────────── lat, lon = _get_district_coords(state_upper, district_u) wx = _get_forecast_weather(lat, lon) wx_score, wx_driver = _weather_driver(wx) # ── District model path ─────────────────────────────────────────────────── if d_bundle is not None: # Spatial fallback: find nearest covered district if needed actual_district = district_u fb_district, fb_lat, fb_lon, fb_dist_km = _nearest_covered_district( state_upper, district_u, d_bundle) if fb_dist_km > 0: actual_district = fb_district lat, lon = fb_lat, fb_lon # Refresh weather for correct coords wx = _get_forecast_weather(fb_lat, fb_lon) wx_score, wx_driver = _weather_driver(wx) fallback_note = ( f" [spatial fallback: using {fb_district} ({fb_dist_km:.0f}km)]" if fb_dist_km > 0 else "" ) results = [] for pest_cat in PEST_LIST: fv_df = _build_district_feature_vector( d_bundle, baselines, state_upper, actual_district, crop_group, pest_cat, month, wx, lat, lon) proba = _district_predict(d_bundle, fv_df) # Weight: 70% model, 20% weather physics, 10% history signal hist_rate, _ = _baseline_presence(baselines, state_upper, actual_district, crop_group, pest_cat, month) hist_score = min(hist_rate * 2.0, 1.0) # scale 0-1 combined = 0.70 * proba + 0.20 * wx_score + 0.10 * hist_score combined, triggered = _apply_agro_rules(pest_cat, combined, wx) rule_note = (f" [+{', '.join(triggered)}]" if triggered else "") risk_score = max(0, min(100, int(round(combined * 100)))) rl = _risk_level(risk_score) pest_name, spray = PEST_INFO.get(pest_cat, (pest_cat.replace("_"," ").title(), "Consult local KVK")) per_pest_auc = (d_meta or {}).get("per_pest_auc", {}).get(pest_cat, 0) conf_tier = _confidence_tier(per_pest_auc) confidence = conf_tier["label"] # backward compat string # Top-3 feature attribution (qualitative) feature_drivers = _top_feature_drivers(wx, proba, hist_rate, month) ndvi_val = float(np.clip( 0.5*((wx["rain_mm"] - _MONTHLY_RAIN_NORM.get(month, 30)) / 30) - 0.3*((wx["temp_mean"] - _MONTHLY_TEMP_NORM.get(month, 25)) / 4), -2, 2)) ndvi_pct = int((ndvi_val + 2) / 4 * 100) # Determine model version and lead-time note mv = d_bundle.get("model_version", "district_v1") if mv == "district_v2": next_m = (month % 12) + 1 mv_label = f"district_v2 (1-month early warning){fallback_note}" lead_note = (f"⚡ Predicting pest risk for NEXT month (month {next_m}). " "Current weather → next-month forecast. " "You have 3-4 weeks to apply preventive spray.") else: mv_label = f"district_v1{fallback_note}" lead_note = "Current-month detection (no lead time). Upgrade to v2 for early warning." results.append({ "pest": pest_name, "pest_cat": pest_cat, "risk_score": risk_score, "risk_level": rl, "confidence": confidence, "confidence_tier": conf_tier, # NEW: structured confidence info "model_auc": round(per_pest_auc, 3), "model_version": mv_label, "lead_time_note": lead_note, "history_score": round(hist_score, 3), "weather_score": round(wx_score, 3), "model_score": round(proba, 3), # NEW: raw model probability "weather_driver": wx_driver + rule_note, "history_note": (f"Historical presence rate: {hist_rate*100:.0f}% " f"for {crop_group} in {actual_district} in month {month}"), "feature_drivers": feature_drivers, "growth_stage": ["Sowing","Vegetative","Reproductive","Maturity", "Off-season"][_growth_stage(month)], "ndvi_index": ndvi_pct, "recommended_action": _action(rl), "spray": spray, "weather_summary": wx, }) results.sort(key=lambda x: -x["risk_score"]) medium_plus = [r for r in results if r["risk_level"] in ("CRITICAL","HIGH","MEDIUM")] # Return top results — always include at least 3 even if all LOW/NEGLIGIBLE return medium_plus[:8] if len(medium_plus) >= 3 else results[:max(6, len(results))] # ── v3 state model fallback ─────────────────────────────────────────────── v3_models, v3_meta = _load_v3_model() if v3_models is not None: return _predict_with_v3(v3_models, v3_meta, state_upper, crop_group, month, wx, wx_score, wx_driver) # ── Last resort: heuristic ──────────────────────────────────────────────── return _heuristic_predictions(state_upper, crop_group, month, wx, wx_score, wx_driver) def _top_feature_drivers(wx, model_prob, hist_rate, month): """Return 3 human-readable feature drivers for this prediction.""" drivers = [] if model_prob >= 0.60: drivers.append(f"Model probability: {model_prob*100:.0f}% (strong signal)") elif model_prob >= 0.40: drivers.append(f"Model probability: {model_prob*100:.0f}% (moderate signal)") if hist_rate >= 0.30: drivers.append(f"High historical occurrence ({hist_rate*100:.0f}%)") elif hist_rate >= 0.10: drivers.append(f"Moderate historical occurrence ({hist_rate*100:.0f}%)") hum = wx["humidity_mean"] if hum >= 80: drivers.append(f"High humidity ({hum:.0f}%) promotes spread") elif hum < 45: drivers.append(f"Low humidity ({hum:.0f}%) favors sucking pests") rain = wx["rain_7d_mean"] if rain >= 60: drivers.append(f"Heavy recent rain ({rain:.0f}mm/7d)") return drivers[:3] if drivers else ["Weather and seasonal patterns"] def _predict_with_v3(models, meta, state, crop_group, month, wx, wx_score, wx_driver): """Run v3 state-level predictions. Returns same format as district path.""" results = [] for pest_cat, bundle_v3 in models.items(): try: prev_freq, lag1, lag2, lag3 = _get_lag_freqs_v3(meta, state, crop_group, month, pest_cat) cooc1, cooc2 = _get_cooc_freqs_v3(meta, state, crop_group, month, pest_cat) days_sow = ((month - 6) % 12) * 30 stage = min(days_sow // 30, 4) import pandas as _pd2 fv = _build_feature_vector_v3(meta, state, crop_group, month, wx, days_sow, stage, prev_freq, lag1, lag2, lag3, cooc1, cooc2) fv_df = _pd2.DataFrame([dict(zip(meta["feature_cols"], fv))]) if isinstance(bundle_v3, dict) and "lgb" in bundle_v3: p_lgb = float(bundle_v3["lgb"].predict_proba(fv_df)[0][1]) p_xgb = float(bundle_v3["xgb"].predict_proba(fv_df)[0][1]) p_cat = float(bundle_v3["cat"].predict_proba(fv_df)[0][1]) meta_X = [[p_lgb, p_xgb, p_cat]] proba = float(bundle_v3["meta"].predict_proba(meta_X)[0][1]) else: proba = float(bundle_v3.predict_proba(fv_df)[0][1]) except Exception: proba = 0.25 hist_score = min(lag1 * 8, 1.0) combined = 0.55 * proba + 0.25 * wx_score + 0.20 * hist_score combined, triggered = _apply_agro_rules(pest_cat, combined, wx) rule_note = (f" [+{', '.join(triggered)}]" if triggered else "") risk_score = max(0, min(100, int(round(combined * 100)))) rl = _risk_level(risk_score) pest_name, spray = PEST_INFO.get(pest_cat, (pest_cat.replace("_"," ").title(), "")) cv = meta.get("cv_results", {}).get(pest_cat, {}) auc = cv.get("auc_stack", cv.get("auc", 0)) confidence = "High" if auc >= 0.85 else "Moderate" if auc >= 0.70 else "Low" ndvi_val = float(np.clip( 0.5*((wx["rain_mm"]-30)/30) - 0.3*((wx["temp_mean"]-25)/4), -2, 2)) results.append({ "pest": pest_name, "pest_cat": pest_cat, "risk_score": risk_score, "risk_level": rl, "confidence": confidence, "model_auc": round(auc, 3), "model_version": meta.get("version", "v3"), "history_score": round(hist_score, 3), "weather_score": round(wx_score, 3), "weather_driver": wx_driver + rule_note, "history_note": f"Reported in KCC records from {state.title()}", "feature_drivers": _top_feature_drivers(wx, proba, lag1, month), "growth_stage": f"~{days_sow}d from sowing", "ndvi_index": int((ndvi_val + 2) / 4 * 100), "recommended_action": _action(rl), "spray": spray, "weather_summary": wx, }) results.sort(key=lambda x: -x["risk_score"]) medium_plus = [r for r in results if r["risk_level"] in ("CRITICAL","HIGH","MEDIUM")] return medium_plus[:8] if len(medium_plus) >= 3 else results[:6] def _heuristic_predictions(state, crop_group, month, wx, wx_score, wx_driver): """Last-resort predictions using only weather + known pest seasonality.""" # Seasonal pest probabilities (expert-encoded) SEASONAL_PESTS = { (6,7,8,9): ["blight","rot","caterpillar","borer","whitefly"], (10,11,12): ["rust","mildew","aphid","leaf_spot"], (1,2,3): ["aphid","thrips","jassid","mite"], (4,5): ["mite","whitefly","thrips","caterpillar"], } active_pests = [] for months_tuple, pests in SEASONAL_PESTS.items(): if month in months_tuple: active_pests = pests break if not active_pests: active_pests = ["aphid","blight","whitefly","borer"] results = [] for pest_cat in active_pests[:6]: combined = 0.4 * wx_score + 0.3 combined, triggered = _apply_agro_rules(pest_cat, combined, wx) rule_note = (f" [+{', '.join(triggered)}]" if triggered else "") risk_score = max(0, min(100, int(round(combined * 100)))) rl = _risk_level(risk_score) pest_name, spray = PEST_INFO.get(pest_cat, (pest_cat.replace("_"," ").title(), "Consult KVK")) results.append({ "pest": pest_name, "pest_cat": pest_cat, "risk_score": risk_score, "risk_level": rl, "confidence": "Low (heuristic only)", "model_auc": 0, "model_version": "heuristic", "history_score": 0, "weather_score": round(wx_score, 3), "weather_driver": wx_driver + rule_note, "history_note": "Seasonal heuristic — no model data available", "feature_drivers": ["Seasonal pattern", wx_driver], "growth_stage": "Unknown", "ndvi_index": 50, "recommended_action": _action(rl), "spray": spray, "weather_summary": wx, }) results.sort(key=lambda x: -x["risk_score"]) return results