from pathlib import Path
"""
pest_predictor.py  v5  —  District-primary pest risk prediction
===============================================================
Primary:  district_v2 model (AUC ~0.95, 81 features, 1-month early warning)
Fallback: district_v1 model (AUC 0.936, 63 features, same-month detection)
Fallback: state-level v3 model (AUC 0.909) for uncovered districts
Last resort: weather + history heuristics (no model)

District v2 architecture (NEW):
  • Temporal lag: current weather → predicts NEXT month's pests (1-month lead)
  • 81 features: 63 v1 + 12 wx_prev_* + 7 disease-pressure indices
  • Wallin disease pressure, GDD accumulation, humidity trend
  • Isotonic calibration per pest (probabilities = actual rates)
  • F1-optimal thresholds via Youden's J

Public API:
    predict_pest_risk(state, crop, district=None, month=None, year=None) -> list[dict]
"""

import json, math, pickle, os, datetime, re
from functools import lru_cache
import numpy as np

MODEL_DIR = str(Path(__file__).parent.parent / "pest_model")

# v2 model (preferred — 1-month early warning, 81 features)
DISTRICT_V2_MODEL_PATH = f"{MODEL_DIR}/pest_risk_model_district_v2.pkl"
DISTRICT_V2_META_PATH  = f"{MODEL_DIR}/pest_model_meta_district_v2.json"
THRESHOLDS_V2_PATH     = f"{MODEL_DIR}/thresholds_district.json"  # saved by v2 training

# v1 model (fallback — same-month detection, 63 features)
DISTRICT_MODEL_PATH  = f"{MODEL_DIR}/pest_risk_model_district.pkl"
DISTRICT_META_PATH   = f"{MODEL_DIR}/pest_model_meta_district.json"

V3_MODEL_PATH        = f"{MODEL_DIR}/pest_risk_model_v3.pkl"
V3_META_PATH         = f"{MODEL_DIR}/pest_model_meta_v3.json"
DISTRICT_PEST_CSV    = f"{MODEL_DIR}/pest_monthly_district.csv"
DISTRICT_WEATHER_CSV = f"{MODEL_DIR}/weather_monthly_district.csv"
SUBDISTRICTS_CSV     = str(Path(__file__).parent.parent / "data" / "indian_sub_districts.csv")  # fallback to local
THRESHOLDS_PATH      = f"{MODEL_DIR}/thresholds_district.json"

# All 14 pest categories (must match training)
PEST_LIST = ["aphid","blight","borer","caterpillar","jassid","leaf_spot",
             "mildew","mite","mosaic_virus","rot","rust","thrips","whitefly","wilt"]

# ── State coordinates fallback ────────────────────────────────────────────────
STATE_COORDS = {
    "MADHYA PRADESH":   (23.25, 77.41), "MAHARASHTRA":     (18.52, 73.86),
    "PUNJAB":           (30.73, 76.78), "UTTAR PRADESH":   (26.85, 80.95),
    "RAJASTHAN":        (26.91, 75.79), "KARNATAKA":       (12.97, 77.59),
    "GUJARAT":          (23.03, 72.58), "HARYANA":         (29.07, 76.08),
    "BIHAR":            (25.61, 85.14), "ANDHRA PRADESH":  (17.39, 78.49),
    "TELANGANA":        (17.39, 78.49), "WEST BENGAL":     (22.57, 88.36),
    "CHHATTISGARH":     (21.27, 81.87), "JHARKHAND":       (23.35, 85.33),
    "ODISHA":           (20.30, 85.82), "TAMIL NADU":      (13.08, 80.27),
    "ASSAM":            (26.14, 91.74), "HIMACHAL PRADESH":(31.10, 77.17),
}

# ── Soil type (NBSS classification, matching retrain script) ──────────────────
SOIL_TYPE = {
    "UTTAR PRADESH":0,"BIHAR":0,"WEST BENGAL":0,"PUNJAB":0,"HARYANA":0,
    "ASSAM":0,"ODISHA":0,"JHARKHAND":0,"GUJARAT":0,
    "MAHARASHTRA":1,"MADHYA PRADESH":1,"ANDHRA PRADESH":1,"TELANGANA":1,
    "KARNATAKA":1,"CHHATTISGARH":1,
    "TAMIL NADU":2,"KERALA":2,"GOA":2,
    "RAJASTHAN":3,
    "HIMACHAL PRADESH":4,"UTTARAKHAND":4,"JAMMU AND KASHMIR":4,
    "MANIPUR":4,"MEGHALAYA":4,"NAGALAND":4,"SIKKIM":4,"TRIPURA":4,
    "ARUNACHAL PRADESH":4,"MIZORAM":4,
}

# ── Pest info (name + treatment) ──────────────────────────────────────────────
PEST_INFO = {
    "aphid":        ("Aphid (Mahu / Aphis gossypii)",
                     "Imidacloprid 17.8% SL @ 0.5ml/L or Thiamethoxam 25% WG @ 0.3g/L"),
    "blight":       ("Early/Late Blight (Alternaria/Phytophthora)",
                     "Mancozeb 75% WP @ 2g/L or Metalaxyl+Mancozeb @ 2.5g/L"),
    "wilt":         ("Fusarium/Bacterial Wilt",
                     "Copper Oxychloride 50% WP @ 3g/L or Carbendazim 50% WP @ 1g/L"),
    "borer":        ("Stem/Fruit Borer (Helicoverpa / Chilo suppressalis)",
                     "Emamectin Benzoate 5% SG @ 0.5g/L or Spinosad 45% SC @ 0.5ml/L"),
    "mite":         ("Red Spider Mite (Tetranychus urticae)",
                     "Spiromesifen 22.9% SC @ 1ml/L or Abamectin 1.9% EC @ 0.5ml/L"),
    "whitefly":     ("Whitefly (Bemisia tabaci)",
                     "Imidacloprid 17.8% SL @ 0.5ml/L or Spiromesifen 22.9% SC @ 1ml/L"),
    "thrips":       ("Thrips (Frankliniella occidentalis)",
                     "Spinosad 45% SC @ 0.5ml/L or Fipronil 5% SC @ 1.5ml/L"),
    "rust":         ("Rust (Puccinia spp.)",
                     "Propiconazole 25% EC @ 1ml/L or Tebuconazole 25.9% EC @ 1ml/L"),
    "rot":          ("Root/Collar Rot (Pythium/Sclerotinia)",
                     "Metalaxyl 35% WS @ 2g/kg seed or Copper Oxychloride drench @ 3g/L"),
    "mildew":       ("Powdery/Downy Mildew",
                     "Sulphur 80% WP @ 3g/L (powdery) or Mancozeb 75% WP @ 2g/L (downy)"),
    "leaf_spot":    ("Leaf Spot/Anthracnose (Cercospora/Colletotrichum)",
                     "Carbendazim 50% WP @ 1g/L or Mancozeb+Carbendazim @ 2g/L"),
    "mosaic_virus": ("Mosaic/Leaf Curl Virus (vector: whitefly/aphid)",
                     "Control vector: Imidacloprid 17.8% SL @ 0.5ml/L — no direct cure for virus"),
    "caterpillar":  ("Caterpillar/Armyworm (Spodoptera/Helicoverpa)",
                     "Bt (Bacillus thuringiensis) @ 1g/L or Emamectin Benzoate 5% SG @ 0.5g/L"),
    "jassid":       ("Jassid/Leafhopper (Amrasca devastans)",
                     "Imidacloprid 17.8% SL @ 0.5ml/L or Thiamethoxam 25% WG @ 0.3g/L"),
}

# ── Agronomic overlay rules ───────────────────────────────────────────────────
AGRO_RULES = [
    {"name": "High humidity fungal risk",
     "cond": lambda wx: wx["humidity_mean"] >= 85 and 18 <= wx["temp_mean"] <= 30,
     "pests": ["blight","mildew","leaf_spot","rot","rust"], "boost": 0.18},
    {"name": "Heavy rain disease amplifier",
     "cond": lambda wx: wx["rain_7d_mean"] >= 60 and wx["temp_mean"] > 20,
     "pests": ["rot","blight","leaf_spot","caterpillar"], "boost": 0.12},
    {"name": "Dry heat sucking pest surge",
     "cond": lambda wx: wx["humidity_mean"] < 45 and wx["temp_max_mean"] > 34,
     "pests": ["aphid","mite","whitefly","jassid","thrips"], "boost": 0.15},
    {"name": "Drought stress wilt risk",
     "cond": lambda wx: wx["vpd_proxy_mean"] > 12,
     "pests": ["wilt","rot"], "boost": 0.10},
    {"name": "Intermittent rain sporulation",
     "cond": lambda wx: 3 <= wx["rain_days"] <= 6 and wx["humidity_mean"] >= 70,
     "pests": ["rust","mildew","blight","leaf_spot"], "boost": 0.10},
    {"name": "Late blight temperature window",
     "cond": lambda wx: 18 <= wx["temp_mean"] <= 24 and wx["humidity_mean"] >= 80,
     "pests": ["blight"], "boost": 0.20},
    {"name": "Whitefly virus pressure",
     "cond": lambda wx: wx["temp_mean"] > 26 and wx["humidity_mean"] < 60,
     "pests": ["mosaic_virus","whitefly"], "boost": 0.08},
]

# ── Monthly heuristics for anomaly computation at inference ───────────────────
_MONTHLY_RAIN_NORM = {6:80,7:120,8:110,9:70,10:30,11:10,12:5,1:5,2:5,3:10,4:15,5:20}
_MONTHLY_TEMP_NORM = {1:15,2:18,3:23,4:28,5:32,6:30,7:28,8:27,9:27,10:25,11:20,12:16}


# ── Model + baselines loading ─────────────────────────────────────────────────

@lru_cache(maxsize=2)
def _load_district_model():
    """
    Load district stacking bundle (cached — loaded once per process).
    Prefers v2 (1-month early warning) over v1 if both exist.
    Returns (bundle, meta) — meta has model_version key.
    """
    # ── Try v2 first (1-month early warning, 81 features) ──────────────────
    if os.path.exists(DISTRICT_V2_MODEL_PATH):
        try:
            with open(DISTRICT_V2_MODEL_PATH, "rb") as f:
                bundle = pickle.load(f)
            meta = {"model_version": "district_v2", "auc": 0.0,
                    "per_pest_auc": {}, "lead_time": "1-month"}
            if os.path.exists(DISTRICT_V2_META_PATH):
                with open(DISTRICT_V2_META_PATH) as f:
                    meta.update(json.load(f))
            # Mark bundle with version for feature construction
            bundle.setdefault("model_version", "district_v2")
            print(f"[pest_predictor] ✅ District v2 model loaded (AUC {meta.get('cv_auc_stack', meta.get('auc', '?'))})")
            return bundle, meta
        except Exception as e:
            print(f"[pest_predictor] v2 model load failed ({e}) — trying v1")

    # ── Fall back to v1 (same-month, 63 features) ───────────────────────────
    if not os.path.exists(DISTRICT_MODEL_PATH):
        return None, None
    try:
        with open(DISTRICT_MODEL_PATH, "rb") as f:
            bundle = pickle.load(f)
        bundle.setdefault("model_version", "district_v1")
        meta = {"model_version": "district_v1", "lead_time": "0-month"}
        if os.path.exists(DISTRICT_META_PATH):
            with open(DISTRICT_META_PATH) as f:
                meta.update(json.load(f))
        print(f"[pest_predictor] ✅ District v1 model loaded (AUC 0.936)")
        return bundle, meta
    except Exception as e:
        print(f"[pest_predictor] District model load failed: {e}")
        return None, None


@lru_cache(maxsize=2)
def _load_v3_model():
    """Load v3 state-level model as fallback (cached)."""
    if not os.path.exists(V3_MODEL_PATH) or not os.path.exists(V3_META_PATH):
        return None, None
    try:
        with open(V3_MODEL_PATH, "rb") as f:
            models = pickle.load(f)
        with open(V3_META_PATH) as f:
            meta = json.load(f)
        return models, meta
    except Exception as e:
        print(f"[pest_predictor] v3 model load failed: {e}")
        return None, None


@lru_cache(maxsize=1)
def _load_district_baselines():
    """
    Load district pest CSV and compute historical baselines.
    Returns dict keyed by "{state}|{district}|{crop}|{pest}|{month}" → {
        "presence_rate": float,   # mean historical presence (0-1)
        "freq_mean": float,       # mean freq_norm over years
    }
    Also returns a "summary" dict for lag features:
        "{state}|{district}|{crop}|{pest}" → [monthly_presence_rate × 12]
    """
    if not os.path.exists(DISTRICT_PEST_CSV):
        return {}, {}
    try:
        df = _pd().read_csv(DISTRICT_PEST_CSV,
                            usecols=["state","district","crop_group","pest_cat","year","month","freq_norm"])
        df["present"] = (df["freq_norm"] >= 0.02).astype(float)

        # Monthly baseline per (state, district, crop, pest, month)
        agg = df.groupby(["state","district","crop_group","pest_cat","month"]).agg(
            presence_rate=("present","mean"),
            freq_mean    =("freq_norm","mean"),
        ).reset_index()

        baselines = {}
        for _, row in agg.iterrows():
            key = f"{row['state']}|{row['district']}|{row['crop_group']}|{row['pest_cat']}|{int(row['month'])}"
            baselines[key] = {
                "presence_rate": float(row["presence_rate"]),
                "freq_mean":     float(row["freq_mean"]),
            }

        return baselines, {}
    except Exception as e:
        print(f"[pest_predictor] Baselines load failed: {e}")
        return {}, {}


@lru_cache(maxsize=1)
def _load_district_coords():
    """
    Load district centroid coordinates for spatial fallback.
    Returns (coords_dict, covered_districts_list)
    coords_dict: "{STATE}|{DISTRICT}" → (lat, lon)
    covered_list: list of (lat, lon, state, district) for haversine search
    """
    # Try from weather CSV first (has covered districts)
    coords_dict = {}
    covered_list = []

    if os.path.exists(DISTRICT_WEATHER_CSV):
        try:
            df = _pd().read_csv(DISTRICT_WEATHER_CSV, usecols=["state","district"])
            # Weather CSV doesn't have lat/lon columns directly — use sub_districts CSV
        except Exception:
            pass

    if os.path.exists(SUBDISTRICTS_CSV):
        try:
            sub = _pd().read_csv(SUBDISTRICTS_CSV)
            sub["State"]    = sub["State"].str.strip().str.upper()
            sub["District"] = sub["District"].str.strip().str.upper()
            centroids = sub.groupby(["State","District"])[["lat","lon"]].mean().reset_index()
            for _, row in centroids.iterrows():
                k = f"{row['State']}|{row['District']}"
                coords_dict[k] = (float(row["lat"]), float(row["lon"]))
                covered_list.append((float(row["lat"]), float(row["lon"]),
                                     row["State"], row["District"]))
        except Exception as e:
            print(f"[pest_predictor] Sub-districts CSV load failed: {e}")

    # Fallback: build from STATE_COORDS
    if not coords_dict:
        for state, (lat, lon) in STATE_COORDS.items():
            coords_dict[f"{state}|"] = (lat, lon)

    return coords_dict, covered_list


@lru_cache(maxsize=1)
def _load_thresholds():
    """Load per-pest optimal classification thresholds (default 0.35 if missing)."""
    defaults = {p: 0.35 for p in PEST_LIST}
    if os.path.exists(THRESHOLDS_PATH):
        try:
            with open(THRESHOLDS_PATH) as f:
                saved = json.load(f)
            defaults.update(saved)
        except Exception:
            pass
    return defaults


def _pd():
    """Lazy import pandas to avoid startup cost if not used."""
    import pandas
    return pandas


# ── Coordinate helpers ────────────────────────────────────────────────────────

def _haversine(lat1, lon1, lat2, lon2):
    """Distance in km between two lat/lon points."""
    R = 6371.0
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1))*math.cos(math.radians(lat2))*math.sin(dlon/2)**2
    return R * 2 * math.asin(math.sqrt(a))


def _get_district_coords(state, district):
    """
    Return (lat, lon) for a district.
    Priority: sub-districts CSV → state centroid.
    """
    coords_dict, _ = _load_district_coords()
    key = f"{state.upper()}|{district.upper()}" if district else None
    if key and key in coords_dict:
        return coords_dict[key]

    # Try just state
    for k, v in coords_dict.items():
        if k.startswith(f"{state.upper()}|"):
            return v

    return STATE_COORDS.get(state.upper(), (23.25, 77.41))


def _nearest_covered_district(state, district, bundle):
    """
    If district not in district_enc, find nearest covered district via haversine.
    Returns (fallback_district, fallback_lat, fallback_lon, distance_km).
    """
    dist_enc = bundle.get("district_enc", {})
    district_u = district.upper() if district else ""

    if district_u in dist_enc:
        lat, lon = _get_district_coords(state, district_u)
        return district_u, lat, lon, 0.0

    # Get query coords
    lat_q, lon_q = _get_district_coords(state, district_u or state)
    _, covered_list = _load_district_coords()

    best_dist = float("inf")
    best = (district_u, lat_q, lon_q)

    for (lat, lon, cov_state, cov_dist) in covered_list:
        if cov_dist not in dist_enc:
            continue
        d = _haversine(lat_q, lon_q, lat, lon)
        if d < best_dist:
            best_dist = d
            best = (cov_dist, lat, lon)

    fb_district, fb_lat, fb_lon = best
    return fb_district, fb_lat, fb_lon, best_dist


# ── Weather fetching ──────────────────────────────────────────────────────────

def _get_forecast_weather(lat, lon):
    """Fetch 7-day Open-Meteo forecast. Falls back to seasonal heuristics."""
    try:
        import requests
        r = requests.get("https://api.open-meteo.com/v1/forecast", params={
            "latitude": lat, "longitude": lon,
            "daily": ("temperature_2m_mean,temperature_2m_max,temperature_2m_min,"
                      "relative_humidity_2m_mean,precipitation_sum,wind_speed_10m_max"),
            "forecast_days": 7, "timezone": "Asia/Kolkata",
        }, timeout=12)
        r.raise_for_status()
        d = r.json().get("daily", {})

        def sm(lst, default=25.0):
            vals = [x for x in (lst or []) if x is not None]
            return float(np.mean(vals)) if vals else default

        temps    = d.get("temperature_2m_mean", [])
        temp_max = d.get("temperature_2m_max", [])
        temp_min = d.get("temperature_2m_min", [])
        hum      = d.get("relative_humidity_2m_mean", [])
        wind     = d.get("wind_speed_10m_max", [])
        rain_raw = d.get("precipitation_sum", [])
        rain     = [x for x in rain_raw if x is not None]
        rain_t   = sum(rain)
        rain_d   = sum(1 for x in rain if x > 1.0)

        t_mean  = sm(temps, 25.0)
        t_max   = sm(temp_max, 32.0)
        t_min   = sm(temp_min, 18.0)
        h_mean  = sm(hum, 65.0)
        w_mean  = sm(wind, 15.0)

        return {
            "temp_mean":     t_mean,
            "temp_max_mean": t_max,
            "temp_min_mean": t_min,
            "humidity_mean": h_mean,
            "rain_mm":       rain_t,
            "rain_7d_mean":  rain_t,
            "rain_30d_mean": rain_t,       # 7d proxy; anomaly handles the rest
            "rain_days":     rain_d,
            "vpd_proxy_mean": t_max * (1 - h_mean / 100),
            "wind_max_mean":  w_mean,
            "diurnal_range":  t_max - t_min,
            "skin_temp_mean": t_mean + 0.5,   # ERA5 skt ≈ t2m at 0.25°
            "soil_temp_mean": t_mean - 2.0,   # Soil slightly cooler
            "satd_mean":      0.5,             # ERA5 SATD noisy at 0.25° — near zero
        }
    except Exception:
        month = datetime.datetime.now().month
        t = _MONTHLY_TEMP_NORM.get(month, 25.0)
        r = _MONTHLY_RAIN_NORM.get(month, 30.0)
        h = 70.0 if month in (6,7,8,9) else 55.0
        return {
            "temp_mean": t, "temp_max_mean": t+7, "temp_min_mean": t-6,
            "humidity_mean": h, "rain_mm": r, "rain_7d_mean": r,
            "rain_30d_mean": r, "rain_days": 3 if r > 5 else 0,
            "vpd_proxy_mean": (t+7)*(1-h/100), "wind_max_mean": 15.0,
            "diurnal_range": 13.0, "skin_temp_mean": t+0.5,
            "soil_temp_mean": t-2.0, "satd_mean": 0.5,
        }


# ── Feature vector construction ───────────────────────────────────────────────

def _growth_stage(month):
    """Kharif/Rabi stage proxy matching retrain script."""
    if month in [6, 7]:     return 0   # sowing/establishment
    elif month in [8, 9]:   return 1   # vegetative
    elif month in [10, 11]: return 2   # reproductive
    elif month in [12, 1]:  return 3   # maturity
    else:                   return 1   # off-season vegetative


def _safe_encode(enc_dict, value, default=-1):
    """Encode value with dict encoder, return default if unknown."""
    return enc_dict.get(value, default)


def _baseline_presence(baselines, state, district, crop, pest, month):
    """Look up historical pest presence rate. Returns (rate, freq_mean)."""
    key = f"{state}|{district}|{crop}|{pest}|{month}"
    b = baselines.get(key, {})
    return b.get("presence_rate", 0.0), b.get("freq_mean", 0.0)


def _disease_pressure(temp, hum):
    """Wallin-inspired fungal disease pressure index."""
    return (hum / 100) ** 2 * max(0.0, (30 - abs(temp - 22)) / 30)


def _build_district_feature_vector(bundle, baselines, state, district,
                                   crop_group, pest_cat, month, wx,
                                   lat, lon):
    """
    Build the full feature vector for one (state, district, crop, pest, month).
    Supports BOTH v1 (63 features) and v2 (81 features, temporal lag).

    v2 inference semantics:
      Current 7-day forecast weather → wx_prev_* features (M-1 inputs)
      Next-month climatological norms → same-month temp_mean/humidity_mean inputs
      This gives 1-month early warning: current conditions predict next month's risk.

    Returns pd.DataFrame with exactly bundle["feature_cols"] columns.
    """
    import pandas as pd

    feature_cols = bundle["feature_cols"]
    crop_enc     = bundle["crop_enc"]
    pest_enc     = bundle["pest_enc"]
    state_enc    = bundle["state_enc"]
    dist_enc     = bundle["district_enc"]
    top_cooc     = bundle["top_cooc"]
    model_ver    = bundle.get("model_version", "district_v1")
    is_v2        = (model_ver == "district_v2")

    # ── Encoded categoricals ─────────────────────────────────────────────────
    s_enc = _safe_encode(state_enc,  state,      -1)
    d_enc = _safe_encode(dist_enc,   district,   -1)
    c_enc = _safe_encode(crop_enc,   crop_group, -1)
    p_enc = _safe_encode(pest_enc,   pest_cat,   -1)

    # ── Month cyclical ───────────────────────────────────────────────────────
    month_sin = math.sin(2 * math.pi * month / 12)
    month_cos = math.cos(2 * math.pi * month / 12)

    # ── Current (observed/forecast) weather — used as wx_prev_* for v2 ───────
    curr_temp  = wx["temp_mean"]
    curr_t_max = wx["temp_max_mean"]
    curr_t_min = wx["temp_min_mean"]
    curr_hum   = wx["humidity_mean"]
    curr_rain  = wx["rain_mm"]
    curr_rain_d= wx["rain_days"]
    curr_dr    = wx["diurnal_range"]
    curr_vpd   = wx["vpd_proxy_mean"]
    curr_lw    = (curr_hum / 100) * curr_rain_d * 3   # leaf_wetness from current weather

    # For anomalies, use current month
    curr_rain_norm = _MONTHLY_RAIN_NORM.get(month, 30)
    curr_temp_norm = _MONTHLY_TEMP_NORM.get(month, 25)
    curr_rain_anom = (curr_rain - curr_rain_norm) / max(curr_rain_norm * 0.5, 1)
    curr_temp_anom = (curr_temp - curr_temp_norm) / 4.0
    curr_hum_anom  = (curr_hum - 65) / 15.0

    if is_v2:
        # ── v2: same-month features = climatological norm for prediction month ──
        # At inference: we predict month (month+1), current wx → wx_prev_*
        # For temp_mean/humidity_mean of next month, use seasonal norms
        next_m = (month % 12) + 1
        temp   = float(_MONTHLY_TEMP_NORM.get(next_m, 25))
        t_max  = temp + 7.0
        t_min  = temp - 6.0
        hum    = 70.0 if next_m in (6, 7, 8, 9) else 57.0
        rain   = float(_MONTHLY_RAIN_NORM.get(next_m, 30))
        rain_d = 4 if rain > 10 else 1
        dr     = 13.0
        vpd    = t_max * (1 - hum / 100)
        wind   = wx.get("wind_max_mean", 15.0)
        skt    = temp + 0.5
        stl    = temp - 2.0
        satd   = wx.get("satd_mean", 0.5)
        r7d    = rain / 4.0   # rough 7d from monthly
        # Use next-month norms for anomaly features in the same-month slot
        rain_norm   = _MONTHLY_RAIN_NORM.get(next_m, 30)
        temp_norm   = float(_MONTHLY_TEMP_NORM.get(next_m, 25))
        rain_anomaly = (rain - rain_norm) / max(rain_norm * 0.5, 1)
        temp_anomaly = (temp - temp_norm) / 4.0
        hum_anomaly  = (hum - 65) / 15.0
        # For lag features, use next_m (which is what we're predicting)
        pred_month  = next_m
    else:
        # ── v1: same-month weather = current forecast ────────────────────────
        temp   = curr_temp
        t_max  = curr_t_max
        t_min  = curr_t_min
        hum    = curr_hum
        rain   = curr_rain
        rain_d = curr_rain_d
        dr     = curr_dr
        vpd    = curr_vpd
        wind   = wx.get("wind_max_mean", 15.0)
        skt    = wx.get("skin_temp_mean", curr_temp + 0.5)
        stl    = wx.get("soil_temp_mean", curr_temp - 2.0)
        satd   = wx.get("satd_mean", 0.5)
        r7d    = wx.get("rain_7d_mean", curr_rain)
        rain_norm    = curr_rain_norm
        temp_norm    = curr_temp_norm
        rain_anomaly = curr_rain_anom
        temp_anomaly = curr_temp_anom
        hum_anomaly  = curr_hum_anom
        pred_month   = month

    rain_intensity = rain / max(rain_d, 1)
    leaf_wetness   = (hum / 100) * rain_d * 3
    hum_x_temp     = hum * temp / 1000
    rain_x_hum     = rain * hum / 10000
    high_humidity  = int(hum > 75)

    soil_anomaly  = (stl  - (temp_norm - 2)) / 4.0
    ndvi_proxy    = float(np.clip(
        0.5 * np.clip(rain_anomaly, -3, 3)
        - 0.3 * np.clip(temp_anomaly, -3, 3)
        - 0.2 * np.clip(soil_anomaly, -3, 3),
        -2, 2
    ))

    # ── Lag features from baselines ──────────────────────────────────────────
    prev_m = pred_month - 1 if pred_month > 1 else 12
    lag_prev_month, _ = _baseline_presence(baselines, state, district, crop_group, pest_cat, prev_m)
    lag_1yr,        _ = _baseline_presence(baselines, state, district, crop_group, pest_cat, pred_month)
    lag_2yr           = lag_1yr * 0.85    # approximate 2yr from 1yr
    pest_trend        = (lag_prev_month + lag_1yr) / 2.0

    # ── Soil type ────────────────────────────────────────────────────────────
    soil_type = SOIL_TYPE.get(state, 0)

    # ── Growth stage ─────────────────────────────────────────────────────────
    gs = _growth_stage(pred_month)

    # ── Base feature dict ────────────────────────────────────────────────────
    fv = {
        "month_sin":      month_sin,
        "month_cos":      month_cos,
        "state_enc":      s_enc,
        "district_enc":   d_enc,
        "crop_enc":       c_enc,
        "pest_enc":       p_enc,
        "lat":            lat,
        "lon":            lon,
        "soil_type":      soil_type,
        "growth_stage":   gs,
        "temp_mean":      temp,
        "temp_max_mean":  t_max,
        "temp_min_mean":  t_min,
        "diurnal_range":  dr,
        "humidity_mean":  hum,
        "rain_mm":        rain,
        "rain_days":      rain_d,
        "rain_intensity": rain_intensity,
        "soil_temp_mean": stl,
        "satd_mean":      satd,
        "rain_7d_mean":   r7d,
        "vpd_proxy_mean": vpd,
        "leaf_wetness":   leaf_wetness,
        "hum_x_temp":     hum_x_temp,
        "rain_x_hum":     rain_x_hum,
        "high_humidity":  high_humidity,
        "rain_anomaly":   rain_anomaly,
        "temp_anomaly":   temp_anomaly,
        "hum_anomaly":    hum_anomaly,
        "soil_anomaly":   soil_anomaly,
        "ndvi_proxy":     ndvi_proxy,
        "lag_prev_month": lag_prev_month,
        "lag_1yr":        lag_1yr,
        "lag_2yr":        lag_2yr,
        "pest_trend":     pest_trend,
    }

    # ── v2 ONLY: temporal lag features + disease pressure indices ────────────
    if is_v2:
        # wx_prev_* = current observed/forecast weather (M-1 relative to next month)
        curr_lw_v2 = (curr_hum / 100) * curr_rain_d * 3
        fv.update({
            "wx_prev_temp_mean":      curr_temp,
            "wx_prev_temp_max_mean":  curr_t_max,
            "wx_prev_humidity_mean":  curr_hum,
            "wx_prev_rain_mm":        curr_rain,
            "wx_prev_rain_days":      curr_rain_d,
            "wx_prev_leaf_wetness":   curr_lw_v2,
            "wx_prev_vpd_proxy_mean": curr_vpd,
            "wx_prev_diurnal_range":  curr_dr,
            "wx_prev_rain_anomaly":   curr_rain_anom,
            "wx_prev_temp_anomaly":   curr_temp_anom,
            "wx_prev_hum_anomaly":    curr_hum_anom,
            "wx_prev_wet_days":       curr_rain_d,   # alias used in some feature sets
        })

        # Disease pressure indices — computed from CURRENT weather (the observed M-1 signal)
        dp_curr = _disease_pressure(curr_temp, curr_hum)
        # Previous month's disease pressure: approximate from monthly norm
        prev_temp_norm = float(_MONTHLY_TEMP_NORM.get(prev_m, 25))
        prev_hum_norm  = 70.0 if prev_m in (6, 7, 8, 9) else 57.0
        dp_prev = _disease_pressure(prev_temp_norm, prev_hum_norm)

        gdd_monthly       = max(0.0, curr_temp - 10.0) * 30.0
        wet_days_intensity= curr_rain_d * (curr_lw_v2 + 1)
        thermo_humid      = curr_hum * curr_vpd / 1000.0
        humidity_trend    = curr_hum - prev_hum_norm   # rising vs falling

        fv.update({
            "disease_pressure_idx":  dp_curr,
            "gdd_monthly":           gdd_monthly,
            "wet_days_intensity":    wet_days_intensity,
            "thermo_humid_stress":   thermo_humid,
            "prev_disease_pressure": dp_prev,
            "humidity_trend":        humidity_trend,
        })

    # ── Co-occurrence features (all pests × 2) ───────────────────────────────
    # cooc1_{p} = prior-year presence of top co-occurring pest 1 for pest p
    # cooc2_{p} = prior-year presence of top co-occurring pest 2 for pest p
    for p, cooc_pests in top_cooc.items():
        for i, cooc_pest in enumerate(cooc_pests[:2], 1):
            col = f"cooc{i}_{p}"
            if col in feature_cols:
                rate, _ = _baseline_presence(baselines, state, district, crop_group, cooc_pest, pred_month)
                fv[col] = rate

    # ── Build DataFrame in exact feature_cols order ──────────────────────────
    row = {col: fv.get(col, 0.0) for col in feature_cols}
    return pd.DataFrame([row])


# ── Stacking prediction ───────────────────────────────────────────────────────

def _district_predict(bundle, fv_df):
    """
    Get stacking ensemble probability from district bundle.
    Bundle has: lgb, xgb, cat (sklearn-API), meta (LogisticRegression), scaler.
    """
    try:
        p_lgb = float(bundle["lgb"].predict_proba(fv_df)[0][1])
        p_xgb = float(bundle["xgb"].predict_proba(fv_df)[0][1])
        p_cat = float(bundle["cat"].predict_proba(fv_df)[0][1])
        meta_X = bundle["scaler"].transform([[p_lgb, p_xgb, p_cat]])
        return float(bundle["meta"].predict_proba(meta_X)[0][1])
    except Exception as e:
        print(f"[pest_predictor] District predict error: {e}")
        return 0.3


def _v3_predict(models, meta_v3, state, crop_group, month, wx):
    """Run v3 state-level model (fallback). Returns {pest_cat: prob}."""
    from mandi_advisor.pest_predictor_v3_helpers import _build_feature_vector_v3
    import pandas as pd
    results = {}
    is_v3 = meta_v3.get("version") == "v3"
    for pest_cat, bundle_v3 in models.items():
        try:
            if is_v3:
                prev_freq, lag1, lag2, lag3 = _get_lag_freqs_v3(meta_v3, state, crop_group, month, pest_cat)
                cooc1, cooc2 = _get_cooc_freqs_v3(meta_v3, state, crop_group, month, pest_cat)
                days_sow = ((month - 6) % 12) * 30
                stage    = min(days_sow // 30, 4)
                fv = _build_feature_vector_v3(meta_v3, state, crop_group, month, wx,
                                              days_sow, stage, prev_freq, lag1, lag2, lag3, cooc1, cooc2)
            else:
                fv = [0] * len(meta_v3.get("feature_cols", []))
            fv_df = pd.DataFrame([dict(zip(meta_v3["feature_cols"], fv))])
            if isinstance(bundle_v3, dict) and "lgb" in bundle_v3:
                p_lgb = float(bundle_v3["lgb"].predict_proba(fv_df)[0][1])
                p_xgb = float(bundle_v3["xgb"].predict_proba(fv_df)[0][1])
                p_cat = float(bundle_v3["cat"].predict_proba(fv_df)[0][1])
                meta_X = [[p_lgb, p_xgb, p_cat]]
                prob = float(bundle_v3["meta"].predict_proba(meta_X)[0][1])
            else:
                prob = float(bundle_v3.predict_proba(fv_df)[0][1])
            results[pest_cat] = prob
        except Exception:
            results[pest_cat] = 0.25
    return results


def _get_lag_freqs_v3(meta, state, crop_group, month, pest_cat):
    baselines = meta.get("baselines", {})
    prev_m = month - 1 if month > 1 else 12
    prev_k = f"{state}|{crop_group}|{prev_m}|{pest_cat}"
    prev_freq = float(baselines.get(prev_k, {}).get("mean", 0.0))
    curr_k = f"{state}|{crop_group}|{month}|{pest_cat}"
    b = baselines.get(curr_k, {})
    lag1 = float(b.get("lag1yr_mean", b.get("mean", 0.0)))
    lag2 = lag1 * 0.85
    lag3 = lag1 * 0.70
    return prev_freq, lag1, lag2, lag3


def _get_cooc_freqs_v3(meta, state, crop_group, month, pest_cat):
    cooc_map  = meta.get("cooccurrence_map", {})
    baselines = meta.get("baselines", {})
    corr_pests = cooc_map.get(pest_cat, [])
    results = []
    for cp in corr_pests[:2]:
        k = f"{state}|{crop_group}|{month}|{cp}"
        results.append(float(baselines.get(k, {}).get("mean", 0.0)))
    while len(results) < 2:
        results.append(0.0)
    return results[0], results[1]


def _build_feature_vector_v3(meta, state, crop_group, month, wx,
                              days_sowing, stage, prev_freq, lag1, lag2, lag3, cooc1, cooc2):
    """v3 33-feature vector (kept inline to avoid circular import)."""
    FEATURE_COLS  = meta["feature_cols"]
    state_classes = meta.get("state_classes", [])
    crop_classes  = meta.get("crop_classes", [])
    state_enc     = state_classes.index(state) if state in state_classes else 0
    crop_enc      = crop_classes.index(crop_group) if crop_group in crop_classes else 0
    rain = wx.get("rain_mm", 10); hum = wx.get("humidity_mean", 65); temp = wx.get("temp_mean", 25)
    rain_days = wx.get("rain_days", 3)
    rain_norm  = _MONTHLY_RAIN_NORM.get(month, 30)
    temp_norm  = _MONTHLY_TEMP_NORM.get(month, 25)
    rain_anomaly = (rain - rain_norm) / max(rain_norm * 0.5, 1)
    temp_anomaly = (temp - temp_norm) / 4.0
    hum_anomaly  = (hum - 65) / 15.0
    ndvi_proxy   = float(np.clip(0.5*rain_anomaly - 0.3*temp_anomaly, -2, 2))
    leaf_wetness = (hum/100) * rain_days * 3
    dr = wx.get("diurnal_range", 14)
    hum_x_temp   = hum * temp / 1000
    rain_x_hum   = rain * hum / 10000
    pest_trend   = lag1 - lag2
    state_soil   = meta.get("state_soil", {})
    soil_mult_d  = meta.get("soil_moisture_mult", {})
    soil_type    = int(state_soil.get(state, 0))
    soil_mult    = float(soil_mult_d.get(str(soil_type), 1.0))
    adj_humidity = wx.get("humidity_mean", 65) * soil_mult
    fv = {
        "month_sin":math.sin(2*math.pi*month/12),"month_cos":math.cos(2*math.pi*month/12),
        "month":month,"state_enc":state_enc,"crop_enc":crop_enc,
        "temp_mean":temp,"temp_max_mean":wx.get("temp_max_mean",32),
        "humidity_mean":hum,"rain_mm":rain,"rain_7d_mean":wx.get("rain_7d_mean",15),
        "rain_30d_mean":wx.get("rain_30d_mean",40),"rain_days":rain_days,
        "rain_intensity":rain/max(rain_days,1),"vpd_proxy_mean":wx.get("vpd_proxy_mean",8),
        "wind_max_mean":wx.get("wind_max_mean",15),"high_humidity":int(hum>75),
        "warm_wet":int(temp>22 and rain>50),"cool_dry":int(temp<18 and rain<20),
        "heat_stress":int(wx.get("temp_max_mean",32)>38),"rain_anomaly":rain_anomaly,
        "temp_anomaly":temp_anomaly,"hum_anomaly":hum_anomaly,"ndvi_proxy":ndvi_proxy,
        "leaf_wetness":leaf_wetness,"diurnal_range":dr,"hum_x_temp":hum_x_temp,
        "rain_x_hum":rain_x_hum,"soil_type":soil_type,"adj_humidity":adj_humidity,
        "days_sowing":days_sowing,"growth_stage":stage,"prev_freq_norm":prev_freq,
        "lag1yr_freq":lag1,"lag2yr_freq":lag2,"lag3yr_freq":lag3,
        "pest_trend":pest_trend,"cooc1":cooc1,"cooc2":cooc2,
    }
    return [fv.get(col, 0) for col in FEATURE_COLS]


# ── Weather scoring ───────────────────────────────────────────────────────────

def _weather_driver(wx):
    score, parts = 0.0, []
    h = wx["humidity_mean"]; r = wx["rain_7d_mean"]; t = wx["temp_mean"]
    if h >= 85:   score += 0.40; parts.append(f"very high humidity ({h:.0f}%)")
    elif h >= 70: score += 0.25; parts.append(f"high humidity ({h:.0f}%)")
    elif h < 40:  score += 0.10; parts.append(f"dry air ({h:.0f}% — sucking pest risk)")
    if r >= 50:   score += 0.35; parts.append(f"{r:.0f}mm rain last 7d")
    elif r >= 20: score += 0.20; parts.append(f"{r:.0f}mm rain last 7d")
    if 20 <= t <= 30: score += 0.15; parts.append(f"optimal infection temp ({t:.1f}°C)")
    elif t > 36:  score += 0.05; parts.append(f"heat stress ({t:.1f}°C)")
    if wx["rain_days"] >= 5: score += 0.10; parts.append(f"{wx['rain_days']} rainy days")
    return min(score, 1.0), ("; ".join(parts) if parts else "moderate conditions")


def _apply_agro_rules(pest_cat, combined, wx):
    triggered = []
    for rule in AGRO_RULES:
        if pest_cat in rule["pests"]:
            try:
                if rule["cond"](wx):
                    combined += rule["boost"]
                    triggered.append(rule["name"])
            except Exception:
                pass
    return min(combined, 1.0), triggered


# ── Risk scoring ──────────────────────────────────────────────────────────────

def _risk_level(score):
    """Map 0-100 risk score to severity tier."""
    if score >= 80: return "CRITICAL"
    if score >= 60: return "HIGH"
    if score >= 40: return "MEDIUM"
    if score >= 20: return "LOW"
    return "NEGLIGIBLE"


def _action(rl):
    return {
        "CRITICAL":   "🔴 Act immediately — spray within 24-48h to prevent crop loss",
        "HIGH":       "🟠 Spray within 3-5 days (preventive window)",
        "MEDIUM":     "🟡 Monitor field daily — prepare spray materials",
        "LOW":        "🟢 Low risk — standard weekly scouting sufficient",
        "NEGLIGIBLE": "✅ Very low risk — no immediate action needed",
    }.get(rl, "Monitor")


def _confidence_tier(auc: float, n_samples: int = 0) -> dict:
    """
    Return structured confidence info for a per-pest AUC.
    n_samples: number of training records for this pest (optional).
    """
    if auc >= 0.93:
        label, color, note = "Very High", "🟢", "Model highly reliable for this pest"
    elif auc >= 0.88:
        label, color, note = "High", "🟢", "Model reliable — trust predictions"
    elif auc >= 0.80:
        label, color, note = "Moderate", "🟡", "Reasonable accuracy — verify with field scouting"
    elif auc >= 0.70:
        label, color, note = "Low", "🟠", "Limited data — use as early indicator only"
    else:
        label, color, note = "Very Low", "🔴", "Insufficient training data — heuristic estimate"
    return {"label": label, "color": color, "auc": round(auc, 3), "note": note}


# ── Crop mapping ──────────────────────────────────────────────────────────────

# v2 — expanded to 25 crop groups matching build_district_pest_data.py (ORDER MATTERS)
_CROP_GROUPS = {
    # Pulses — separated for distinct pest profiles
    "pigeonpea":   ["pigeon pea", "red gram", "arhar", "tur dal", "toor"],
    "bengal_gram": ["bengal gram", "kabuli chana", "kabuli", "chick pea", "chickpea"],
    "moong":       ["green gram", "moong bean", "moong"],
    "urad":        ["black gram", "urd bean", "urad", "urid"],
    "lentil":      ["lentil", "masur", "masoor"],
    # Horticulture
    "mango":       ["mango", "aam"],
    "banana":      ["banana", "kela", "plantain"],
    "okra":        ["bhindi", "okra", "ladysfinger", "lady finger"],
    # Cereals / millets
    "pearl_millet":["pearl millet", "bajra", "bulrush millet", "spiked millet"],
    "sorghum":     ["sorghum", "jowar", "great millet"],
    # Row crops
    "tomato":      ["tomato", "tamatar"],
    "onion":       ["onion", "pyaz", "kanda"],
    "potato":      ["potato", "aloo", "alu"],
    "wheat":       ["wheat", "gehu", "gehun", "gehoo"],
    "rice":        ["paddy", "rice", "dhan", "chawal", "dhaan"],
    "maize":       ["maize", "corn", "makka", "makki"],
    "cotton":      ["cotton", "kapas", "karpas"],
    "soybean":     ["soybean", "soya", "soyabean"],
    "mustard":     ["mustard", "sarson", "rai", "rapeseed"],
    "chilli":      ["chilli", "mirchi", "capsicum"],
    "brinjal":     ["brinjal", "baingan", "eggplant", "begun"],
    "cauliflower": ["cauliflower", "phool gobi", "gobhi", "gobi"],
    "groundnut":   ["groundnut", "peanut", "moongphali"],
    "sugarcane":   ["sugarcane", "ganna", "ikh"],
    "vegetables":  ["cucumber", "pumpkin", "gourd", "spinach", "palak",
                    "cabbage", "bitter gourd", "bottle gourd", "ridge gourd",
                    "pointed gourd", "watermelon"],
}

def _map_crop_group(crop):
    cl = (crop or "").lower()
    for grp, kws in _CROP_GROUPS.items():
        if any(k in cl for k in kws): return grp
    return "other"


# ── Main prediction API ───────────────────────────────────────────────────────

def predict_pest_risk(state, crop, district=None, month=None, year=None):
    """
    Main API — returns list of pest risk dicts, sorted by risk_score descending.

    Strategy:
      1. Try district v2 model (1-month early warning, AUC ~0.95) — spatial fallback included
      2. Try district v1 model (same-month, AUC 0.936) — spatial fallback included
      3. Fall back to v3 state model (AUC 0.909) if district model unavailable
      4. Fall back to heuristic (weather + history) if neither model available
    """
    state_upper  = (state or "MADHYA PRADESH").strip().upper()
    crop_group   = _map_crop_group(crop)
    month        = month or datetime.datetime.now().month
    district_u   = district.strip().upper() if district else ""

    # ── Load assets ──────────────────────────────────────────────────────────
    d_bundle, d_meta = _load_district_model()
    baselines, _     = _load_district_baselines()
    thresholds       = _load_thresholds()

    # ── Get coordinates ───────────────────────────────────────────────────────
    lat, lon = _get_district_coords(state_upper, district_u)
    wx        = _get_forecast_weather(lat, lon)
    wx_score, wx_driver = _weather_driver(wx)

    # ── District model path ───────────────────────────────────────────────────
    if d_bundle is not None:
        # Spatial fallback: find nearest covered district if needed
        actual_district = district_u
        fb_district, fb_lat, fb_lon, fb_dist_km = _nearest_covered_district(
            state_upper, district_u, d_bundle)

        if fb_dist_km > 0:
            actual_district = fb_district
            lat, lon = fb_lat, fb_lon
            # Refresh weather for correct coords
            wx        = _get_forecast_weather(fb_lat, fb_lon)
            wx_score, wx_driver = _weather_driver(wx)

        fallback_note = (
            f" [spatial fallback: using {fb_district} ({fb_dist_km:.0f}km)]"
            if fb_dist_km > 0 else ""
        )

        results = []
        for pest_cat in PEST_LIST:
            fv_df = _build_district_feature_vector(
                d_bundle, baselines,
                state_upper, actual_district, crop_group, pest_cat,
                month, wx, lat, lon)

            proba = _district_predict(d_bundle, fv_df)

            # Weight: 70% model, 20% weather physics, 10% history signal
            hist_rate, _ = _baseline_presence(baselines, state_upper, actual_district,
                                              crop_group, pest_cat, month)
            hist_score   = min(hist_rate * 2.0, 1.0)   # scale 0-1

            combined = 0.70 * proba + 0.20 * wx_score + 0.10 * hist_score
            combined, triggered = _apply_agro_rules(pest_cat, combined, wx)
            rule_note = (f" [+{', '.join(triggered)}]" if triggered else "")

            risk_score = max(0, min(100, int(round(combined * 100))))
            rl = _risk_level(risk_score)

            pest_name, spray = PEST_INFO.get(pest_cat, (pest_cat.replace("_"," ").title(), "Consult local KVK"))
            per_pest_auc = (d_meta or {}).get("per_pest_auc", {}).get(pest_cat, 0)
            conf_tier    = _confidence_tier(per_pest_auc)
            confidence   = conf_tier["label"]   # backward compat string

            # Top-3 feature attribution (qualitative)
            feature_drivers = _top_feature_drivers(wx, proba, hist_rate, month)

            ndvi_val = float(np.clip(
                0.5*((wx["rain_mm"] - _MONTHLY_RAIN_NORM.get(month, 30)) / 30)
                - 0.3*((wx["temp_mean"] - _MONTHLY_TEMP_NORM.get(month, 25)) / 4),
                -2, 2))
            ndvi_pct = int((ndvi_val + 2) / 4 * 100)

            # Determine model version and lead-time note
            mv = d_bundle.get("model_version", "district_v1")
            if mv == "district_v2":
                next_m = (month % 12) + 1
                mv_label = f"district_v2 (1-month early warning){fallback_note}"
                lead_note = (f"⚡ Predicting pest risk for NEXT month (month {next_m}). "
                             "Current weather → next-month forecast. "
                             "You have 3-4 weeks to apply preventive spray.")
            else:
                mv_label = f"district_v1{fallback_note}"
                lead_note = "Current-month detection (no lead time). Upgrade to v2 for early warning."

            results.append({
                "pest":             pest_name,
                "pest_cat":         pest_cat,
                "risk_score":       risk_score,
                "risk_level":       rl,
                "confidence":       confidence,
                "confidence_tier":  conf_tier,      # NEW: structured confidence info
                "model_auc":        round(per_pest_auc, 3),
                "model_version":    mv_label,
                "lead_time_note":   lead_note,
                "history_score":    round(hist_score, 3),
                "weather_score":    round(wx_score, 3),
                "model_score":      round(proba, 3),  # NEW: raw model probability
                "weather_driver":   wx_driver + rule_note,
                "history_note":     (f"Historical presence rate: {hist_rate*100:.0f}% "
                                     f"for {crop_group} in {actual_district} in month {month}"),
                "feature_drivers":  feature_drivers,
                "growth_stage":     ["Sowing","Vegetative","Reproductive","Maturity",
                                     "Off-season"][_growth_stage(month)],
                "ndvi_index":       ndvi_pct,
                "recommended_action": _action(rl),
                "spray":            spray,
                "weather_summary":  wx,
            })

        results.sort(key=lambda x: -x["risk_score"])
        medium_plus = [r for r in results if r["risk_level"] in ("CRITICAL","HIGH","MEDIUM")]
        # Return top results — always include at least 3 even if all LOW/NEGLIGIBLE
        return medium_plus[:8] if len(medium_plus) >= 3 else results[:max(6, len(results))]

    # ── v3 state model fallback ───────────────────────────────────────────────
    v3_models, v3_meta = _load_v3_model()
    if v3_models is not None:
        return _predict_with_v3(v3_models, v3_meta, state_upper, crop_group, month, wx,
                                wx_score, wx_driver)

    # ── Last resort: heuristic ────────────────────────────────────────────────
    return _heuristic_predictions(state_upper, crop_group, month, wx, wx_score, wx_driver)


def _top_feature_drivers(wx, model_prob, hist_rate, month):
    """Return 3 human-readable feature drivers for this prediction."""
    drivers = []
    if model_prob >= 0.60:
        drivers.append(f"Model probability: {model_prob*100:.0f}% (strong signal)")
    elif model_prob >= 0.40:
        drivers.append(f"Model probability: {model_prob*100:.0f}% (moderate signal)")
    if hist_rate >= 0.30:
        drivers.append(f"High historical occurrence ({hist_rate*100:.0f}%)")
    elif hist_rate >= 0.10:
        drivers.append(f"Moderate historical occurrence ({hist_rate*100:.0f}%)")
    hum = wx["humidity_mean"]
    if hum >= 80: drivers.append(f"High humidity ({hum:.0f}%) promotes spread")
    elif hum < 45: drivers.append(f"Low humidity ({hum:.0f}%) favors sucking pests")
    rain = wx["rain_7d_mean"]
    if rain >= 60: drivers.append(f"Heavy recent rain ({rain:.0f}mm/7d)")
    return drivers[:3] if drivers else ["Weather and seasonal patterns"]


def _predict_with_v3(models, meta, state, crop_group, month, wx, wx_score, wx_driver):
    """Run v3 state-level predictions. Returns same format as district path."""
    results = []
    for pest_cat, bundle_v3 in models.items():
        try:
            prev_freq, lag1, lag2, lag3 = _get_lag_freqs_v3(meta, state, crop_group, month, pest_cat)
            cooc1, cooc2 = _get_cooc_freqs_v3(meta, state, crop_group, month, pest_cat)
            days_sow = ((month - 6) % 12) * 30
            stage    = min(days_sow // 30, 4)
            import pandas as _pd2
            fv = _build_feature_vector_v3(meta, state, crop_group, month, wx,
                                          days_sow, stage, prev_freq, lag1, lag2, lag3, cooc1, cooc2)
            fv_df = _pd2.DataFrame([dict(zip(meta["feature_cols"], fv))])
            if isinstance(bundle_v3, dict) and "lgb" in bundle_v3:
                p_lgb = float(bundle_v3["lgb"].predict_proba(fv_df)[0][1])
                p_xgb = float(bundle_v3["xgb"].predict_proba(fv_df)[0][1])
                p_cat = float(bundle_v3["cat"].predict_proba(fv_df)[0][1])
                meta_X = [[p_lgb, p_xgb, p_cat]]
                proba = float(bundle_v3["meta"].predict_proba(meta_X)[0][1])
            else:
                proba = float(bundle_v3.predict_proba(fv_df)[0][1])
        except Exception:
            proba = 0.25

        hist_score = min(lag1 * 8, 1.0)
        combined = 0.55 * proba + 0.25 * wx_score + 0.20 * hist_score
        combined, triggered = _apply_agro_rules(pest_cat, combined, wx)
        rule_note = (f" [+{', '.join(triggered)}]" if triggered else "")
        risk_score = max(0, min(100, int(round(combined * 100))))
        rl = _risk_level(risk_score)
        pest_name, spray = PEST_INFO.get(pest_cat, (pest_cat.replace("_"," ").title(), ""))
        cv = meta.get("cv_results", {}).get(pest_cat, {})
        auc = cv.get("auc_stack", cv.get("auc", 0))
        confidence = "High" if auc >= 0.85 else "Moderate" if auc >= 0.70 else "Low"
        ndvi_val = float(np.clip(
            0.5*((wx["rain_mm"]-30)/30) - 0.3*((wx["temp_mean"]-25)/4), -2, 2))

        results.append({
            "pest": pest_name, "pest_cat": pest_cat,
            "risk_score": risk_score, "risk_level": rl,
            "confidence": confidence, "model_auc": round(auc, 3),
            "model_version": meta.get("version", "v3"),
            "history_score": round(hist_score, 3), "weather_score": round(wx_score, 3),
            "weather_driver": wx_driver + rule_note,
            "history_note": f"Reported in KCC records from {state.title()}",
            "feature_drivers": _top_feature_drivers(wx, proba, lag1, month),
            "growth_stage": f"~{days_sow}d from sowing",
            "ndvi_index": int((ndvi_val + 2) / 4 * 100),
            "recommended_action": _action(rl), "spray": spray, "weather_summary": wx,
        })

    results.sort(key=lambda x: -x["risk_score"])
    medium_plus = [r for r in results if r["risk_level"] in ("CRITICAL","HIGH","MEDIUM")]
    return medium_plus[:8] if len(medium_plus) >= 3 else results[:6]


def _heuristic_predictions(state, crop_group, month, wx, wx_score, wx_driver):
    """Last-resort predictions using only weather + known pest seasonality."""
    # Seasonal pest probabilities (expert-encoded)
    SEASONAL_PESTS = {
        (6,7,8,9):  ["blight","rot","caterpillar","borer","whitefly"],
        (10,11,12): ["rust","mildew","aphid","leaf_spot"],
        (1,2,3):    ["aphid","thrips","jassid","mite"],
        (4,5):      ["mite","whitefly","thrips","caterpillar"],
    }
    active_pests = []
    for months_tuple, pests in SEASONAL_PESTS.items():
        if month in months_tuple:
            active_pests = pests
            break
    if not active_pests:
        active_pests = ["aphid","blight","whitefly","borer"]

    results = []
    for pest_cat in active_pests[:6]:
        combined = 0.4 * wx_score + 0.3
        combined, triggered = _apply_agro_rules(pest_cat, combined, wx)
        rule_note = (f" [+{', '.join(triggered)}]" if triggered else "")
        risk_score = max(0, min(100, int(round(combined * 100))))
        rl = _risk_level(risk_score)
        pest_name, spray = PEST_INFO.get(pest_cat, (pest_cat.replace("_"," ").title(), "Consult KVK"))
        results.append({
            "pest": pest_name, "pest_cat": pest_cat,
            "risk_score": risk_score, "risk_level": rl,
            "confidence": "Low (heuristic only)", "model_auc": 0,
            "model_version": "heuristic",
            "history_score": 0, "weather_score": round(wx_score, 3),
            "weather_driver": wx_driver + rule_note,
            "history_note": "Seasonal heuristic — no model data available",
            "feature_drivers": ["Seasonal pattern", wx_driver],
            "growth_stage": "Unknown",
            "ndvi_index": 50,
            "recommended_action": _action(rl), "spray": spray, "weather_summary": wx,
        })

    results.sort(key=lambda x: -x["risk_score"])
    return results