"""Cheap regex/keyword guard. Blocks non-agriculture before any FAISS/LLM cost.""" from __future__ import annotations import re _AGRI_SIGNALS = { # Crops "crop","crops","plant","plants","planting","seed","seeds","soil","fertilizer", "pesticide","pest","disease","farm","farmer","farming","kisan","kheti","fasal", "gehu","gehun","dhan","kapas","tamatar","aloo","pyaz","mandi","bhav","rate", "harvest","harvesting","sow","sown","sowing","grow","grows","growing","grown", "variety","varieties","cultivar","cultivars","season","seasonal","kharif","rabi","zaid", "irrigation","spray","insecticide","fungicide","organic","yield","yields","blight", "wheat","rice","paddy","cotton","maize","sugarcane","soybean","mustard", "chilli","brinjal","onion","tomato","potato","groundnut","gram","pulses","pulse", "barley","jowar","bajra","ragi","arhar","tur","moong","urad","masoor","chana", # Pests / diseases "aphid","borer","mildew","rust","wilt","thrips","mite","whitefly", "caterpillar","jassid","leaf spot","mosaic","virus","rot","fungus", "blast","armyworm","bollworm","helicoverpa","spodoptera","girdle", # Hindi / regional "khaad","dawai","beej","sinchai","pattiya","pattiyan","keeda","bimari", "rog","upchar","khet","paidavar","safed makhi","tela","mahu","tikda", # Weather for crops "pala","thand","frost","baarish","drought","sukha","heat stress", "ola","flood","andhi", # Schemes / advisory "kvk","icar","pm kisan","fasal bima","kcc","kisan call","advisory", # Chemicals (banned-chemical questions ARE agri) "endosulfan","monocrotophos","chlorpyrifos","imidacloprid","emamectin", "mancozeb","propiconazole","thiamethoxam","rhizobium","urea","dap", # Devanagari "फसल","खेत","किसान","खाद","बीज","धान","गेहूं","कपास", "मक्का","सरसों","चना","अरहर","मूंग","प्याज","आलू", "कीड़ा","बीमारी","दवाइ","स्प्रे","उपचार","सिंचाई", } _NON_AGRI_RE = re.compile( r"\b(stock market|share market|bitcoin|crypto|politics|election|" r"movie|cricket|football|recipe|cooking|exam|bank account|insurance claim|" r"marriage|divorce|job|salary|relationship|celebrity|news|tv show|" r"web series|ipl|bollywood|actor|actress|pakistan|china|war|army|" r"love|dating|girlfriend|boyfriend|password|hack|code)\b", re.IGNORECASE, ) OFF_TOPIC_MSG = ( "I can only help with agriculture-related questions — crops, pests, diseases, " "fertilizers, mandi prices, irrigation, seeds, and farming advice. " "Please ask a farming question and I'll be happy to help! 🌾\n\n" "Main sirf kheti-baadi, fasal, keede-makode, bimari, khaad, mandi bhav, " "aur kisan-sambandhit sawalon ka jawab de sakta hoon." ) def is_agriculture_query(query: str) -> bool: """Return True if the query looks like an agriculture question. Short follow-ups (≤3 words) always pass — they're contextual replies. Devanagari script defaults to allow unless a hard non-agri regex hits. """ stripped = query.strip() q_lower_short = stripped.lower() # ≤3 words is usually a follow-up like "ok", "haan", "thanks" — but only # bypass if no obvious non-agri keyword is present. if len(stripped.split()) <= 3 and not _NON_AGRI_RE.search(q_lower_short): return True if any('ऀ' <= c <= 'ॿ' for c in stripped): return not _NON_AGRI_RE.search(stripped.lower()) q_lower = stripped.lower() if _NON_AGRI_RE.search(q_lower): words = set(re.findall(r"\b\w+\b", q_lower)) return bool(words.intersection(_AGRI_SIGNALS)) words = set(re.findall(r"\b\w+\b", q_lower)) return bool(words.intersection(_AGRI_SIGNALS))