| """Cheap regex/keyword guard. Blocks non-agriculture before any FAISS/LLM cost.""" |
| from __future__ import annotations |
| import re |
|
|
| _AGRI_SIGNALS = { |
| |
| "crop","crops","plant","plants","planting","seed","seeds","soil","fertilizer", |
| "pesticide","pest","disease","farm","farmer","farming","kisan","kheti","fasal", |
| "gehu","gehun","dhan","kapas","tamatar","aloo","pyaz","mandi","bhav","rate", |
| "harvest","harvesting","sow","sown","sowing","grow","grows","growing","grown", |
| "variety","varieties","cultivar","cultivars","season","seasonal","kharif","rabi","zaid", |
| "irrigation","spray","insecticide","fungicide","organic","yield","yields","blight", |
| "wheat","rice","paddy","cotton","maize","sugarcane","soybean","mustard", |
| "chilli","brinjal","onion","tomato","potato","groundnut","gram","pulses","pulse", |
| "barley","jowar","bajra","ragi","arhar","tur","moong","urad","masoor","chana", |
| |
| "aphid","borer","mildew","rust","wilt","thrips","mite","whitefly", |
| "caterpillar","jassid","leaf spot","mosaic","virus","rot","fungus", |
| "blast","armyworm","bollworm","helicoverpa","spodoptera","girdle", |
| |
| "khaad","dawai","beej","sinchai","pattiya","pattiyan","keeda","bimari", |
| "rog","upchar","khet","paidavar","safed makhi","tela","mahu","tikda", |
| |
| "pala","thand","frost","baarish","drought","sukha","heat stress", |
| "ola","flood","andhi", |
| |
| "kvk","icar","pm kisan","fasal bima","kcc","kisan call","advisory", |
| |
| "endosulfan","monocrotophos","chlorpyrifos","imidacloprid","emamectin", |
| "mancozeb","propiconazole","thiamethoxam","rhizobium","urea","dap", |
| |
| "फसल","खेत","किसान","खाद","बीज","धान","गेहूं","कपास", |
| "मक्का","सरसों","चना","अरहर","मूंग","प्याज","आलू", |
| "कीड़ा","बीमारी","दवाइ","स्प्रे","उपचार","सिंचाई", |
| } |
|
|
| _NON_AGRI_RE = re.compile( |
| r"\b(stock market|share market|bitcoin|crypto|politics|election|" |
| r"movie|cricket|football|recipe|cooking|exam|bank account|insurance claim|" |
| r"marriage|divorce|job|salary|relationship|celebrity|news|tv show|" |
| r"web series|ipl|bollywood|actor|actress|pakistan|china|war|army|" |
| r"love|dating|girlfriend|boyfriend|password|hack|code)\b", |
| re.IGNORECASE, |
| ) |
|
|
| OFF_TOPIC_MSG = ( |
| "I can only help with agriculture-related questions — crops, pests, diseases, " |
| "fertilizers, mandi prices, irrigation, seeds, and farming advice. " |
| "Please ask a farming question and I'll be happy to help! 🌾\n\n" |
| "Main sirf kheti-baadi, fasal, keede-makode, bimari, khaad, mandi bhav, " |
| "aur kisan-sambandhit sawalon ka jawab de sakta hoon." |
| ) |
|
|
|
|
| def is_agriculture_query(query: str) -> bool: |
| """Return True if the query looks like an agriculture question. |
| |
| Short follow-ups (≤3 words) always pass — they're contextual replies. |
| Devanagari script defaults to allow unless a hard non-agri regex hits. |
| """ |
| stripped = query.strip() |
| q_lower_short = stripped.lower() |
| |
| |
| if len(stripped.split()) <= 3 and not _NON_AGRI_RE.search(q_lower_short): |
| return True |
|
|
| if any('ऀ' <= c <= 'ॿ' for c in stripped): |
| return not _NON_AGRI_RE.search(stripped.lower()) |
|
|
| q_lower = stripped.lower() |
| if _NON_AGRI_RE.search(q_lower): |
| words = set(re.findall(r"\b\w+\b", q_lower)) |
| return bool(words.intersection(_AGRI_SIGNALS)) |
|
|
| words = set(re.findall(r"\b\w+\b", q_lower)) |
| return bool(words.intersection(_AGRI_SIGNALS)) |
|
|