| """HyDE (Hypothetical Document Embeddings) β generate a fake answer with the LLM, |
| then embed *that* and re-search. Doubles recall on rare named diseases / |
| chemical names that the multilingual embedder distributes poorly. |
| |
| Only fires when the top FAISS+BM25+rerank score is below HYDE_TRIGGER_THRESHOLD. |
| Uses the cheapest model in the cascade (gemma2-9b-it) to keep cost ~0. |
| """ |
| from __future__ import annotations |
| from typing import Optional |
|
|
|
|
| _HYDE_PROMPT = """You are an Indian agricultural extension officer. A farmer asked the question below. Write a SHORT (60-120 words) hypothetical expert answer. The answer must include: |
| - The likely cause (1 sentence) |
| - The standard chemical / treatment with EXACT dose and unit (e.g. "Mancozeb 75% WP @ 2g/L") |
| - Timing and method (spray / drench / soil application) |
| - One safety / IPM note |
| |
| Do NOT add disclaimers. Do NOT say "consult an expert". Just give the answer as if from an ICAR factsheet. |
| |
| Farmer question: {query} |
| {ctx} |
| Hypothetical expert answer:""" |
|
|
|
|
| def expand_query(query: str, |
| crop: Optional[str] = None, |
| problem_type: Optional[str] = None) -> str: |
| """Generate a hypothetical answer for HyDE retrieval. Returns "" if LLM fails.""" |
| ctx = [] |
| if crop: |
| ctx.append(f"Crop: {crop}") |
| if problem_type and problem_type != "general": |
| ctx.append(f"Problem type: {problem_type}") |
| ctx_str = ("Context: " + "; ".join(ctx) + "\n") if ctx else "" |
|
|
| prompt = _HYDE_PROMPT.format(query=query, ctx=ctx_str) |
|
|
| try: |
| from . import llm |
| |
| |
| return llm.generate(prompt, max_tokens=200, temperature=0.3, |
| prefer_cheap=True) or "" |
| except Exception: |
| return "" |
|
|