kcc-agri / config.py
hritikm15's picture
KCC AgriAdvisor v1 β€” code deploy
ca88a2c verified
from pathlib import Path
import os
# ── Load .env if python-dotenv available ──────────────────────────────────────
try:
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent / ".env", override=False)
except ImportError:
pass # dotenv optional β€” env vars can be set by OS / Docker
# ── Directories ────────────────────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).parent.resolve() # relative β€” works on any machine
DATA_DIR = PROJECT_ROOT / "data"
INDEX_DIR = PROJECT_ROOT / "index"
LOGS_DIR = PROJECT_ROOT / "logs"
# ── Source data (KCC parquet files) ───────────────────────────────────────────
KCC_DATA_DIR = Path(os.environ.get(
"KCC_DATA_DIR",
str(PROJECT_ROOT / "data" / "kcc_raw")
))
# ── Step 1 output ──────────────────────────────────────────────────────────────
CLEAN_DATA_DIR = DATA_DIR / "clean_years"
CLEAN_DATA_FILE = DATA_DIR / "kcc_clean.parquet"
# ── Step 2 output ──────────────────────────────────────────────────────────────
FAISS_INDEX_FILE = INDEX_DIR / "faiss.index"
METADATA_FILE = INDEX_DIR / "metadata.parquet"
BM25_INDEX_FILE = INDEX_DIR / "bm25.db"
# ── Embedding model ────────────────────────────────────────────────────────────
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
EMBEDDING_DIM = 384
EMBED_BATCH_SIZE = 512
# ── Retrieval settings ─────────────────────────────────────────────────────────
TOP_K = 5
# ── Cross-encoder reranker ─────────────────────────────────────────────────────
RERANKER_MODEL = "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1"
RERANKER_TOP_N = 20
MIN_ANSWER_CHARS = 100
# ── LLM cascade β€” keys from environment ───────────────────────────────────────
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
GROQ_MODEL_PRIMARY = "meta-llama/llama-4-scout-17b-16e-instruct"
GROQ_MODEL_FALLBACK = "llama-3.3-70b-versatile"
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
GEMINI_MODEL = "gemma-3-27b-it"
# ── Mandi price API ───────────────────────────────────────────────────────────
DATA_GOV_API_KEY = os.environ.get("DATA_GOV_API_KEY", "")
# ── Environment ───────────────────────────────────────────────────────────────
ENVIRONMENT = os.environ.get("ENVIRONMENT", "development")
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
ALLOWED_ORIGINS = [
o.strip() for o in
os.environ.get("ALLOWED_ORIGINS", "http://localhost:8501,http://localhost:8000").split(",")
if o.strip()
]
B2B_DEMO_PASSWORD = os.environ.get("B2B_DEMO_PASSWORD", "DeHaat2025")
# ── Data filters ───────────────────────────────────────────────────────────────
MIN_QUERY_LEN = 10
MIN_ANSWER_LEN = 20
START_YEAR = 2006
CHUNK_SIZE = 100000
SAMPLE_PER_YEAR = None
# ── Column names ───────────────────────────────────────────────────────────────
QUERY_COL = "QueryText"
ANSWER_COL = "KccAns"
# Create dirs on import
for _d in [DATA_DIR, CLEAN_DATA_DIR, INDEX_DIR, LOGS_DIR]:
_d.mkdir(parents=True, exist_ok=True)