| from fastapi import FastAPI, Query |
| from fastapi.middleware.cors import CORSMiddleware |
| import numpy as np |
| import json |
| from sentence_transformers import SentenceTransformer |
| import google.generativeai as genai |
| import os |
| from dotenv import load_dotenv |
|
|
| |
| |
| |
|
|
| print("Loading environment variables...") |
| load_dotenv() |
|
|
| print("Loading songs data...") |
| with open("songs.json", encoding="utf-8") as f: |
| songs = json.load(f) |
|
|
| print("Loading embeddings...") |
| embeddings = np.load("song_embeddings_e5_final.npy") |
|
|
| print("Loading embedding model...") |
| model = SentenceTransformer("intfloat/multilingual-e5-large") |
|
|
| print("Configuring Gemini API...") |
| genai.configure(api_key=os.getenv("GEMINI_API_KEY")) |
| gemini_model = genai.GenerativeModel("gemini-2.5-flash") |
|
|
| print("API ready!") |
|
|
| |
| |
| |
|
|
| app = FastAPI( |
| title="Thirumandiram Search API", |
| description="Semantic search and AI-assisted explanations for Thirumandiram verses", |
| version="2.0.0" |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| |
| |
|
|
| def get_payiram(song_number: int) -> str: |
| if 1 <= song_number <= 336: |
| return "First Payiram" |
| elif 337 <= song_number <= 548: |
| return "Second Payiram" |
| elif 549 <= song_number <= 883: |
| return "Third Payiram" |
| elif 884 <= song_number <= 1033: |
| return "Fourth Payiram" |
| elif 1034 <= song_number <= 1560: |
| return "Fifth Payiram" |
| elif 1561 <= song_number <= 1783: |
| return "Sixth Payiram" |
| elif 1784 <= song_number <= 1980: |
| return "Seventh Payiram" |
| elif 1981 <= song_number <= 2121: |
| return "Eighth Payiram" |
| elif 2122 <= song_number <= 3000: |
| return "Ninth Payiram" |
| return "Unknown Payiram" |
|
|
| |
| |
| |
|
|
| def search_songs(query: str, top_k: int = 3): |
| query_text = "query: " + query |
| query_vec = model.encode([query_text])[0] |
|
|
| sims = np.dot(embeddings, query_vec) / ( |
| np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_vec) |
| ) |
|
|
| top_idx = np.argsort(-sims)[:top_k] |
| results = [] |
|
|
| for idx in top_idx: |
| song = songs[idx] |
| song_number = song["song_number"] |
|
|
| results.append({ |
| "song_number": song_number, |
| "padal": song["padal"], |
| "vilakam": song["vilakam"], |
| "vilakam_en": song["vilakam_en"], |
| "payiram": get_payiram(song_number), |
| "similarity": float(sims[idx]), |
| }) |
|
|
| return results |
|
|
| |
| |
| |
|
|
| def is_thirumandiram_scope(query: str) -> bool: |
| prompt = f""" |
| You are a strict classifier. |
| |
| Decide whether the following user query is related to: |
| - Thirumandiram |
| - Thirumoolar |
| - Saivism, Siddha philosophy, Yoga |
| - Spiritual concepts explained in Thirumandiram verses |
| |
| Respond with ONLY: |
| YES or NO |
| |
| If unsure, respond NO. |
| |
| User query: |
| "{query}" |
| """ |
| response = gemini_model.generate_content(prompt) |
| return response.text.strip().upper() == "YES" |
|
|
| |
| |
| |
|
|
| @app.get("/") |
| def root(): |
| return { |
| "name": "Thirumandiram Search API", |
| "version": "2.0.0", |
| "endpoints": { |
| "search": "/search?q=<query>&top_k=3", |
| "chat_search": "/chat_search?q=<query>&top_k=3", |
| "docs": "/docs", |
| "health": "/health" |
| } |
| } |
|
|
| @app.get("/health") |
| def health(): |
| return { |
| "status": "healthy", |
| "embedding_model_loaded": model is not None, |
| "gemini_configured": os.getenv("GEMINI_API_KEY") is not None |
| } |
|
|
| |
| |
| |
|
|
| @app.get("/search") |
| def search( |
| q: str = Query(..., description="Search query in Tamil or English"), |
| top_k: int = Query(3, ge=1, le=10) |
| ): |
| return { |
| "query": q, |
| "results": search_songs(q, top_k) |
| } |
|
|
| |
| |
| |
|
|
| @app.get("/chat_search") |
| def chat_search( |
| q: str = Query(..., description="Search query in Tamil or English"), |
| top_k: int = Query(3, ge=1, le=10) |
| ): |
| |
| if not is_thirumandiram_scope(q): |
| return { |
| "query": q, |
| "out_of_scope": True, |
| "message": "The query is not within the scope of Thirumandiram.", |
| "summary": None, |
| "results": [] |
| } |
|
|
| |
| results = search_songs(q, top_k) |
|
|
| |
| context = "\n\n".join( |
| f"Song {r['song_number']} ({r['payiram']}):\n" |
| f"Verse:\n{r['padal']}\n" |
| f"Explanation:\n{r['vilakam_en']}" |
| for r in results |
| ) |
|
|
| prompt = f""" |
| You are a Thirumandiram expert assistant. |
| Answer ONLY using Thirumandiram philosophy. |
| |
| User query: |
| "{q}" |
| |
| Relevant verses: |
| {context} |
| |
| Explain clearly how these verses address the query. |
| """ |
|
|
| response = gemini_model.generate_content(prompt) |
|
|
| return { |
| "query": q, |
| "out_of_scope": False, |
| "summary": response.text, |
| "results": results |
| } |
|
|