import sqlite3, json, os, torch from sentence_transformers import SentenceTransformer, util BASE_DIR = os.path.abspath(os.path.dirname(__file__)) DB_PATH = os.path.join(BASE_DIR, "platform.db") conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row print("=== SAMPLE ENTRIES (first 15, org_id=2) ===") rows = conn.execute( "SELECT id, question, processus, intent, response FROM entries WHERE org_id=2 ORDER BY id LIMIT 15" ).fetchall() for r in rows: print(f"[{r['id']}] Q: {r['question']}") print(f" processus={r['processus']} | intent={r['intent']}") print(f" R: {(r['response'] or '')[:100]}") print() print("=== LIVE SEARCH TEST ===") model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2", device="cpu") test_queries = [ "comment bloquer un paiement", "ouvrir un compte bancaire", "faire une reclamation", "demande de credit", "carte bancaire perdue", ] all_rows = conn.execute( "SELECT id, question, processus, intent, response FROM entries WHERE org_id=2 ORDER BY id" ).fetchall() questions = [r["question"] for r in all_rows] emb_base = model.encode(questions, convert_to_tensor=True, normalize_embeddings=True, batch_size=64, show_progress_bar=False) for q in test_queries: emb_q = model.encode(q, convert_to_tensor=True, normalize_embeddings=True) scores = util.pytorch_cos_sim(emb_q, emb_base)[0] top3 = torch.topk(scores, 3).indices.tolist() print(f"QUERY: '{q}'") for idx in top3: r = all_rows[idx] print(f" [{float(scores[idx])*100:.1f}%] {r['question']} | intent={r['intent']}") print(f" R: {(r['response'] or '')[:80]}") print()