from flask import Flask, request, jsonify, send_from_directory from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch import os app = Flask(__name__) # --- Load the model --- model_name = "microsoft/phi-2" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Loading model... this may take a few minutes.") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) print("Model loaded.") # --- Serve your custom HTML interface --- @app.route("/") def home(): return send_from_directory(os.path.dirname(__file__), "index.html") # --- API endpoint for AI --- @app.route("/ask", methods=["POST"]) def ask(): data = request.get_json() question = data.get("question", "") if not question: return jsonify({"answer": "Please ask a valid question."}) try: response = generator(question, max_length=200, do_sample=True, temperature=0.7) answer = response[0]["generated_text"] return jsonify({"answer": answer}) except Exception as e: print(e) return jsonify({"answer": "⚠️ There was an error generating the answer."}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)