import requests import json import time def verify_sse(): url = "http://localhost:7860/generate_generic?stream=true" payload = { "text": "The patient complains of severe headache and nausea.", "custom_prompt": "List the symptoms.", "model_name": "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf", "model_type": "gguf" } print(f"Connecting to {url}...") try: with requests.post(url, json=payload, stream=True, timeout=60) as response: print(f"Status Code: {response.status_code}") if response.status_code == 200: print("--- Stream Start ---") for line in response.iter_lines(): if line: decoded_line = line.decode('utf-8') print(decoded_line) # Optional: attempt partial parse if it matches SSE format # data: {...} print("--- Stream End ---") else: print("Error Response:", response.text) except Exception as e: print(f"Request failed: {e}") if __name__ == "__main__": # Wait a bit if server is just starting time.sleep(2) verify_sse()