Spaces:
Running
Running
| import gradio as gr | |
| from transformers import pipeline | |
| # 1. System Prompt/Personality for Hermes Agent | |
| HERMES_SYSTEM_PROMPT = """You are Hermes Agent, a helpful, fast, and practical multi-purpose assistant. You are professional, calm, and user-friendly. You can answer questions, reason through tasks step-by-step, plan, summarize, provide coding help, offer research-style explanations, and break down complex tasks. Do not pretend to have abilities you do not possess. Always strive for clarity and conciseness.""" | |
| # 2. Model Integration | |
| # Using a small, CPU-friendly model for demonstration on Hugging Face Spaces free tier. | |
| # For better performance and more complex tasks, a larger model with GPU would be recommended. | |
| # Example: 'distilgpt2' is a good starting point for CPU inference. | |
| # For more capable models, consider 'HuggingFaceH4/zephyr-7b-beta' or 'mistralai/Mistral-7B-Instruct-v0.2' | |
| # which would require a GPU-enabled Space. | |
| try: | |
| # Initialize the pipeline for text generation | |
| # Using 'text-generation' task with a pre-trained model | |
| # Setting trust_remote_code=True might be necessary for some models, but generally avoid if not explicitly needed. | |
| generator = pipeline('text-generation', model='distilgpt2') | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| generator = None | |
| # 3. Chatbot Logic | |
| def predict(message, history): | |
| if generator is None: | |
| return "Error: Model could not be loaded. Please check the backend logs." | |
| # Format conversation history for the model | |
| # For distilgpt2, a simple concatenation is sufficient. | |
| # For more advanced models, a specific chat template might be required. | |
| conversation = HERMES_SYSTEM_PROMPT + "\n\n" | |
| for human, agent in history: | |
| conversation += f"User: {human}\nHermes Agent: {agent}\n" | |
| conversation += f"User: {message}\nHermes Agent:" | |
| try: | |
| # Generate response | |
| # max_new_tokens controls the length of the generated response | |
| # num_return_sequences=1 to get a single best response | |
| # truncation=True to handle long inputs gracefully | |
| response = generator(conversation, max_new_tokens=150, num_return_sequences=1, truncation=True) | |
| generated_text = response[0]['generated_text'] | |
| # Extract only the agent's response, removing the prompt and user's input | |
| # This is a simple heuristic and might need refinement for complex models/prompts | |
| agent_response_start = generated_text.rfind("Hermes Agent:") | |
| if agent_response_start != -1: | |
| agent_response = generated_text[agent_response_start + len("Hermes Agent:"):].strip() | |
| else: | |
| agent_response = generated_text.strip() # Fallback if marker not found | |
| # Clean up any potential incomplete sentences or model artifacts | |
| # For distilgpt2, it often generates incomplete sentences, so we might need to truncate at the last punctuation. | |
| last_punctuation = max(agent_response.rfind('.'), agent_response.rfind('?'), agent_response.rfind('!')) | |
| if last_punctuation != -1: | |
| agent_response = agent_response[:last_punctuation + 1] | |
| return agent_response | |
| except Exception as e: | |
| return f"An error occurred during model inference: {e}" | |
| # 4. Gradio Web UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Hermes Agent") | |
| gr.Markdown(""" | |
| Hermes Agent is a helpful, fast, and practical multi-purpose AI assistant. | |
| It can answer questions, reason through tasks, plan, summarize, and provide coding help. | |
| """) | |
| chatbot = gr.Chatbot(height=400) | |
| msg = gr.Textbox(label="Your Message", placeholder="Type your message here...") | |
| clear = gr.Button("Clear") | |
| msg.submit(predict, [msg, chatbot], [msg, chatbot]) | |
| clear.click(lambda: None, None, [msg, chatbot], queue=False) | |
| # Launch the Gradio app | |
| # The share=True option creates a public link, useful for testing, but should be False for deployment on Spaces. | |
| # For Hugging Face Spaces, the app runs automatically when app.py is present. | |
| if __name__ == "__main__": | |
| demo.launch(debug=True) # debug=True for local development, set to False for production | |