hermes-agent / app.py
ovokeomovie's picture
Upload app.py with huggingface_hub
c8e766c verified
Raw
History Blame Contribute Delete
4.18 kB
import gradio as gr
from transformers import pipeline
# 1. System Prompt/Personality for Hermes Agent
HERMES_SYSTEM_PROMPT = """You are Hermes Agent, a helpful, fast, and practical multi-purpose assistant. You are professional, calm, and user-friendly. You can answer questions, reason through tasks step-by-step, plan, summarize, provide coding help, offer research-style explanations, and break down complex tasks. Do not pretend to have abilities you do not possess. Always strive for clarity and conciseness."""
# 2. Model Integration
# Using a small, CPU-friendly model for demonstration on Hugging Face Spaces free tier.
# For better performance and more complex tasks, a larger model with GPU would be recommended.
# Example: 'distilgpt2' is a good starting point for CPU inference.
# For more capable models, consider 'HuggingFaceH4/zephyr-7b-beta' or 'mistralai/Mistral-7B-Instruct-v0.2'
# which would require a GPU-enabled Space.
try:
# Initialize the pipeline for text generation
# Using 'text-generation' task with a pre-trained model
# Setting trust_remote_code=True might be necessary for some models, but generally avoid if not explicitly needed.
generator = pipeline('text-generation', model='distilgpt2')
except Exception as e:
print(f"Error loading model: {e}")
generator = None
# 3. Chatbot Logic
def predict(message, history):
if generator is None:
return "Error: Model could not be loaded. Please check the backend logs."
# Format conversation history for the model
# For distilgpt2, a simple concatenation is sufficient.
# For more advanced models, a specific chat template might be required.
conversation = HERMES_SYSTEM_PROMPT + "\n\n"
for human, agent in history:
conversation += f"User: {human}\nHermes Agent: {agent}\n"
conversation += f"User: {message}\nHermes Agent:"
try:
# Generate response
# max_new_tokens controls the length of the generated response
# num_return_sequences=1 to get a single best response
# truncation=True to handle long inputs gracefully
response = generator(conversation, max_new_tokens=150, num_return_sequences=1, truncation=True)
generated_text = response[0]['generated_text']
# Extract only the agent's response, removing the prompt and user's input
# This is a simple heuristic and might need refinement for complex models/prompts
agent_response_start = generated_text.rfind("Hermes Agent:")
if agent_response_start != -1:
agent_response = generated_text[agent_response_start + len("Hermes Agent:"):].strip()
else:
agent_response = generated_text.strip() # Fallback if marker not found
# Clean up any potential incomplete sentences or model artifacts
# For distilgpt2, it often generates incomplete sentences, so we might need to truncate at the last punctuation.
last_punctuation = max(agent_response.rfind('.'), agent_response.rfind('?'), agent_response.rfind('!'))
if last_punctuation != -1:
agent_response = agent_response[:last_punctuation + 1]
return agent_response
except Exception as e:
return f"An error occurred during model inference: {e}"
# 4. Gradio Web UI
with gr.Blocks() as demo:
gr.Markdown("# Hermes Agent")
gr.Markdown("""
Hermes Agent is a helpful, fast, and practical multi-purpose AI assistant.
It can answer questions, reason through tasks, plan, summarize, and provide coding help.
""")
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
clear = gr.Button("Clear")
msg.submit(predict, [msg, chatbot], [msg, chatbot])
clear.click(lambda: None, None, [msg, chatbot], queue=False)
# Launch the Gradio app
# The share=True option creates a public link, useful for testing, but should be False for deployment on Spaces.
# For Hugging Face Spaces, the app runs automatically when app.py is present.
if __name__ == "__main__":
demo.launch(debug=True) # debug=True for local development, set to False for production