Spaces:

ovokeomovie
/

hermes-agent

Running

App Files Files Community

hermes-agent / app.py

ovokeomovie

Upload app.py with huggingface_hub

c8e766c verified 5 days ago

Raw

History Blame Contribute Delete

4.18 kB


	import gradio as gr
	from transformers import pipeline

	# 1. System Prompt/Personality for Hermes Agent
	HERMES_SYSTEM_PROMPT = """You are Hermes Agent, a helpful, fast, and practical multi-purpose assistant. You are professional, calm, and user-friendly. You can answer questions, reason through tasks step-by-step, plan, summarize, provide coding help, offer research-style explanations, and break down complex tasks. Do not pretend to have abilities you do not possess. Always strive for clarity and conciseness."""

	# 2. Model Integration
	# Using a small, CPU-friendly model for demonstration on Hugging Face Spaces free tier.
	# For better performance and more complex tasks, a larger model with GPU would be recommended.
	# Example: 'distilgpt2' is a good starting point for CPU inference.
	# For more capable models, consider 'HuggingFaceH4/zephyr-7b-beta' or 'mistralai/Mistral-7B-Instruct-v0.2'
	# which would require a GPU-enabled Space.

	try:
	# Initialize the pipeline for text generation
	# Using 'text-generation' task with a pre-trained model
	# Setting trust_remote_code=True might be necessary for some models, but generally avoid if not explicitly needed.
	generator = pipeline('text-generation', model='distilgpt2')
	except Exception as e:
	print(f"Error loading model: {e}")
	generator = None

	# 3. Chatbot Logic
	def predict(message, history):
	if generator is None:
	return "Error: Model could not be loaded. Please check the backend logs."

	# Format conversation history for the model
	# For distilgpt2, a simple concatenation is sufficient.
	# For more advanced models, a specific chat template might be required.
	conversation = HERMES_SYSTEM_PROMPT + "\n\n"
	for human, agent in history:
	conversation += f"User: {human}\nHermes Agent: {agent}\n"
	conversation += f"User: {message}\nHermes Agent:"

	try:
	# Generate response
	# max_new_tokens controls the length of the generated response
	# num_return_sequences=1 to get a single best response
	# truncation=True to handle long inputs gracefully
	response = generator(conversation, max_new_tokens=150, num_return_sequences=1, truncation=True)
	generated_text = response[0]['generated_text']

	# Extract only the agent's response, removing the prompt and user's input
	# This is a simple heuristic and might need refinement for complex models/prompts
	agent_response_start = generated_text.rfind("Hermes Agent:")
	if agent_response_start != -1:
	agent_response = generated_text[agent_response_start + len("Hermes Agent:"):].strip()
	else:
	agent_response = generated_text.strip() # Fallback if marker not found

	# Clean up any potential incomplete sentences or model artifacts
	# For distilgpt2, it often generates incomplete sentences, so we might need to truncate at the last punctuation.
	last_punctuation = max(agent_response.rfind('.'), agent_response.rfind('?'), agent_response.rfind('!'))
	if last_punctuation != -1:
	agent_response = agent_response[:last_punctuation + 1]

	return agent_response

	except Exception as e:
	return f"An error occurred during model inference: {e}"

	# 4. Gradio Web UI
	with gr.Blocks() as demo:
	gr.Markdown("# Hermes Agent")
	gr.Markdown("""
	Hermes Agent is a helpful, fast, and practical multi-purpose AI assistant.
	It can answer questions, reason through tasks, plan, summarize, and provide coding help.
	""")

	chatbot = gr.Chatbot(height=400)
	msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
	clear = gr.Button("Clear")

	msg.submit(predict, [msg, chatbot], [msg, chatbot])
	clear.click(lambda: None, None, [msg, chatbot], queue=False)

	# Launch the Gradio app
	# The share=True option creates a public link, useful for testing, but should be False for deployment on Spaces.
	# For Hugging Face Spaces, the app runs automatically when app.py is present.
	if __name__ == "__main__":
	demo.launch(debug=True) # debug=True for local development, set to False for production