""" AgentWorld Demo — Gradio app for HuggingFace Spaces. Demonstrates Language World Model capabilities: simulates Terminal, Search, MCP/Tool, and Web environments using the HF Inference API. Uses Qwen2.5-7B-Instruct (free HF inference) with AgentWorld system prompts. For full fidelity, swap to Qwen-AgentWorld-35B-A3B on a paid Inference Endpoint. """ import json import os import gradio as gr from huggingface_hub import InferenceClient # ── Configuration ────────────────────────────────────────────────────────── MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct") ENDPOINT_URL = os.environ.get("INFERENCE_ENDPOINT_URL", None) # On HF Spaces, the token is auto-injected. Try multiple env var names. HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("API_TOKEN") or None if ENDPOINT_URL: client = InferenceClient(base_url=ENDPOINT_URL, token=HF_TOKEN) else: # No explicit provider — let huggingface_hub auto-route to the correct backend client = InferenceClient(model=MODEL_ID, token=HF_TOKEN) def call_model(messages: list[dict], max_tokens: int = 2048) -> str: """Call the HF Inference API and return the response text.""" response = client.chat_completion( messages=messages, max_tokens=max_tokens, temperature=0.6, top_p=0.95, ) return response.choices[0].message.content # ── Domain system prompts (abridged from Qwen-AgentWorld repo) ───────────── SYSTEM_PROMPTS = { "terminal": """You are a Terminal World Model — a precise terminal state simulator. Predict the exact output of a Linux terminal after executing commands. Rules: - Track working directory, files, and environment variables across turns - Commands need newline to execute - Show command echo, output, and new prompt - Handle errors: command not found, permission denied, file not found - Be consistent: if a file was deleted, cat on it should error - Use prompt format: user@hostname:/current/path$ Input format: JSON array of command objects [{"keystrokes": "ls -la\\n", "duration": 0.1}] Output ONLY the terminal output — no explanations, no markdown wrapping.""", "search": """You are a Search World Model — a precise web search simulator. Simulate search engine results and webpage content. Tools: - web_search: returns 10 results per query with [N] URL, Title, Snippet, Date - web_extractor: returns full page content in markdown for a given URL - dict_memory: key-value memory store (add/update/retrieve/remove) Rules: - Factuality first — use real-world knowledge, don't fabricate - URLs should match query topics realistically - Source diversity: Wikipedia, news, official sites, forums - Extracted content must match prior search snippets Input format: JSON tool call {"name": "web_search", "arguments": {"queries": ["query"]}} Output ONLY the simulated tool response — no explanations.""", "mcp": """You are a Tool World Model — simulate tool/API call execution. Given a JSON tool call with name and arguments, return realistic results. Rules: - Track state: created resources, session context, variable bindings - Query tools return data, create tools return new resource IDs - Delete tools make subsequent access return errors - Match output format to tool type (JSON for data APIs, text for listings, CSV for exports) - Handle errors: not found, permission denied, timeout Input format: JSON tool call {"name": "get_weather", "arguments": {"city": "Tokyo", "units": "celsius"}} Output ONLY the simulated tool response — no explanations.""", "web": """You are a Web World Model — a browser state simulator. Predict the next page state after a browser action. Actions: goto(url), click(bid), fill(bid, value), scroll(dx, dy), go_back(), go_forward(), keyboard_type(text), hover(bid), new_tab(), tab_close() Rules: - Page-changing actions (goto, click on links) update full page state - Local-edit actions (fill, hover) modify only affected elements - Track navigation history, form values, focused elements - Preserve untouched page elements verbatim Input format: JSON browser action {"action": "goto", "url": "https://example.com"} Output ONLY the predicted page state — no explanations.""", } # ── Domain examples for the UI ───────────────────────────────────────────── EXAMPLES = { "terminal": [ ['[{"keystrokes": "ls -la\\n", "duration": 0.1}]'], ['[{"keystrokes": "echo Hello World\\n", "duration": 0.1}]'], ['[{"keystrokes": "cat /etc/hostname\\n", "duration": 0.1}]'], ['[{"keystrokes": "python3 -c \\"print(2**10)\\"\\n", "duration": 0.5}]'], ['[{"keystrokes": "mkdir project && cd project && pwd\\n", "duration": 0.2}]'], ], "search": [ ['{"name": "web_search", "arguments": {"queries": ["Apollo 11 moon landing date"]}}'], ['{"name": "web_search", "arguments": {"queries": ["largest ocean on Earth"]}}'], ['{"name": "web_search", "arguments": {"queries": ["Python 3.13 new features"]}}'], ['{"name": "web_extractor", "arguments": {"url": "https://en.wikipedia.org/wiki/Python_(programming_language)"}}'], ['{"name": "dict_memory", "arguments": {"operation": "add", "dict": {"favorite_color": "blue"}}}'], ], "mcp": [ ['{"name": "get_weather", "arguments": {"city": "Tokyo", "units": "celsius"}}'], ['{"name": "create_user", "arguments": {"username": "alice", "email": "alice@example.com", "role": "admin"}}'], ['{"name": "search_files", "arguments": {"pattern": "*.py", "directory": "/home/user/project"}}'], ['{"name": "get_user", "arguments": {"user_id": "usr_nonexistent"}}'], ['{"name": "list_databases", "arguments": {}}'], ], "web": [ ['{"action": "goto", "url": "https://example-shop.com"}'], ['{"action": "click", "bid": "add-to-cart-1"}'], ['{"action": "fill", "bid": "search-input", "value": "wireless headphones"}'], ['{"action": "scroll", "delta_x": 0, "delta_y": 300}'], ['{"action": "go_back"}'], ], } # ── Core simulation function ─────────────────────────────────────────────── def simulate(domain: str, action: str, history_state: list) -> tuple: """ Send action + history to the model and return the predicted environment response. history_state is a list of {"role": "...", "content": "..."} dicts (Gradio 6.x messages format). """ if not action or not action.strip(): return history_state, history_state, "⚠️ Please enter an action." system_prompt = SYSTEM_PROMPTS.get(domain, SYSTEM_PROMPTS["terminal"]) # Build messages: system + history + new action messages = [{"role": "system", "content": system_prompt}] # Convert Gradio chatbot history (list of {"role":..., "content":...} dicts) to messages for entry in history_state: if entry.get("content"): messages.append({"role": entry["role"], "content": entry["content"]}) # Add current action messages.append({"role": "user", "content": action.strip()}) try: predicted = call_model(messages, max_tokens=2048) except Exception as e: predicted = f"❌ Error: {str(e)}" # Append to chatbot history (messages format) new_history = history_state + [ {"role": "user", "content": action.strip()}, {"role": "assistant", "content": predicted}, ] # Also return as plain text for the raw output box return new_history, new_history, predicted def reset_session(): """Clear the conversation history.""" return [], [], "Session reset. Start a new simulation." def update_examples(domain: str): """Return examples for the selected domain.""" return gr.Dataset(samples=EXAMPLES.get(domain, EXAMPLES["terminal"])) # ── Gradio UI ────────────────────────────────────────────────────────────── with gr.Blocks( title="AgentWorld Demo — Language World Model Simulator", ) as demo: gr.Markdown(""" # 🌍 AgentWorld Demo **Language World Model Simulator** — predicts how environments (terminal, search, tools, web) would respond to agent actions. Powered by the [Qwen-AgentWorld](https://huggingface.co/Qwen/Qwen-AgentWorld-35B-A3B) system prompts running on Qwen2.5-7B-Instruct. **How it works:** Pick a domain, enter an action in JSON format, and the model simulates what the environment would return — no real terminal, browser, or API needed. """) with gr.Row(): with gr.Column(scale=1): domain = gr.Dropdown( choices=["terminal", "search", "mcp", "web"], value="terminal", label="🌐 Domain", info="Which environment to simulate", ) action_input = gr.Textbox( label="⚡ Action (JSON format)", placeholder='[{"keystrokes": "ls -la\\n", "duration": 0.1}]', lines=4, elem_classes="response-box", ) with gr.Row(): submit_btn = gr.Button("▶ Simulate", variant="primary") reset_btn = gr.Button("↺ Reset Session", variant="stop") example_btn = gr.Button("📋 Load Example") with gr.Column(scale=2): chatbot = gr.Chatbot( label="📜 Simulation History", height=400, ) with gr.Accordion("📄 Raw Response", open=False): raw_output = gr.Textbox( label="Last predicted observation", lines=12, elem_classes="response-box", interactive=False, ) # State to hold conversation history history_state = gr.State([]) # ── Event handlers ────────────────────────────────────────────────── submit_btn.click( fn=simulate, inputs=[domain, action_input, history_state], outputs=[chatbot, history_state, raw_output], ).then( fn=lambda: "", outputs=[action_input], ) reset_btn.click( fn=reset_session, inputs=[], outputs=[chatbot, history_state, raw_output], ) # Load example into the action input def load_example(domain_val): examples = EXAMPLES.get(domain_val, EXAMPLES["terminal"]) return examples[0][0] if examples else "" example_btn.click( fn=load_example, inputs=[domain], outputs=[action_input], ) # Update examples when domain changes domain.change( fn=load_example, inputs=[domain], outputs=[action_input], ) # ── Description ───────────────────────────────────────────────────── gr.Markdown(""" --- ### 📖 Domain Reference | Domain | Action Format | Example | |---|---|---| | **Terminal** | `[{"keystrokes": "cmd\\n", "duration": 0.1}]` | `ls -la`, `cat file.txt`, `mkdir dir` | | **Search** | `{"name": "web_search", "arguments": {"queries": ["q"]}}` | Search queries, extract pages, memory ops | | **MCP / Tools** | `{"name": "tool_name", "arguments": {...}}` | API calls, CRUD operations, file search | | **Web** | `{"action": "goto", "url": "..."}` | Navigate, click, fill forms, scroll | ### ⚠️ Note This demo uses **Qwen2.5-7B-Instruct** (free HF inference) with AgentWorld system prompts. For full simulation fidelity, deploy the actual [Qwen-AgentWorld-35B-A3B](https://huggingface.co/Qwen/Qwen-AgentWorld-35B-A3B) model on a paid Inference Endpoint and set the `INFERENCE_ENDPOINT_URL` environment variable. Built by [geraldamasi](https://huggingface.co/geraldamasi) · [Source on GitHub](https://github.com/QwenLM/Qwen-AgentWorld) """) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", theme=gr.themes.Soft(), css=""" .response-box textarea { font-family: 'SF Mono', 'Menlo', 'Monaco', monospace !important; font-size: 13px !important; } footer { display: none !important; } """, )