""" Student Agent for Text Adventure Games This is your submission file. Implement the StudentAgent class to play text adventure games using the MCP server you also implement. Your agent should: 1. Connect to the MCP server via the provided client 2. Use the ReAct pattern (Thought -> Action -> Observation) 3. Call MCP tools to interact with the game 4. Maximize the game score within the step limit Required method: async def run(self, client, game, max_steps, seed, verbose) -> RunResult The 'client' is a FastMCP Client already connected to your MCP server. Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) Tips: - Start by looking around and understanding your environment - Keep track of visited locations to avoid loops - Pick up useful items (lamp, sword, etc.) - The seed parameter should be used to set your LLM's seed for reproducibility """ import json import os import re from dataclasses import dataclass, field from typing import Optional from dotenv import load_dotenv from huggingface_hub import InferenceClient # Load environment variables load_dotenv() # ============================================================================= # LLM Configuration - DO NOT MODIFY # ============================================================================= # Model to use (fixed for fair evaluation) LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" # Initialize the LLM client (uses HF_TOKEN from environment) _hf_token = os.getenv("HF_TOKEN") if not _hf_token: raise ValueError("HF_TOKEN not found. Set it in your .env file.") LLM_CLIENT = InferenceClient(token=_hf_token) def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: """ Call the LLM with the given prompt. Use this function in your agent. Args: prompt: The user prompt (current game state, history, etc.) system_prompt: The system prompt (instructions for the agent) seed: Random seed for reproducibility max_tokens: Maximum tokens in response (default: 300) Returns: The LLM's response text Example: response = call_llm( prompt="You are in a forest. What do you do?", system_prompt=SYSTEM_PROMPT, seed=42, ) """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] response = LLM_CLIENT.chat.completions.create( model=LLM_MODEL, messages=messages, temperature=0.0, # Deterministic for reproducibility max_tokens=max_tokens, seed=seed, ) return response.choices[0].message.content @dataclass class RunResult: """Result of running the agent. Do not modify this class.""" final_score: int max_score: int moves: int locations_visited: set[str] game_completed: bool error: Optional[str] = None history: list[tuple[str, str, str]] = field(default_factory=list) # ============================================================================= # System Prompt - Customize this for your agent # ============================================================================= SYSTEM_PROMPT = """You are playing a classic text adventure game. GOAL: Explore the world, solve puzzles, and maximize your score. AVAILABLE TOOLS (use via MCP): - play_action: Execute a game command (north, take lamp, open mailbox, etc.) - memory: Get current game state and history - inventory: Check what you're carrying - get_map: Get a map of explored locations and their connections CRITICAL RULES TO AVOID LOOPS: 1. NEVER repeat the same action twice in a row unless it succeeded 2. If you visit a location more than once, you MUST try a different direction 3. If an action fails, NEVER try it again in the same location 4. Prioritize unexplored areas over familiar ones 5. If stuck with 0 score for 20+ moves, try something completely new 6. If a door, window, or anything is locked, DO NOT get stuck trying to open it. Leave the area to find a key or look for another entrance. VALID GAME COMMANDS for play_action: - Movement: north, south, east, west, up, down, enter, exit - Objects: take , drop , open , close , examine - Other: look, inventory, read , turn on lamp RESPOND IN THIS EXACT FORMAT (no markdown): THOUGHT: TOOL: ARGS: Example: THOUGHT: I should look around to see where I am. TOOL: play_action ARGS: {"action": "look"} """ # ============================================================================= # Student Agent - IMPLEMENT THIS CLASS # ============================================================================= class StudentAgent: """ State Machine Agent for Text Adventures. Combines ReAct with systematic exploration and auto-extraction of actions. """ def __init__(self): self.history = [] self.visited_locations = set() self.current_location = None self.last_known_inventory = "Unknown. You should use the 'inventory' tool to check." self.inventory = [] self.previous_location = None self.score = 0 self.map = {} # --- NEW: State Machine Tracking --- self.locations_data = {} # { "Room Name": {"actions_tried": [], "promising_actions": []} } self.consecutive_turns = 0 # Compteur pour le biais d'exploration def _extract_promising_actions(self, observation: str, seed: int) -> list[str]: """ Appel LLM spécifique pour extraire les objets interactifs d'une nouvelle pièce. """ sys_prompt = "You are an expert text adventure parser. Extract interactive objects from the observation and return a JSON list of 1 to 4 simple verb-noun commands to try (e.g., ['open mailbox', 'climb tree', 'move leaves', 'read leaflet', 'take sword']). Ignore simple directional movement commands (north, south, etc.). Return ONLY the JSON list." prompt = f"OBSERVATION:\n{observation}\n\nJSON LIST:" try: # Appel rapide (max_tokens faible) response = call_llm(prompt, sys_prompt, seed, max_tokens=100) # Parsing robuste de la liste JSON match = re.search(r'\[.*?\]', response, re.DOTALL) if match: actions = json.loads(match.group(0)) # Nettoyage et filtrage return [str(a).lower().strip() for a in actions if isinstance(a, str)] except Exception: pass # Si le LLM échoue, on retourne juste une liste vide return [] async def run( self, client, game: str, max_steps: int, seed: int, verbose: bool = True, ) -> RunResult: self.history = [] self.visited_locations = set() self.current_location = None self.score = 0 self.consecutive_turns = 0 moves = 0 game_completed = False error = None try: # 1. Get initial observation result = await client.call_tool("play_action", {"action": "look"}) observation = result.content[0].text if result else "No response" self._extract_location(observation, action_taken=None) # Initialiser les données du premier lieu if self.current_location: new_promising = self._extract_promising_actions(observation, seed) self.locations_data[self.current_location] = { "actions_tried": [], "promising_actions": new_promising } if verbose: print(f"=== Initial Observation ===\n{observation}\n") # 2. Main Loop for step in range(max_steps): moves = step + 1 # Récupérer les données de la pièce actuelle loc_data = self.locations_data.get(self.current_location, {}) pending_actions = loc_data.get("promising_actions", []) prompt = self._build_prompt(observation, self.history) response = call_llm(prompt, SYSTEM_PROMPT, seed) thought, tool_name, args = self._parse_response(response) if verbose: print(f"\nPrompt:\n{prompt}\n") print(f"=== Step {step + 1} ===") print(f"LLM Response:\n{response}\n") # Mémoriser où on était avant l'action location_before_action = self.current_location action_name = args.get("action", "unknown") if args else "unknown" # Exécuter l'outil if tool_name: try: result = await client.call_tool(tool_name, args if args else {}) tool_result = result.content[0].text if result else "No response" if tool_name == "play_action": observation = tool_result if action_name.lower() in ["i", "inv", "inventory"]: self.last_known_inventory = tool_result elif tool_name == "inventory": self.last_known_inventory = tool_result observation = f"[Inventory checked]\n{tool_result}\n\n[Game state remains:]\n{observation}" action_name = "check inventory" except Exception as e: observation = f"Error executing tool: {str(e)}" action_name = f"{tool_name} (error)" else: result = await client.call_tool("play_action", {"action": "look"}) observation = result.content[0].text if result else "No response" action_name = "look" # Enregistrer le résultat dans le registre de la pièce if location_before_action in self.locations_data: # On garde juste la première phrase de l'obs pour ne pas surcharger la mémoire short_res = observation.split('\n')[0][:80] self.locations_data[location_before_action]["actions_tried"].append((action_name, short_res)) self._extract_score(observation) self.history.append((thought, action_name, observation)) self._extract_location(observation, action_taken=action_name) # --- GESTION DU BIAIS D'EXPLORATION ET NOUVEAUX LIEUX --- if self.current_location == location_before_action: self.consecutive_turns += 1 else: self.consecutive_turns = 0 # On a bougé, on reset le compteur ! # Si c'est un nouveau lieu jamais visité if self.current_location and self.current_location not in self.locations_data: new_promising = self._extract_promising_actions(observation, seed) self.locations_data[self.current_location] = { "actions_tried": [], "promising_actions": new_promising } if verbose: print(f"[NEW LOCATION DETECTED] Auto-extracted actions: {new_promising}") if self._is_game_over(observation): game_completed = True break except Exception as e: error = str(e) if verbose: print(f"Error: {error}") return RunResult( final_score=self.score, max_score=350, moves=moves, locations_visited=self.visited_locations, game_completed=game_completed, history=self.history, ) def _build_prompt(self, observation: str, history: list) -> str: history_text = "" loop_warning = "" loc_log = "" exploration_warning = "" unexplored_dirs = "" suggestions_text = "" if history: recent = history[-5:] history_text = "RECENT HISTORY:\n" actions = [a for _, a, _ in history[-4:]] if len(actions) >= 3 and len(set(actions)) == 1: loop_warning = "\n!!! CRITICAL WARNING: You are repeating the exact same action. YOU MUST do something else. !!!\n" elif len(actions) == 4 and actions[0] == actions[2] and actions[1] == actions[3]: loop_warning = f"\n!!! WARNING: You are stuck in a loop going between {actions[0]} and {actions[1]}. Pick a completely different direction. !!!\n" for thought, action, obs in recent: history_text += f"- Action: {action}\n" short_obs = obs[:200] + "..." if len(obs) > 200 else obs history_text += f" Result: {short_obs}\n" # --- NEW: Inject Location specific logs --- if self.current_location in self.locations_data: tried = self.locations_data[self.current_location]["actions_tried"] if tried: loc_log = "ACTIONS ALREADY TRIED IN THIS LOCATION:\n" for a, res in tried[-5:]: # On montre les 5 dernières actions faites ICI loc_log += f"- Tried: '{a}' -> Result: '{res}'\n" pending = self.locations_data[self.current_location].get("promising_actions", []) # On retire de la liste des suggestions les actions qu'il a déjà tentées tried_actions = [a.lower() for a, _ in tried] valid_suggestions = [p for p in pending if p.lower() not in tried_actions] if valid_suggestions: suggestions_text = f"INTERACTIVE OBJECTS SUGGESTIONS: You might want to try these actions here: {', '.join(valid_suggestions)}.\n" if self.consecutive_turns >= 4: exploration_warning = "\n!!! SYSTEM OVERRIDE: You have spent too many turns in this location without progressing. You MUST use a movement command (north, south, east, west, up, down, enter, exit) to explore elsewhere immediately. !!!\n" prompt = f"""CURRENT OBSERVATION: {observation} {exploration_warning} {loop_warning} {unexplored_dirs} {suggestions_text} KNOWN MAP (Spatial Graph): {self._format_map()} CURRENT INVENTORY: {self.last_known_inventory} {loc_log} {history_text} CURRENT SCORE: {self.score} Based on the observation, inventory, and history, decide your next action. Remember to explore new areas, pick up items, use them when appropriate, and solve puzzles to maximize score. """ return prompt def _parse_response(self, response: str) -> tuple[str, str, dict]: thought = "" tool_name = "" args = {} thought_match = re.search(r'THOUGHT:\s*(.+?)(?=TOOL:|$)', response, re.DOTALL | re.IGNORECASE) if thought_match: thought = thought_match.group(1).strip() tool_match = re.search(r'TOOL:\s*(\w+)', response, re.IGNORECASE) if tool_match: tool_name = tool_match.group(1).strip() args_match = re.search(r'ARGS:\s*(?:```json\n?)?(\{.*?\})(?:\n?```)?', response, re.DOTALL | re.IGNORECASE) if args_match: try: json_str = args_match.group(1).strip() args = json.loads(json_str) except json.JSONDecodeError: action_match = re.search(r'"action"\s*:\s*"([^"]+)"', response) if action_match: args = {"action": action_match.group(1)} if not tool_name: tool_name = "play_action" args = {"action": "look"} elif not args: if tool_name == "play_action": args = {"action": "look"} else: args = {} return thought, tool_name, args def _extract_location(self, observation: str, action_taken: str = None): lines = [line.strip() for line in observation.split('\n') if line.strip()] if not lines: return None first_line = lines[0] if first_line.endswith(('.', '!', '?')): return None skip_patterns = [ 'taken', 'opening', "you can't", "i don't", 'welcome', "there's nothing", 'score:', '[score:', 'it is already', 'the door is', 'you are carrying', 'dropped', 'done', 'closed', 'open', 'locked', 'the grating', 'you are' ] for pattern in skip_patterns: if pattern.lower() in first_line.lower(): return None if len(first_line) < 40 and first_line[0].isupper(): new_location = first_line if new_location not in self.map: self.map[new_location] = {} movement_commands = ["north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw", "n", "s", "e", "w", "enter", "exit", "in", "out"] if self.current_location and action_taken: clean_action = action_taken.lower().strip() if clean_action in movement_commands and self.current_location != new_location: self.map[self.current_location][clean_action] = new_location reverse_dirs = { "north": "south", "south": "north", "east": "west", "west": "east", "up": "down", "down": "up", "in": "out", "out": "in", "n": "s", "s": "n", "e": "w", "w": "e" } if clean_action in reverse_dirs: self.map[new_location][reverse_dirs[clean_action]] = self.current_location self.previous_location = self.current_location self.current_location = new_location self.visited_locations.add(new_location) return new_location return None def _extract_score(self, observation: str): score_match = re.search(r'[Ss]core[:\s]+(\d+)', observation) if score_match: self.score = int(score_match.group(1)) def _is_game_over(self, observation: str) -> bool: game_over_patterns = [ r'\*\*\*\s*You have died\s*\*\*\*', r'\*\*\*\s*You have won\s*\*\*\*', r'game over', r'The End', r'RESTART, RESTORE, or QUIT', ] for pattern in game_over_patterns: if re.search(pattern, observation, re.IGNORECASE): return True return False def _format_map(self) -> str: if not self.map: return "Empty map (no locations explored yet)." map_text = "" for room, connections in self.map.items(): conn_list = [f"[{dir} -> {dest}]" for dir, dest in connections.items()] conn_str = ", ".join(conn_list) if conn_list else "No known exits" marker = " <=== YOU ARE HERE" if room == self.current_location else "" map_text += f" - {room}{marker}: {conn_str}\n" return map_text.rstrip() # ============================================================================= # For local testing # ============================================================================= async def test_agent(): """Test the agent locally.""" from fastmcp import Client # Path to your MCP server server_path = "mcp_server.py" agent = StudentAgent() async with Client(server_path) as client: result = await agent.run( client=client, game="zork1", max_steps=10, seed=42, verbose=True, ) print(f"\nFinal Score: {result.final_score}") print(f"Moves: {result.moves}") print(f"Locations: {result.locations_visited}") if __name__ == "__main__": import asyncio asyncio.run(test_agent())