""" ReAct Agent for Text Adventure Games Architecture: - REASONING/ACTION format (from the hint prompt) for tighter LLM outputs - Heavy context management: per-location failed-action memory, global attempted-action set, recent-N observation window - Loop/deadlock detection with escalating escape strategies - Structured logging throughout for easy debugging - Score-delta awareness to prefer rewarding actions - `look` as universal safe fallback """ import json import logging import os import re from collections import defaultdict from dataclasses import dataclass, field from typing import Optional from dotenv import load_dotenv from huggingface_hub import InferenceClient load_dotenv() # ============================================================================== # Logging # ============================================================================== logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", handlers=[ logging.FileHandler("agent.log", mode="w"), logging.StreamHandler(), ], ) logger = logging.getLogger("agent") # ============================================================================== # LLM Configuration — DO NOT MODIFY # ============================================================================== LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" _hf_token = os.getenv("HF_TOKEN") if not _hf_token: raise ValueError("HF_TOKEN not found. Set it in your .env file.") LLM_CLIENT = InferenceClient(token=_hf_token) def call_llm( prompt: str, system_prompt: str, seed: int, max_tokens: int = 300, ) -> str: """Call the LLM with the given prompt.""" logger.debug(f"[LLM] Calling model seed={seed}, max_tokens={max_tokens}") logger.debug(f"[LLM] Prompt (first 300 chars): {prompt[:300]!r}") messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] response = LLM_CLIENT.chat.completions.create( model=LLM_MODEL, messages=messages, temperature=0.0, max_tokens=max_tokens, seed=seed, ) content = response.choices[0].message.content logger.debug(f"[LLM] Response: {content!r}") return content # ============================================================================== # Data Structures # ============================================================================== @dataclass class RunResult: """Result of running the agent. Do not modify this class.""" final_score: int max_score: int moves: int locations_visited: set[str] game_completed: bool error: Optional[str] = None history: list[tuple[str, str, str]] = field(default_factory=list) # ============================================================================== # System Prompt # ============================================================================== SYSTEM_PROMPT = """\ You are an expert text adventure game player. Your goal is to explore, collect \ treasures, solve puzzles, and MAXIMIZE your score. AVAILABLE MCP TOOLS: - play_action : Execute a game command (costs a game turn) - memory : Get full game state, score, recent history, failed actions - get_map : See explored locations and connections - inventory : Check what you are carrying - get_failed_actions: See what has already failed at the current location VALID GAME COMMANDS for play_action: Movement : north, south, east, west, up, down, ne, nw, se, sw, enter, exit Objects : take , drop , open , close Examine : examine , look, read , look in , \ look under , look behind Senses : listen, smell, wait Light : turn on , turn off Combat : attack with Interact : push , pull , turn , move Give/Put : give to , put in , \ put on FORBIDDEN verbs (will confuse the parser): check, inspect, search, grab, use, \ help, investigate EXPLORATION STRATEGY (follow in each new room): 1. "look" to understand the room fully. 2. Examine ALL objects, items, and features mentioned in the description. 3. Take everything that can be picked up. 4. Try "listen" or "read" if there are sounds or signs or clues. 5. Try all exits before backtracking to already-explored rooms. 6. Use items from inventory to solve puzzles or overcome obstacles. 7. If stuck, try "push", "pull", "move", "open", or "turn" on objects. 8. Use "get_map" to plan navigation and avoid revisiting areas. ANTI-LOOP RULES — these are MANDATORY: 1. NEVER repeat an action that already FAILED at this location. 2. NEVER repeat the same action three times in a row. 3. If stuck 4+ turns, call memory then try something completely new. 4. Prefer actions that have scored points in the past. 5. If all obvious actions fail, try: "listen", "wait", "look under ", \ "look behind ", "push ". RESPOND IN THIS EXACT FORMAT (no markdown, no extra lines): REASONING: <1-2 sentence explanation of your choice> TOOL: ARGS: Example: REASONING: I should look around to understand my surroundings. TOOL: play_action ARGS: {"action": "look"} """ # ============================================================================== # Student Agent # ============================================================================== class StudentAgent: """ ReAct agent with: - REASONING/TOOL/ARGS response format (tight, parseable) - Per-location attempted & failed action memory - Global loop/deadlock detector with escalating escapes - Score-delta tracking - Comprehensive logging """ # Consecutive identical-action threshold before forcing a redirect LOOP_THRESHOLD = 3 # Consecutive steps at same location before triggering escape STUCK_THRESHOLD = 10 # Recent observation window sent to LLM HISTORY_WINDOW = 10 def __init__(self): # Per-location tracking self.attempted: dict[str, set[str]] = defaultdict(set) self.failed: dict[str, set[str]] = defaultdict(set) # Recent step history for context self.step_history: list[dict] = [] # Loop / stuck detection self.recent_actions: list[str] = [] self.location_streak: int = 0 self.prev_location: str = "" # Score tracking self.current_score: int = 0 self.best_score: int = 0 # Escape rotation for when the agent is stuck self._escape_idx: int = 0 self._escape_actions = [ "look", "north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw", "listen", "wait", "examine room", ] logger.info("[Agent] Initialized") # ── Main entry point ─────────────────────────────────────────────────────── async def run( self, client, game: str, max_steps: int, seed: int, verbose: bool = False, ) -> RunResult: """Run the full ReAct loop.""" logger.info(f"[Agent] Starting game={game}, max_steps={max_steps}, seed={seed}") locations_visited: set[str] = set() result_history: list[tuple[str, str, str]] = [] moves = 0 game_completed = False error_msg = None # Discover available tools try: tools = await client.list_tools() tool_names = [t.name for t in tools] logger.info(f"[Agent] Available tools: {tool_names}") except Exception as e: logger.error(f"[Agent] Failed to list tools: {e}") tool_names = ["play_action", "memory", "get_map", "inventory", "get_failed_actions"] # ── Bootstrap: get initial observation ──────────────────────────────── try: obs_result = await client.call_tool("play_action", {"action": "look"}) observation = self._extract_text(obs_result) logger.info(f"[Agent] Initial observation: {observation[:120]!r}") except Exception as e: logger.error(f"[Agent] Failed to get initial observation: {e}") observation = "You are in a dark room." location = self._extract_location(observation) locations_visited.add(location) self.prev_location = location # ── Main ReAct loop ──────────────────────────────────────────────────── for step in range(1, max_steps + 1): logger.info(f"[Agent] ── Step {step}/{max_steps} ── Location: {location} | Score: {self.current_score}") # Build prompt prompt = self._build_prompt(observation, location, step) # Call LLM try: llm_response = call_llm( prompt=prompt, system_prompt=SYSTEM_PROMPT, seed=seed + step, max_tokens=200, ) except Exception as e: logger.error(f"[Agent] LLM call failed at step {step}: {e}") llm_response = "REASONING: LLM failed, falling back.\nTOOL: play_action\nARGS: {\"action\": \"look\"}" # Parse LLM response reasoning, tool_name, tool_args = self._parse_response(llm_response, tool_names) logger.info(f"[Agent] Parsed → tool={tool_name}, args={tool_args}") logger.debug(f"[Agent] Reasoning: {reasoning}") # Validate / fix the tool call tool_name, tool_args = self._validate_and_fix(tool_name, tool_args, tool_names, location) # Loop / stuck detection & override if tool_name == "play_action": action = tool_args.get("action", "look") action, tool_args = self._anti_loop(action, location, tool_args, verbose) tool_args["action"] = action moves += 1 # Execute tool try: raw_result = await client.call_tool(tool_name, tool_args) observation = self._extract_text(raw_result) logger.debug(f"[Agent] Tool result: {observation[:120]!r}") except Exception as e: observation = f"Error executing {tool_name}: {e}" logger.error(f"[Agent] Tool call error: {e}") # Update tracking & location (only for play_action) if tool_name == "play_action": acted_action = tool_args.get("action", "look").lower() self.attempted[location].add(acted_action) # Score tracking prev_score = self.current_score self._update_score(observation) # Detect room name for location & failure logic detected_room = self._detect_room_name(observation) new_location = detected_room if detected_room else location # Failure detection — skip if room name detected (action succeeded) if not detected_room and self._is_failed(observation): self.failed[location].add(acted_action) logger.warning(f"[Agent] Action '{acted_action}' FAILED at '{location}'") # Location & streak update if new_location != location: logger.info(f"[Agent] Location change: '{location}' → '{new_location}'") self.location_streak = 0 location = new_location elif self.current_score > prev_score: self.location_streak = 0 # Score increase = progress else: self.location_streak += 1 logger.debug(f"[Agent] Stuck streak={self.location_streak} at '{location}'") locations_visited.add(location) self.prev_location = location # Non-play_action tools don't affect location tracking or streak # Store step self.step_history.append({ "step": step, "reasoning": reasoning, "tool": tool_name, "args": tool_args, "observation": observation, "score": self.current_score, "location": location, }) if len(self.step_history) > 30: self.step_history = self.step_history[-30:] result_history.append((reasoning, f"{tool_name}({tool_args})", observation[:150])) if verbose: print(f"\n── Step {step} ──") print(f"[REASONING] {reasoning}") print(f"[TOOL] {tool_name}({tool_args})") print(f"[OBS] {observation[:200]}") print(f"[SCORE] {self.current_score} | Location: {location}") # Game over check if self._is_game_over(observation): game_completed = True logger.info(f"[Agent] Game over detected at step {step}") break logger.info( f"[Agent] Run complete. Score={self.current_score}, Moves={moves}, " f"Locations={len(locations_visited)}, Completed={game_completed}" ) return RunResult( final_score=self.current_score, max_score=self._get_max_score(game), moves=moves, locations_visited=locations_visited, game_completed=game_completed, error=error_msg, history=result_history, ) # Prompt builder def _build_prompt(self, observation: str, location: str, step: int) -> str: """ Build a rich, anti-loop-focused prompt. Includes: - Current observation - Recent action history (last N) - Per-location failed actions - Per-location all-attempted actions - Score and step info - Loop warnings when needed """ parts: list[str] = [] parts.append(f"=== GAME STATUS (step {step}) ===") parts.append(f"Score: {self.current_score} | Location: {location}") # Recent history if self.step_history: parts.append("\n--- RECENT ACTIONS ---") for e in self.step_history[-self.HISTORY_WINDOW:]: if e["tool"] == "play_action": obs_short = e["observation"].replace("\n", " ")[:200] reward_note = "" if "pts!" in e["observation"]: reward_note = " ✓SCORED" parts.append(f" > {e['args'].get('action', '?')}{reward_note} → {obs_short}") else: parts.append(f" > [{e['tool']}] called") # Anti-loop: failed actions at current location failed_here = self.failed.get(location, set()) attempted_here = self.attempted.get(location, set()) if failed_here: parts.append( f"\n FAILED ACTIONS at '{location}' (DO NOT REPEAT): " + ", ".join(sorted(failed_here)) ) if attempted_here: parts.append( f"Already tried at '{location}': " + ", ".join(sorted(attempted_here)) ) # Loop warning if len(self.recent_actions) >= self.LOOP_THRESHOLD: last = self.recent_actions[-self.LOOP_THRESHOLD:] if len(set(last)) == 1: parts.append( f"\n LOOP DETECTED: '{last[0]}' repeated {self.LOOP_THRESHOLD} times! " "Choose something COMPLETELY DIFFERENT." ) # Stuck warning if self.location_streak >= self.STUCK_THRESHOLD: parts.append( f"\n STUCK WARNING: You've been in '{location}' for " f"{self.location_streak} turns. Try a new direction or use get_map." ) parts.append("\n--- CURRENT OBSERVATION ---") parts.append(observation) parts.append("\nWhat do you do next?") prompt = "\n".join(parts) logger.debug(f"[Agent] Built prompt ({len(prompt)} chars)") return prompt # ── Response parser ──────────────────────────────────────────────────────── def _parse_response( self, response: str, valid_tools: list[str] ) -> tuple[str, str, dict]: """ Parse LLM response in REASONING/TOOL/ARGS format. Falls back gracefully to play_action("look"). """ reasoning = "No reasoning provided" tool_name = "play_action" tool_args: dict = {"action": "look"} if not response or not isinstance(response, str): logger.warning("[Parser] Empty or non-string response") return reasoning, tool_name, tool_args lines = response.strip().split("\n") for line in lines: stripped = line.strip() upper = stripped.upper() if upper.startswith("REASONING:"): reasoning = stripped.split(":", 1)[1].strip() elif upper.startswith("THOUGHT:"): # Accept legacy THOUGHT: format too reasoning = stripped.split(":", 1)[1].strip() elif upper.startswith("TOOL:"): raw = stripped.split(":", 1)[1].strip().lower() raw = re.sub(r"[`*\[\]]", "", raw).split()[0] if raw else "play_action" tool_name = raw elif upper.startswith("ACTION:"): # Handle old REASONING/ACTION format from the hint action_val = stripped.split(":", 1)[1].strip() action_val = re.sub(r"[`*\[\]]", "", action_val).strip() tool_name = "play_action" tool_args = {"action": action_val} elif upper.startswith("ARGS:"): args_str = stripped.split(":", 1)[1].strip() try: args_str = args_str.replace("'", '"') tool_args = json.loads(args_str) except json.JSONDecodeError: # Try extracting action from malformed JSON m = re.search(r'"action"\s*:\s*"([^"]+)"', args_str) if m: tool_args = {"action": m.group(1)} else: logger.warning(f"[Parser] Could not parse ARGS: {args_str!r}, using look") tool_args = {"action": "look"} logger.debug(f"[Parser] reasoning={reasoning!r}, tool={tool_name}, args={tool_args}") return reasoning, tool_name, tool_args # ── Validation & fixing ──────────────────────────────────────────────────── def _validate_and_fix( self, tool_name: str, tool_args: dict, valid_tools: list[str], location: str, ) -> tuple[str, dict]: """Validate tool name and fix forbidden action verbs.""" # Fix tool name tool_aliases = { "action": "play_action", "do": "play_action", "command": "play_action", "map": "get_map", "location": "get_map", "mem": "memory", "state": "memory", "status": "memory", "inv": "inventory", "items": "inventory", "failed": "get_failed_actions", } if tool_name not in valid_tools: fixed = tool_aliases.get(tool_name, "play_action") logger.warning(f"[Validate] Unknown tool '{tool_name}' → '{fixed}'") tool_name = fixed # Fix forbidden action verbs if tool_name == "play_action": action = tool_args.get("action", "look") forbidden_map = { "check": "examine", "inspect": "examine", "search": "look", "grab": "take", "pick": "take", "use": "examine", "investigate": "examine", "help": "look", } words = action.lower().split() if words and words[0] in forbidden_map: old_verb = words[0] words[0] = forbidden_map[old_verb] action = " ".join(words) logger.debug(f"[Validate] Verb fix: '{old_verb}' → '{words[0]}'") # Clean up action = re.sub(r"[`*\[\]]", "", action).strip().lower() action = " ".join(action.split()) tool_args["action"] = action logger.debug(f"[Validate] Cleaned action: {action!r}") return tool_name, tool_args # Anti-loop engine def _anti_loop( self, action: str, location: str, tool_args: dict, verbose: bool, ) -> tuple[str, dict]: """ Detect and break loops/deadlocks with escalating strategies. Strategy levels: 1. Same action 3× → force a random unexplored direction 2. Already attempted at this location → pick from unexplored directions 3. Stuck for 4+ turns → rotate through escape actions """ self.recent_actions.append(action) if len(self.recent_actions) > 10: self.recent_actions = self.recent_actions[-10:] attempted_here = self.attempted.get(location, set()) failed_here = self.failed.get(location, set()) # Immediate consecutive repetition if ( len(self.recent_actions) >= self.LOOP_THRESHOLD and len(set(self.recent_actions[-self.LOOP_THRESHOLD:])) == 1 ): forced = self._pick_escape(location) logger.warning( f"[AntiLoop] L1 – '{action}' repeated {self.LOOP_THRESHOLD}× → forcing '{forced}'" ) if verbose: print(f"[ANTI-LOOP] Repeated action → forcing '{forced}'") action = forced self.recent_actions.append(action) # Action already failed at this location elif action.lower() in failed_here: forced = self._pick_escape(location) logger.warning( f"[AntiLoop] L2 – '{action}' already FAILED at '{location}' → forcing '{forced}'" ) if verbose: print(f"[ANTI-LOOP] Already failed → forcing '{forced}'") action = forced self.recent_actions.append(action) # Level 3: Stuck at same location too long elif self.location_streak >= self.STUCK_THRESHOLD: forced = self._pick_escape(location) logger.warning( f"[AntiLoop] L3 – Stuck {self.location_streak} turns → forcing '{forced}'" ) if verbose: print(f"[ANTI-LOOP] Stuck {self.location_streak} turns → forcing '{forced}'") action = forced self.location_streak = 0 # Reset after escape attempt self.recent_actions.append(action) tool_args["action"] = action return action, tool_args def _pick_escape(self, location: str) -> str: """ Pick an escape action not yet attempted at this location. Rotates through the escape list; falls back to 'look'. """ attempted_here = self.attempted.get(location, set()) for _ in range(len(self._escape_actions)): candidate = self._escape_actions[self._escape_idx % len(self._escape_actions)] self._escape_idx += 1 if candidate.lower() not in attempted_here: logger.debug(f"[AntiLoop] Escape → '{candidate}'") return candidate logger.debug("[AntiLoop] All escapes tried, falling back to 'look'") return "look" # Utility helpers def _extract_text(self, result) -> str: """Extract plain text from a FastMCP tool result.""" if hasattr(result, "content") and result.content: return result.content[0].text if isinstance(result, list) and result: item = result[0] return item.text if hasattr(item, "text") else str(item) return str(result) def _detect_room_name(self, observation: str) -> str: """Detect a room name in observation text. Z-machine games output room descriptions as: [optional flavor text]\\n\\n\\n A room name appears after a blank line, is short, and has no sentence-ending punctuation. Returns room name or empty string. """ lines = observation.split("\n") prev_blank = True # Treat start of text as "after blank" for line in lines: stripped = line.strip() if not stripped: prev_blank = True continue if prev_blank: if (2 < len(stripped) < 60 and not stripped.endswith(('.', '!', '?', ':', ',', ';', '*', '"')) and not stripped.startswith(('[', '>')) ): return stripped prev_blank = False return "" def _extract_location(self, observation: str) -> str: """Extract room name, falling back to previous location.""" detected = self._detect_room_name(observation) return detected if detected else (self.prev_location or "Unknown") def _is_failed(self, observation: str) -> bool: """Detect parser-rejected or no-effect responses.""" fail_phrases = [ "i don't understand", "i don't know the word", "that's not a verb", "you can't go that way", "there is no", "nothing happens", "that doesn't work", "you can't", "it is fixed", "doesn't open", "you don't see", "i beg your pardon", "what do you want to", "huh?", "you can't do that", "already", "not see that", "not know how", "not have", "won't budge", "there's nothing", "that's not something", "not allowed", ] obs_lower = observation.lower() return any(p in obs_lower for p in fail_phrases) def _update_score(self, text: str) -> None: """Parse score from play_action output.""" m = re.search(r"\[Score:\s*(\d+)", text) if m: new_score = int(m.group(1)) if new_score != self.current_score: logger.info(f"[Agent] Score update: {self.current_score} → {new_score}") self.current_score = max(self.current_score, new_score) self.best_score = max(self.best_score, self.current_score) def _is_game_over(self, text: str) -> bool: """Detect game-over conditions.""" phrases = [ "game over", "you have died", "you are dead", "*** you have died ***", "*** game over ***", "you have won", "*** you have won ***", ] text_lower = text.lower() result = any(p in text_lower for p in phrases) if result: logger.info(f"[Agent] Game-over phrase detected in: {text[:80]!r}") return result def _get_max_score(self, game: str) -> int: """Return known max scores for common games.""" max_scores = { "zork1": 350, "zork2": 400, "zork3": 7, "hitchhiker": 400, "trinity": 100, "enchanter": 400, "planetfall": 80, "infidel": 400, "suspect": 360, "starcross": 430, "deadline": 350, "witness": 100, "lurking": 100, "lostpig": 7, } score = max_scores.get(game.lower(), 350) logger.debug(f"[Agent] Max score for '{game}': {score}") return score # ============================================================================== # Local Testing # ============================================================================== async def test_agent(): from fastmcp import Client agent = StudentAgent() async with Client("mcp_server.py") as client: result = await agent.run( client=client, game="zork1", max_steps=30, seed=42, verbose=True, ) print(f"\n{'=' * 50}") print(f"Final Score : {result.final_score} / {result.max_score}") print(f"Moves : {result.moves}") print(f"Locations : {len(result.locations_visited)}") print(f"Completed : {result.game_completed}") if __name__ == "__main__": import asyncio asyncio.run(test_agent())