text-adventure-agent

Sleeping

Felix Lebel

fix again

76e7160 about 2 months ago

38.3 kB

	"""
	Student Agent for Text Adventure Games

	This is your submission file. Implement the StudentAgent class to play
	text adventure games using the MCP server you also implement.

	Your agent should:
	1. Connect to the MCP server via the provided client
	2. Use the ReAct pattern (Thought -> Action -> Observation)
	3. Call MCP tools to interact with the game
	4. Maximize the game score within the step limit

	Required method:
	async def run(self, client, game, max_steps, seed, verbose) -> RunResult

	The 'client' is a FastMCP Client already connected to your MCP server.
	Use it to call tools like: await client.call_tool("play_action", {"action": "look"})

	Tips:
	- Start by looking around and understanding your environment
	- Keep track of visited locations to avoid loops
	- Pick up useful items (lamp, sword, etc.)
	- The seed parameter should be used to set your LLM's seed for reproducibility
	"""

	import json
	import os
	import re
	from dataclasses import dataclass, field
	from typing import Optional
	import numpy as np

	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# =============================================================================
	# LLM Configuration - DO NOT MODIFY
	# =============================================================================

	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

	USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "false").lower() == "true"
	_local_pipeline = None

	if USE_LOCAL_MODEL:
	try:
	from transformers import pipeline
	LOCAL_MODEL = os.getenv("LOCAL_MODEL", "Qwen/Qwen2.5-3B-Instruct")
	_local_pipeline = pipeline("text-generation", model=LOCAL_MODEL, device_map="auto")
	except Exception:
	USE_LOCAL_MODEL = False

	if not USE_LOCAL_MODEL:
	from huggingface_hub import InferenceClient
	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")
	LLM_CLIENT = InferenceClient(token=_hf_token)


	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	"""
	Call the LLM with the given prompt. Use this function in your agent.

	Args:
	prompt: The user prompt (current game state, history, etc.)
	system_prompt: The system prompt (instructions for the agent)
	seed: Random seed for reproducibility
	max_tokens: Maximum tokens in response (default: 300)

	Returns:
	The LLM's response text
	"""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	if USE_LOCAL_MODEL and _local_pipeline is not None:
	outputs = _local_pipeline(
	messages,
	max_new_tokens=max_tokens,
	temperature=0.0001,
	do_sample=True,
	)
	return outputs[0]["generated_text"][-1]["content"]

	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0,
	max_tokens=max_tokens,
	seed=seed,
	)

	return response.choices[0].message.content


	@dataclass
	class RunResult:
	"""Result of running the agent. Do not modify this class."""
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# =============================================================================
	# System Prompt
	# =============================================================================


	SYSTEM_PROMPT = """You are playing a classic text adventure game. Your goal is to EXPLORE widely, COLLECT treasures and MAXIMIZE your score.

	AVAILABLE TOOLS (use via MCP):
	- play_action: Execute a game command (north, take lamp, open mailbox, etc.)
	- location_log: See what actions were tried at the current location, their outcomes and the promising actions to try.
	- memory: Get a current game state summary including current location (number of visits, actions tried, promising actions), recent actions and current observation
	- get_map: Get a map of explored locations, connections and exits. It also helps you remember what you've tried at the current location and their outcomes, so you can avoid repeating failed actions and focus on promising ones..
	- inventory: Have a look at what you're currently carrying.

	VALID GAME COMMANDS for play_action:
	- Movement: north, south, east, west, northeast, northwest, southeast, southwest, up, down, enter, exit
	- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
	- Light: turn on lamp, turn off lamp
	- Combat: attack <enemy> with <weapon>
	- Other: inventory, look, read <thing>, wait
	- Other: look, examine, listen, speak, look, take, drop, empty, fill, inventory, climb, swim, open, close, set, turn, push, pull, push [direction], throw at, eat, drink, wear, take off, burn, dig, kick, destroy, read, ask for, give, feed, show, ask about, tell about, talk to, kiss, attack, wake, answer, wave, rub , squeeze, jump, jump over, wait, sleep
	sing, yell, think, pray

	FORBIDDEN (will NOT work): check, inspect, search, grab, use, help

	RESPOND IN THIS EXACT FORMAT (no markdown):
	THOUGHT: <your reasoning about what to do next>
	TOOL: <tool_name>
	ARGS: <JSON arguments, e.g., {"action": "look"}>

	EXPLORATION STRATEGY (follow this priority):
	1. EXPLORE a lot! Try new locations and exits frequently (north, south, east, west, northeast, northwest, southeast, southwest, up, down, enter, exit)
	2. ALWAYS EXAMINE everything that could be interesting, especially details in objects, rooms... EXAMINE where you could find some loot or useful items, or clues for puzzles. INTERACT with characters and objects to discover new possibilities.
	3. ALWAYS take items that seem useful (lamp, sword, key, etc.)
	4. Open containers (mailbox, cases, doors, windows)
	5. Try ALL exits from a location before moving on
	6. Use get_map and location_log frequently to plan which unexplored exits to try, and what actions to take. It also helps you remember what you've tried at the current location and their outcomes, so you can avoid repeating failed actions and focus on promising ones.
	7. Use memory to check if you're repeating yourself
	8. If you've been in the same location for 3+ turns, MOVE to a new location

	HERE IS THE STRUCTURE OF THE GAME OUTPUT you receive after each action and tool call:
	<BEGIN GAME OUTPUT>
	- CURRENT LOCATION: <location name>
	- STEPS AT THIS LOCATION: <number of steps taken at this location>

	- RECENT ACTIONS:
	[<location name>] > action -> outcome
	[<other location name>] > other action -> other outcome
	...
	[<other location name>] > other action -> other outcome

	- CURRENT SITUATION:
	<text describing the current location, visible objects, characters, exits, inventory, map, etc.>
	or <map description>

	- ACTIONS ALREADY TRIED AT THIS LOCATION:
	> action -> outcome
	> other action -> other outcome

	- ACTIONS SUGGESTED: action1, action2, action3
	<END GAME OUTPUT>


	"CURRENT SITUATION" is the most important part of the output, it is the direct consequence of your last action and the most up-to-date description of the world. Focus on it to find new interactions, objects, exits, and details to examine.
	"RECENT ACTIONS" is a summary of what you've done recently and their outcomes. Use it to avoid repeating failed actions and to focus on promising ones.
	DON'T SUGGEST ACTIONS YOU'VE ALREADY TRIED AT THIS LOCATION. If there are too many ACTIONS ALREADY TRIED AT THIS LOCATION, move to another place (use look to see the exits).


	IMPORTANT:
	- DO NOT repeat the same action multiple times in a row
	- If an action doesn't work, try something DIFFERENT or EXAMINE more (precisely) to find new possibilities

	Examples:

	THOUGHT: I need to remember what I've tried here before. Let me check the location log.
	TOOL: location_log
	ARGS: {}

	THOUGHT: I see an interesting object. Let me examine it.
	TOOL: play_action
	ARGS: {"action": "examine mailbox"}

	THOUGHT: I should check the map to find unexplored exits and to remember what I've tried here before.
	TOOL: get_map
	ARGS: {}

	THOUGHT: Look around to find more details about the room and possible interactions.
	TOOL: play_action
	ARGS: {"action": "look"}
	"""

	# =============================================================================
	# Prompt for extracting promising actions from observations
	# =============================================================================

	EXTRACT_ACTIONS_PROMPT = """You are analyzing text adventure game output. Extract promising actions the player should try.

	Here is the structure of the GAME OUTPUT you receive:
	<BEGIN GAME OUTPUT>
	- CURRENT LOCATION: <location name>
	- STEPS AT THIS LOCATION: <number of steps taken at this location>

	- RECENT ACTIONS:
	[<location name>] > action -> outcome
	[<other location name>] > other action -> other outcome
	...
	[<other location name>] > other action -> other outcome

	- CURRENT SITUATION:
	<text describing the current location, visible objects, characters, exits, inventory, map, etc.>
	or <map description>

	- ACTIONS ALREADY TRIED AT THIS LOCATION:
	> action -> outcome
	> other action -> other outcome

	- ACTIONS SUGGESTED: action1, action2, action3
	<END GAME OUTPUT>


	Given the GAME OUTPUT, output a JSON list of action strings. Focus on:
	- Objects mentioned in CURRENT SITUATION that can be TAKEN, examined, or opened
	- Objects or places to examine mentioned in CURRENT SITUATION that could reveal new information or items
	- Directions/exits mentioned in CURRENT SITUATION
	- Interactive elements in CURRENT SITUATION (doors, containers, levers, buttons). Suggest interacting with them to discover new possibilities.
	- Items that might be useful in CURRENT SITUATION
	- Exploration if there is no interesting object to interact with mentioned in CURRENT SITUATION

	Follow these additional guidelines:
	- "CURRENT SITUATION" is the most important part of the output, it is the direct consequence of your last action and the most up-to-date description of the world. Focus on it to find new interactions, objects, exits, and details to examine.
	- "RECENT ACTIONS" is a summary of what you've done recently and their outcomes. Use it to avoid repeating failed actions and to focus on promising ones.
	- DON'T SUGGEST ACTIONS YOU'VE ALREADY TRIED AT THIS LOCATION. If there are too many ACTIONS ALREADY TRIED AT THIS LOCATION, move to another place (use look to see the exits).
	- ACTIONS SUGGESTED are additionally useful, but make sure to focus on the CURRENT SITUATION and RECENT ACTIONS to find promising actions that are relevant to the current context.

	IMPORTANT: If there is a warning 'WARNING', 'EXPLORATION HINT' or 'URGENT' in the GAME OUTPUT, prioritize suggesting actions that address those warnings.

	VALID COMMANDS for include:
	- Movement: north, south, east, west, northeast, northwest, southeast, southwest, up, down, enter, exit
	- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
	- Light: turn on lamp, turn off lamp
	- Combat: attack <enemy> with <weapon>
	- Other: inventory, look, read <thing>, wait
	- Other: look, examine, listen, speak, look, take, drop, empty, fill, inventory, climb, swim, open, close, set, turn, push, pull, push [direction], throw at, eat, drink, wear, take off, burn, dig, kick, destroy, read, ask for, give, feed, show, ask about, tell about, talk to, kiss, attack, wake, answer, wave, rub , squeeze, jump, jump over, wait, sleep
	sing, yell, think, pray
	KEEP VALID COMMANDS SIMPLE (e.g., "examine pcture" instead of "examine picture on east wall").
	SUGGEST look when you need more information.

	Output ONLY a JSON list, no explanation. Example: ["examine table", "take key", "open door", "north"]
	If nothing stands out, output: []"""


	EXTRACT_ACTIONS_PROMPT_EXIT = """You are analyzing text adventure game output. Extract promising actions or directions the player should try.

	Here is the structure of the GAME OUTPUT you receive:
	<BEGIN GAME OUTPUT>
	- CURRENT LOCATION: <location name>
	- STEPS AT THIS LOCATION: <number of steps taken at this location>

	- RECENT ACTIONS:
	[<location name>] > action -> outcome
	[<other location name>] > other action -> other outcome
	...
	[<other location name>] > other action -> other outcome

	- CURRENT SITUATION:
	<text describing the current location, visible objects, characters, exits, inventory, map, etc.>
	or <map description>

	- ACTIONS ALREADY TRIED AT THIS LOCATION:
	> action -> outcome
	> other action -> other outcome

	<END GAME OUTPUT>

	GUIDELINES:
	The player needs to move to a different location. TRY TO DISCOVER NEW PLACES AND EXITS TO EXPLORE (look at RECENT ACTIONS to avoid going in the same direction again).
	If no exits or directions are mentioned in the CURRENT SITUATION, suggest: look, get_map.
	Otherwise, suggests exits and directions mentioned in the CURRENT SITUATION among the valid commands: north, south, east, west, northeast, northwest, southeast, southwest.

	Output ONLY a JSON list, no explanation. Example: ["north", "look", "southwest", "east"]
	If nothing stands out, output: []"""


	# =============================================================================
	# Student Agent
	# =============================================================================

	MVMT_COMMANDS = {"look", "north", "south", "east", "west", "up", "down", "northeast", "northwest", "southeast", "southwest"}

	class StudentAgent:
	"""
	ReAct agent with enhanced exploration and location-aware reasoning.
	"""

	def __init__(self):
	"""Initialize your agent here."""
	self.history_agent: list[dict] = [] # # location -> history of actions/directions and outcomes at that location
	self.history_location: dict[str, list[dict]] = {} # location -> history of actions that are not directions and outcomes at that location
	self.remaining_directions: dict[str, set[str]] = {} # location -> unexplored directions
	self.recent_actions: list[str] = [] # track recent actions for loop detection
	self.score: int = 0
	self.previous_location: str = "" # track previous location to detect movement
	self.current_location: str = "" # track current location
	self.steps_at_current_location: int = 0 # track how many steps we've been at the current location to encourage exploration
	self.visited_locations: dict[str, int] = {} # location -> visit count
	self.promising_actions: list[str] = [] # promising actions extracted from observation at new locations
	self.is_new_location: bool = False # flag to indicate if the last observation was a new location


	async def run(
	self,
	client,
	game: str,
	max_steps: int,
	seed: int,
	verbose: bool = False,
	) -> RunResult:
	"""
	Run the agent for a game session.
	"""
	locations_visited = set()
	history = []
	moves = 0

	# Get list of available tools
	tools = await client.list_tools()
	tool_names = [t.name for t in tools]

	# Get initial observation
	result = await client.call_tool("play_action", {"action": "look"})
	observation, location, is_new_location = self._extract_result(result)

	# Track location (for counting unique locations visited, not necessarily the same as in-game location name)
	dummy_location = observation.split("\n")[0] if observation else "Unknown"
	locations_visited.add(dummy_location)

	# Track location (location = in-game location name = the name of the room or area we're currently in, extracted from the observation)
	self.current_location = location
	self.previous_location = location
	self.visited_locations[location] = 1
	self.remaining_directions[location] = set(["north", "south", "east", "west", "northeast", "northwest", "southeast", "southwest"])

	if verbose:
	print(f"\n{observation}")

	# Extract promising actions from initial observation
	self.promising_actions = self._extract_promising_actions(observation, seed, EXTRACT_ACTIONS_PROMPT)
	if self.promising_actions and verbose:
	print(f"[PROMISING] {self.promising_actions}")

	# Main ReAct loop
	for step in range(1, max_steps + 1):

	# Build prompt with context
	prompt = self._build_prompt(observation, seed + step)

	# Call LLM for reasoning
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step)

	# Parse the response
	thought, tool_name, tool_args = self._parse_response(response)

	if verbose:
	print(f"\n--- Step {step} ---")
	print(f"[THOUGHT] {thought}")
	print(f"[TOOL] {tool_name}({tool_args})")

	# Validate and fix common issues
	tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)

	action = tool_args.get("action", "")

	# Loop detection for play_action
	if tool_name == "play_action":
	action = tool_args.get("action", "look")

	self.recent_actions.append(action)
	if len(self.recent_actions) > 7:
	self.recent_actions = self.recent_actions[-7:]

	# Detect loops - if same action 3 times, force exploration
	if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
	if verbose:
	print(f"[WARNING] Loop detected - forcing exploration")
	# Try to move somewhere new
	tool_name, tool_args = self._break_loop(tool_names)
	self.recent_actions.append(tool_args.get("action", "look"))

	# If stuck at same location too long, add exploration pressure
	if self.steps_at_current_location >= 5 and tool_name == "play_action":
	action = tool_args.get("action", "")
	if action not in MVMT_COMMANDS:
	if verbose:
	print(f"[EXPLORATION BIAS] Been here {self.steps_at_current_location} steps, forcing movement")

	moves += 1

	# Execute the tool
	observation = ""
	new_location = self.current_location
	is_new_location = False
	try:
	result = await client.call_tool(tool_name, tool_args)
	observation, new_location, is_new_location = self._extract_result(result)
	self.is_new_location = is_new_location

	if verbose:
	print(f"[RESULT] {observation[:200]}...")

	except Exception as e:
	observation = f"Error: {e}"
	if verbose:
	print(f"[ERROR] {e}")

	# Detect location changes
	self.previous_location = self.current_location
	self.current_location = new_location
	if is_new_location:
	self.steps_at_current_location = 0

	# Extract promising actions from new location
	self.promising_actions = self._extract_promising_actions(observation, seed + step, EXTRACT_ACTIONS_PROMPT)
	if self.promising_actions and verbose:
	print(f"[PROMISING at new location] {self.promising_actions}")

	else:
	self.steps_at_current_location += 1
	self.promising_actions = [] # Clear promising actions if we haven't moved

	# Track number of visits to this location
	if self._has_moved():
	self.visited_locations[self.current_location] = self.visited_locations.get(self.current_location, 0) + 1
	self.steps_at_current_location = 0

	# Track location (for counting unique locations visited, not necessarily the same as in-game location name)
	dummy_location = observation.split("\n")[0] if observation else "Unknown"
	locations_visited.add(dummy_location)

	# Update history of actions/directions and outcomes at that location
	# Keep this general history not too long
	self.history_agent.append({
	"step": step,
	"thought": thought,
	"tool": tool_name,
	"args": tool_args,
	"result": observation[:200],
	"location": self.current_location,
	})
	if len(self.history_agent) > 15:
	self.history_agent = self.history_agent[-15:]

	if self.current_location not in self.history_location:
	self.history_location[self.current_location] = []

	# Update remaining directions for this location if it's new
	if self.current_location not in self.remaining_directions:
	self.remaining_directions[self.current_location] = set(["north", "south", "east", "west", "northeast", "northwest", "southeast", "southwest"])

	# Update history of non-movement actions at this location (to help the LLM learn from what worked and what didn't at this location).
	if action not in MVMT_COMMANDS:
	self.history_location[self.current_location].append({
	"step": step,
	"thought": thought,
	"tool": tool_name,
	"args": tool_args,
	"result": observation,
	})
	else:
	# If it's a movement action, remove it from remaining directions for this location
	if action in self.remaining_directions[self.current_location]:
	self.remaining_directions[self.current_location].remove(action)

	# Track score from observation
	self._update_score(observation)

	# Record in result history (for final output)
	history.append((thought, f"{tool_name}({tool_args})", observation[:100]))

	# Check for game over
	if self._is_game_over(observation):
	if verbose:
	print("\n* GAME OVER *")
	break

	return RunResult(
	final_score=self.score,
	max_score=350,
	moves=moves,
	locations_visited=locations_visited,
	game_completed=self._is_game_over(observation),
	history=history,
	)

	def _has_moved(self) -> bool:
	"""Check if the player has moved to a new location."""
	return self.current_location != self.previous_location

	def _parse_location_from_observation(self, observation: str) -> tuple[str, bool]:
	"""Extract location name from observation text.
	Return also if it's a new location based on tags in the observation."""
	is_new_location = False
	if not observation:
	return "Unknown", False
	first_line = observation.split("\n")[0].strip()
	# If the first line begins with "[NEW LOCATION:", is_new_location = True
	if first_line.startswith("[NEW LOCATION:"):
	is_new_location = True
	# Extract location from "[NEW/CURRENT LOCATION: location name]" if present
	match = re.search(r'\[(?:NEW\|CURRENT) LOCATION: (.+?)\]', first_line)

	if match:
	return match.group(1).strip(), is_new_location
	else:
	print(f"[ERROR] Could not parse location from observation. Defaulting to first line as location. Observation: \n{observation[:100]}...")
	# Otherwise, return the first line as location
	return first_line, is_new_location

	def _parse_observation_wo_score(self, observation: str) -> str:
	"""Remove score information from observation to avoid confusion."""
	if not observation:
	return ""
	return observation.split("[Score:")[0].strip()

	def _extract_promising_actions(self, observation: str, seed: int, prompt: str) -> list[str]:
	"""
	Use the LLM to extract promising actions from an observation.
	Returns a list of action strings worth trying.
	"""
	try:
	response = call_llm(
	prompt=f"{observation}",
	system_prompt=prompt,
	seed=seed,
	max_tokens=150,
	)
	# Try to parse JSON list from response
	# Find the JSON array in the response
	match = re.search(r'\[.*?\]', response, re.DOTALL)
	if match:
	actions = json.loads(match.group(0))
	if isinstance(actions, list):
	return [str(a) for a in actions if isinstance(a, str)]
	except Exception:
	pass
	return []

	def _break_loop(self, tool_names: list[str]) -> tuple[str, dict]:
	"""Break out of a loop by choosing an unexplored action."""
	# Try movement directions we haven't tried recently
	directions = ["north", "south", "east", "west", "up", "down",
	"northeast", "northwest", "southeast", "southwest"]
	recent_set = set(self.recent_actions[-5:]) if self.recent_actions else set()

	for d in directions:
	if d not in recent_set:
	return "play_action", {"action": d}

	# If all directions tried, try examining or looking
	if "get_map" in tool_names:
	return "get_map", {}

	return "play_action", {"action": "look"}

	def _force_movement(self) -> tuple[str, dict]:
	"""Force a movement action when stuck too long at a location."""
	directions = ["north", "south", "east", "west", "up", "down",
	"enter", "northeast", "northwest", "southeast", "southwest"]
	recent_set = set(self.recent_actions[-5:]) if self.recent_actions else set()

	for d in directions:
	if d not in recent_set:
	return "play_action", {"action": d}

	# Fallback: just try north
	return "play_action", {"action": "north"}

	def _build_prompt(self, observation: str, seed: int = 0) -> str:
	"""
	Build the prompt for the LLM with rich context.
	"""
	parts = []

	parts.append(f"- CURRENT LOCATION: {self.current_location}")
	parts.append(f"- STEPS AT THIS LOCATION: {self.steps_at_current_location}")

	# Recent history
	if self.history_agent:
	parts.append("\n- RECENT ACTIONS:")
	for entry in self.history_agent[-5:]:
	loc = entry.get("location", "?")
	action = entry.get("args", {}).get("action", entry["tool"])
	result = entry.get("result", "")
	result = self._parse_observation_wo_score(result)
	# replace newlines in result with spaces for better readability
	result = result.replace("\n", " ")
	result_short = result[:80] + "..." if len(result) > 80 else result
	parts.append(f" [{loc}] > {action} -> {result_short}")

	# Warn about repeated actions
	if self.recent_actions and len(self.recent_actions) >= 4 and len(set(self.recent_actions[-3:])) == 1:
	parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING COMPLETELY DIFFERENT!]")

	# Exploration pressure
	if self.steps_at_current_location >= 4:
	parts.append(f"\n[EXPLORATION HINT: You have been at '{self.current_location}' for {self.steps_at_current_location} steps. Consider moving to a NEW location soon! Use 'look' to find exits of the room, or 'get_map' to see the discovered map.]")
	if self.steps_at_current_location >= 5:
	parts.append(f"\n[URGENT: You MUST move to a different location NOW. Pick a direction and go.]")

	parts.append(f"\n- CURRENT SITUATION:\n{observation}")

	# Actions already tried at this location (to avoid repetition and encourage trying new things)
	revisited = self.visited_locations.get(self.current_location, 0) > 1
	location_history = self.history_location.get(self.current_location, [])
	if revisited:
	parts.append(f"\n- ACTIONS ALREADY TRIED AT THIS LOCATION ({self.current_location}):")
	for entry in location_history[-20:]:
	action = entry.get("args", {}).get("action", entry["tool"])
	result = entry.get("result", "")
	result = self._parse_observation_wo_score(result)
	result = result.replace("\n", " ")
	result_short = result[:100] + "..." if len(result) > 100 else result
	parts.append(f" > {action} -> {result_short}")

	# # Show remaining unexplored directions for current location
	# if self.current_location in self.remaining_directions and (self.visited_locations.get(self.current_location, 0) >= 5 or self.steps_at_current_location >= 5):
	# # remaining should be a list
	# remaining = list(self.remaining_directions[self.current_location])
	# if remaining:
	# parts.append(f"\n- REMAINING UNEXPLORED DIRECTIONS AT THIS LOCATION: {', '.join(remaining)}")

	# Actions suggested by the LLM
	if self.promising_actions:
	parts.append(f"\n- ACTIONS SUGGESTED AT NEW LOCATION: {', '.join(self.promising_actions)}")
	else:
	prompt = EXTRACT_ACTIONS_PROMPT
	if self.steps_at_current_location >= 5:
	prompt = EXTRACT_ACTIONS_PROMPT_EXIT
	promising_actions = self._extract_promising_actions("\n".join(parts), seed=seed, prompt=prompt)
	if len(location_history) >= 7 or self.visited_locations.get(self.current_location, 0) >= 7:
	# If we've been here a lot, prioritize exit directions
	directions = ['look', 'get_map', 'north', 'south', 'east', 'west', 'northeast', 'northwest', 'southeast', 'southwest', 'up', 'down', 'enter', 'exit']
	# Take 4 random elements from directions to build promising_actions
	promising_actions = np.random.choice(directions, size=min(4, len(directions)), replace=False).tolist()
	if promising_actions:
	parts.append(f"\n- ACTIONS SUGGESTED: {', '.join(promising_actions)}")

	parts.append("\nWhat do you do next?")

	parts_str = "\n".join(parts)
	# print(f"\n----------------- [START DEBUG] PROMPT RICH IN CONTEXT PASSED TO THE AGENT ----------------- \n{parts_str}\n----------------- [END DEBUG] PROMPT RICH IN CONTEXT PASSED TO THE AGENT ----------------- ")

	return "\n".join(parts)

	def _parse_response(self, response: str) -> tuple[str, str, dict]:
	"""
	Parse LLM response to extract thought, tool name, and arguments.
	"""
	thought = "No reasoning provided"
	tool_name = "play_action"
	tool_args = {"action": "look"}

	lines = response.strip().split("\n")

	for line in lines:
	line_clean = line.strip()
	line_upper = line_clean.upper()

	if line_upper.startswith("THOUGHT:"):
	thought = line_clean.split(":", 1)[1].strip()

	elif line_upper.startswith("TOOL:"):
	raw_tool = line_clean.split(":", 1)[1].strip().lower()
	raw_tool = raw_tool.replace("*", "").replace("", "").replace("`", "")
	tool_name = raw_tool.strip()

	elif line_upper.startswith("ARGS:"):
	raw_args = line_clean.split(":", 1)[1].strip()
	raw_args = raw_args.replace("*", "").replace("", "").replace("`", "")
	try:
	parsed = json.loads(raw_args)
	if isinstance(parsed, dict):
	tool_args = parsed
	except json.JSONDecodeError:
	# Try to extract action from malformed JSON
	match = re.search(r'"action"\s:\s"([^"]+)"', raw_args)
	if match:
	tool_args = {"action": match.group(1)}
	else:
	# Try bare string
	clean = raw_args.strip().strip('"').strip("'")
	if clean:
	tool_args = {"action": clean}

	return thought, tool_name, tool_args

	def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
	"""Validate and fix common tool call issues."""
	# Fix tool name
	if tool_name not in valid_tools:
	if tool_name in ["action", "do", "command", "play"]:
	tool_name = "play_action"
	elif tool_name in ["map", "location", "locations"]:
	tool_name = "get_map"
	elif tool_name in ["mem", "state", "status", "history"]:
	tool_name = "memory"
	elif tool_name in ["inv", "items", "carrying"]:
	tool_name = "inventory"
	elif tool_name in ["valid", "valid_actions", "actions", "possible_actions"]:
	tool_name = "get_valid_actions"
	elif tool_name in ["log", "loc_log", "location_history"]:
	tool_name = "location_log"
	else:
	tool_name = "play_action"

	# Fix action verbs
	if tool_name == "play_action":
	action = tool_args.get("action", "look")

	invalid_verb_map = {
	"check": "examine",
	"inspect": "examine",
	"search": "look",
	"grab": "take",
	"pick": "take",
	"pick up": "take",
	"get": "take",
	"collect": "take",
	"use": "turn on",
	"switch on": "turn on",
	"go north": "north",
	"go south": "south",
	"go east": "east",
	"go west": "west",
	"go up": "up",
	"go down": "down",
	"move north": "north",
	"move south": "south",
	"move east": "east",
	"move west": "west",
	}

	action_lower = action.lower().strip()
	if action_lower in invalid_verb_map:
	action = invalid_verb_map[action_lower]
	else:
	# Check if action starts with an invalid verb
	for invalid, valid in invalid_verb_map.items():
	if action_lower.startswith(invalid + " "):
	remainder = action_lower[len(invalid):].strip()
	action = f"{valid} {remainder}"
	break

	tool_args["action"] = action

	return tool_name, tool_args

	def _extract_result(self, result) -> str:
	"""Extract observation, location, and boolean indicating if it's a new location from MCP tool result."""
	if hasattr(result, 'content') and result.content:
	obs = result.content[0].text
	elif isinstance(result, list) and result:
	obs = result[0].text if hasattr(result[0], 'text') else str(result[0])
	else:
	obs = str(result)
	location, is_new_location = self._parse_location_from_observation(obs)

	# obs without the first line
	obs_without_first_line = "\n".join(obs.split("\n")[1:]).strip() if "\n" in obs else obs

	return obs_without_first_line, location, is_new_location


	def _update_score(self, text: str) -> None:
	"""Update score from game text."""
	patterns = [
	r'Score:\s*(\d+)',
	r'score[:\s]+(\d+)',
	r'\[Score:\s*(\d+)',
	r'Total:\s*(\d+)',
	]

	for pattern in patterns:
	match = re.search(pattern, text, re.IGNORECASE)
	if match:
	self.score = max(self.score, int(match.group(1)))

	def _is_game_over(self, text: str) -> bool:
	"""Check if the game is over."""
	game_over_phrases = [
	"game over",
	"you have died",
	"you are dead",
	"* you have died *",
	"* you have won *",
	]
	text_lower = text.lower()
	return any(phrase in text_lower for phrase in game_over_phrases)

	def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
	"""Convenience wrapper for call_llm()."""
	return call_llm(prompt, system_prompt, seed)


	# =============================================================================
	# For local testing
	# =============================================================================

	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	server_path = "mcp_server.py"

	agent = StudentAgent()

	async with Client(server_path) as client:
	result = await agent.run(
	client=client,
	game="zork1",
	max_steps=10,
	seed=42,
	verbose=True,
	)

	print(f"\nFinal Score: {result.final_score}")
	print(f"Moves: {result.moves}")
	print(f"Locations: {result.locations_visited}")


	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())