"""
Student Agent for Text Adventure Games

This is your submission file. Implement the StudentAgent class to play
text adventure games using the MCP server you also implement.

Your agent should:
1. Connect to the MCP server via the provided client
2. Use the ReAct pattern (Thought -> Action -> Observation)
3. Call MCP tools to interact with the game
4. Maximize the game score within the step limit

Required method:
    async def run(self, client, game, max_steps, seed, verbose) -> RunResult

The 'client' is a FastMCP Client already connected to your MCP server.
Use it to call tools like: await client.call_tool("play_action", {"action": "look"})

Tips:
- Start by looking around and understanding your environment
- Keep track of visited locations to avoid loops
- Pick up useful items (lamp, sword, etc.)
- The seed parameter should be used to set your LLM's seed for reproducibility
"""

import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional

from dotenv import load_dotenv
from huggingface_hub import InferenceClient

# Load environment variables
load_dotenv()

# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================

# Model to use (fixed for fair evaluation)
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

# Initialize the LLM client (uses HF_TOKEN from environment)
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
    raise ValueError("HF_TOKEN not found. Set it in your .env file.")

LLM_CLIENT = InferenceClient(token=_hf_token)


def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
    """
    Call the LLM with the given prompt. Use this function in your agent.
    
    Args:
        prompt: The user prompt (current game state, history, etc.)
        system_prompt: The system prompt (instructions for the agent)
        seed: Random seed for reproducibility
        max_tokens: Maximum tokens in response (default: 300)
        
    Returns:
        The LLM's response text
        
    Example:
        response = call_llm(
            prompt="You are in a forest. What do you do?",
            system_prompt=SYSTEM_PROMPT,
            seed=42,
        )
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]
    
    response = LLM_CLIENT.chat.completions.create(
        model=LLM_MODEL,
        messages=messages,
        temperature=0.0,  # Deterministic for reproducibility
        max_tokens=max_tokens,
        seed=seed,
    )
    
    return response.choices[0].message.content


@dataclass
class RunResult:
    """Result of running the agent. Do not modify this class."""
    final_score: int
    max_score: int
    moves: int
    locations_visited: set[str]
    game_completed: bool
    error: Optional[str] = None
    history: list[tuple[str, str, str]] = field(default_factory=list)


# =============================================================================
# System Prompt - Customize this for your agent
# =============================================================================

SYSTEM_PROMPT = """You are playing a classic text adventure game.

GOAL: Explore the world, solve puzzles, and maximize your score.

AVAILABLE TOOLS (use via MCP):
- play_action: Execute a game command (north, take lamp, open mailbox, etc.)
- memory: Get current game state and history
- inventory: Check what you're carrying
- get_map: Get a map of explored locations and their connections

CRITICAL RULES TO AVOID LOOPS:
1. NEVER repeat the same action twice in a row unless it succeeded
2. If you visit a location more than once, you MUST try a different direction
3. If an action fails, NEVER try it again in the same location
4. Prioritize unexplored areas over familiar ones
5. If stuck with 0 score for 20+ moves, try something completely new
6. If a door, window, or anything is locked, DO NOT get stuck trying to open it. Leave the area to find a key or look for another entrance.

VALID GAME COMMANDS for play_action:
- Movement: north, south, east, west, up, down, enter, exit
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
- Other: look, inventory, read <thing>, turn on lamp

RESPOND IN THIS EXACT FORMAT (no markdown):
THOUGHT: <your reasoning about what to do next>
TOOL: <tool_name>
ARGS: <JSON arguments, e.g., {"action": "look"}>

Example:
THOUGHT: I should look around to see where I am.
TOOL: play_action
ARGS: {"action": "look"}
"""

# =============================================================================
# Student Agent - IMPLEMENT THIS CLASS
# =============================================================================

class StudentAgent:
    """
    State Machine Agent for Text Adventures.
    Combines ReAct with systematic exploration and auto-extraction of actions.
    """
    
    def __init__(self):
        self.history = []
        self.visited_locations = set()
        self.current_location = None
        self.last_known_inventory = "Unknown. You should use the 'inventory' tool to check."
        self.inventory = []
        self.previous_location = None
        self.score = 0
        self.map = {}
        
        # --- NEW: State Machine Tracking ---
        self.locations_data = {}  # { "Room Name": {"actions_tried": [], "promising_actions": []} }
        self.consecutive_turns = 0 # Compteur pour le biais d'exploration
    
    def _extract_promising_actions(self, observation: str, seed: int) -> list[str]:
        """
        Appel LLM spécifique pour extraire les objets interactifs d'une nouvelle pièce.
        """
        sys_prompt = "You are an expert text adventure parser. Extract interactive objects from the observation and return a JSON list of 1 to 4 simple verb-noun commands to try (e.g., ['open mailbox', 'climb tree', 'move leaves', 'read leaflet', 'take sword']). Ignore simple directional movement commands (north, south, etc.). Return ONLY the JSON list."
        prompt = f"OBSERVATION:\n{observation}\n\nJSON LIST:"
        
        try:
            # Appel rapide (max_tokens faible)
            response = call_llm(prompt, sys_prompt, seed, max_tokens=100)
            
            # Parsing robuste de la liste JSON
            match = re.search(r'\[.*?\]', response, re.DOTALL)
            if match:
                actions = json.loads(match.group(0))
                # Nettoyage et filtrage
                return [str(a).lower().strip() for a in actions if isinstance(a, str)]
        except Exception:
            pass # Si le LLM échoue, on retourne juste une liste vide
            
        return []

    async def run(
        self,
        client,
        game: str,
        max_steps: int,
        seed: int,
        verbose: bool = True,
    ) -> RunResult:
        
        self.history = []
        self.visited_locations = set()
        self.current_location = None
        self.score = 0
        self.consecutive_turns = 0
        moves = 0
        game_completed = False
        error = None
        
        try:
            # 1. Get initial observation
            result = await client.call_tool("play_action", {"action": "look"})
            observation = result.content[0].text if result else "No response"

            self._extract_location(observation, action_taken=None)
            
            # Initialiser les données du premier lieu
            if self.current_location:
                new_promising = self._extract_promising_actions(observation, seed)
                self.locations_data[self.current_location] = {
                    "actions_tried": [],
                    "promising_actions": new_promising
                }

            if verbose:
                print(f"=== Initial Observation ===\n{observation}\n")
            
            # 2. Main Loop
            for step in range(max_steps):
                moves = step + 1
                
                # Récupérer les données de la pièce actuelle
                loc_data = self.locations_data.get(self.current_location, {})
                pending_actions = loc_data.get("promising_actions", [])
                
                prompt = self._build_prompt(observation, self.history)
                response = call_llm(prompt, SYSTEM_PROMPT, seed)
                thought, tool_name, args = self._parse_response(response)
                
                if verbose:
                    print(f"\nPrompt:\n{prompt}\n")
                    print(f"=== Step {step + 1} ===")
                    print(f"LLM Response:\n{response}\n")

                # Mémoriser où on était avant l'action
                location_before_action = self.current_location
                action_name = args.get("action", "unknown") if args else "unknown"

                # Exécuter l'outil
                if tool_name:
                    try:
                        result = await client.call_tool(tool_name, args if args else {})
                        tool_result = result.content[0].text if result else "No response"
                        
                        if tool_name == "play_action":
                            observation = tool_result
                            
                            if action_name.lower() in ["i", "inv", "inventory"]:
                                self.last_known_inventory = tool_result
                                
                        elif tool_name == "inventory":
                            self.last_known_inventory = tool_result
                            observation = f"[Inventory checked]\n{tool_result}\n\n[Game state remains:]\n{observation}"
                            action_name = "check inventory"
                            
                    except Exception as e:
                            observation = f"Error executing tool: {str(e)}"
                            action_name = f"{tool_name} (error)"
                else:
                    result = await client.call_tool("play_action", {"action": "look"})
                    observation = result.content[0].text if result else "No response"
                    action_name = "look"

                # Enregistrer le résultat dans le registre de la pièce
                if location_before_action in self.locations_data:
                    # On garde juste la première phrase de l'obs pour ne pas surcharger la mémoire
                    short_res = observation.split('\n')[0][:80]
                    self.locations_data[location_before_action]["actions_tried"].append((action_name, short_res))

                self._extract_score(observation)
                self.history.append((thought, action_name, observation))
                self._extract_location(observation, action_taken=action_name)
                
                # --- GESTION DU BIAIS D'EXPLORATION ET NOUVEAUX LIEUX ---
                if self.current_location == location_before_action:
                    self.consecutive_turns += 1
                else:
                    self.consecutive_turns = 0 # On a bougé, on reset le compteur !
                    
                # Si c'est un nouveau lieu jamais visité
                if self.current_location and self.current_location not in self.locations_data:
                    new_promising = self._extract_promising_actions(observation, seed)
                    self.locations_data[self.current_location] = {
                        "actions_tried": [],
                        "promising_actions": new_promising
                    }
                    if verbose:
                        print(f"[NEW LOCATION DETECTED] Auto-extracted actions: {new_promising}")

                if self._is_game_over(observation):
                    game_completed = True
                    break
                    
        except Exception as e:
            error = str(e)
            if verbose:
                print(f"Error: {error}")
                
        return RunResult(
            final_score=self.score,
            max_score=350,
            moves=moves,
            locations_visited=self.visited_locations,
            game_completed=game_completed,
            history=self.history,
        )
    
    def _build_prompt(self, observation: str, history: list) -> str:
        history_text = ""
        loop_warning = ""
        loc_log = ""
        exploration_warning = ""
        unexplored_dirs = ""
        suggestions_text = ""

        if history:
            recent = history[-5:]
            history_text = "RECENT HISTORY:\n"
            actions = [a for _, a, _ in history[-4:]]
            
            if len(actions) >= 3 and len(set(actions)) == 1:
                loop_warning = "\n!!! CRITICAL WARNING: You are repeating the exact same action. YOU MUST do something else. !!!\n"
            elif len(actions) == 4 and actions[0] == actions[2] and actions[1] == actions[3]:
                loop_warning = f"\n!!! WARNING: You are stuck in a loop going between {actions[0]} and {actions[1]}. Pick a completely different direction. !!!\n"
            
            for thought, action, obs in recent:
                history_text += f"- Action: {action}\n"
                short_obs = obs[:200] + "..." if len(obs) > 200 else obs
                history_text += f"  Result: {short_obs}\n"

        # --- NEW: Inject Location specific logs ---
        if self.current_location in self.locations_data:
            tried = self.locations_data[self.current_location]["actions_tried"]
            if tried:
                loc_log = "ACTIONS ALREADY TRIED IN THIS LOCATION:\n"
                for a, res in tried[-5:]: # On montre les 5 dernières actions faites ICI
                    loc_log += f"- Tried: '{a}' -> Result: '{res}'\n"

            pending = self.locations_data[self.current_location].get("promising_actions", [])
            # On retire de la liste des suggestions les actions qu'il a déjà tentées
            tried_actions = [a.lower() for a, _ in tried]
            valid_suggestions = [p for p in pending if p.lower() not in tried_actions]
            
            if valid_suggestions:
                suggestions_text = f"INTERACTIVE OBJECTS SUGGESTIONS: You might want to try these actions here: {', '.join(valid_suggestions)}.\n"

        if self.consecutive_turns >= 4:
            exploration_warning = "\n!!! SYSTEM OVERRIDE: You have spent too many turns in this location without progressing. You MUST use a movement command (north, south, east, west, up, down, enter, exit) to explore elsewhere immediately. !!!\n"

        prompt = f"""CURRENT OBSERVATION:
{observation}
{exploration_warning}
{loop_warning}
{unexplored_dirs}
{suggestions_text}

KNOWN MAP (Spatial Graph):
{self._format_map()}

CURRENT INVENTORY:
{self.last_known_inventory}

{loc_log}
{history_text}
CURRENT SCORE: {self.score}

Based on the observation, inventory, and history, decide your next action.
Remember to explore new areas, pick up items, use them when appropriate, and solve puzzles to maximize score. 
"""
        return prompt
    
    def _parse_response(self, response: str) -> tuple[str, str, dict]:
        thought = ""
        tool_name = ""
        args = {}
        
        thought_match = re.search(r'THOUGHT:\s*(.+?)(?=TOOL:|$)', response, re.DOTALL | re.IGNORECASE)
        if thought_match:
            thought = thought_match.group(1).strip()
        
        tool_match = re.search(r'TOOL:\s*(\w+)', response, re.IGNORECASE)
        if tool_match:
            tool_name = tool_match.group(1).strip()
        
        args_match = re.search(r'ARGS:\s*(?:```json\n?)?(\{.*?\})(?:\n?```)?', response, re.DOTALL | re.IGNORECASE)
        if args_match:
            try:
                json_str = args_match.group(1).strip()
                args = json.loads(json_str)
            except json.JSONDecodeError:
                action_match = re.search(r'"action"\s*:\s*"([^"]+)"', response)
                if action_match:
                    args = {"action": action_match.group(1)}
        
        if not tool_name:
            tool_name = "play_action"
            args = {"action": "look"}
        elif not args:
            if tool_name == "play_action":
                args = {"action": "look"}
            else:
                args = {}
                
        return thought, tool_name, args
    
    def _extract_location(self, observation: str, action_taken: str = None):
        lines = [line.strip() for line in observation.split('\n') if line.strip()]
        if not lines:
            return None
            
        first_line = lines[0]
        
        if first_line.endswith(('.', '!', '?')):
            return None
            
        skip_patterns = [
            'taken', 'opening', "you can't", "i don't", 'welcome',
            "there's nothing", 'score:', '[score:', 'it is already',
            'the door is', 'you are carrying', 'dropped', 'done',
            'closed', 'open', 'locked', 'the grating', 'you are'
        ]
        
        for pattern in skip_patterns:
            if pattern.lower() in first_line.lower():
                return None
                
        if len(first_line) < 40 and first_line[0].isupper():
            new_location = first_line
            
            if new_location not in self.map:
                self.map[new_location] = {}
                
            movement_commands = ["north", "south", "east", "west", "up", "down", 
                                "ne", "nw", "se", "sw", "n", "s", "e", "w", 
                                "enter", "exit", "in", "out"]
            
            if self.current_location and action_taken:
                clean_action = action_taken.lower().strip()
                if clean_action in movement_commands and self.current_location != new_location:
                    self.map[self.current_location][clean_action] = new_location
                    reverse_dirs = {
                        "north": "south", "south": "north", "east": "west", "west": "east",
                        "up": "down", "down": "up", "in": "out", "out": "in",
                        "n": "s", "s": "n", "e": "w", "w": "e"
                    }
                    if clean_action in reverse_dirs:
                        self.map[new_location][reverse_dirs[clean_action]] = self.current_location

            self.previous_location = self.current_location
            self.current_location = new_location
            self.visited_locations.add(new_location)
            
            return new_location
            
        return None

    def _extract_score(self, observation: str):
        score_match = re.search(r'[Ss]core[:\s]+(\d+)', observation)
        if score_match:
            self.score = int(score_match.group(1))
    
    def _is_game_over(self, observation: str) -> bool:
        game_over_patterns = [
            r'\*\*\*\s*You have died\s*\*\*\*',
            r'\*\*\*\s*You have won\s*\*\*\*',
            r'game over',
            r'The End',
            r'RESTART, RESTORE, or QUIT',
        ]
        for pattern in game_over_patterns:
            if re.search(pattern, observation, re.IGNORECASE):
                return True
        return False
    
    def _format_map(self) -> str:
        if not self.map:
            return "Empty map (no locations explored yet)."
        map_text = ""
        for room, connections in self.map.items():
            conn_list = [f"[{dir} -> {dest}]" for dir, dest in connections.items()]
            conn_str = ", ".join(conn_list) if conn_list else "No known exits"
            marker = " <=== YOU ARE HERE" if room == self.current_location else ""
            map_text += f"  - {room}{marker}: {conn_str}\n"
        return map_text.rstrip()
    
# =============================================================================
# For local testing
# =============================================================================

async def test_agent():
    """Test the agent locally."""
    from fastmcp import Client
    
    # Path to your MCP server
    server_path = "mcp_server.py"
    
    agent = StudentAgent()
    
    async with Client(server_path) as client:
        result = await agent.run(
            client=client,
            game="zork1",
            max_steps=10,
            seed=42,
            verbose=True,
        )
        
        print(f"\nFinal Score: {result.final_score}")
        print(f"Moves: {result.moves}")
        print(f"Locations: {result.locations_visited}")


if __name__ == "__main__":
    import asyncio
    asyncio.run(test_agent())