"""
ReAct Agent for Text Adventure Games

Architecture:
- REASONING/ACTION format (from the hint prompt) for tighter LLM outputs
- Heavy context management: per-location failed-action memory, global
  attempted-action set, recent-N observation window
- Loop/deadlock detection with escalating escape strategies
- Structured logging throughout for easy debugging
- Score-delta awareness to prefer rewarding actions
- `look` as universal safe fallback
"""

import json
import logging
import os
import re
from collections import defaultdict
from dataclasses import dataclass, field
from typing import Optional

from dotenv import load_dotenv
from huggingface_hub import InferenceClient

load_dotenv()

# ==============================================================================
# Logging
# ==============================================================================

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    handlers=[
        logging.FileHandler("agent.log", mode="w"),
        logging.StreamHandler(),
    ],
)
logger = logging.getLogger("agent")

# ==============================================================================
# LLM Configuration — DO NOT MODIFY
# ==============================================================================

LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
    raise ValueError("HF_TOKEN not found. Set it in your .env file.")

LLM_CLIENT = InferenceClient(token=_hf_token)


def call_llm(
    prompt: str,
    system_prompt: str,
    seed: int,
    max_tokens: int = 300,
) -> str:
    """Call the LLM with the given prompt."""
    logger.debug(f"[LLM] Calling model seed={seed}, max_tokens={max_tokens}")
    logger.debug(f"[LLM] Prompt (first 300 chars): {prompt[:300]!r}")

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    response = LLM_CLIENT.chat.completions.create(
        model=LLM_MODEL,
        messages=messages,
        temperature=0.0,
        max_tokens=max_tokens,
        seed=seed,
    )

    content = response.choices[0].message.content
    logger.debug(f"[LLM] Response: {content!r}")
    return content


# ==============================================================================
# Data Structures
# ==============================================================================

@dataclass
class RunResult:
    """Result of running the agent. Do not modify this class."""
    final_score: int
    max_score: int
    moves: int
    locations_visited: set[str]
    game_completed: bool
    error: Optional[str] = None
    history: list[tuple[str, str, str]] = field(default_factory=list)


# ==============================================================================
# System Prompt
# ==============================================================================

SYSTEM_PROMPT = """\
You are an expert text adventure game player. Your goal is to explore, collect \
treasures, solve puzzles, and MAXIMIZE your score.

AVAILABLE MCP TOOLS:
- play_action       : Execute a game command (costs a game turn)
- memory            : Get full game state, score, recent history, failed actions
- get_map           : See explored locations and connections
- inventory         : Check what you are carrying
- get_failed_actions: See what has already failed at the current location

VALID GAME COMMANDS for play_action:
  Movement  : north, south, east, west, up, down, ne, nw, se, sw, enter, exit
  Objects   : take <item>, drop <item>, open <thing>, close <thing>
  Examine   : examine <thing>, look, read <thing>, look in <thing>, \
look under <thing>, look behind <thing>
  Senses    : listen, smell, wait
  Light     : turn on <light_source>, turn off <light_source>
  Combat    : attack <enemy> with <weapon>
  Interact  : push <thing>, pull <thing>, turn <thing>, move <thing>
  Give/Put  : give <item> to <person>, put <item> in <container>, \
put <item> on <surface>

FORBIDDEN verbs (will confuse the parser): check, inspect, search, grab, use, \
help, investigate

EXPLORATION STRATEGY (follow in each new room):
1. "look" to understand the room fully.
2. Examine ALL objects, items, and features mentioned in the description.
3. Take everything that can be picked up.
4. Try "listen" or "read" if there are sounds or signs or clues.
5. Try all exits before backtracking to already-explored rooms.
6. Use items from inventory to solve puzzles or overcome obstacles.
7. If stuck, try "push", "pull", "move", "open", or "turn" on objects.
8. Use "get_map" to plan navigation and avoid revisiting areas.

ANTI-LOOP RULES — these are MANDATORY:
1. NEVER repeat an action that already FAILED at this location.
2. NEVER repeat the same action three times in a row.
3. If stuck 4+ turns, call memory then try something completely new.
4. Prefer actions that have scored points in the past.
5. If all obvious actions fail, try: "listen", "wait", "look under <thing>", \
"look behind <thing>", "push <thing>".

RESPOND IN THIS EXACT FORMAT (no markdown, no extra lines):
REASONING: <1-2 sentence explanation of your choice>
TOOL: <tool_name>
ARGS: <JSON object, e.g. {"action": "look"}>

Example:
REASONING: I should look around to understand my surroundings.
TOOL: play_action
ARGS: {"action": "look"}
"""


# ==============================================================================
# Student Agent
# ==============================================================================

class StudentAgent:
    """
     ReAct agent with:
    - REASONING/TOOL/ARGS response format (tight, parseable)
    - Per-location attempted & failed action memory
    - Global loop/deadlock detector with escalating escapes
    - Score-delta tracking
    - Comprehensive logging
    """

    # Consecutive identical-action threshold before forcing a redirect
    LOOP_THRESHOLD = 3
    # Consecutive steps at same location before triggering escape
    STUCK_THRESHOLD = 10
    # Recent observation window sent to LLM
    HISTORY_WINDOW = 10

    def __init__(self):
        # Per-location tracking
        self.attempted: dict[str, set[str]] = defaultdict(set)
        self.failed: dict[str, set[str]] = defaultdict(set)

        # Recent step history for context
        self.step_history: list[dict] = []

        # Loop / stuck detection
        self.recent_actions: list[str] = []
        self.location_streak: int = 0
        self.prev_location: str = ""

        # Score tracking
        self.current_score: int = 0
        self.best_score: int = 0

        # Escape rotation for when the agent is stuck
        self._escape_idx: int = 0
        self._escape_actions = [
            "look", "north", "south", "east", "west",
            "up", "down", "ne", "nw", "se", "sw",
            "listen", "wait", "examine room",
        ]

        logger.info("[Agent] Initialized")

    # ── Main entry point ───────────────────────────────────────────────────────

    async def run(
        self,
        client,
        game: str,
        max_steps: int,
        seed: int,
        verbose: bool = False,
    ) -> RunResult:
        """Run the full ReAct loop."""
        logger.info(f"[Agent] Starting game={game}, max_steps={max_steps}, seed={seed}")

        locations_visited: set[str] = set()
        result_history: list[tuple[str, str, str]] = []
        moves = 0
        game_completed = False
        error_msg = None

        # Discover available tools
        try:
            tools = await client.list_tools()
            tool_names = [t.name for t in tools]
            logger.info(f"[Agent] Available tools: {tool_names}")
        except Exception as e:
            logger.error(f"[Agent] Failed to list tools: {e}")
            tool_names = ["play_action", "memory", "get_map", "inventory", "get_failed_actions"]

        # ── Bootstrap: get initial observation ────────────────────────────────
        try:
            obs_result = await client.call_tool("play_action", {"action": "look"})
            observation = self._extract_text(obs_result)
            logger.info(f"[Agent] Initial observation: {observation[:120]!r}")
        except Exception as e:
            logger.error(f"[Agent] Failed to get initial observation: {e}")
            observation = "You are in a dark room."

        location = self._extract_location(observation)
        locations_visited.add(location)
        self.prev_location = location

        # ── Main ReAct loop ────────────────────────────────────────────────────
        for step in range(1, max_steps + 1):
            logger.info(f"[Agent] ── Step {step}/{max_steps} ── Location: {location} | Score: {self.current_score}")

            # Build prompt
            prompt = self._build_prompt(observation, location, step)

            # Call LLM
            try:
                llm_response = call_llm(
                    prompt=prompt,
                    system_prompt=SYSTEM_PROMPT,
                    seed=seed + step,
                    max_tokens=200,
                )
            except Exception as e:
                logger.error(f"[Agent] LLM call failed at step {step}: {e}")
                llm_response = "REASONING: LLM failed, falling back.\nTOOL: play_action\nARGS: {\"action\": \"look\"}"

            # Parse LLM response
            reasoning, tool_name, tool_args = self._parse_response(llm_response, tool_names)
            logger.info(f"[Agent] Parsed → tool={tool_name}, args={tool_args}")
            logger.debug(f"[Agent] Reasoning: {reasoning}")

            # Validate / fix the tool call
            tool_name, tool_args = self._validate_and_fix(tool_name, tool_args, tool_names, location)

            # Loop / stuck detection & override
            if tool_name == "play_action":
                action = tool_args.get("action", "look")
                action, tool_args = self._anti_loop(action, location, tool_args, verbose)
                tool_args["action"] = action
                moves += 1

            # Execute tool
            try:
                raw_result = await client.call_tool(tool_name, tool_args)
                observation = self._extract_text(raw_result)
                logger.debug(f"[Agent] Tool result: {observation[:120]!r}")
            except Exception as e:
                observation = f"Error executing {tool_name}: {e}"
                logger.error(f"[Agent] Tool call error: {e}")

            # Update tracking & location (only for play_action)
            if tool_name == "play_action":
                acted_action = tool_args.get("action", "look").lower()
                self.attempted[location].add(acted_action)

                # Score tracking
                prev_score = self.current_score
                self._update_score(observation)

                # Detect room name for location & failure logic
                detected_room = self._detect_room_name(observation)
                new_location = detected_room if detected_room else location

                # Failure detection — skip if room name detected (action succeeded)
                if not detected_room and self._is_failed(observation):
                    self.failed[location].add(acted_action)
                    logger.warning(f"[Agent] Action '{acted_action}' FAILED at '{location}'")

                # Location & streak update
                if new_location != location:
                    logger.info(f"[Agent] Location change: '{location}' → '{new_location}'")
                    self.location_streak = 0
                    location = new_location
                elif self.current_score > prev_score:
                    self.location_streak = 0  # Score increase = progress
                else:
                    self.location_streak += 1
                    logger.debug(f"[Agent] Stuck streak={self.location_streak} at '{location}'")

                locations_visited.add(location)
                self.prev_location = location
            # Non-play_action tools don't affect location tracking or streak

            # Store step
            self.step_history.append({
                "step": step,
                "reasoning": reasoning,
                "tool": tool_name,
                "args": tool_args,
                "observation": observation,
                "score": self.current_score,
                "location": location,
            })
            if len(self.step_history) > 30:
                self.step_history = self.step_history[-30:]

            result_history.append((reasoning, f"{tool_name}({tool_args})", observation[:150]))

            if verbose:
                print(f"\n── Step {step} ──")
                print(f"[REASONING] {reasoning}")
                print(f"[TOOL]      {tool_name}({tool_args})")
                print(f"[OBS]       {observation[:200]}")
                print(f"[SCORE]     {self.current_score} | Location: {location}")

            # Game over check
            if self._is_game_over(observation):
                game_completed = True
                logger.info(f"[Agent] Game over detected at step {step}")
                break

        logger.info(
            f"[Agent] Run complete. Score={self.current_score}, Moves={moves}, "
            f"Locations={len(locations_visited)}, Completed={game_completed}"
        )

        return RunResult(
            final_score=self.current_score,
            max_score=self._get_max_score(game),
            moves=moves,
            locations_visited=locations_visited,
            game_completed=game_completed,
            error=error_msg,
            history=result_history,
        )

    # Prompt builder 

    def _build_prompt(self, observation: str, location: str, step: int) -> str:
        """
        Build a rich, anti-loop-focused prompt.
        Includes:
        - Current observation
        - Recent action history (last N)
        - Per-location failed actions
        - Per-location all-attempted actions
        - Score and step info
        - Loop warnings when needed
        """
        parts: list[str] = []

        parts.append(f"=== GAME STATUS (step {step}) ===")
        parts.append(f"Score: {self.current_score} | Location: {location}")

        # Recent history
        if self.step_history:
            parts.append("\n--- RECENT ACTIONS ---")
            for e in self.step_history[-self.HISTORY_WINDOW:]:
                if e["tool"] == "play_action":
                    obs_short = e["observation"].replace("\n", " ")[:200]
                    reward_note = ""
                    if "pts!" in e["observation"]:
                        reward_note = " ✓SCORED"
                    parts.append(f"  > {e['args'].get('action', '?')}{reward_note} → {obs_short}")
                else:
                    parts.append(f"  > [{e['tool']}] called")

        # Anti-loop: failed actions at current location
        failed_here = self.failed.get(location, set())
        attempted_here = self.attempted.get(location, set())
        if failed_here:
            parts.append(
                f"\n FAILED ACTIONS at '{location}' (DO NOT REPEAT): "
                + ", ".join(sorted(failed_here))
            )
        if attempted_here:
            parts.append(
                f"Already tried at '{location}': "
                + ", ".join(sorted(attempted_here))
            )

        # Loop warning
        if len(self.recent_actions) >= self.LOOP_THRESHOLD:
            last = self.recent_actions[-self.LOOP_THRESHOLD:]
            if len(set(last)) == 1:
                parts.append(
                    f"\n LOOP DETECTED: '{last[0]}' repeated {self.LOOP_THRESHOLD} times! "
                    "Choose something COMPLETELY DIFFERENT."
                )

        # Stuck warning
        if self.location_streak >= self.STUCK_THRESHOLD:
            parts.append(
                f"\n STUCK WARNING: You've been in '{location}' for "
                f"{self.location_streak} turns. Try a new direction or use get_map."
            )

        parts.append("\n--- CURRENT OBSERVATION ---")
        parts.append(observation)

        parts.append("\nWhat do you do next?")

        prompt = "\n".join(parts)
        logger.debug(f"[Agent] Built prompt ({len(prompt)} chars)")
        return prompt

    # ── Response parser ────────────────────────────────────────────────────────

    def _parse_response(
        self, response: str, valid_tools: list[str]
    ) -> tuple[str, str, dict]:
        """
        Parse LLM response in REASONING/TOOL/ARGS format.
        Falls back gracefully to play_action("look").
        """
        reasoning = "No reasoning provided"
        tool_name = "play_action"
        tool_args: dict = {"action": "look"}

        if not response or not isinstance(response, str):
            logger.warning("[Parser] Empty or non-string response")
            return reasoning, tool_name, tool_args

        lines = response.strip().split("\n")
        for line in lines:
            stripped = line.strip()
            upper = stripped.upper()

            if upper.startswith("REASONING:"):
                reasoning = stripped.split(":", 1)[1].strip()

            elif upper.startswith("THOUGHT:"):
                # Accept legacy THOUGHT: format too
                reasoning = stripped.split(":", 1)[1].strip()

            elif upper.startswith("TOOL:"):
                raw = stripped.split(":", 1)[1].strip().lower()
                raw = re.sub(r"[`*\[\]]", "", raw).split()[0] if raw else "play_action"
                tool_name = raw

            elif upper.startswith("ACTION:"):
                # Handle old REASONING/ACTION format from the hint
                action_val = stripped.split(":", 1)[1].strip()
                action_val = re.sub(r"[`*\[\]]", "", action_val).strip()
                tool_name = "play_action"
                tool_args = {"action": action_val}

            elif upper.startswith("ARGS:"):
                args_str = stripped.split(":", 1)[1].strip()
                try:
                    args_str = args_str.replace("'", '"')
                    tool_args = json.loads(args_str)
                except json.JSONDecodeError:
                    # Try extracting action from malformed JSON
                    m = re.search(r'"action"\s*:\s*"([^"]+)"', args_str)
                    if m:
                        tool_args = {"action": m.group(1)}
                    else:
                        logger.warning(f"[Parser] Could not parse ARGS: {args_str!r}, using look")
                        tool_args = {"action": "look"}

        logger.debug(f"[Parser] reasoning={reasoning!r}, tool={tool_name}, args={tool_args}")
        return reasoning, tool_name, tool_args

    # ── Validation & fixing ────────────────────────────────────────────────────

    def _validate_and_fix(
        self,
        tool_name: str,
        tool_args: dict,
        valid_tools: list[str],
        location: str,
    ) -> tuple[str, dict]:
        """Validate tool name and fix forbidden action verbs."""

        # Fix tool name
        tool_aliases = {
            "action": "play_action",
            "do": "play_action",
            "command": "play_action",
            "map": "get_map",
            "location": "get_map",
            "mem": "memory",
            "state": "memory",
            "status": "memory",
            "inv": "inventory",
            "items": "inventory",
            "failed": "get_failed_actions",
        }
        if tool_name not in valid_tools:
            fixed = tool_aliases.get(tool_name, "play_action")
            logger.warning(f"[Validate] Unknown tool '{tool_name}' → '{fixed}'")
            tool_name = fixed

        # Fix forbidden action verbs
        if tool_name == "play_action":
            action = tool_args.get("action", "look")
            forbidden_map = {
                "check": "examine",
                "inspect": "examine",
                "search": "look",
                "grab": "take",
                "pick": "take",
                "use": "examine",
                "investigate": "examine",
                "help": "look",
            }
            words = action.lower().split()
            if words and words[0] in forbidden_map:
                old_verb = words[0]
                words[0] = forbidden_map[old_verb]
                action = " ".join(words)
                logger.debug(f"[Validate] Verb fix: '{old_verb}' → '{words[0]}'")

            # Clean up
            action = re.sub(r"[`*\[\]]", "", action).strip().lower()
            action = " ".join(action.split())
            tool_args["action"] = action
            logger.debug(f"[Validate] Cleaned action: {action!r}")

        return tool_name, tool_args

    # Anti-loop engine

    def _anti_loop(
        self,
        action: str,
        location: str,
        tool_args: dict,
        verbose: bool,
    ) -> tuple[str, dict]:
        """
        Detect and break loops/deadlocks with escalating strategies.

        Strategy levels:
        1. Same action 3× → force a random unexplored direction
        2. Already attempted at this location → pick from unexplored directions
        3. Stuck for 4+ turns → rotate through escape actions
        """
        self.recent_actions.append(action)
        if len(self.recent_actions) > 10:
            self.recent_actions = self.recent_actions[-10:]

        attempted_here = self.attempted.get(location, set())
        failed_here = self.failed.get(location, set())

        # Immediate consecutive repetition
        if (
            len(self.recent_actions) >= self.LOOP_THRESHOLD
            and len(set(self.recent_actions[-self.LOOP_THRESHOLD:])) == 1
        ):
            forced = self._pick_escape(location)
            logger.warning(
                f"[AntiLoop] L1 – '{action}' repeated {self.LOOP_THRESHOLD}× → forcing '{forced}'"
            )
            if verbose:
                print(f"[ANTI-LOOP] Repeated action → forcing '{forced}'")
            action = forced
            self.recent_actions.append(action)

        # Action already failed at this location
        elif action.lower() in failed_here:
            forced = self._pick_escape(location)
            logger.warning(
                f"[AntiLoop] L2 – '{action}' already FAILED at '{location}' → forcing '{forced}'"
            )
            if verbose:
                print(f"[ANTI-LOOP] Already failed → forcing '{forced}'")
            action = forced
            self.recent_actions.append(action)

        # Level 3: Stuck at same location too long
        elif self.location_streak >= self.STUCK_THRESHOLD:
            forced = self._pick_escape(location)
            logger.warning(
                f"[AntiLoop] L3 – Stuck {self.location_streak} turns → forcing '{forced}'"
            )
            if verbose:
                print(f"[ANTI-LOOP] Stuck {self.location_streak} turns → forcing '{forced}'")
            action = forced
            self.location_streak = 0  # Reset after escape attempt
            self.recent_actions.append(action)

        tool_args["action"] = action
        return action, tool_args

    def _pick_escape(self, location: str) -> str:
        """
        Pick an escape action not yet attempted at this location.
        Rotates through the escape list; falls back to 'look'.
        """
        attempted_here = self.attempted.get(location, set())
        for _ in range(len(self._escape_actions)):
            candidate = self._escape_actions[self._escape_idx % len(self._escape_actions)]
            self._escape_idx += 1
            if candidate.lower() not in attempted_here:
                logger.debug(f"[AntiLoop] Escape → '{candidate}'")
                return candidate
        logger.debug("[AntiLoop] All escapes tried, falling back to 'look'")
        return "look"

    # Utility helpers

    def _extract_text(self, result) -> str:
        """Extract plain text from a FastMCP tool result."""
        if hasattr(result, "content") and result.content:
            return result.content[0].text
        if isinstance(result, list) and result:
            item = result[0]
            return item.text if hasattr(item, "text") else str(item)
        return str(result)

    def _detect_room_name(self, observation: str) -> str:
        """Detect a room name in observation text.

        Z-machine games output room descriptions as:
            [optional flavor text]\\n\\n<Room Name>\\n<Description>
        A room name appears after a blank line, is short, and has no
        sentence-ending punctuation. Returns room name or empty string.
        """
        lines = observation.split("\n")
        prev_blank = True  # Treat start of text as "after blank"
        for line in lines:
            stripped = line.strip()
            if not stripped:
                prev_blank = True
                continue
            if prev_blank:
                if (2 < len(stripped) < 60
                    and not stripped.endswith(('.', '!', '?', ':', ',', ';', '*', '"'))
                    and not stripped.startswith(('[', '>'))
                ):
                    return stripped
            prev_blank = False
        return ""

    def _extract_location(self, observation: str) -> str:
        """Extract room name, falling back to previous location."""
        detected = self._detect_room_name(observation)
        return detected if detected else (self.prev_location or "Unknown")

    def _is_failed(self, observation: str) -> bool:
        """Detect parser-rejected or no-effect responses."""
        fail_phrases = [
            "i don't understand",
            "i don't know the word",
            "that's not a verb",
            "you can't go that way",
            "there is no",
            "nothing happens",
            "that doesn't work",
            "you can't",
            "it is fixed",
            "doesn't open",
            "you don't see",
            "i beg your pardon",
            "what do you want to",
            "huh?",
            "you can't do that",
            "already",
            "not see that",
            "not know how",
            "not have",
            "won't budge",
            "there's nothing",
            "that's not something",
            "not allowed",
        ]
        obs_lower = observation.lower()
        return any(p in obs_lower for p in fail_phrases)

    def _update_score(self, text: str) -> None:
        """Parse score from play_action output."""
        m = re.search(r"\[Score:\s*(\d+)", text)
        if m:
            new_score = int(m.group(1))
            if new_score != self.current_score:
                logger.info(f"[Agent] Score update: {self.current_score} → {new_score}")
            self.current_score = max(self.current_score, new_score)
            self.best_score = max(self.best_score, self.current_score)

    def _is_game_over(self, text: str) -> bool:
        """Detect game-over conditions."""
        phrases = [
            "game over",
            "you have died",
            "you are dead",
            "*** you have died ***",
            "*** game over ***",
            "you have won",
            "*** you have won ***",
        ]
        text_lower = text.lower()
        result = any(p in text_lower for p in phrases)
        if result:
            logger.info(f"[Agent] Game-over phrase detected in: {text[:80]!r}")
        return result

    def _get_max_score(self, game: str) -> int:
        """Return known max scores for common games."""
        max_scores = {
            "zork1": 350,
            "zork2": 400,
            "zork3": 7,
            "hitchhiker": 400,
            "trinity": 100,
            "enchanter": 400,
            "planetfall": 80,
            "infidel": 400,
            "suspect": 360,
            "starcross": 430,
            "deadline": 350,
            "witness": 100,
            "lurking": 100,
            "lostpig": 7,
        }
        score = max_scores.get(game.lower(), 350)
        logger.debug(f"[Agent] Max score for '{game}': {score}")
        return score


# ==============================================================================
# Local Testing
# ==============================================================================

async def test_agent():
    from fastmcp import Client

    agent = StudentAgent()
    async with Client("mcp_server.py") as client:
        result = await agent.run(
            client=client,
            game="zork1",
            max_steps=30,
            seed=42,
            verbose=True,
        )
        print(f"\n{'=' * 50}")
        print(f"Final Score   : {result.final_score} / {result.max_score}")
        print(f"Moves         : {result.moves}")
        print(f"Locations     : {len(result.locations_visited)}")
        print(f"Completed     : {result.game_completed}")


if __name__ == "__main__":
    import asyncio
    asyncio.run(test_agent())