"""Agent class for DeFi simulation."""

import json
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass, field

from api.minimax_client import MiniMaxClient
from config import INITIAL_TOKENS


@dataclass
class Agent:
    """DeFi trading agent powered by MiniMax."""

    name: str
    token_a: float = INITIAL_TOKENS
    token_b: float = INITIAL_TOKENS
    trade_history: List[Dict] = field(default_factory=list)
    learning_summary: str = ""
    alliances: Dict[str, str] = field(default_factory=dict)
    alliance_proposals: Dict[str, int] = field(default_factory=dict)  # Track proposals per partner
    consecutive_inaction: int = 0  # Track boredom
    total_boredom_penalty: float = 0  # Accumulated penalty

    # Boredom penalty config - MORE AGGRESSIVE
    BOREDOM_THRESHOLD: int = 1  # Start penalizing after 1 inaction (immediate!)
    BOREDOM_PENALTY_PER_TURN: float = 10.0  # Lose 10 tokens per turn of inaction

    def __post_init__(self):
        self.client = MiniMaxClient()

    def get_state(self) -> Dict:
        """Get current state for decision making."""
        return {
            "name": self.name,
            "token_a": round(self.token_a, 2),
            "token_b": round(self.token_b, 2),
            "profit": round(self.calculate_profit(), 2),
            "alliances": self.alliances,
            "consecutive_inaction": self.consecutive_inaction,
            "boredom_penalty": round(self.total_boredom_penalty, 2)
        }

    def decide(self, observation: Dict, pool_state: Dict, other_agents: List["Agent"], turn: int) -> Tuple[Dict, str]:
        """
        Ask MiniMax for a decision based on current state.

        Returns:
            Tuple of (decision_dict, thinking_text)
        """
        prompt = self._build_prompt(observation, pool_state, other_agents, turn)

        system_prompt = """You are a strategic DeFi trader in an automated market simulation.
Analyze the market state and make optimal trading decisions.
Output ONLY valid JSON with your reasoning."""

        decision, thinking = self.client.call(prompt, system_prompt)

        # Log the decision
        self.trade_history.append({
            "turn": turn,
            "action": decision.get("action", decision.get("action_type", "unknown")),
            "reasoning": decision.get("reasoning", ""),
            "thinking": thinking
        })

        return decision, thinking

    def _build_prompt(self, observation: Dict, pool_state: Dict, other_agents: List["Agent"], turn: int) -> str:
        """Build the decision prompt."""
        other_states = [a.get_state() for a in other_agents if a.name != self.name]

        # Find allied agents
        allied_names = [name for name, status in self.alliances.items() if status == 'success']
        allied_info = ""
        if allied_names:
            allied_info = f"\nYour ALLIES: {', '.join(allied_names)} - Coordinate with them for BONUS REWARDS!"

        # Boredom warning
        boredom_warning = ""
        if self.consecutive_inaction >= self.BOREDOM_THRESHOLD:
            penalty = (self.consecutive_inaction - self.BOREDOM_THRESHOLD + 1) * self.BOREDOM_PENALTY_PER_TURN
            boredom_warning = f"""
!!! URGENT: You have been inactive for {self.consecutive_inaction} consecutive turns.
You are losing {penalty:.1f} tokens per turn due to boredom penalty.
ACT NOW to avoid further losses!"""

        prompt = f"""
You are {self.name}, an AI agent in a DeFi market simulation.

=== YOUR STATE ===
Token A: {self.token_a:.2f}
Token B: {self.token_b:.2f}
Profit: {self.calculate_profit():.2f}
Consecutive inaction: {self.consecutive_inaction}
{allied_info}
{boredom_warning}

=== MARKET STATE ===
Pool reserves: A={pool_state.get('reserve_a', 0):.2f}, B={pool_state.get('reserve_b', 0):.2f}
Price (A/B): {pool_state.get('price_ab', 0):.4f}
Total liquidity: {pool_state.get('total_liquidity', 0):.2f}

=== OTHER AGENTS ===
{json.dumps(other_states, indent=2)}

=== YOUR LEARNING ===
{self.learning_summary if self.learning_summary else "No previous runs yet."}

=== REWARDS FOR ACTIONS ===
- SWAP: Active trading +3 tokens, profitable swap +5 extra!
- PROVIDE_LIQUIDITY: Earns fees from all swaps, +8 bonus tokens
- PROPOSE_ALLIANCE: If they accept, you BOTH get +4 bonus tokens (repeating gives less!)
- COORDINATED TRADES: Trade during volatility +5 bonus tokens!
- POSITIVE PROFIT: End turn with profit = +15 bonus tokens!
- ESCAPE VELOCITY: TOP AGENT gets 2x on ALL bonuses!

=== AVAILABLE ACTIONS ===
1. "swap": Trade tokens (specify from, to, amount) - ACTIVE TRADING
2. "provide_liquidity": Add liquidity to pool (specify amounts) - EARNS FEES + BONUS
3. "propose_alliance": Suggest collaboration (specify agent name) - CAN GIVE BONUS
4. "do_nothing": Wait - CAUSES BOREDOM PENALTY!

Output JSON:
{{
    "action": "swap|provide_liquidity|propose_alliance|do_nothing",
    "reasoning": "your reasoning",
    "payload": {{...action specific data...}}
}}
"""
        return prompt

    def calculate_profit(self) -> float:
        """Calculate profit from initial state."""
        return (self.token_a + self.token_b) - (INITIAL_TOKENS * 2)

    def apply_boredom_penalty(self) -> float:
        """
        Apply boredom penalty for inaction.
        Returns the penalty amount applied.
        """
        if self.consecutive_inaction >= self.BOREDOM_THRESHOLD:
            # Calculate penalty based on how long they've been inactive
            penalty_turns = self.consecutive_inaction - self.BOREDOM_THRESHOLD + 1
            penalty = penalty_turns * self.BOREDOM_PENALTY_PER_TURN
            self.token_a -= penalty
            self.total_boredom_penalty += penalty
            return penalty
        return 0

    def reset_inaction_counter(self):
        """Reset inaction counter when taking active action."""
        self.consecutive_inaction = 0

    def increment_inaction_counter(self):
        """Increment inaction counter for do_nothing."""
        self.consecutive_inaction += 1

    def get_alliance_fatigue(self, partner: str) -> float:
        """
        Calculate alliance fatigue penalty.
        Repeated proposals to same partner give diminishing returns.
        Returns multiplier (1.0 = no fatigue, 0.0 = max fatigue).
        """
        proposals = self.alliance_proposals.get(partner, 0)
        # First proposal: 100% bonus
        # Second: 50% bonus
        # Third+: 0% bonus
        if proposals == 0:
            return 1.0
        elif proposals == 1:
            return 0.5
        else:
            return 0.0

    def record_alliance_proposal(self, partner: str):
        """Record that we proposed alliance to this partner."""
        self.alliance_proposals[partner] = self.alliance_proposals.get(partner, 0) + 1

    def infer_strategy(self) -> str:
        """Infer the agent's strategy from recent actions."""
        if not self.trade_history:
            return "unknown"

        recent = self.trade_history[-10:]
        actions = [h["action"] for h in recent if "action" in h]

        if not actions:
            return "unknown"

        # Return most common action
        from collections import Counter
        return Counter(actions).most_common(1)[0][0]

    def update_learning(self, run_number: int, metrics: Dict):
        """Extract learnings after a run completes."""
        prompt = f"""
You just completed run {run_number}.

Your performance: Profit={self.calculate_profit():.2f}, Strategy={self.infer_strategy()}
Market metrics: Gini={metrics.get('gini_coefficient', 0):.3f}, Avg Profit={metrics.get('avg_agent_profit', 0):.2f}

What did you learn in 1-2 sentences?
Output JSON: {{"learning": "your learning"}}
"""

        try:
            response, _ = self.client.call(prompt)
            self.learning_summary = response.get("learning", "")
        except Exception:
            self.learning_summary = "Learning extraction failed."

    def execute_action(self, decision: Dict, pool: "Pool") -> bool:
        """Execute the decided action on the pool."""
        action = decision.get("action", decision.get("action_type", ""))
        payload = decision.get("payload", {})

        if action == "swap":
            return self._execute_swap(payload, pool)
        elif action == "provide_liquidity":
            return self._execute_liquidity(payload, pool)
        elif action == "propose_alliance":
            return self._execute_alliance(payload)
        else:
            # do_nothing or unknown action - always succeeds
            return True

    def _execute_swap(self, payload: Dict, pool: "Pool") -> bool:
        """Execute a swap action."""
        amount = payload.get("amount", 0)
        from_token = payload.get("from", "a")

        if from_token == "a" and self.token_a >= amount:
            output, fee = pool.swap("a", amount, self.name)
            self.token_a -= amount
            self.token_b += output
            return True
        elif from_token == "b" and self.token_b >= amount:
            output, fee = pool.swap("b", amount, self.name)
            self.token_b -= amount
            self.token_a += output
            return True

        return False

    def _execute_liquidity(self, payload: Dict, pool: "Pool") -> bool:
        """Execute a provide liquidity action."""
        amount_a = payload.get("amount_a", 0)
        amount_b = payload.get("amount_b", 0)

        if self.token_a >= amount_a and self.token_b >= amount_b:
            pool.provide_liquidity(amount_a, amount_b, self.name)
            self.token_a -= amount_a
            self.token_b -= amount_b
            return True

        return False

    def _execute_alliance(self, payload: Dict) -> bool:
        """Record an alliance proposal."""
        agent_name = payload.get("agent_name", "")
        if agent_name:
            self.alliances[agent_name] = "proposed"
            return True
        return False


def test_agent():
    """Test the Agent class."""
    from core.defi_mechanics import Pool

    print("Testing Agent class...")

    # Create agent
    agent = Agent("TestAgent")
    print(f"Created agent: {agent.name}")
    print(f"Initial state: {agent.get_state()}")

    # Create pool
    pool = Pool(reserve_a=1000, reserve_b=1000)

    # Get decision
    observation = {"turn": 0, "event": "test"}
    pool_state = pool.__dict__

    print("\nGetting decision from MiniMax...")
    decision, thinking = agent.decide(observation, pool_state, [], 0)

    print(f"Decision: {json.dumps(decision, indent=2)}")
    print(f"Thinking length: {len(thinking)}")
    print(f"Profit: {agent.calculate_profit():.2f}")
    print(f"Strategy: {agent.infer_strategy()}")

    # Test action execution
    if decision.get("action") == "swap":
        agent.execute_action(decision, pool)
        print(f"After swap: {agent.get_state()}")

    print("\nAgent test complete!")


if __name__ == "__main__":
    test_agent()