#!/usr/bin/env python3
"""
HybriKo-117M Linux Function Calling Demo
Usage: python scripts/demo_linux_fc.py

Example:
  [사용자] 현재 폴더의 파일 목록을 보여줘
  [HybriKo] Thought: 디렉토리 내용을 확인합니다.
           Action: ls_command
           Action Input: {"path": ".", "options": "-l"}
"""

import torch
import sentencepiece as spm
import sys
import json
import re
import argparse

sys.path.insert(0, ".")
from hybridko.model import HybriKoModel, HybriKoConfig


# Exact system prompt used during training (with prettified JSON)
SYSTEM_PROMPT = """You are a Linux command assistant. You can use many tools (functions) to help users with their Linux tasks.
At each step, you need to give your thought to analyze the status now and what to do next, with a function call to actually execute your step. Your output should follow this format:
Thought:
Action
Action Input:

After the call, you will get the call result, and you are now in a new state.
Then you will analyze your status now, then decide what to do next...
After many (Thought-call) pairs, you finally perform the task, then you can give your final answer.

Remember:
1. The state change is irreversible, you can't go back to one of the former state.
2. All the thought is short, at most in 5 sentences.
3. ALWAYS call "Finish" function at the end of the task.
4. If you cannot handle the task with the available tools, say you don't know and call Finish with give_answer.

You have access of the following tools:
[
  {
    "name": "ls_command",
    "description": "List directory contents.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string"
        },
        "options": {
          "type": "string"
        }
      },
      "required": [
        "path"
      ]
    }
  },
  {
    "name": "cd_command",
    "description": "Change the current working directory.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string"
        }
      },
      "required": [
        "path"
      ]
    }
  },
  {
    "name": "mkdir_command",
    "description": "Create a new directory.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string"
        }
      },
      "required": [
        "path"
      ]
    }
  },
  {
    "name": "rm_command",
    "description": "Remove files or directories.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string"
        },
        "recursive": {
          "type": "boolean"
        }
      },
      "required": [
        "path"
      ]
    }
  },
  {
    "name": "cp_command",
    "description": "Copy files or directories.",
    "parameters": {
      "type": "object",
      "properties": {
        "source": {
          "type": "string"
        },
        "destination": {
          "type": "string"
        }
      },
      "required": [
        "source",
        "destination"
      ]
    }
  },
  {
    "name": "mv_command",
    "description": "Move or rename files.",
    "parameters": {
      "type": "object",
      "properties": {
        "source": {
          "type": "string"
        },
        "destination": {
          "type": "string"
        }
      },
      "required": [
        "source",
        "destination"
      ]
    }
  },
  {
    "name": "find_command",
    "description": "Find files by name pattern.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string"
        },
        "name": {
          "type": "string"
        }
      },
      "required": [
        "path",
        "name"
      ]
    }
  },
  {
    "name": "cat_command",
    "description": "Display file contents.",
    "parameters": {
      "type": "object",
      "properties": {
        "file": {
          "type": "string"
        }
      },
      "required": [
        "file"
      ]
    }
  },
  {
    "name": "grep_command",
    "description": "Search for patterns in files.",
    "parameters": {
      "type": "object",
      "properties": {
        "pattern": {
          "type": "string"
        },
        "file": {
          "type": "string"
        }
      },
      "required": [
        "pattern",
        "file"
      ]
    }
  },
  {
    "name": "head_command",
    "description": "Display first lines of a file.",
    "parameters": {
      "type": "object",
      "properties": {
        "file": {
          "type": "string"
        },
        "lines": {
          "type": "integer"
        }
      },
      "required": [
        "file"
      ]
    }
  },
  {
    "name": "tail_command",
    "description": "Display last lines of a file.",
    "parameters": {
      "type": "object",
      "properties": {
        "file": {
          "type": "string"
        },
        "lines": {
          "type": "integer"
        }
      },
      "required": [
        "file"
      ]
    }
  },
  {
    "name": "wc_command",
    "description": "Count lines, words, and bytes.",
    "parameters": {
      "type": "object",
      "properties": {
        "file": {
          "type": "string"
        }
      },
      "required": [
        "file"
      ]
    }
  },
  {
    "name": "ps_command",
    "description": "Display running processes.",
    "parameters": {
      "type": "object",
      "properties": {
        "options": {
          "type": "string"
        }
      },
      "required": []
    }
  },
  {
    "name": "df_command",
    "description": "Display disk space usage.",
    "parameters": {
      "type": "object",
      "properties": {
        "options": {
          "type": "string"
        }
      },
      "required": []
    }
  },
  {
    "name": "du_command",
    "description": "Display directory space usage.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string"
        },
        "options": {
          "type": "string"
        }
      },
      "required": [
        "path"
      ]
    }
  },
  {
    "name": "top_command",
    "description": "Display system processes in real-time.",
    "parameters": {
      "type": "object",
      "properties": {},
      "required": []
    }
  },
  {
    "name": "ping_command",
    "description": "Test network connectivity.",
    "parameters": {
      "type": "object",
      "properties": {
        "host": {
          "type": "string"
        },
        "count": {
          "type": "integer"
        }
      },
      "required": [
        "host"
      ]
    }
  },
  {
    "name": "curl_command",
    "description": "Transfer data from URL.",
    "parameters": {
      "type": "object",
      "properties": {
        "url": {
          "type": "string"
        },
        "options": {
          "type": "string"
        }
      },
      "required": [
        "url"
      ]
    }
  },
  {
    "name": "chmod_command",
    "description": "Change file permissions.",
    "parameters": {
      "type": "object",
      "properties": {
        "mode": {
          "type": "string"
        },
        "file": {
          "type": "string"
        }
      },
      "required": [
        "mode",
        "file"
      ]
    }
  },
  {
    "name": "tar_command",
    "description": "Archive or extract files.",
    "parameters": {
      "type": "object",
      "properties": {
        "options": {
          "type": "string"
        },
        "archive": {
          "type": "string"
        },
        "files": {
          "type": "string"
        }
      },
      "required": [
        "options",
        "archive"
      ]
    }
  },
  {
    "name": "Finish",
    "description": "Complete the task.",
    "parameters": {
      "type": "object",
      "properties": {
        "give_answer": {
          "type": "string"
        }
      },
      "required": [
        "give_answer"
      ]
    }
  }
]"""


def load_model(checkpoint_path="checkpoints/linux_fc_sft/checkpoint_epoch_15.pt"):
    """Load model and tokenizer."""
    print("Loading tokenizer...")
    sp = spm.SentencePieceProcessor()
    sp.Load("tokenizer/HybriKo_tok.model")

    print("Loading model...")
    config = HybriKoConfig(
        d_model=768, n_layers=12, vocab_size=32000,
        n_heads=12, n_kv_heads=3, ff_mult=3,
        max_seq_len=6144, dropout=0.0
    )
    model = HybriKoModel(config)
    checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
    model.load_state_dict(checkpoint["model_state_dict"])

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device).eval()
    print(f"Model loaded on {device}\n")
    return model, sp, device


def generate(model, tokenizer, prompt, device, max_new_tokens=150):
    """Generate response with improved stopping."""
    input_ids = tokenizer.EncodeAsIds(prompt)
    input_tensor = torch.tensor([input_ids], dtype=torch.long, device=device)
    prompt_len = len(input_ids)

    # Get EOS token ID
    eos_id = tokenizer.PieceToId("<|im_end|>")
    if eos_id == tokenizer.unk_id():
        eos_id = None

    with torch.no_grad():
        generated = input_tensor
        for _ in range(max_new_tokens):
            outputs = model(generated)
            logits = outputs["logits"] if isinstance(outputs, dict) else outputs.logits
            next_token_logits = logits[:, -1, :]
            next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
            generated = torch.cat([generated, next_token], dim=1)

            # Stop on EOS token
            if eos_id and next_token.item() == eos_id:
                break

            # Stop when we have complete Action Input
            new_tokens = generated[0, prompt_len:].tolist()
            new_text = tokenizer.DecodeIds(new_tokens)

            # Check for completion patterns
            if "Action Input:" in new_text:
                # Find the JSON part after Action Input
                ai_idx = new_text.find("Action Input:")
                after_ai = new_text[ai_idx + 13:].strip()
                # Stop when JSON is complete (matching braces)
                if after_ai.startswith("{"):
                    brace_count = 0
                    for i, c in enumerate(after_ai):
                        if c == "{":
                            brace_count += 1
                        elif c == "}":
                            brace_count -= 1
                            if brace_count == 0:
                                # Found complete JSON, stop
                                break_idx = ai_idx + 13 + i + 1
                                # Truncate to just the complete response
                                new_tokens = generated[0, prompt_len:].tolist()
                                return tokenizer.DecodeIds(new_tokens)[:break_idx + len(new_text) - len(new_text[ai_idx + 13:])]

    new_tokens = generated[0, prompt_len:].tolist()
    return tokenizer.DecodeIds(new_tokens)


def create_prompt(user_input):
    """Create ChatML format prompt."""
    return f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"


def parse_response(response):
    """Parse response into components."""
    # Clean up the response - stop at <|im_end|> or garbage
    if "<|im_end|>" in response:
        response = response.split("<|im_end|>")[0]

    # Also stop at <|im_start|> which indicates model continuing incorrectly
    if "<|im_start|>" in response:
        response = response.split("<|im_start|>")[0]

    result = {"thought": None, "action": None, "action_input": None, "raw": response}

    # Extract Thought
    thought_match = re.search(r"Thought:\s*(.+?)(?=\s*Action:|\s*$)", response, re.DOTALL)
    if thought_match:
        result["thought"] = thought_match.group(1).strip()

    # Extract Action
    action_match = re.search(r"Action:\s*(\w+)", response)
    if action_match:
        result["action"] = action_match.group(1)

    # Extract Action Input
    input_match = re.search(r"Action Input:\s*(\{[^}]+\})", response, re.DOTALL)
    if input_match:
        try:
            result["action_input"] = json.loads(input_match.group(1))
        except:
            result["action_input"] = input_match.group(1)

    return result


def run_single(model, tokenizer, device, user_input):
    """Run single inference."""
    prompt = create_prompt(user_input)
    response = generate(model, tokenizer, prompt, device)
    return parse_response(response)


def main():
    import locale
    import io

    # Set UTF-8 encoding for stdin/stdout
    if sys.stdin.encoding != 'utf-8':
        sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace')
    if sys.stdout.encoding != 'utf-8':
        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')

    parser = argparse.ArgumentParser(description="HybriKo Linux FC Demo")
    parser.add_argument("--checkpoint", default="checkpoints/linux_fc_sft/checkpoint_epoch_15.pt")
    parser.add_argument("--query", type=str, help="Single query mode (non-interactive)")
    args = parser.parse_args()

    print("=" * 60)
    print("  HybriKo-117M Linux Function Calling Demo")
    print("=" * 60)

    model, tokenizer, device = load_model(args.checkpoint)

    # Single query mode
    if args.query:
        result = run_single(model, tokenizer, device, args.query)
        print(f"Input: {args.query}")
        print("-" * 40)
        if result["thought"]:
            print(f"Thought: {result['thought']}")
        if result["action"]:
            print(f"Action:  {result['action']}")
        if result["action_input"]:
            print(f"Input:   {json.dumps(result['action_input'], ensure_ascii=False)}")
        return

    # Interactive mode
    print("Supported commands:")
    print("  ls, cd, mkdir, rm, cp, mv, find, cat, grep, head,")
    print("  tail, wc, ps, df, du, top, ping, curl, chmod, tar")
    print("\nType 'quit' or 'exit' to exit")
    print("=" * 60)

    while True:
        try:
            print("\n[User] ", end="", flush=True)
            user_input = sys.stdin.readline()
            if not user_input:  # EOF
                break
            user_input = user_input.strip()
            if not user_input:
                continue
            if user_input.lower() in ["quit", "exit", "q"]:
                print("Goodbye!")
                break

            result = run_single(model, tokenizer, device, user_input)

            print("\n[HybriKo]")
            print("-" * 40)
            if result["thought"]:
                print(f"Thought: {result['thought']}")
            if result["action"]:
                print(f"Action:  {result['action']}")
            if result["action_input"]:
                print(f"Input:   {json.dumps(result['action_input'], ensure_ascii=False)}")
            print("-" * 40)

        except KeyboardInterrupt:
            print("\nGoodbye!")
            break
        except UnicodeDecodeError as e:
            print(f"\n[Error] Encoding issue: {e}")
            print("Try using --query option instead: python scripts/demo_linux_fc.py --query \"your query\"")
            continue


if __name__ == "__main__":
    main()