#!/usr/bin/env python3
"""
HybriKo-117M Linux Function Calling Demo (CPU Version)

CPU-optimized version that auto-downloads from HuggingFace.
Works on Windows/Linux/macOS without GPU.

Usage:
  pip install torch sentencepiece huggingface_hub
  python demo_cpu.py --query "현재 폴더의 파일 목록을 보여줘"
  python demo_cpu.py  # Interactive mode
"""

import torch
import sentencepiece as spm
import sys
import json
import re
import argparse
import os
import io

# Set UTF-8 encoding for Windows compatibility
if sys.stdout.encoding != 'utf-8':
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
if sys.stdin.encoding != 'utf-8':
    sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace')


def download_files():
    """Download model files from HuggingFace."""
    try:
        from huggingface_hub import hf_hub_download
    except ImportError:
        print("Installing huggingface_hub...")
        os.system("pip install -q huggingface_hub")
        from huggingface_hub import hf_hub_download
    
    repo_id = "Yaongi/HybriKo-117M-LinuxFC-SFT-v2"
    files_to_download = [
        "configuration_hybridko.py",
        "modeling_hybridko.py",
        "pytorch_model.pt",
        "HybriKo_tok.model",
    ]
    
    for filename in files_to_download:
        if not os.path.exists(filename):
            print(f"Downloading {filename}...")
            hf_hub_download(repo_id, filename, local_dir=".")


# Download files first
download_files()

# Add current directory to path
sys.path.insert(0, ".")

# Now import model
from configuration_hybridko import HybriKoConfig
from modeling_hybridko import HybriKoModel


# System prompt used during training
SYSTEM_PROMPT = """You are a Linux command assistant. You can use many tools (functions) to help users with their Linux tasks.
At each step, you need to give your thought to analyze the status now and what to do next, with a function call to actually execute your step. Your output should follow this format:
Thought:
Action
Action Input:

After the call, you will get the call result, and you are now in a new state.
Then you will analyze your status now, then decide what to do next...
After many (Thought-call) pairs, you finally perform the task, then you can give your final answer.

Remember:
1. The state change is irreversible, you can't go back to one of the former state.
2. All the thought is short, at most in 5 sentences.
3. ALWAYS call "Finish" function at the end of the task.
4. If you cannot handle the task with the available tools, say you don't know and call Finish with give_answer.

You have access of the following tools:
[
  {
    "name": "ls_command",
    "description": "List directory contents.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {"type": "string"},
        "options": {"type": "string"}
      },
      "required": ["path"]
    }
  },
  {
    "name": "cd_command",
    "description": "Change the current working directory.",
    "parameters": {
      "type": "object",
      "properties": {"path": {"type": "string"}},
      "required": ["path"]
    }
  },
  {
    "name": "mkdir_command",
    "description": "Create a new directory.",
    "parameters": {
      "type": "object",
      "properties": {"path": {"type": "string"}},
      "required": ["path"]
    }
  },
  {
    "name": "rm_command",
    "description": "Remove files or directories.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {"type": "string"},
        "recursive": {"type": "boolean"}
      },
      "required": ["path"]
    }
  },
  {
    "name": "cp_command",
    "description": "Copy files or directories.",
    "parameters": {
      "type": "object",
      "properties": {
        "source": {"type": "string"},
        "destination": {"type": "string"}
      },
      "required": ["source", "destination"]
    }
  },
  {
    "name": "mv_command",
    "description": "Move or rename files.",
    "parameters": {
      "type": "object",
      "properties": {
        "source": {"type": "string"},
        "destination": {"type": "string"}
      },
      "required": ["source", "destination"]
    }
  },
  {
    "name": "find_command",
    "description": "Find files by name pattern.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {"type": "string"},
        "name": {"type": "string"}
      },
      "required": ["path", "name"]
    }
  },
  {
    "name": "cat_command",
    "description": "Display file contents.",
    "parameters": {
      "type": "object",
      "properties": {"file": {"type": "string"}},
      "required": ["file"]
    }
  },
  {
    "name": "grep_command",
    "description": "Search for patterns in files.",
    "parameters": {
      "type": "object",
      "properties": {
        "pattern": {"type": "string"},
        "file": {"type": "string"}
      },
      "required": ["pattern", "file"]
    }
  },
  {
    "name": "head_command",
    "description": "Display first lines of a file.",
    "parameters": {
      "type": "object",
      "properties": {
        "file": {"type": "string"},
        "lines": {"type": "integer"}
      },
      "required": ["file"]
    }
  },
  {
    "name": "tail_command",
    "description": "Display last lines of a file.",
    "parameters": {
      "type": "object",
      "properties": {
        "file": {"type": "string"},
        "lines": {"type": "integer"}
      },
      "required": ["file"]
    }
  },
  {
    "name": "wc_command",
    "description": "Count lines, words, and bytes.",
    "parameters": {
      "type": "object",
      "properties": {"file": {"type": "string"}},
      "required": ["file"]
    }
  },
  {
    "name": "ps_command",
    "description": "Display running processes.",
    "parameters": {
      "type": "object",
      "properties": {"options": {"type": "string"}},
      "required": []
    }
  },
  {
    "name": "df_command",
    "description": "Display disk space usage.",
    "parameters": {
      "type": "object",
      "properties": {"options": {"type": "string"}},
      "required": []
    }
  },
  {
    "name": "du_command",
    "description": "Display directory space usage.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {"type": "string"},
        "options": {"type": "string"}
      },
      "required": ["path"]
    }
  },
  {
    "name": "top_command",
    "description": "Display system processes in real-time.",
    "parameters": {
      "type": "object",
      "properties": {},
      "required": []
    }
  },
  {
    "name": "ping_command",
    "description": "Test network connectivity.",
    "parameters": {
      "type": "object",
      "properties": {
        "host": {"type": "string"},
        "count": {"type": "integer"}
      },
      "required": ["host"]
    }
  },
  {
    "name": "curl_command",
    "description": "Transfer data from URL.",
    "parameters": {
      "type": "object",
      "properties": {
        "url": {"type": "string"},
        "options": {"type": "string"}
      },
      "required": ["url"]
    }
  },
  {
    "name": "chmod_command",
    "description": "Change file permissions.",
    "parameters": {
      "type": "object",
      "properties": {
        "mode": {"type": "string"},
        "file": {"type": "string"}
      },
      "required": ["mode", "file"]
    }
  },
  {
    "name": "tar_command",
    "description": "Archive or extract files.",
    "parameters": {
      "type": "object",
      "properties": {
        "options": {"type": "string"},
        "archive": {"type": "string"},
        "files": {"type": "string"}
      },
      "required": ["options", "archive"]
    }
  },
  {
    "name": "Finish",
    "description": "Complete the task.",
    "parameters": {
      "type": "object",
      "properties": {"give_answer": {"type": "string"}},
      "required": ["give_answer"]
    }
  }
]"""


def load_model(num_threads=-1):
    """Load model and tokenizer for CPU inference."""
    # Set CPU threads (-1 = auto-detect max)
    if num_threads <= 0:
        num_threads = os.cpu_count() or 4
    torch.set_num_threads(num_threads)
    print(f"Using {num_threads} CPU threads (max: {os.cpu_count()})")
    
    print("Loading tokenizer...")
    sp = spm.SentencePieceProcessor()
    sp.Load("HybriKo_tok.model")

    print("Loading model (CPU mode)...")
    config = HybriKoConfig(
        d_model=768, n_layers=12, vocab_size=32000,
        n_heads=12, n_kv_heads=3, ff_mult=3,
        max_seq_len=6144
    )
    model = HybriKoModel(config)
    checkpoint = torch.load("pytorch_model.pt", map_location="cpu", weights_only=False)
    model.load_state_dict(checkpoint["model_state_dict"])

    # Disable Flash Attention for CPU (Flash Attention requires CUDA)
    for module in model.modules():
        if hasattr(module, 'use_flash_attention'):
            module.use_flash_attention = False

    model.eval()
    print(f"✅ Model loaded on CPU ({sum(p.numel() for p in model.parameters()) / 1e6:.1f}M params)\n")
    return model, sp


@torch.inference_mode()
def generate(model, tokenizer, prompt, max_new_tokens=150):
    """Generate response with CPU-optimized inference."""
    input_ids = tokenizer.EncodeAsIds(prompt)
    input_tensor = torch.tensor([input_ids], dtype=torch.long)
    prompt_len = len(input_ids)

    generated = input_tensor
    for _ in range(max_new_tokens):
        outputs = model(generated)
        logits = outputs["logits"] if isinstance(outputs, dict) else outputs.logits
        next_token_logits = logits[:, -1, :]
        next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)
        generated = torch.cat([generated, next_token], dim=1)

        # Check for completion
        new_tokens = generated[0, prompt_len:].tolist()
        new_text = tokenizer.DecodeIds(new_tokens)

        if "<|im_end|>" in new_text:
            break

        # Stop when we have complete Action Input (JSON closed)
        if "Action Input:" in new_text:
            ai_idx = new_text.find("Action Input:")
            after_ai = new_text[ai_idx + 13:].strip()
            if after_ai.startswith("{"):
                brace_count = 0
                for i, c in enumerate(after_ai):
                    if c == "{":
                        brace_count += 1
                    elif c == "}":
                        brace_count -= 1
                        if brace_count == 0:
                            return new_text

    new_tokens = generated[0, prompt_len:].tolist()
    return tokenizer.DecodeIds(new_tokens)


def create_prompt(user_input):
    """Create ChatML format prompt."""
    return f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"


def parse_response(response):
    """Parse response into components."""
    if "<|im_end|>" in response:
        response = response.split("<|im_end|}")[0]
    if "<|im_start|>" in response:
        response = response.split("<|im_start|>")[0]

    result = {"thought": None, "action": None, "action_input": None, "raw": response}

    thought_match = re.search(r"Thought:\s*(.+?)(?=\s*Action:|\s*$)", response, re.DOTALL)
    if thought_match:
        result["thought"] = thought_match.group(1).strip()

    action_match = re.search(r"Action:\s*(\w+)", response)
    if action_match:
        result["action"] = action_match.group(1)

    input_match = re.search(r"Action Input:\s*(\{[^}]+\})", response, re.DOTALL)
    if input_match:
        try:
            result["action_input"] = json.loads(input_match.group(1))
        except:
            result["action_input"] = input_match.group(1)

    return result


def run_single(model, tokenizer, user_input):
    """Run single inference."""
    prompt = create_prompt(user_input)
    response = generate(model, tokenizer, prompt)
    return parse_response(response)


def main():
    parser = argparse.ArgumentParser(description="HybriKo Linux FC Demo (CPU)")
    parser.add_argument("--query", type=str, help="Query to process")
    parser.add_argument("--threads", type=int, default=-1, help="Number of CPU threads (-1 = auto max)")
    args = parser.parse_args()

    print("=" * 60)
    print("  HybriKo-117M Linux Function Calling Demo (CPU)")
    print("=" * 60)

    model, tokenizer = load_model(args.threads)

    if args.query:
        # Single query mode
        result = run_single(model, tokenizer, args.query)
        print(f"Input: {args.query}")
        print("-" * 40)
        if result["thought"]:
            print(f"Thought: {result['thought']}")
        if result["action"]:
            print(f"Action:  {result['action']}")
        if result["action_input"]:
            print(f"Input:   {json.dumps(result['action_input'], ensure_ascii=False)}")
        if not result["thought"] and not result["action"]:
            print(f"[Raw]: {result['raw'][:500]}")
        print("-" * 40)
    else:
        # Interactive mode
        print("Supported: ls, cd, mkdir, rm, cp, mv, find, cat, grep, head, tail, wc, ps, df, du, top, ping, curl, chmod, tar")
        print("Type 'quit' to exit\n")
        
        while True:
            try:
                print("[User] ", end="", flush=True)
                user_input = sys.stdin.readline()
                if not user_input:
                    break
                user_input = user_input.strip()
                if not user_input:
                    continue
                if user_input.lower() in ["quit", "exit", "q"]:
                    break

                result = run_single(model, tokenizer, user_input)
                print("\n[HybriKo]")
                print("-" * 40)
                if result["thought"]:
                    print(f"Thought: {result['thought']}")
                if result["action"]:
                    print(f"Action:  {result['action']}")
                if result["action_input"]:
                    print(f"Input:   {json.dumps(result['action_input'], ensure_ascii=False)}")
                print("-" * 40 + "\n")

            except KeyboardInterrupt:
                break
            except EOFError:
                break

        print("Goodbye!")


if __name__ == "__main__":
    main()