"""
Unsloth Training Hub - LLM Fine-tuning & RL Platform
Supports: SFT, GRPO, GSPO, DPO, Dr-GRPO, DAPO, BNPO
"""
import gradio as gr
import os
import json
from datetime import datetime

MODELS = [
    "unsloth/Qwen2.5-7B-Instruct",
    "unsloth/Qwen2.5-3B-Instruct",
    "unsloth/Qwen2.5-14B-Instruct",
    "unsloth/Meta-Llama-3.1-8B-Instruct",
    "unsloth/DeepSeek-R1-Distill-Qwen-7B",
    "unsloth/gemma-3-4b-it",
    "unsloth/Phi-4-mini-instruct",
]

RL_METHODS = ["grpo", "gspo", "dr_grpo", "dapo", "bnpo", "dpo"]
PRESETS = ["test_run", "small_run", "medium_run", "large_run", "grokking_run"]

def get_status():
    s = {"cuda": False, "gpu": "None", "unsloth": False, "vllm": False}
    try:
        import torch
        s["cuda"] = torch.cuda.is_available()
        if s["cuda"]: s["gpu"] = torch.cuda.get_device_name(0)
    except: pass
    try:
        import unsloth
        s["unsloth"] = True
    except: pass
    try:
        import vllm
        s["vllm"] = True
    except: pass
    return s

def create_ui():
    with gr.Blocks(title="Unsloth Training Hub", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# Unsloth Training Hub")
        gr.Markdown("Comprehensive LLM Fine-tuning & RL Platform")
        
        status = get_status()
        gr.Markdown(f"**CUDA**: {status['cuda']} | **GPU**: {status['gpu']} | **Unsloth**: {status['unsloth']} | **vLLM**: {status['vllm']}")
        
        with gr.Tabs():
            with gr.Tab("Model & Mode"):
                model = gr.Dropdown(choices=MODELS, value=MODELS[0], label="Model")
                mode = gr.Radio(choices=["sft", "rl"], value="sft", label="Training Mode")
                rl_method = gr.Dropdown(choices=RL_METHODS, value="grpo", label="RL Method", visible=False)
                mode.change(lambda m: gr.Dropdown(visible=m=="rl"), mode, rl_method)
            
            with gr.Tab("Training Config"):
                preset = gr.Radio(choices=PRESETS, value="small_run", label="Preset")
                lora_rank = gr.Dropdown(choices=[8,16,32,64,128], value=32, label="LoRA Rank")
                lr = gr.Number(value=5e-6, label="Learning Rate")
            
            with gr.Tab("Output"):
                hub_id = gr.Textbox(value="wheattoast11/trained-model", label="Hub Model ID")
                push = gr.Checkbox(value=True, label="Push to Hub")
        
        output = gr.Markdown("Configure and click Generate")
        btn = gr.Button("Generate Training Script", variant="primary")
        
        def generate(model, mode, rl_method, preset, lora_rank, lr, hub_id, push):
            return f"**Model**: {model}
**Mode**: {mode}
**Preset**: {preset}
**LoRA**: {lora_rank}
**LR**: {lr}"
        
        btn.click(generate, [model, mode, rl_method, preset, lora_rank, lr, hub_id, push], output)
        gr.Markdown("---
**Intuition Labs** | L40S ~$1.80/hr - PAUSE when not training!")
    return demo

if __name__ == "__main__":
    demo = create_ui()
    demo.launch(server_name="0.0.0.0", server_port=7860)