| """ |
| Unsloth Training Hub - LLM Fine-tuning & RL Platform |
| Supports: SFT, GRPO, GSPO, DPO, Dr-GRPO, DAPO, BNPO |
| """ |
| import gradio as gr |
| import os |
| import json |
| from datetime import datetime |
|
|
| MODELS = [ |
| "unsloth/Qwen2.5-7B-Instruct", |
| "unsloth/Qwen2.5-3B-Instruct", |
| "unsloth/Qwen2.5-14B-Instruct", |
| "unsloth/Meta-Llama-3.1-8B-Instruct", |
| "unsloth/DeepSeek-R1-Distill-Qwen-7B", |
| "unsloth/gemma-3-4b-it", |
| "unsloth/Phi-4-mini-instruct", |
| ] |
|
|
| RL_METHODS = ["grpo", "gspo", "dr_grpo", "dapo", "bnpo", "dpo"] |
| PRESETS = ["test_run", "small_run", "medium_run", "large_run", "grokking_run"] |
|
|
| def get_status(): |
| s = {"cuda": False, "gpu": "None", "unsloth": False, "vllm": False} |
| try: |
| import torch |
| s["cuda"] = torch.cuda.is_available() |
| if s["cuda"]: s["gpu"] = torch.cuda.get_device_name(0) |
| except: pass |
| try: |
| import unsloth |
| s["unsloth"] = True |
| except: pass |
| try: |
| import vllm |
| s["vllm"] = True |
| except: pass |
| return s |
|
|
| def create_ui(): |
| with gr.Blocks(title="Unsloth Training Hub", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# Unsloth Training Hub") |
| gr.Markdown("Comprehensive LLM Fine-tuning & RL Platform") |
| |
| status = get_status() |
| gr.Markdown(f"**CUDA**: {status['cuda']} | **GPU**: {status['gpu']} | **Unsloth**: {status['unsloth']} | **vLLM**: {status['vllm']}") |
| |
| with gr.Tabs(): |
| with gr.Tab("Model & Mode"): |
| model = gr.Dropdown(choices=MODELS, value=MODELS[0], label="Model") |
| mode = gr.Radio(choices=["sft", "rl"], value="sft", label="Training Mode") |
| rl_method = gr.Dropdown(choices=RL_METHODS, value="grpo", label="RL Method", visible=False) |
| mode.change(lambda m: gr.Dropdown(visible=m=="rl"), mode, rl_method) |
| |
| with gr.Tab("Training Config"): |
| preset = gr.Radio(choices=PRESETS, value="small_run", label="Preset") |
| lora_rank = gr.Dropdown(choices=[8,16,32,64,128], value=32, label="LoRA Rank") |
| lr = gr.Number(value=5e-6, label="Learning Rate") |
| |
| with gr.Tab("Output"): |
| hub_id = gr.Textbox(value="wheattoast11/trained-model", label="Hub Model ID") |
| push = gr.Checkbox(value=True, label="Push to Hub") |
| |
| output = gr.Markdown("Configure and click Generate") |
| btn = gr.Button("Generate Training Script", variant="primary") |
| |
| def generate(model, mode, rl_method, preset, lora_rank, lr, hub_id, push): |
| return f"**Model**: {model} |
| **Mode**: {mode} |
| **Preset**: {preset} |
| **LoRA**: {lora_rank} |
| **LR**: {lr}" |
| |
| btn.click(generate, [model, mode, rl_method, preset, lora_rank, lr, hub_id, push], output) |
| gr.Markdown("--- |
| **Intuition Labs** | L40S ~$1.80/hr - PAUSE when not training!") |
| return demo |
|
|
| if __name__ == "__main__": |
| demo = create_ui() |
| demo.launch(server_name="0.0.0.0", server_port=7860) |