wheattoast11's picture
Create app.py
1dc6e01 verified
raw
history blame
3.02 kB
"""
Unsloth Training Hub - LLM Fine-tuning & RL Platform
Supports: SFT, GRPO, GSPO, DPO, Dr-GRPO, DAPO, BNPO
"""
import gradio as gr
import os
import json
from datetime import datetime
MODELS = [
"unsloth/Qwen2.5-7B-Instruct",
"unsloth/Qwen2.5-3B-Instruct",
"unsloth/Qwen2.5-14B-Instruct",
"unsloth/Meta-Llama-3.1-8B-Instruct",
"unsloth/DeepSeek-R1-Distill-Qwen-7B",
"unsloth/gemma-3-4b-it",
"unsloth/Phi-4-mini-instruct",
]
RL_METHODS = ["grpo", "gspo", "dr_grpo", "dapo", "bnpo", "dpo"]
PRESETS = ["test_run", "small_run", "medium_run", "large_run", "grokking_run"]
def get_status():
s = {"cuda": False, "gpu": "None", "unsloth": False, "vllm": False}
try:
import torch
s["cuda"] = torch.cuda.is_available()
if s["cuda"]: s["gpu"] = torch.cuda.get_device_name(0)
except: pass
try:
import unsloth
s["unsloth"] = True
except: pass
try:
import vllm
s["vllm"] = True
except: pass
return s
def create_ui():
with gr.Blocks(title="Unsloth Training Hub", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Unsloth Training Hub")
gr.Markdown("Comprehensive LLM Fine-tuning & RL Platform")
status = get_status()
gr.Markdown(f"**CUDA**: {status['cuda']} | **GPU**: {status['gpu']} | **Unsloth**: {status['unsloth']} | **vLLM**: {status['vllm']}")
with gr.Tabs():
with gr.Tab("Model & Mode"):
model = gr.Dropdown(choices=MODELS, value=MODELS[0], label="Model")
mode = gr.Radio(choices=["sft", "rl"], value="sft", label="Training Mode")
rl_method = gr.Dropdown(choices=RL_METHODS, value="grpo", label="RL Method", visible=False)
mode.change(lambda m: gr.Dropdown(visible=m=="rl"), mode, rl_method)
with gr.Tab("Training Config"):
preset = gr.Radio(choices=PRESETS, value="small_run", label="Preset")
lora_rank = gr.Dropdown(choices=[8,16,32,64,128], value=32, label="LoRA Rank")
lr = gr.Number(value=5e-6, label="Learning Rate")
with gr.Tab("Output"):
hub_id = gr.Textbox(value="wheattoast11/trained-model", label="Hub Model ID")
push = gr.Checkbox(value=True, label="Push to Hub")
output = gr.Markdown("Configure and click Generate")
btn = gr.Button("Generate Training Script", variant="primary")
def generate(model, mode, rl_method, preset, lora_rank, lr, hub_id, push):
return f"**Model**: {model}
**Mode**: {mode}
**Preset**: {preset}
**LoRA**: {lora_rank}
**LR**: {lr}"
btn.click(generate, [model, mode, rl_method, preset, lora_rank, lr, hub_id, push], output)
gr.Markdown("---
**Intuition Labs** | L40S ~$1.80/hr - PAUSE when not training!")
return demo
if __name__ == "__main__":
demo = create_ui()
demo.launch(server_name="0.0.0.0", server_port=7860)