| """ |
| Unsloth Training Hub - Comprehensive LLM Fine-tuning & RL Platform |
| Supports: SFT, GRPO, GSPO, DPO, Dr-GRPO, DAPO, BNPO |
| Models: All Unsloth-optimized models (LLM, VLM, Embedding, Multimodal) |
| """ |
|
|
| import gradio as gr |
| import os |
| import json |
| from datetime import datetime |
|
|
| |
| |
| |
|
|
| UNSLOTH_MODELS = { |
| "text_llm": { |
| "Qwen3": [ |
| "unsloth/Qwen3-0.6B", |
| "unsloth/Qwen3-1.7B", |
| "unsloth/Qwen3-4B", |
| "unsloth/Qwen3-8B", |
| "unsloth/Qwen3-14B", |
| "unsloth/Qwen3-32B", |
| "unsloth/Qwen3-30B-A3B", |
| "unsloth/Qwen3-235B-A22B", |
| ], |
| "Qwen2.5": [ |
| "unsloth/Qwen2.5-0.5B-Instruct", |
| "unsloth/Qwen2.5-1.5B-Instruct", |
| "unsloth/Qwen2.5-3B-Instruct", |
| "unsloth/Qwen2.5-7B-Instruct", |
| "unsloth/Qwen2.5-14B-Instruct", |
| "unsloth/Qwen2.5-32B-Instruct", |
| "unsloth/Qwen2.5-72B-Instruct", |
| ], |
| "Qwen2.5-Coder": [ |
| "unsloth/Qwen2.5-Coder-0.5B-Instruct", |
| "unsloth/Qwen2.5-Coder-1.5B-Instruct", |
| "unsloth/Qwen2.5-Coder-3B-Instruct", |
| "unsloth/Qwen2.5-Coder-7B-Instruct", |
| "unsloth/Qwen2.5-Coder-14B-Instruct", |
| "unsloth/Qwen2.5-Coder-32B-Instruct", |
| ], |
| "Llama-4": [ |
| "unsloth/Llama-4-Scout-17B-16E-Instruct", |
| "unsloth/Llama-4-Maverick-17B-128E-Instruct", |
| ], |
| "Llama-3.3": [ |
| "unsloth/Llama-3.3-70B-Instruct", |
| ], |
| "Llama-3.1": [ |
| "unsloth/Meta-Llama-3.1-8B-Instruct", |
| "unsloth/Meta-Llama-3.1-70B-Instruct", |
| "unsloth/Meta-Llama-3.1-405B-Instruct", |
| ], |
| "Llama-3.2": [ |
| "unsloth/Llama-3.2-1B-Instruct", |
| "unsloth/Llama-3.2-3B-Instruct", |
| ], |
| "DeepSeek-R1": [ |
| "unsloth/DeepSeek-R1-Distill-Qwen-1.5B", |
| "unsloth/DeepSeek-R1-Distill-Qwen-7B", |
| "unsloth/DeepSeek-R1-Distill-Qwen-14B", |
| "unsloth/DeepSeek-R1-Distill-Qwen-32B", |
| "unsloth/DeepSeek-R1-Distill-Llama-8B", |
| "unsloth/DeepSeek-R1-Distill-Llama-70B", |
| ], |
| "Gemma-3": [ |
| "unsloth/gemma-3-1b-it", |
| "unsloth/gemma-3-4b-it", |
| "unsloth/gemma-3-12b-it", |
| "unsloth/gemma-3-27b-it", |
| ], |
| "Mistral": [ |
| "unsloth/Mistral-Small-3.2-24B-Instruct-2506", |
| "unsloth/mistral-7b-instruct-v0.3", |
| "unsloth/Mistral-Nemo-Instruct-2407", |
| ], |
| "Phi-4": [ |
| "unsloth/Phi-4-mini-instruct", |
| "unsloth/Phi-4-Instruct", |
| ], |
| "GLM": [ |
| "unsloth/GLM-4.7-Flash", |
| "unsloth/GLM-4.5-Air", |
| ], |
| "Nemotron": [ |
| "unsloth/Nemotron-3-Nano-30B-A3B", |
| ], |
| }, |
| "vision_vlm": { |
| "Qwen3-VL": [ |
| "unsloth/Qwen3-VL-2B-Instruct", |
| "unsloth/Qwen3-VL-4B-Instruct", |
| "unsloth/Qwen3-VL-8B-Instruct", |
| "unsloth/Qwen3-VL-32B-Instruct", |
| ], |
| "Qwen2.5-VL": [ |
| "unsloth/Qwen2.5-VL-3B-Instruct", |
| "unsloth/Qwen2.5-VL-7B-Instruct", |
| "unsloth/Qwen2.5-VL-32B-Instruct", |
| "unsloth/Qwen2.5-VL-72B-Instruct", |
| ], |
| "Llama-Vision": [ |
| "unsloth/Llama-3.2-11B-Vision-Instruct", |
| "unsloth/Llama-3.2-90B-Vision-Instruct", |
| ], |
| "Pixtral": [ |
| "unsloth/Pixtral-12B-2409", |
| ], |
| "Gemma-3-Vision": [ |
| "unsloth/gemma-3-4b-it", |
| "unsloth/gemma-3-12b-it", |
| "unsloth/gemma-3-27b-it", |
| ], |
| }, |
| "embedding": [ |
| "unsloth/Qwen3-Embedding-0.6B", |
| "unsloth/Qwen3-Embedding-4B", |
| "unsloth/Qwen3-Embedding-8B", |
| "unsloth/embeddinggemma-300m", |
| "unsloth/bge-m3", |
| "unsloth/ModernBERT-base", |
| "unsloth/ModernBERT-large", |
| ], |
| "multimodal_omni": [ |
| "unsloth/Qwen2.5-Omni-3B", |
| "unsloth/Qwen2.5-Omni-7B", |
| ], |
| } |
|
|
| |
| |
| |
|
|
| RL_METHODS = { |
| "grpo": { |
| "name": "GRPO (Group Relative Policy Optimization)", |
| "description": "Token-level importance sampling. Default DeepSeek method.", |
| "config": {"loss_type": "grpo", "importance_sampling_level": "token"}, |
| }, |
| "gspo": { |
| "name": "GSPO (Group Sequence Policy Optimization)", |
| "description": "Sequence-level importance sampling. Qwen team variant.", |
| "config": {"loss_type": "grpo", "importance_sampling_level": "sequence"}, |
| }, |
| "dr_grpo": { |
| "name": "Dr-GRPO (Difficulty-Resilient GRPO)", |
| "description": "Avoids difficulty bias in training.", |
| "config": {"loss_type": "dr_grpo", "scale_rewards": False}, |
| }, |
| "dapo": { |
| "name": "DAPO (Direct Advantage Policy Optimization)", |
| "description": "Token-level normalization for long chain-of-thought.", |
| "config": {"loss_type": "dapo", "mask_truncated_completions": True}, |
| }, |
| "bnpo": { |
| "name": "BNPO (Bounded Natural Policy Optimization)", |
| "description": "Asymmetric clipping for better exploration.", |
| "config": {"loss_type": "bnpo", "epsilon": 0.2, "epsilon_high": 0.28, "delta": 1.5}, |
| }, |
| "dpo": { |
| "name": "DPO (Direct Preference Optimization)", |
| "description": "Preference-based training without reward model.", |
| "config": {"method": "dpo"}, |
| }, |
| } |
|
|
| |
| |
| |
|
|
| SAMPLE_PRESETS = { |
| "test_run": {"samples": 100, "max_steps": 50, "description": "Quick test (5-10 min)"}, |
| "small_run": {"samples": 1000, "max_steps": 250, "description": "Small training (30-60 min)"}, |
| "medium_run": {"samples": 5000, "max_steps": 1000, "description": "Medium training (2-4 hours)"}, |
| "large_run": {"samples": 25000, "max_steps": 5000, "description": "Large training (8-12 hours)"}, |
| "grokking_run": {"samples": 100000, "max_steps": 50000, "description": "Grokking/extended (24+ hours)"}, |
| } |
|
|
| |
| |
| |
|
|
| REWARD_PACKS = { |
| "reasoning_xml": { |
| "name": "XML Reasoning Format", |
| "description": "Rewards <reasoning>...</reasoning><answer>...</answer> format", |
| "functions": ["xmlcount_reward", "soft_format_reward", "strict_format_reward"], |
| }, |
| "code_quality": { |
| "name": "Code Quality", |
| "description": "Rewards syntactically correct, well-formatted code", |
| "functions": ["syntax_reward", "docstring_reward", "type_hint_reward"], |
| }, |
| "math_accuracy": { |
| "name": "Math Accuracy", |
| "description": "Rewards correct numerical answers with step verification", |
| "functions": ["correctness_reward", "int_reward", "step_count_reward"], |
| }, |
| "instruction_following": { |
| "name": "Instruction Following", |
| "description": "Rewards adherence to specific output formats", |
| "functions": ["format_reward", "length_reward", "keyword_reward"], |
| }, |
| "safety_alignment": { |
| "name": "Safety & Alignment", |
| "description": "Rewards helpful, harmless, honest outputs", |
| "functions": ["helpfulness_reward", "safety_reward", "factuality_reward"], |
| }, |
| } |
|
|
|
|
| def get_environment_status(): |
| """Check environment and return status.""" |
| import subprocess |
|
|
| status = { |
| "cuda_available": False, |
| "gpu_name": "Not detected", |
| "gpu_memory": "Unknown", |
| "unsloth_installed": False, |
| "vllm_installed": False, |
| "trl_installed": False, |
| "anthropic_key": bool(os.environ.get("ANTHROPIC_API_KEY")), |
| "hf_token": bool(os.environ.get("HF_TOKEN")), |
| } |
|
|
| try: |
| import torch |
| status["cuda_available"] = torch.cuda.is_available() |
| if status["cuda_available"]: |
| status["gpu_name"] = torch.cuda.get_device_name(0) |
| status["gpu_memory"] = f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" |
| except: |
| pass |
|
|
| try: |
| import unsloth |
| status["unsloth_installed"] = True |
| except: |
| pass |
|
|
| try: |
| import vllm |
| status["vllm_installed"] = True |
| except: |
| pass |
|
|
| try: |
| import trl |
| status["trl_installed"] = True |
| except: |
| pass |
|
|
| return status |
|
|
|
|
| def format_status_markdown(status): |
| """Format status as markdown.""" |
| lines = [ |
| "## Environment Status\n", |
| f"- **CUDA**: {'Available' if status['cuda_available'] else 'Not available'}", |
| f"- **GPU**: {status['gpu_name']} ({status['gpu_memory']})", |
| f"- **Unsloth**: {'Installed' if status['unsloth_installed'] else 'Not installed'}", |
| f"- **vLLM**: {'Installed' if status['vllm_installed'] else 'Not installed'}", |
| f"- **TRL**: {'Installed' if status['trl_installed'] else 'Not installed'}", |
| f"- **ANTHROPIC_API_KEY**: {'Set' if status['anthropic_key'] else 'Not set'}", |
| f"- **HF_TOKEN**: {'Set' if status['hf_token'] else 'Not set'}", |
| ] |
| return "\n".join(lines) |
|
|
|
|
| def get_model_list(model_type): |
| """Get flat list of models for given type.""" |
| if model_type == "text_llm": |
| models = [] |
| for family, family_models in UNSLOTH_MODELS["text_llm"].items(): |
| models.extend(family_models) |
| return models |
| elif model_type == "vision_vlm": |
| models = [] |
| for family, family_models in UNSLOTH_MODELS["vision_vlm"].items(): |
| models.extend(family_models) |
| return models |
| elif model_type == "embedding": |
| return UNSLOTH_MODELS["embedding"] |
| elif model_type == "multimodal": |
| return UNSLOTH_MODELS["multimodal_omni"] |
| return [] |
|
|
|
|
| def start_training( |
| model_name, |
| model_type, |
| training_mode, |
| rl_method, |
| sample_preset, |
| reward_pack, |
| custom_reward_code, |
| lora_rank, |
| learning_rate, |
| num_generations, |
| temperature, |
| max_seq_length, |
| hub_model_id, |
| push_to_hub, |
| ): |
| """Start training with selected configuration.""" |
|
|
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| run_dir = f"/app/runs/{training_mode}_{timestamp}" |
|
|
| config = { |
| "model_name": model_name, |
| "model_type": model_type, |
| "training_mode": training_mode, |
| "rl_method": rl_method if training_mode == "rl" else None, |
| "sample_preset": sample_preset, |
| "reward_pack": reward_pack if training_mode == "rl" else None, |
| "lora_rank": lora_rank, |
| "learning_rate": learning_rate, |
| "num_generations": num_generations if training_mode == "rl" else None, |
| "temperature": temperature, |
| "max_seq_length": max_seq_length, |
| "hub_model_id": hub_model_id, |
| "push_to_hub": push_to_hub, |
| "run_dir": run_dir, |
| "timestamp": timestamp, |
| } |
|
|
| |
| if training_mode == "sft": |
| script = generate_sft_script(config) |
| else: |
| script = generate_rl_script(config) |
|
|
| return f""" |
| ## Training Configuration Saved |
| |
| **Run Directory**: `{run_dir}` |
| **Timestamp**: {timestamp} |
| |
| ### Configuration: |
| ```json |
| {json.dumps(config, indent=2)} |
| ``` |
| |
| ### Generated Training Script: |
| ```python |
| {script[:2000]}... |
| ``` |
| |
| **Status**: Ready to execute. Click 'Execute Training' to start. |
| """ |
|
|
|
|
| def generate_sft_script(config): |
| """Generate SFT training script.""" |
| preset = SAMPLE_PRESETS[config["sample_preset"]] |
|
|
| return f''' |
| # Unsloth SFT Training Script |
| # Generated: {config["timestamp"]} |
| |
| from unsloth import FastLanguageModel |
| from trl import SFTTrainer, SFTConfig |
| from datasets import load_dataset |
| |
| max_seq_length = {config["max_seq_length"]} |
| lora_rank = {config["lora_rank"]} |
| |
| # Load model with Unsloth optimizations |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name="{config["model_name"]}", |
| max_seq_length=max_seq_length, |
| load_in_4bit=True, |
| dtype=None, |
| ) |
| |
| # Add LoRA adapters |
| model = FastLanguageModel.get_peft_model( |
| model, |
| r=lora_rank, |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj"], |
| lora_alpha=lora_rank, |
| lora_dropout=0, |
| bias="none", |
| use_gradient_checkpointing="unsloth", |
| random_state=3407, |
| ) |
| |
| # Load and prepare dataset |
| dataset = load_dataset("your_dataset", split="train") |
| |
| # Configure trainer |
| trainer = SFTTrainer( |
| model=model, |
| tokenizer=tokenizer, |
| train_dataset=dataset, |
| args=SFTConfig( |
| per_device_train_batch_size=2, |
| gradient_accumulation_steps=4, |
| warmup_steps=10, |
| max_steps={preset["max_steps"]}, |
| learning_rate={config["learning_rate"]}, |
| optim="adamw_8bit", |
| packing=True, |
| max_length=max_seq_length, |
| output_dir="{config["run_dir"]}", |
| report_to="none", |
| ), |
| ) |
| |
| # Train |
| trainer.train() |
| |
| # Save |
| model.save_pretrained_merged("{config["run_dir"]}/merged", tokenizer, save_method="merged_16bit") |
| ''' |
|
|
|
|
| def generate_rl_script(config): |
| """Generate RL training script.""" |
| preset = SAMPLE_PRESETS[config["sample_preset"]] |
| rl_config = RL_METHODS[config["rl_method"]]["config"] |
|
|
| return f''' |
| # Unsloth RL Training Script ({config["rl_method"].upper()}) |
| # Generated: {config["timestamp"]} |
| |
| from unsloth import FastLanguageModel, PatchFastRL |
| PatchFastRL("GRPO", FastLanguageModel) # CRITICAL: Must be BEFORE trl import |
| |
| from trl import GRPOConfig, GRPOTrainer |
| from datasets import load_dataset |
| |
| max_seq_length = {config["max_seq_length"]} |
| lora_rank = {config["lora_rank"]} |
| |
| # Load model with Unsloth optimizations + vLLM fast inference |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name="{config["model_name"]}", |
| max_seq_length=max_seq_length, |
| load_in_4bit=True, |
| fast_inference=True, |
| max_lora_rank=lora_rank, |
| gpu_memory_utilization=0.6, |
| ) |
| |
| # Add LoRA adapters |
| model = FastLanguageModel.get_peft_model( |
| model, |
| r=lora_rank, |
| target_modules=["q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj"], |
| lora_alpha=lora_rank, |
| use_gradient_checkpointing="unsloth", |
| random_state=3407, |
| ) |
| |
| # Reward functions from pack: {config["reward_pack"]} |
| def xmlcount_reward_func(completions, **kwargs): |
| def count_xml(text): |
| count = 0.0 |
| if text.count("<reasoning>\\n") == 1: count += 0.125 |
| if text.count("\\n</reasoning>\\n") == 1: count += 0.125 |
| if text.count("\\n<answer>\\n") == 1: count += 0.125 |
| if text.count("\\n</answer>") == 1: count += 0.125 |
| return count |
| return [count_xml(c[0]["content"]) for c in completions] |
| |
| def correctness_reward_func(prompts, completions, answer, **kwargs): |
| def extract_answer(text): |
| if "<answer>" in text and "</answer>" in text: |
| return text.split("<answer>")[-1].split("</answer>")[0].strip() |
| return text.strip() |
| responses = [c[0]["content"] for c in completions] |
| extracted = [extract_answer(r) for r in responses] |
| return [2.0 if r == a else 0.0 for r, a in zip(extracted, answer)] |
| |
| # Load dataset |
| dataset = load_dataset("openai/gsm8k", "main", split="train") |
| |
| # Configure GRPO trainer |
| training_args = GRPOConfig( |
| output_dir="{config["run_dir"]}", |
| learning_rate={config["learning_rate"]}, |
| per_device_train_batch_size=1, |
| gradient_accumulation_steps=4, |
| num_generations={config["num_generations"]}, |
| max_prompt_length=256, |
| max_completion_length={config["max_seq_length"]} - 256, |
| max_steps={preset["max_steps"]}, |
| temperature={config["temperature"]}, |
| loss_type="{rl_config.get("loss_type", "grpo")}", |
| importance_sampling_level="{rl_config.get("importance_sampling_level", "token")}", |
| optim="adamw_8bit", |
| warmup_ratio=0.1, |
| lr_scheduler_type="cosine", |
| max_grad_norm=0.1, |
| report_to="none", |
| ) |
| |
| # Initialize trainer |
| trainer = GRPOTrainer( |
| model=model, |
| processing_class=tokenizer, |
| reward_funcs=[xmlcount_reward_func, correctness_reward_func], |
| args=training_args, |
| train_dataset=dataset, |
| ) |
| |
| # Train |
| trainer.train() |
| |
| # Save |
| model.save_pretrained("{config["run_dir"]}/lora") |
| tokenizer.save_pretrained("{config["run_dir"]}/lora") |
| ''' |
|
|
|
|
| |
| |
| |
|
|
| def create_ui(): |
| """Create Gradio interface.""" |
|
|
| with gr.Blocks(title="Unsloth Training Hub", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("# Unsloth Training Hub") |
| gr.Markdown("Comprehensive LLM Fine-tuning & RL Platform") |
|
|
| |
| status = get_environment_status() |
| gr.Markdown(format_status_markdown(status)) |
|
|
| with gr.Tabs(): |
| |
| with gr.Tab("1. Model Selection"): |
| model_type = gr.Radio( |
| choices=["text_llm", "vision_vlm", "embedding", "multimodal"], |
| value="text_llm", |
| label="Model Type", |
| ) |
|
|
| model_dropdown = gr.Dropdown( |
| choices=get_model_list("text_llm"), |
| value="unsloth/Qwen2.5-7B-Instruct", |
| label="Select Model", |
| filterable=True, |
| ) |
|
|
| def update_models(model_type): |
| models = get_model_list(model_type) |
| return gr.Dropdown(choices=models, value=models[0] if models else None) |
|
|
| model_type.change(update_models, model_type, model_dropdown) |
|
|
| |
| with gr.Tab("2. Training Mode"): |
| training_mode = gr.Radio( |
| choices=["sft", "rl"], |
| value="sft", |
| label="Training Mode", |
| info="SFT: Supervised Fine-Tuning | RL: Reinforcement Learning" |
| ) |
|
|
| with gr.Group(visible=False) as rl_options: |
| rl_method = gr.Dropdown( |
| choices=list(RL_METHODS.keys()), |
| value="grpo", |
| label="RL Method", |
| ) |
|
|
| rl_info = gr.Markdown(RL_METHODS["grpo"]["description"]) |
|
|
| def update_rl_info(method): |
| return RL_METHODS[method]["description"] |
|
|
| rl_method.change(update_rl_info, rl_method, rl_info) |
|
|
| reward_pack = gr.Dropdown( |
| choices=list(REWARD_PACKS.keys()), |
| value="reasoning_xml", |
| label="Reward Pack", |
| ) |
|
|
| custom_reward = gr.Code( |
| label="Custom Reward Function (Optional)", |
| language="python", |
| value="# def custom_reward(completions, **kwargs):\n# return [1.0 for _ in completions]", |
| ) |
|
|
| num_generations = gr.Slider( |
| minimum=2, maximum=16, value=8, step=2, |
| label="Generations per Prompt", |
| ) |
|
|
| temperature = gr.Slider( |
| minimum=0.1, maximum=2.0, value=1.0, step=0.1, |
| label="Generation Temperature", |
| ) |
|
|
| def toggle_rl_options(mode): |
| return gr.Group(visible=(mode == "rl")) |
|
|
| training_mode.change(toggle_rl_options, training_mode, rl_options) |
|
|
| |
| with gr.Tab("3. Training Config"): |
| sample_preset = gr.Radio( |
| choices=list(SAMPLE_PRESETS.keys()), |
| value="small_run", |
| label="Sample Size Preset", |
| ) |
|
|
| preset_info = gr.Markdown( |
| f"**{SAMPLE_PRESETS['small_run']['description']}** - " |
| f"{SAMPLE_PRESETS['small_run']['samples']} samples, " |
| f"{SAMPLE_PRESETS['small_run']['max_steps']} steps" |
| ) |
|
|
| def update_preset_info(preset): |
| p = SAMPLE_PRESETS[preset] |
| return f"**{p['description']}** - {p['samples']} samples, {p['max_steps']} steps" |
|
|
| sample_preset.change(update_preset_info, sample_preset, preset_info) |
|
|
| with gr.Row(): |
| lora_rank = gr.Dropdown( |
| choices=[8, 16, 32, 64, 128], |
| value=32, |
| label="LoRA Rank", |
| ) |
|
|
| learning_rate = gr.Number( |
| value=5e-6, |
| label="Learning Rate", |
| ) |
|
|
| max_seq_length = gr.Dropdown( |
| choices=[512, 1024, 2048, 4096, 8192, 16384, 32768], |
| value=2048, |
| label="Max Sequence Length", |
| ) |
|
|
| |
| with gr.Tab("4. Output & Hub"): |
| hub_model_id = gr.Textbox( |
| value="wheattoast11/unsloth-trained-model", |
| label="HuggingFace Hub Model ID", |
| ) |
|
|
| push_to_hub = gr.Checkbox( |
| value=True, |
| label="Push to HuggingFace Hub after training", |
| ) |
|
|
| output_format = gr.CheckboxGroup( |
| choices=["merged_16bit", "merged_4bit", "lora", "gguf_q4_k_m", "gguf_q8_0"], |
| value=["merged_16bit", "lora"], |
| label="Output Formats", |
| ) |
|
|
| |
| gr.Markdown("---") |
|
|
| with gr.Row(): |
| start_btn = gr.Button("Generate Training Config", variant="primary", scale=2) |
| execute_btn = gr.Button("Execute Training", variant="secondary", scale=1) |
|
|
| output = gr.Markdown("Configure your training and click 'Generate Training Config'") |
|
|
| start_btn.click( |
| start_training, |
| inputs=[ |
| model_dropdown, |
| model_type, |
| training_mode, |
| rl_method, |
| sample_preset, |
| reward_pack, |
| custom_reward, |
| lora_rank, |
| learning_rate, |
| num_generations, |
| temperature, |
| max_seq_length, |
| hub_model_id, |
| push_to_hub, |
| ], |
| outputs=output, |
| ) |
|
|
| gr.Markdown("---") |
| gr.Markdown("**Intuition Labs** | Unsloth Training Hub | L40S ~$1.80/hr - PAUSE when not training!") |
|
|
| return demo |
|
|
|
|
| if __name__ == "__main__": |
| demo = create_ui() |
| demo.launch(server_name="0.0.0.0", server_port=7860) |