Spaces:

ltx-community
/

ltx2-lora-trainer

Running

linoyts HF Staff commited on 7 days ago

Commit

cbeb747

1 Parent(s): 8dd5e45

Upgrade to PR#237 trainer (torch 2.9.1/cu128) + enable RTX PRO 6000 + 6h timeout (#1)

- Upgrade to PR#237 trainer (torch 2.9.1/cu128) + enable RTX PRO 6000 + 6h timeout (55f8a3416ffa507ebdf6c6fcd108c1c9a990deb4)

Files changed (2) hide show

app.py CHANGED Viewed

@@ -16,16 +16,16 @@ import gradio as gr
 import jobs
-# Single-GPU flavors that fit the 22B model AND work with the trainer's pinned PyTorch (cu126).
-# NOTE: rtx-pro-6000 is excluded — it's Blackwell (sm_100/120) and cu126 has no kernels for it
-# ("CUDA error: no kernel image is available"). Multi-GPU flavors are omitted (single-GPU job).
-FLAVORS = ["l40sx1", "a100-large", "h200"]
 FLAVOR_GUIDE = """**Which GPU?** You're billed per-minute of actual runtime.
 | Flavor | VRAM | $/h | Best for |
 |---|---|---|---|
 | `l40sx1` | 48 GB | $1.80 | cheapest — pair with the **Low-VRAM** profile (int8) |
 | `a100-large` | 80 GB | $2.50 | **recommended** — fits the 22B model in bf16 |
 | `h200` | 141 GB | $5.00 | fastest — best for higher resolution / longer clips |
 """
 MAX_LOG = 60_000
@@ -356,7 +356,7 @@ with gr.Blocks(title="LTX-2.3 LoRA Trainer") as demo:
                 flavor = gr.Dropdown(FLAVORS, value=jobs.DEFAULT_FLAVOR, label="GPU flavor",
                                      info="Auto-set from your profile (Quality → a100-large · Low-VRAM → l40sx1); "
                                           "change it freely. See the guide below.")
-                timeout = gr.Textbox(label="Timeout", value="4h",
                                      info="Max job runtime (e.g. 4h). First run spends ~minutes downloading the model.")
             with gr.Accordion("GPU guide — which flavor fits when", open=False):
                 gr.Markdown(FLAVOR_GUIDE)

 import jobs
+# Single-GPU flavors that fit the 22B model. The v2 trainer pins torch 2.9.1/cu128, which has
+# Blackwell (sm_120) kernels, so rtx-pro-6000 works here. Multi-GPU flavors omitted (single-GPU job).
+FLAVORS = ["l40sx1", "a100-large", "rtx-pro-6000", "h200"]
 FLAVOR_GUIDE = """**Which GPU?** You're billed per-minute of actual runtime.
 | Flavor | VRAM | $/h | Best for |
 |---|---|---|---|
 | `l40sx1` | 48 GB | $1.80 | cheapest — pair with the **Low-VRAM** profile (int8) |
 | `a100-large` | 80 GB | $2.50 | **recommended** — fits the 22B model in bf16 |
+| `rtx-pro-6000` | 96 GB | $2.75 | a bit more headroom (Blackwell) |
 | `h200` | 141 GB | $5.00 | fastest — best for higher resolution / longer clips |
 """
 MAX_LOG = 60_000
                 flavor = gr.Dropdown(FLAVORS, value=jobs.DEFAULT_FLAVOR, label="GPU flavor",
                                      info="Auto-set from your profile (Quality → a100-large · Low-VRAM → l40sx1); "
                                           "change it freely. See the guide below.")
+                timeout = gr.Textbox(label="Timeout", value="6h",
                                      info="Max job runtime (e.g. 4h). First run spends ~minutes downloading the model.")
             with gr.Accordion("GPU guide — which flavor fits when", open=False):
                 gr.Markdown(FLAVOR_GUIDE)

jobs.py CHANGED Viewed

@@ -28,7 +28,7 @@ from pathlib import Path
 import yaml
 from huggingface_hub import HfApi
-SRC_BUCKET = os.environ.get("LTX_SRC_BUCKET", "ltx-community/ltx2-trainer-src")
 DEFAULT_FLAVOR = "a100-large"  # the largest single-GPU flavor `hf jobs uv run` accepts (80GB)
 VIDEO_EXTS = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}

 import yaml
 from huggingface_hub import HfApi
+SRC_BUCKET = os.environ.get("LTX_SRC_BUCKET", "ltx-community/ltx2-trainer-src-v2")
 DEFAULT_FLAVOR = "a100-large"  # the largest single-GPU flavor `hf jobs uv run` accepts (80GB)
 VIDEO_EXTS = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}