linoyts HF Staff commited on
Commit
cbeb747
Β·
1 Parent(s): 8dd5e45

Upgrade to PR#237 trainer (torch 2.9.1/cu128) + enable RTX PRO 6000 + 6h timeout (#1)

Browse files

- Upgrade to PR#237 trainer (torch 2.9.1/cu128) + enable RTX PRO 6000 + 6h timeout (55f8a3416ffa507ebdf6c6fcd108c1c9a990deb4)

Files changed (2) hide show
  1. app.py +5 -5
  2. jobs.py +1 -1
app.py CHANGED
@@ -16,16 +16,16 @@ import gradio as gr
16
 
17
  import jobs
18
 
19
- # Single-GPU flavors that fit the 22B model AND work with the trainer's pinned PyTorch (cu126).
20
- # NOTE: rtx-pro-6000 is excluded β€” it's Blackwell (sm_100/120) and cu126 has no kernels for it
21
- # ("CUDA error: no kernel image is available"). Multi-GPU flavors are omitted (single-GPU job).
22
- FLAVORS = ["l40sx1", "a100-large", "h200"]
23
  FLAVOR_GUIDE = """**Which GPU?** You're billed per-minute of actual runtime.
24
 
25
  | Flavor | VRAM | $/h | Best for |
26
  |---|---|---|---|
27
  | `l40sx1` | 48 GB | $1.80 | cheapest β€” pair with the **Low-VRAM** profile (int8) |
28
  | `a100-large` | 80 GB | $2.50 | **recommended** β€” fits the 22B model in bf16 |
 
29
  | `h200` | 141 GB | $5.00 | fastest β€” best for higher resolution / longer clips |
30
  """
31
  MAX_LOG = 60_000
@@ -356,7 +356,7 @@ with gr.Blocks(title="LTX-2.3 LoRA Trainer") as demo:
356
  flavor = gr.Dropdown(FLAVORS, value=jobs.DEFAULT_FLAVOR, label="GPU flavor",
357
  info="Auto-set from your profile (Quality β†’ a100-large Β· Low-VRAM β†’ l40sx1); "
358
  "change it freely. See the guide below.")
359
- timeout = gr.Textbox(label="Timeout", value="4h",
360
  info="Max job runtime (e.g. 4h). First run spends ~minutes downloading the model.")
361
  with gr.Accordion("GPU guide β€” which flavor fits when", open=False):
362
  gr.Markdown(FLAVOR_GUIDE)
 
16
 
17
  import jobs
18
 
19
+ # Single-GPU flavors that fit the 22B model. The v2 trainer pins torch 2.9.1/cu128, which has
20
+ # Blackwell (sm_120) kernels, so rtx-pro-6000 works here. Multi-GPU flavors omitted (single-GPU job).
21
+ FLAVORS = ["l40sx1", "a100-large", "rtx-pro-6000", "h200"]
 
22
  FLAVOR_GUIDE = """**Which GPU?** You're billed per-minute of actual runtime.
23
 
24
  | Flavor | VRAM | $/h | Best for |
25
  |---|---|---|---|
26
  | `l40sx1` | 48 GB | $1.80 | cheapest β€” pair with the **Low-VRAM** profile (int8) |
27
  | `a100-large` | 80 GB | $2.50 | **recommended** β€” fits the 22B model in bf16 |
28
+ | `rtx-pro-6000` | 96 GB | $2.75 | a bit more headroom (Blackwell) |
29
  | `h200` | 141 GB | $5.00 | fastest β€” best for higher resolution / longer clips |
30
  """
31
  MAX_LOG = 60_000
 
356
  flavor = gr.Dropdown(FLAVORS, value=jobs.DEFAULT_FLAVOR, label="GPU flavor",
357
  info="Auto-set from your profile (Quality β†’ a100-large Β· Low-VRAM β†’ l40sx1); "
358
  "change it freely. See the guide below.")
359
+ timeout = gr.Textbox(label="Timeout", value="6h",
360
  info="Max job runtime (e.g. 4h). First run spends ~minutes downloading the model.")
361
  with gr.Accordion("GPU guide β€” which flavor fits when", open=False):
362
  gr.Markdown(FLAVOR_GUIDE)
jobs.py CHANGED
@@ -28,7 +28,7 @@ from pathlib import Path
28
  import yaml
29
  from huggingface_hub import HfApi
30
 
31
- SRC_BUCKET = os.environ.get("LTX_SRC_BUCKET", "ltx-community/ltx2-trainer-src")
32
  DEFAULT_FLAVOR = "a100-large" # the largest single-GPU flavor `hf jobs uv run` accepts (80GB)
33
  VIDEO_EXTS = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}
34
 
 
28
  import yaml
29
  from huggingface_hub import HfApi
30
 
31
+ SRC_BUCKET = os.environ.get("LTX_SRC_BUCKET", "ltx-community/ltx2-trainer-src-v2")
32
  DEFAULT_FLAVOR = "a100-large" # the largest single-GPU flavor `hf jobs uv run` accepts (80GB)
33
  VIDEO_EXTS = {".mp4", ".mov", ".mkv", ".webm", ".avi", ".m4v"}
34