Buckets:

ltx-community
/

ltx2-trainer-src

Files

xet

ltx-community/ltx2-trainer-src / packages /ltx-pipelines /src /ltx_pipelines /utils /args.py

linoyts

13 days ago

download

raw

29.1 kB

	import argparse
	import json
	from collections.abc import Sequence
	from pathlib import Path
	from typing import Any, NamedTuple

	from ltx_core.loader import LTXV_LORA_COMFY_RENAMING_MAP, LoraPathStrengthAndSDOps
	from ltx_core.model.transformer.compiling import CompilationConfig
	from ltx_core.quantization import QuantizationPolicy
	from ltx_pipelines.utils.constants import (
	DEFAULT_IMAGE_CRF,
	DEFAULT_LORA_STRENGTH,
	DEFAULT_NEGATIVE_PROMPT,
	LTX_2_3_HQ_PARAMS,
	LTX_2_3_PARAMS,
	PipelineParams,
	)
	from ltx_pipelines.utils.quantization_factory import QuantizationKind
	from ltx_pipelines.utils.types import OffloadMode


	class ImageConditioningInput(NamedTuple):
	path: str
	frame_idx: int
	strength: float
	crf: int = DEFAULT_IMAGE_CRF


	class VideoConditioningAction(argparse.Action):
	def __call__(
	self,
	parser: argparse.ArgumentParser, # noqa: ARG002
	namespace: argparse.Namespace,
	values: list[str],
	option_string: str \| None = None, # noqa: ARG002
	) -> None:
	path, strength_str = values
	resolved_path = resolve_existing_path(path)
	strength = float(strength_str)
	current = getattr(namespace, self.dest) or []
	current.append((resolved_path, strength))
	setattr(namespace, self.dest, current)


	class VideoMaskConditioningAction(argparse.Action):
	"""Parse ``--conditioning-attention-mask PATH STRENGTH``.
	Stores a ``(mask_path, strength)`` tuple on the namespace. The mask video
	should be grayscale with pixel values in [0, 1] controlling per-region
	conditioning attention strength. The scalar STRENGTH is multiplied with
	the spatial mask before it is applied.
	"""

	def __call__(
	self,
	parser: argparse.ArgumentParser, # noqa: ARG002
	namespace: argparse.Namespace,
	values: list[str],
	option_string: str \| None = None,
	) -> None:
	if len(values) != 2:
	msg = f"{option_string} requires exactly 2 arguments (MASK_PATH STRENGTH), got {len(values)}"
	raise argparse.ArgumentError(self, msg)

	mask_path = resolve_existing_path(values[0])
	strength = float(values[1])
	setattr(namespace, self.dest, (mask_path, strength))


	class ImageAction(argparse.Action):
	def __call__(
	self,
	parser: argparse.ArgumentParser, # noqa: ARG002
	namespace: argparse.Namespace,
	values: list[str],
	option_string: str \| None = None,
	) -> None:
	if len(values) not in (3, 4):
	msg = f"{option_string} requires 3 or 4 arguments (PATH FRAME_IDX STRENGTH [CRF]), got {len(values)}"
	raise argparse.ArgumentError(self, msg)

	conditioning = ImageConditioningInput(
	path=resolve_existing_path(values[0]),
	frame_idx=int(values[1]),
	strength=float(values[2]),
	crf=int(values[3]) if len(values) > 3 else DEFAULT_IMAGE_CRF,
	)
	current = getattr(namespace, self.dest) or []
	current.append(conditioning)
	setattr(namespace, self.dest, current)


	class LoraAction(argparse.Action):
	def __call__(
	self,
	parser: argparse.ArgumentParser, # noqa: ARG002
	namespace: argparse.Namespace,
	values: list[str],
	option_string: str \| None = None,
	) -> None:
	if len(values) > 2:
	msg = f"{option_string} accepts at most 2 arguments (PATH and optional STRENGTH), got {len(values)} values"
	raise argparse.ArgumentError(self, msg)

	path = values[0]
	strength_str = values[1] if len(values) > 1 else str(DEFAULT_LORA_STRENGTH)

	resolved_path = resolve_existing_path(path)
	strength = float(strength_str)

	current = getattr(namespace, self.dest) or []
	current.append(LoraPathStrengthAndSDOps(resolved_path, strength, LTXV_LORA_COMFY_RENAMING_MAP))
	setattr(namespace, self.dest, current)


	class CompileAction(argparse.Action):
	"""Parse ``--compile [KEY=VALUE ...]`` into a :class:`CompilationConfig`.
	The flag is absent -> ``args.compile`` stays at its default (``None``).
	The flag is passed alone -> ``CompilationConfig()`` (vanilla torch defaults).
	The flag is passed with args -> ``CompilationConfig`` with the given fields overridden.
	Errors (unknown key, malformed value, duplicate key, empty value) raise
	:class:`argparse.ArgumentError` so argparse formats them as friendly CLI
	messages rather than uncaught tracebacks.
	"""

	_ALLOWED_KEYS = frozenset({"mode", "backend", "fullgraph", "dynamic", "inductor_config", "dynamo_config"})

	def __call__(
	self,
	parser: argparse.ArgumentParser, # noqa: ARG002
	namespace: argparse.Namespace,
	values: list[str],
	option_string: str \| None = None, # noqa: ARG002
	) -> None:
	overrides: dict[str, object] = {}
	for item in values:
	if "=" not in item:
	raise argparse.ArgumentError(self, f"expects KEY=VALUE pairs, got: {item!r}")
	key, _, raw = item.partition("=")
	key = key.strip()
	if key not in self._ALLOWED_KEYS:
	raise argparse.ArgumentError(
	self,
	f"{key!r} is not a CompilationConfig field; valid keys: {sorted(self._ALLOWED_KEYS)}",
	)
	if key in overrides:
	raise argparse.ArgumentError(self, f"{key} given more than once")
	if key == "mode":
	overrides[key] = self._parse_mode(raw)
	elif key == "backend":
	overrides[key] = self._parse_non_empty(key, raw)
	elif key == "fullgraph":
	overrides[key] = self._parse_bool(key, raw)
	elif key == "dynamic":
	overrides[key] = self._parse_dynamic(raw)
	elif key in ("inductor_config", "dynamo_config"):
	overrides[key] = self._parse_json_dict(key, raw)
	setattr(namespace, self.dest, CompilationConfig(**overrides))

	def _parse_mode(self, raw: str) -> str \| None:
	stripped = raw.strip()
	if not stripped:
	raise argparse.ArgumentError(self, "mode=... value cannot be empty (use mode=none to clear)")
	if stripped.lower() == "none":
	return None
	return stripped

	def _parse_non_empty(self, key: str, raw: str) -> str:
	stripped = raw.strip()
	if not stripped:
	raise argparse.ArgumentError(self, f"{key}=... value cannot be empty")
	return stripped

	def _parse_bool(self, key: str, raw: str) -> bool:
	normalized = raw.strip().lower()
	if normalized in ("true", "1"):
	return True
	if normalized in ("false", "0"):
	return False
	raise argparse.ArgumentError(self, f"{key}=... must be true or false; got {raw!r}")

	def _parse_dynamic(self, raw: str) -> bool \| None:
	normalized = raw.strip().lower()
	if normalized in ("auto", "none"):
	return None
	if normalized in ("true", "1"):
	return True
	if normalized in ("false", "0"):
	return False
	raise argparse.ArgumentError(self, f"dynamic=... must be auto/true/false; got {raw!r}")

	def _parse_json_dict(self, key: str, raw: str) -> dict[str, Any]:
	# Inline JSON object starts with '{'; otherwise treat the value as a path to a JSON file.
	stripped = raw.strip()
	if not stripped:
	raise argparse.ArgumentError(self, f"{key}=... value cannot be empty")
	if stripped.startswith("{"):
	source = stripped
	else:
	path = Path(stripped).expanduser()
	if not path.is_file():
	raise argparse.ArgumentError(
	self, f"{key}=... must be a JSON object or a path to a JSON file; got {raw!r}"
	)
	source = path.read_text()
	try:
	value = json.loads(source)
	except json.JSONDecodeError as e:
	raise argparse.ArgumentError(self, f"{key}=... must be a JSON object; got {raw!r} ({e.msg})") from None
	if not isinstance(value, dict):
	raise argparse.ArgumentError(self, f"{key}=... must decode to a JSON object; got {type(value).__name__}")
	return value


	def resolve_path(path: str) -> str:
	return str(Path(path).expanduser().resolve().as_posix())


	def resolve_existing_path(path: str) -> str:
	"""Resolve path and verify it exists."""
	resolved = resolve_path(path)
	if not Path(resolved).exists():
	raise argparse.ArgumentError(None, f"Path not found: {resolved}")
	return resolved


	QUANTIZATION_POLICIES = tuple(k.value for k in QuantizationKind)


	def _resolve_quantization(namespace: argparse.Namespace) -> None:
	# Resolution is deferred until after parse_args because fp8-scaled-mm needs the
	# checkpoint path, which isn't on the namespace when the --quantization argument
	# is parsed.
	name = getattr(namespace, "quantization", None)
	if name is None or isinstance(name, QuantizationPolicy):
	return
	try:
	kind = QuantizationKind(name)
	except ValueError:
	return
	ckpt = getattr(namespace, "checkpoint_path", None) or getattr(namespace, "distilled_checkpoint_path", None)
	if ckpt is None:
	raise SystemExit(f"--quantization {kind.value} requires --checkpoint-path (or --distilled-checkpoint-path).")
	namespace.quantization = kind.to_policy(checkpoint_path=ckpt)


	class _PipelineArgumentParser(argparse.ArgumentParser):
	def parse_args( # type: ignore[override]
	self,
	args: Sequence[str] \| None = None,
	namespace: argparse.Namespace \| None = None,
	) -> argparse.Namespace:
	ns = super().parse_args(args, namespace)
	_resolve_quantization(ns)
	return ns


	def detect_checkpoint_path(distilled: bool = False) -> str:
	"""Pre-parse argv to extract the checkpoint path before building the full parser."""
	pre = argparse.ArgumentParser(add_help=False)
	flag = "--distilled-checkpoint-path" if distilled else "--checkpoint-path"
	pre.add_argument(flag, type=resolve_existing_path, required=True)
	known, _ = pre.parse_known_args()
	return known.distilled_checkpoint_path if distilled else known.checkpoint_path


	def basic_arg_parser(
	params: PipelineParams = LTX_2_3_PARAMS,
	distilled: bool = False,
	) -> argparse.ArgumentParser:
	parser = _PipelineArgumentParser()
	if distilled:
	parser.add_argument(
	"--distilled-checkpoint-path",
	type=resolve_existing_path,
	required=True,
	help="Path to LTX-2 distilled model checkpoint (.safetensors file).",
	)
	else:
	parser.add_argument(
	"--checkpoint-path",
	type=resolve_existing_path,
	required=True,
	help="Path to LTX-2 model checkpoint (.safetensors file).",
	)
	parser.add_argument(
	"--num-inference-steps",
	type=int,
	default=params.num_inference_steps,
	help=(
	f"Number of denoising steps in the diffusion sampling process. "
	f"Higher values improve quality but increase generation time (default: {params.num_inference_steps})."
	),
	)
	parser.add_argument(
	"--gemma-root",
	type=resolve_existing_path,
	required=True,
	help="Path to the root directory containing the Gemma text encoder model files.",
	)
	parser.add_argument(
	"--prompt",
	type=str,
	required=True,
	help="Text prompt describing the desired video content to be generated by the model.",
	)
	parser.add_argument(
	"--output-path",
	type=resolve_path,
	required=True,
	help="Path to the output video file (MP4 format).",
	)
	parser.add_argument(
	"--seed",
	type=int,
	default=params.seed,
	help=f"Random seed for reproducible generation (default: {params.seed}).",
	)
	parser.add_argument(
	"--lora",
	dest="lora",
	action=LoraAction,
	nargs="+", # Accept 1-2 arguments per use (path and optional strength); validation is handled in LoraAction
	metavar=("PATH", "STRENGTH"),
	default=[],
	help=(
	"LoRA (Low-Rank Adaptation) model: path to model file and optional strength "
	f"(default strength: {DEFAULT_LORA_STRENGTH}). Can be specified multiple times. "
	"Example: --lora path/to/lora1.safetensors 0.8 --lora path/to/lora2.safetensors"
	),
	)

	parser.add_argument("--enhance-prompt", action="store_true")

	def _positive_int(value: str) -> int:
	try:
	int_value = int(value)
	if int_value < 1:
	raise argparse.ArgumentTypeError("must be >= 1")
	return int_value
	except ValueError as e:
	raise argparse.ArgumentTypeError(f"must be an integer, got {value}") from e

	# Weight offloading
	parser.add_argument(
	"--offload",
	dest="offload_mode",
	type=OffloadMode,
	default=OffloadMode.NONE,
	choices=list(OffloadMode),
	help=(
	"Weight offloading strategy. "
	"'none' keeps all weights on GPU (default). "
	"'cpu' pins weights in CPU RAM, streams to GPU per layer. "
	"'disk' reads weights from disk on demand (lowest memory). "
	"Example: --offload cpu"
	),
	)

	parser.add_argument(
	"--max-batch-size",
	type=_positive_int,
	default=1,
	metavar="N",
	help=(
	"Maximum batch size per transformer forward pass. "
	"Guided denoisers batch up to 4 guidance passes into a single call. "
	"Default 1 runs passes sequentially. Set to 4 to batch all passes "
	"together, which reduces layer-streaming PCIe transfers. "
	"Example: --max-batch-size 4"
	),
	)

	parser.add_argument(
	"--quantization",
	choices=QUANTIZATION_POLICIES,
	default=None,
	help=(
	f"Quantization policy: {', '.join(QUANTIZATION_POLICIES)}. "
	"fp8-cast uses FP8 casting with upcasting during inference. "
	"fp8-scaled-mm uses FP8 scaled matrix multiplication; the layer set is auto-discovered "
	"from the checkpoint's .weight_scale tensors. "
	"Example: --quantization fp8-cast or --quantization fp8-scaled-mm"
	),
	)
	parser.add_argument(
	"--compile",
	nargs="*",
	action=CompileAction,
	default=None,
	metavar="KEY=VALUE",
	help=(
	"Enable torch.compile for transformer blocks. Pass alone for defaults, "
	"or with KEY=VALUE overrides for any CompilationConfig field. "
	"Keys: mode, backend, fullgraph, dynamic, inductor_config, dynamo_config. "
	"inductor_config/dynamo_config take JSON objects (inline or a path to a .json file) "
	"that fully replace the defaults. "
	"Examples: --compile or --compile mode=reduce-overhead or "
	"--compile mode=reduce-overhead fullgraph=true backend=eager or "
	"--compile inductor_config='{\"max_autotune\": true}'"
	),
	)
	return parser


	def new_video_gen_arg_parser(
	params: PipelineParams = LTX_2_3_PARAMS,
	distilled: bool = False,
	) -> argparse.ArgumentParser:
	parser = basic_arg_parser(params=params, distilled=distilled)
	parser.add_argument(
	"--height",
	type=int,
	default=params.stage_1_height,
	help=f"Video height in pixels, divisible by 32 (default: {params.stage_1_height}).",
	)
	parser.add_argument(
	"--width",
	type=int,
	default=params.stage_1_width,
	help=f"Width of the generated video in pixels, should be divisible by 32 (default: {params.stage_1_width}).",
	)
	parser.add_argument(
	"--num-frames",
	type=int,
	default=params.num_frames,
	help=f"Number of frames to generate in the output video sequence, num-frames = (8 x K) + 1, "
	f"where k is a non-negative integer (default: {params.num_frames}).",
	)
	parser.add_argument(
	"--frame-rate",
	type=float,
	default=params.frame_rate,
	help=f"Frame rate of the generated video (fps) (default: {params.frame_rate}).",
	)
	parser.add_argument(
	"--image",
	dest="images",
	action=ImageAction,
	nargs="+",
	metavar="ARG",
	default=[],
	help=(
	"Image conditioning input: PATH FRAME_IDX STRENGTH [CRF]. "
	"PATH is the image file, FRAME_IDX is the target frame index, "
	"STRENGTH is the conditioning strength (all three required). "
	f"CRF is the optional H.264 compression quality (0=lossless, default: {DEFAULT_IMAGE_CRF}). "
	"Can be specified multiple times. Example: --image path/to/image1.jpg 0 0.8 "
	"--image path/to/image2.jpg 160 0.9 0"
	),
	)

	return parser


	def video_editing_arg_parser(
	distilled: bool = True,
	) -> argparse.ArgumentParser:
	"""Base argument parser for video-editing pipelines (retake, extension, inpainting, sticker movement).
	Uses the same actions and conventions as basic_arg_parser but only the args needed for editing
	(no height/width/num-frames; resolution comes from input video). Default is distilled checkpoint only.
	"""
	parser = basic_arg_parser(distilled=distilled)
	parser.add_argument("--video-path", type=resolve_existing_path, required=True, help="Path to the source video.")
	parser.add_argument("--start-time", type=float, required=True, help="Start time of the region to regenerate (s).")
	parser.add_argument("--end-time", type=float, required=True, help="End time of the region to regenerate (s).")
	return parser


	def lipdub_arg_parser(
	params: PipelineParams = LTX_2_3_PARAMS,
	) -> argparse.ArgumentParser:
	"""Argument parser for the lip-dub pipeline.
	Frame count and frame rate are derived from the reference video at runtime (the frame count
	is silently snapped down to the nearest 8k+1), so this parser intentionally omits
	--num-frames, --frame-rate, and --image. Distilled checkpoint only.
	"""
	parser = basic_arg_parser(params=params, distilled=True)
	parser.add_argument(
	"--height",
	type=int,
	default=params.stage_2_height,
	help=(
	f"Height of the generated video in pixels, should be divisible by 64 (default: {params.stage_2_height})."
	),
	)
	parser.add_argument(
	"--width",
	type=int,
	default=params.stage_2_width,
	help=f"Width of the generated video in pixels, should be divisible by 64 (default: {params.stage_2_width}).",
	)
	parser.add_argument(
	"--spatial-upsampler-path",
	type=resolve_path,
	required=True,
	help=(
	"Path to the spatial upsampler model used to increase the resolution "
	"of the generated video in the latent space."
	),
	)
	parser.add_argument(
	"--reference-video",
	type=resolve_path,
	required=True,
	help="Reference video file (video + audio track used for IC-LoRA and audio identity).",
	)
	parser.add_argument(
	"--reference-strength",
	type=float,
	default=1.0,
	help="Strength for IC-LoRA video reference conditioning (default: 1.0).",
	)
	return parser


	def default_1_stage_arg_parser(params: PipelineParams = LTX_2_3_PARAMS) -> argparse.ArgumentParser:
	video_guider = params.video_guider_params
	audio_guider = params.audio_guider_params
	parser = new_video_gen_arg_parser(params=params)
	parser.add_argument(
	"--negative-prompt",
	type=str,
	default=DEFAULT_NEGATIVE_PROMPT,
	help=(
	"Negative prompt describing what should not appear in the generated video, "
	"used to guide the diffusion process away from unwanted content. "
	"Default: a comprehensive negative prompt covering common artifacts and quality issues."
	),
	)
	parser.add_argument(
	"--video-cfg-guidance-scale",
	type=float,
	default=video_guider.cfg_scale,
	help=(
	f"Classifier-free guidance (CFG) scale controlling how strongly "
	f"the model adheres to the video prompt. Higher values increase prompt "
	f"adherence but may reduce diversity. 1.0 means no effect "
	f"(default: {video_guider.cfg_scale})."
	),
	)
	parser.add_argument(
	"--video-stg-guidance-scale",
	type=float,
	default=video_guider.stg_scale,
	help=(
	f"STG (Spatio-Temporal Guidance) scale controlling how strongly "
	f"the model reacts to the perturbation of the video modality. Higher values increase "
	f"the effect but may reduce quality. 0.0 means no effect "
	f"(default: {video_guider.stg_scale})."
	),
	)
	parser.add_argument(
	"--video-rescale-scale",
	type=float,
	default=video_guider.rescale_scale,
	help=(
	f"Rescale scale controlling how strongly "
	f"the model rescales the video modality after applying other guidance. Higher values tend to decrease "
	f"oversaturation effects. 0.0 means no effect (default: {video_guider.rescale_scale})."
	),
	)
	parser.add_argument(
	"--video-stg-blocks",
	type=int,
	nargs="*",
	default=video_guider.stg_blocks,
	help=(f"Which transformer blocks to perturb for STG. Default: {video_guider.stg_blocks}."),
	)
	parser.add_argument(
	"--a2v-guidance-scale",
	type=float,
	default=video_guider.modality_scale,
	help=(
	f"A2V (Audio-to-Video) guidance scale controlling how strongly "
	f"the model reacts to the perturbation of the audio-to-video cross-attention. Higher values may increase "
	f"lipsync quality. 1.0 means no effect (default: {video_guider.modality_scale})."
	),
	)
	parser.add_argument(
	"--video-skip-step",
	type=int,
	default=video_guider.skip_step,
	help=(
	"Video skip step N controls periodic skipping during the video diffusion process: "
	"only steps where step_index %% (N + 1) == 0 are processed, all others are skipped "
	f"(e.g., 0 = no skipping; 1 = skip every other step; 2 = skip 2 of every 3 steps; "
	f"default: {video_guider.skip_step})."
	),
	)
	parser.add_argument(
	"--audio-cfg-guidance-scale",
	type=float,
	default=audio_guider.cfg_scale,
	help=(
	f"Audio CFG (Classifier-free guidance) scale controlling how strongly "
	f"the model adheres to the audio prompt. Higher values increase prompt "
	f"adherence but may reduce diversity. 1.0 means no effect "
	f"(default: {audio_guider.cfg_scale})."
	),
	)
	parser.add_argument(
	"--audio-stg-guidance-scale",
	type=float,
	default=audio_guider.stg_scale,
	help=(
	f"Audio STG (Spatio-Temporal Guidance) scale controlling how strongly "
	f"the model reacts to the perturbation of the audio modality. Higher values increase "
	f"the effect but may reduce quality. 0.0 means no effect "
	f"(default: {audio_guider.stg_scale})."
	),
	)
	parser.add_argument(
	"--audio-rescale-scale",
	type=float,
	default=audio_guider.rescale_scale,
	help=(
	f"Audio rescale scale controlling how strongly "
	f"the model rescales the audio modality after applying other guidance. "
	f"Experimental. 0.0 means no effect (default: {audio_guider.rescale_scale})."
	),
	)
	parser.add_argument(
	"--audio-stg-blocks",
	type=int,
	nargs="*",
	default=audio_guider.stg_blocks,
	help=(f"Which transformer blocks to perturb for Audio STG. Default: {audio_guider.stg_blocks}."),
	)
	parser.add_argument(
	"--v2a-guidance-scale",
	type=float,
	default=audio_guider.modality_scale,
	help=(
	f"V2A (Video-to-Audio) guidance scale controlling how strongly "
	f"the model reacts to the perturbation of the video-to-audio cross-attention. Higher values may increase "
	f"lipsync quality. 1.0 means no effect (default: {audio_guider.modality_scale})."
	),
	)
	parser.add_argument(
	"--audio-skip-step",
	type=int,
	default=audio_guider.skip_step,
	help=(
	"Audio skip step N controls periodic skipping during the audio diffusion process: "
	"only steps where step_index %% (N + 1) == 0 are processed, all others are skipped "
	f"(e.g., 0 = no skipping; 1 = skip every other step; 2 = skip 2 of every 3 steps; "
	f"default: {audio_guider.skip_step})."
	),
	)
	return parser


	def default_2_stage_arg_parser(params: PipelineParams = LTX_2_3_PARAMS) -> argparse.ArgumentParser:
	parser = default_1_stage_arg_parser(params=params)
	parser.set_defaults(height=params.stage_2_height, width=params.stage_2_width)
	# Update help text to reflect 2-stage defaults
	for action in parser._actions:
	if "--height" in action.option_strings:
	action.help = (
	f"Height of the generated video in pixels, should be divisible by 64 "
	f"(default: {params.stage_2_height})."
	)
	if "--width" in action.option_strings:
	action.help = (
	f"Width of the generated video in pixels, should be divisible by 64 (default: {params.stage_2_width})."
	)
	parser.add_argument(
	"--distilled-lora",
	dest="distilled_lora",
	action=LoraAction,
	nargs="+", # Accept 1-2 arguments per use (path and optional strength); validation is handled in LoraAction
	metavar=("PATH", "STRENGTH"),
	required=True,
	help=(
	"Distilled LoRA (Low-Rank Adaptation) model used in the second stage (upscaling and refinement): "
	f"path to model file and optional strength (default strength: {DEFAULT_LORA_STRENGTH}). "
	"The second stage upsamples the video by 2x resolution and refines it using a distilled "
	"denoising schedule (fewer steps, no CFG). The distilled LoRA is specifically trained "
	"for this refinement process to improve quality at higher resolutions. "
	"Example: --distilled-lora path/to/distilled_lora.safetensors 0.8"
	),
	)
	parser.add_argument(
	"--spatial-upsampler-path",
	type=resolve_existing_path,
	required=True,
	help=(
	"Path to the spatial upsampler model used to increase the resolution "
	"of the generated video in the latent space."
	),
	)
	return parser


	def hq_2_stage_arg_parser(params: PipelineParams = LTX_2_3_HQ_PARAMS) -> argparse.ArgumentParser:
	parser = default_2_stage_arg_parser(params=params)
	parser.add_argument(
	"--distilled-lora-strength-stage-1",
	type=float,
	default=0.25,
	help=(f"Strength of the distilled LoRA used in the first stage (default: {0.25})."),
	)
	parser.add_argument(
	"--distilled-lora-strength-stage-2",
	type=float,
	default=0.5,
	help=(f"Strength of the distilled LoRA used in the second stage (default: {0.5})."),
	)
	return parser


	def default_2_stage_distilled_arg_parser(params: PipelineParams = LTX_2_3_PARAMS) -> argparse.ArgumentParser:
	parser = new_video_gen_arg_parser(params=params, distilled=True)
	parser.set_defaults(height=params.stage_2_height, width=params.stage_2_width)
	# Update help text to reflect 2-stage defaults
	for action in parser._actions:
	if "--height" in action.option_strings:
	action.help = (
	f"Height of the generated video in pixels, should be divisible by 64 "
	f"(default: {params.stage_2_height})."
	)
	if "--width" in action.option_strings:
	action.help = (
	f"Width of the generated video in pixels, should be divisible by 64 (default: {params.stage_2_width})."
	)
	parser.add_argument(
	"--spatial-upsampler-path",
	type=resolve_existing_path,
	required=True,
	help=(
	"Path to the spatial upsampler model used to increase the resolution "
	"of the generated video in the latent space."
	),
	)
	return parser

Xet Storage Details

Size:: 29.1 kB
Xet hash:: 7a07cf99dc093fe0cd661096c35c57334498d4e507f17ef3ac5dda762fe32575

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.