ropedia-xperience-10m-task-baselines / scripts /omni /omni_finetune_runbook.py
cy0307's picture
Polish reader-facing project language
04c0bde verified
Raw
History Blame
7.66 kB
#!/usr/bin/env python3
"""Write a staged Xperience-10M -> Qwen3-Omni pilot runbook and comparisons."""
from __future__ import annotations
import argparse
import json
import platform
import subprocess
from pathlib import Path
PRIMARY_METRICS = [
"action_macro_f1",
"subtask_accuracy",
"transition_accuracy",
"next_action_accuracy",
"object_micro_f1",
"json_validity_rate",
]
def parse_args() -> argparse.Namespace:
workspace_default = Path(__file__).resolve().parents[2]
parser = argparse.ArgumentParser(description="Create omni fine-tuning runbook and optional metric comparison.")
parser.add_argument("--workspace", type=Path, default=workspace_default)
parser.add_argument("--run-id", default="xperience10m_qwen3_omni_32ep")
parser.add_argument("--episodes", type=int, default=32)
parser.add_argument("--next-scale-episodes", type=int, default=64)
parser.add_argument("--manifest", type=Path, default=workspace_default / "results/omni_finetune/xperience10m_omni_dataset/dataset_manifest.json")
parser.add_argument("--output-dir", type=Path)
parser.add_argument("--metric-file", type=Path, action="append", help="metrics.json files to compare.")
return parser.parse_args()
def command_output(cmd: list[str]) -> str:
try:
return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT).strip()
except (FileNotFoundError, subprocess.CalledProcessError):
return "unknown"
def preflight_snapshot() -> dict:
return {
"host": platform.node(),
"python": platform.python_version(),
"platform": platform.platform(),
"nvidia_smi": command_output(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"]),
"cuda_visible_devices": command_output(["bash", "-lc", "printf %s \"${CUDA_VISIBLE_DEVICES:-unset}\""]),
"ffmpeg": command_output(["ffmpeg", "-version"]).splitlines()[0],
"disk_workdir": command_output(["df", "-h", "."]),
}
def stage_commands(run_id: str, manifest_path: Path) -> list[dict]:
dataset_dir = f"results/omni_finetune/{run_id}_dataset"
dataset_jsonl = f"{dataset_dir}/dataset.jsonl"
return [
{
"phase": "phase_0_preflight",
"goal": "Confirm accelerator runtime, local Qwen weights, dataset access, ffmpeg, and HOMIE loader.",
"commands": [
"nvidia-smi",
"ffmpeg -version",
"python -c \"from transformers import Qwen3OmniMoeForConditionalGeneration, Qwen3OmniMoeProcessor; print('qwen imports ok')\"",
],
},
{
"phase": "phase_1_one_episode_setup_check",
"goal": "Reproduce the adapter setup check and validate JSONL/media generation.",
"commands": [
f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 1 --output {manifest_path}",
f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --max-windows-per-episode 16 --run-id {run_id}_dataset",
f"python scripts/omni/qwen3_omni_inference_smoke.py --dataset-jsonl {dataset_jsonl} --sample-limit 3 --run-id {run_id}_zero_shot",
],
},
{
"phase": "phase_2_three_episode_overfit",
"goal": "Train adapter-only and Qwen LoRA on 3 episodes; require decreasing loss and >=98% JSON validity.",
"commands": [
f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 3 --output {manifest_path}",
f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_3ep_dataset",
f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl results/omni_finetune/{run_id}_3ep_dataset/dataset.jsonl --run-id {run_id}_3ep_lora --max-train-samples 256",
],
},
{
"phase": "phase_3_32_episode_pilot",
"goal": "Run adapter-only, frozen Qwen, Qwen LoRA video/audio/text, and Qwen LoRA plus sensor bridge.",
"commands": [
f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 32 --output {manifest_path}",
f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_dataset",
f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --run-id {run_id}_lora",
f"python scripts/omni/eval_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --adapter-dir checkpoints/{run_id}_lora/adapter_lora --run-id {run_id}_eval",
],
},
{
"phase": "phase_4_scale_decision",
"goal": "Scale to 64 only after stability, disk headroom, and sensor bridge improvements are confirmed.",
"commands": [
f"python scripts/omni/omni_finetune_runbook.py --run-id {run_id} --metric-file results/omni_finetune/{run_id}_eval/metrics.json",
],
},
]
def load_metrics(paths: list[Path] | None) -> list[dict]:
rows = []
for path in paths or []:
payload = json.loads(path.read_text(encoding="utf-8"))
row = {"path": str(path), "run": path.parent.name}
for metric in PRIMARY_METRICS:
row[metric] = payload.get(metric)
rows.append(row)
return rows
def main() -> int:
args = parse_args()
args.workspace = args.workspace.expanduser().resolve()
if args.output_dir is None:
args.output_dir = args.workspace / "results" / "omni_finetune" / args.run_id
args.output_dir.mkdir(parents=True, exist_ok=True)
payload = {
"run_id": args.run_id,
"goal": "Fine-tune Qwen3-Omni-Instruct for Xperience-10M episode understanding JSON QA.",
"default_scale": {
"pilot_episodes": args.episodes,
"next_scale_episodes": args.next_scale_episodes,
"do_not_start_with": "10000 episodes",
},
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
"download_priority": ["ModelScope", "Hugging Face fallback"],
"training_unit": "sampled window-centered clips",
"split_unit": "held-out episodes",
"primary_metrics": PRIMARY_METRICS,
"preflight_snapshot": preflight_snapshot(),
"stages": stage_commands(args.run_id, args.manifest),
"comparisons": load_metrics(args.metric_file),
"scale_acceptance": [
"Full pipeline completes from downloaded subset to metrics.",
"No train/test episode leakage.",
"JSON validity >= 0.98.",
"Sensor bridge beats video/audio/text-only LoRA on at least 3 primary metrics.",
"Commands, model ID, dataset manifest, GPU info, and split file are recorded.",
],
}
(args.output_dir / "runbook.json").write_text(json.dumps(payload, indent=2), encoding="utf-8")
lines = [
f"run_id: {args.run_id}",
"objective: xperience10m_episode_understanding_json_qa",
"backbone: Qwen/Qwen3-Omni-30B-A3B-Instruct",
f"pilot_episodes: {args.episodes}",
f"next_scale_episodes: {args.next_scale_episodes}",
"download_priority: [ModelScope, Hugging Face fallback]",
"full_parameter_finetune: false",
]
(args.output_dir / "config.yaml").write_text("\n".join(lines) + "\n", encoding="utf-8")
print(json.dumps(payload, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())