| |
| """Write a staged Xperience-10M -> Qwen3-Omni pilot runbook and comparisons.""" |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import platform |
| import subprocess |
| from pathlib import Path |
|
|
|
|
| PRIMARY_METRICS = [ |
| "action_macro_f1", |
| "subtask_accuracy", |
| "transition_accuracy", |
| "next_action_accuracy", |
| "object_micro_f1", |
| "json_validity_rate", |
| ] |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| workspace_default = Path(__file__).resolve().parents[2] |
| parser = argparse.ArgumentParser(description="Create omni fine-tuning runbook and optional metric comparison.") |
| parser.add_argument("--workspace", type=Path, default=workspace_default) |
| parser.add_argument("--run-id", default="xperience10m_qwen3_omni_32ep") |
| parser.add_argument("--episodes", type=int, default=32) |
| parser.add_argument("--next-scale-episodes", type=int, default=64) |
| parser.add_argument("--manifest", type=Path, default=workspace_default / "results/omni_finetune/xperience10m_omni_dataset/dataset_manifest.json") |
| parser.add_argument("--output-dir", type=Path) |
| parser.add_argument("--metric-file", type=Path, action="append", help="metrics.json files to compare.") |
| return parser.parse_args() |
|
|
|
|
| def command_output(cmd: list[str]) -> str: |
| try: |
| return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT).strip() |
| except (FileNotFoundError, subprocess.CalledProcessError): |
| return "unknown" |
|
|
|
|
| def preflight_snapshot() -> dict: |
| return { |
| "host": platform.node(), |
| "python": platform.python_version(), |
| "platform": platform.platform(), |
| "nvidia_smi": command_output(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"]), |
| "cuda_visible_devices": command_output(["bash", "-lc", "printf %s \"${CUDA_VISIBLE_DEVICES:-unset}\""]), |
| "ffmpeg": command_output(["ffmpeg", "-version"]).splitlines()[0], |
| "disk_workdir": command_output(["df", "-h", "."]), |
| } |
|
|
|
|
| def stage_commands(run_id: str, manifest_path: Path) -> list[dict]: |
| dataset_dir = f"results/omni_finetune/{run_id}_dataset" |
| dataset_jsonl = f"{dataset_dir}/dataset.jsonl" |
| return [ |
| { |
| "phase": "phase_0_preflight", |
| "goal": "Confirm accelerator runtime, local Qwen weights, dataset access, ffmpeg, and HOMIE loader.", |
| "commands": [ |
| "nvidia-smi", |
| "ffmpeg -version", |
| "python -c \"from transformers import Qwen3OmniMoeForConditionalGeneration, Qwen3OmniMoeProcessor; print('qwen imports ok')\"", |
| ], |
| }, |
| { |
| "phase": "phase_1_one_episode_setup_check", |
| "goal": "Reproduce the adapter setup check and validate JSONL/media generation.", |
| "commands": [ |
| f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 1 --output {manifest_path}", |
| f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --max-windows-per-episode 16 --run-id {run_id}_dataset", |
| f"python scripts/omni/qwen3_omni_inference_smoke.py --dataset-jsonl {dataset_jsonl} --sample-limit 3 --run-id {run_id}_zero_shot", |
| ], |
| }, |
| { |
| "phase": "phase_2_three_episode_overfit", |
| "goal": "Train adapter-only and Qwen LoRA on 3 episodes; require decreasing loss and >=98% JSON validity.", |
| "commands": [ |
| f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 3 --output {manifest_path}", |
| f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_3ep_dataset", |
| f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl results/omni_finetune/{run_id}_3ep_dataset/dataset.jsonl --run-id {run_id}_3ep_lora --max-train-samples 256", |
| ], |
| }, |
| { |
| "phase": "phase_3_32_episode_pilot", |
| "goal": "Run adapter-only, frozen Qwen, Qwen LoRA video/audio/text, and Qwen LoRA plus sensor bridge.", |
| "commands": [ |
| f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 32 --output {manifest_path}", |
| f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_dataset", |
| f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --run-id {run_id}_lora", |
| f"python scripts/omni/eval_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --adapter-dir checkpoints/{run_id}_lora/adapter_lora --run-id {run_id}_eval", |
| ], |
| }, |
| { |
| "phase": "phase_4_scale_decision", |
| "goal": "Scale to 64 only after stability, disk headroom, and sensor bridge improvements are confirmed.", |
| "commands": [ |
| f"python scripts/omni/omni_finetune_runbook.py --run-id {run_id} --metric-file results/omni_finetune/{run_id}_eval/metrics.json", |
| ], |
| }, |
| ] |
|
|
|
|
| def load_metrics(paths: list[Path] | None) -> list[dict]: |
| rows = [] |
| for path in paths or []: |
| payload = json.loads(path.read_text(encoding="utf-8")) |
| row = {"path": str(path), "run": path.parent.name} |
| for metric in PRIMARY_METRICS: |
| row[metric] = payload.get(metric) |
| rows.append(row) |
| return rows |
|
|
|
|
| def main() -> int: |
| args = parse_args() |
| args.workspace = args.workspace.expanduser().resolve() |
| if args.output_dir is None: |
| args.output_dir = args.workspace / "results" / "omni_finetune" / args.run_id |
| args.output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| payload = { |
| "run_id": args.run_id, |
| "goal": "Fine-tune Qwen3-Omni-Instruct for Xperience-10M episode understanding JSON QA.", |
| "default_scale": { |
| "pilot_episodes": args.episodes, |
| "next_scale_episodes": args.next_scale_episodes, |
| "do_not_start_with": "10000 episodes", |
| }, |
| "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct", |
| "download_priority": ["ModelScope", "Hugging Face fallback"], |
| "training_unit": "sampled window-centered clips", |
| "split_unit": "held-out episodes", |
| "primary_metrics": PRIMARY_METRICS, |
| "preflight_snapshot": preflight_snapshot(), |
| "stages": stage_commands(args.run_id, args.manifest), |
| "comparisons": load_metrics(args.metric_file), |
| "scale_acceptance": [ |
| "Full pipeline completes from downloaded subset to metrics.", |
| "No train/test episode leakage.", |
| "JSON validity >= 0.98.", |
| "Sensor bridge beats video/audio/text-only LoRA on at least 3 primary metrics.", |
| "Commands, model ID, dataset manifest, GPU info, and split file are recorded.", |
| ], |
| } |
| (args.output_dir / "runbook.json").write_text(json.dumps(payload, indent=2), encoding="utf-8") |
| lines = [ |
| f"run_id: {args.run_id}", |
| "objective: xperience10m_episode_understanding_json_qa", |
| "backbone: Qwen/Qwen3-Omni-30B-A3B-Instruct", |
| f"pilot_episodes: {args.episodes}", |
| f"next_scale_episodes: {args.next_scale_episodes}", |
| "download_priority: [ModelScope, Hugging Face fallback]", |
| "full_parameter_finetune: false", |
| ] |
| (args.output_dir / "config.yaml").write_text("\n".join(lines) + "\n", encoding="utf-8") |
| print(json.dumps(payload, indent=2)) |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| raise SystemExit(main()) |
|
|