File size: 7,658 Bytes
f590d7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfd29be
f590d7e
 
 
 
 
 
 
 
 
cfd29be
f590d7e
 
 
 
 
 
 
04c0bde
 
f590d7e
cfd29be
f590d7e
 
 
 
 
 
 
 
cfd29be
f590d7e
 
 
 
 
 
 
 
cfd29be
f590d7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python3
"""Write a staged Xperience-10M -> Qwen3-Omni pilot runbook and comparisons."""

from __future__ import annotations

import argparse
import json
import platform
import subprocess
from pathlib import Path


PRIMARY_METRICS = [
    "action_macro_f1",
    "subtask_accuracy",
    "transition_accuracy",
    "next_action_accuracy",
    "object_micro_f1",
    "json_validity_rate",
]


def parse_args() -> argparse.Namespace:
    workspace_default = Path(__file__).resolve().parents[2]
    parser = argparse.ArgumentParser(description="Create omni fine-tuning runbook and optional metric comparison.")
    parser.add_argument("--workspace", type=Path, default=workspace_default)
    parser.add_argument("--run-id", default="xperience10m_qwen3_omni_32ep")
    parser.add_argument("--episodes", type=int, default=32)
    parser.add_argument("--next-scale-episodes", type=int, default=64)
    parser.add_argument("--manifest", type=Path, default=workspace_default / "results/omni_finetune/xperience10m_omni_dataset/dataset_manifest.json")
    parser.add_argument("--output-dir", type=Path)
    parser.add_argument("--metric-file", type=Path, action="append", help="metrics.json files to compare.")
    return parser.parse_args()


def command_output(cmd: list[str]) -> str:
    try:
        return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT).strip()
    except (FileNotFoundError, subprocess.CalledProcessError):
        return "unknown"


def preflight_snapshot() -> dict:
    return {
        "host": platform.node(),
        "python": platform.python_version(),
        "platform": platform.platform(),
        "nvidia_smi": command_output(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"]),
        "cuda_visible_devices": command_output(["bash", "-lc", "printf %s \"${CUDA_VISIBLE_DEVICES:-unset}\""]),
        "ffmpeg": command_output(["ffmpeg", "-version"]).splitlines()[0],
        "disk_workdir": command_output(["df", "-h", "."]),
    }


def stage_commands(run_id: str, manifest_path: Path) -> list[dict]:
    dataset_dir = f"results/omni_finetune/{run_id}_dataset"
    dataset_jsonl = f"{dataset_dir}/dataset.jsonl"
    return [
        {
            "phase": "phase_0_preflight",
            "goal": "Confirm accelerator runtime, local Qwen weights, dataset access, ffmpeg, and HOMIE loader.",
            "commands": [
                "nvidia-smi",
                "ffmpeg -version",
                "python -c \"from transformers import Qwen3OmniMoeForConditionalGeneration, Qwen3OmniMoeProcessor; print('qwen imports ok')\"",
            ],
        },
        {
            "phase": "phase_1_one_episode_setup_check",
            "goal": "Reproduce the adapter setup check and validate JSONL/media generation.",
            "commands": [
                f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 1 --output {manifest_path}",
                f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --max-windows-per-episode 16 --run-id {run_id}_dataset",
                f"python scripts/omni/qwen3_omni_inference_smoke.py --dataset-jsonl {dataset_jsonl} --sample-limit 3 --run-id {run_id}_zero_shot",
            ],
        },
        {
            "phase": "phase_2_three_episode_overfit",
            "goal": "Train adapter-only and Qwen LoRA on 3 episodes; require decreasing loss and >=98% JSON validity.",
            "commands": [
                f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 3 --output {manifest_path}",
                f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_3ep_dataset",
                f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl results/omni_finetune/{run_id}_3ep_dataset/dataset.jsonl --run-id {run_id}_3ep_lora --max-train-samples 256",
            ],
        },
        {
            "phase": "phase_3_32_episode_pilot",
            "goal": "Run adapter-only, frozen Qwen, Qwen LoRA video/audio/text, and Qwen LoRA plus sensor bridge.",
            "commands": [
                f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 32 --output {manifest_path}",
                f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_dataset",
                f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --run-id {run_id}_lora",
                f"python scripts/omni/eval_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --adapter-dir checkpoints/{run_id}_lora/adapter_lora --run-id {run_id}_eval",
            ],
        },
        {
            "phase": "phase_4_scale_decision",
            "goal": "Scale to 64 only after stability, disk headroom, and sensor bridge improvements are confirmed.",
            "commands": [
                f"python scripts/omni/omni_finetune_runbook.py --run-id {run_id} --metric-file results/omni_finetune/{run_id}_eval/metrics.json",
            ],
        },
    ]


def load_metrics(paths: list[Path] | None) -> list[dict]:
    rows = []
    for path in paths or []:
        payload = json.loads(path.read_text(encoding="utf-8"))
        row = {"path": str(path), "run": path.parent.name}
        for metric in PRIMARY_METRICS:
            row[metric] = payload.get(metric)
        rows.append(row)
    return rows


def main() -> int:
    args = parse_args()
    args.workspace = args.workspace.expanduser().resolve()
    if args.output_dir is None:
        args.output_dir = args.workspace / "results" / "omni_finetune" / args.run_id
    args.output_dir.mkdir(parents=True, exist_ok=True)

    payload = {
        "run_id": args.run_id,
        "goal": "Fine-tune Qwen3-Omni-Instruct for Xperience-10M episode understanding JSON QA.",
        "default_scale": {
            "pilot_episodes": args.episodes,
            "next_scale_episodes": args.next_scale_episodes,
            "do_not_start_with": "10000 episodes",
        },
        "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
        "download_priority": ["ModelScope", "Hugging Face fallback"],
        "training_unit": "sampled window-centered clips",
        "split_unit": "held-out episodes",
        "primary_metrics": PRIMARY_METRICS,
        "preflight_snapshot": preflight_snapshot(),
        "stages": stage_commands(args.run_id, args.manifest),
        "comparisons": load_metrics(args.metric_file),
        "scale_acceptance": [
            "Full pipeline completes from downloaded subset to metrics.",
            "No train/test episode leakage.",
            "JSON validity >= 0.98.",
            "Sensor bridge beats video/audio/text-only LoRA on at least 3 primary metrics.",
            "Commands, model ID, dataset manifest, GPU info, and split file are recorded.",
        ],
    }
    (args.output_dir / "runbook.json").write_text(json.dumps(payload, indent=2), encoding="utf-8")
    lines = [
        f"run_id: {args.run_id}",
        "objective: xperience10m_episode_understanding_json_qa",
        "backbone: Qwen/Qwen3-Omni-30B-A3B-Instruct",
        f"pilot_episodes: {args.episodes}",
        f"next_scale_episodes: {args.next_scale_episodes}",
        "download_priority: [ModelScope, Hugging Face fallback]",
        "full_parameter_finetune: false",
    ]
    (args.output_dir / "config.yaml").write_text("\n".join(lines) + "\n", encoding="utf-8")
    print(json.dumps(payload, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())