Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| """Write a staged Xperience-10M -> Qwen3-Omni pilot runbook and comparisons.""" | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import platform | |
| import subprocess | |
| from pathlib import Path | |
| PRIMARY_METRICS = [ | |
| "action_macro_f1", | |
| "subtask_accuracy", | |
| "transition_accuracy", | |
| "next_action_accuracy", | |
| "object_micro_f1", | |
| "json_validity_rate", | |
| ] | |
| def parse_args() -> argparse.Namespace: | |
| workspace_default = Path(__file__).resolve().parents[2] | |
| parser = argparse.ArgumentParser(description="Create omni fine-tuning runbook and optional metric comparison.") | |
| parser.add_argument("--workspace", type=Path, default=workspace_default) | |
| parser.add_argument("--run-id", default="xperience10m_qwen3_omni_32ep") | |
| parser.add_argument("--episodes", type=int, default=32) | |
| parser.add_argument("--next-scale-episodes", type=int, default=64) | |
| parser.add_argument("--manifest", type=Path, default=workspace_default / "results/omni_finetune/xperience10m_omni_dataset/dataset_manifest.json") | |
| parser.add_argument("--output-dir", type=Path) | |
| parser.add_argument("--metric-file", type=Path, action="append", help="metrics.json files to compare.") | |
| return parser.parse_args() | |
| def command_output(cmd: list[str]) -> str: | |
| try: | |
| return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT).strip() | |
| except (FileNotFoundError, subprocess.CalledProcessError): | |
| return "unknown" | |
| def preflight_snapshot() -> dict: | |
| return { | |
| "host": platform.node(), | |
| "python": platform.python_version(), | |
| "platform": platform.platform(), | |
| "nvidia_smi": command_output(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"]), | |
| "cuda_visible_devices": command_output(["bash", "-lc", "printf %s \"${CUDA_VISIBLE_DEVICES:-unset}\""]), | |
| "ffmpeg": command_output(["ffmpeg", "-version"]).splitlines()[0], | |
| "disk_workdir": command_output(["df", "-h", "."]), | |
| } | |
| def stage_commands(run_id: str, manifest_path: Path) -> list[dict]: | |
| dataset_dir = f"results/omni_finetune/{run_id}_dataset" | |
| dataset_jsonl = f"{dataset_dir}/dataset.jsonl" | |
| return [ | |
| { | |
| "phase": "phase_0_preflight", | |
| "goal": "Confirm accelerator runtime, local Qwen weights, dataset access, ffmpeg, and HOMIE loader.", | |
| "commands": [ | |
| "nvidia-smi", | |
| "ffmpeg -version", | |
| "python -c \"from transformers import Qwen3OmniMoeForConditionalGeneration, Qwen3OmniMoeProcessor; print('qwen imports ok')\"", | |
| ], | |
| }, | |
| { | |
| "phase": "phase_1_one_episode_setup_check", | |
| "goal": "Reproduce the adapter setup check and validate JSONL/media generation.", | |
| "commands": [ | |
| f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 1 --output {manifest_path}", | |
| f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --max-windows-per-episode 16 --run-id {run_id}_dataset", | |
| f"python scripts/omni/qwen3_omni_inference_smoke.py --dataset-jsonl {dataset_jsonl} --sample-limit 3 --run-id {run_id}_zero_shot", | |
| ], | |
| }, | |
| { | |
| "phase": "phase_2_three_episode_overfit", | |
| "goal": "Train adapter-only and Qwen LoRA on 3 episodes; require decreasing loss and >=98% JSON validity.", | |
| "commands": [ | |
| f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 3 --output {manifest_path}", | |
| f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_3ep_dataset", | |
| f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl results/omni_finetune/{run_id}_3ep_dataset/dataset.jsonl --run-id {run_id}_3ep_lora --max-train-samples 256", | |
| ], | |
| }, | |
| { | |
| "phase": "phase_3_32_episode_pilot", | |
| "goal": "Run adapter-only, frozen Qwen, Qwen LoRA video/audio/text, and Qwen LoRA plus sensor bridge.", | |
| "commands": [ | |
| f"python scripts/omni/build_episode_manifest.py --data-root /path/to/xperience10m_data --max-episodes 32 --output {manifest_path}", | |
| f"python scripts/omni/export_qwen3_omni_action_dataset.py --manifest {manifest_path} --run-id {run_id}_dataset", | |
| f"python scripts/omni/train_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --run-id {run_id}_lora", | |
| f"python scripts/omni/eval_qwen3_omni_lora.py --dataset-jsonl {dataset_jsonl} --adapter-dir checkpoints/{run_id}_lora/adapter_lora --run-id {run_id}_eval", | |
| ], | |
| }, | |
| { | |
| "phase": "phase_4_scale_decision", | |
| "goal": "Scale to 64 only after stability, disk headroom, and sensor bridge improvements are confirmed.", | |
| "commands": [ | |
| f"python scripts/omni/omni_finetune_runbook.py --run-id {run_id} --metric-file results/omni_finetune/{run_id}_eval/metrics.json", | |
| ], | |
| }, | |
| ] | |
| def load_metrics(paths: list[Path] | None) -> list[dict]: | |
| rows = [] | |
| for path in paths or []: | |
| payload = json.loads(path.read_text(encoding="utf-8")) | |
| row = {"path": str(path), "run": path.parent.name} | |
| for metric in PRIMARY_METRICS: | |
| row[metric] = payload.get(metric) | |
| rows.append(row) | |
| return rows | |
| def main() -> int: | |
| args = parse_args() | |
| args.workspace = args.workspace.expanduser().resolve() | |
| if args.output_dir is None: | |
| args.output_dir = args.workspace / "results" / "omni_finetune" / args.run_id | |
| args.output_dir.mkdir(parents=True, exist_ok=True) | |
| payload = { | |
| "run_id": args.run_id, | |
| "goal": "Fine-tune Qwen3-Omni-Instruct for Xperience-10M episode understanding JSON QA.", | |
| "default_scale": { | |
| "pilot_episodes": args.episodes, | |
| "next_scale_episodes": args.next_scale_episodes, | |
| "do_not_start_with": "10000 episodes", | |
| }, | |
| "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct", | |
| "download_priority": ["ModelScope", "Hugging Face fallback"], | |
| "training_unit": "sampled window-centered clips", | |
| "split_unit": "held-out episodes", | |
| "primary_metrics": PRIMARY_METRICS, | |
| "preflight_snapshot": preflight_snapshot(), | |
| "stages": stage_commands(args.run_id, args.manifest), | |
| "comparisons": load_metrics(args.metric_file), | |
| "scale_acceptance": [ | |
| "Full pipeline completes from downloaded subset to metrics.", | |
| "No train/test episode leakage.", | |
| "JSON validity >= 0.98.", | |
| "Sensor bridge beats video/audio/text-only LoRA on at least 3 primary metrics.", | |
| "Commands, model ID, dataset manifest, GPU info, and split file are recorded.", | |
| ], | |
| } | |
| (args.output_dir / "runbook.json").write_text(json.dumps(payload, indent=2), encoding="utf-8") | |
| lines = [ | |
| f"run_id: {args.run_id}", | |
| "objective: xperience10m_episode_understanding_json_qa", | |
| "backbone: Qwen/Qwen3-Omni-30B-A3B-Instruct", | |
| f"pilot_episodes: {args.episodes}", | |
| f"next_scale_episodes: {args.next_scale_episodes}", | |
| "download_priority: [ModelScope, Hugging Face fallback]", | |
| "full_parameter_finetune: false", | |
| ] | |
| (args.output_dir / "config.yaml").write_text("\n".join(lines) + "\n", encoding="utf-8") | |
| print(json.dumps(payload, indent=2)) | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |