Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
File size: 6,836 Bytes
7faed79 0995310 7faed79 a07660e 7faed79 d9be7c0 7faed79 146ae33 45c1706 7faed79 a07660e c614c4e 146ae33 c614c4e 2ebe45d 146ae33 c614c4e a07660e 2c5b88c 7faed79 2c5b88c 7faed79 4173e02 0f9a8e2 7faed79 45c1706 2d80be0 0995310 7faed79 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | {
"title": "Ropedia Xperience-10M Task Suite Reproducibility Matrix",
"version": "2026-06-14",
"scope": "one public Xperience-10M sample episode plus owner-side private staged Qwen3-Omni v6 reproduction",
"python_target": "3.12",
"public_raw_data_redistributed": false,
"last_exact_metric_audit": {
"date": "2026-05-30",
"timezone": "Asia/Singapore",
"evidence": "notes/reproducibility_audit.md",
"status": "pass",
"matched_artifacts": [
"results/min_action_model/metrics.json",
"results/min_subtask_model/metrics.json",
"results/min_all_modalities_action_model/metrics.json",
"results/min_all_modalities_subtask_model/metrics.json",
"results/episode_task_suite/summary_report.json",
"results/episode_task_suite/feature_manifest.json",
"results/episode_task_suite/available_modalities.json",
"results/episode_task_suite/*/metrics.json"
]
},
"steps": [
{
"id": "download_sample",
"status": "reproducible",
"command": "hf download ropedia-ai/xperience-10m-sample --repo-type dataset --local-dir data/sample/xperience-10m-sample",
"expected": "annotation.hdf5 plus public sample MP4 streams under data/sample/xperience-10m-sample; optional visualization.rrd can be inspected with Rerun 0.29.0",
"boundary": "sample card lists cc-by-nc-4.0; raw sample data is downloaded from upstream, not redistributed here"
},
{
"id": "minimal_baselines",
"status": "reproducible",
"command": "python scripts/train_min_action_model.py --workspace $WORKSPACE && python scripts/train_all_modalities_model.py --workspace $WORKSPACE",
"expected": "minimal baseline metrics and model weights under results/min_*",
"boundary": "single-episode chronological split"
},
{
"id": "original_task_suite",
"status": "reproducible",
"command": "python scripts/episode_task_suite.py --workspace $WORKSPACE --include-neural",
"expected": "walkthrough-backed task metrics, predictions, manifests, and neural_mlp task-head artifacts",
"boundary": "8,546-dimensional multimodal window contract"
},
{
"id": "research_direction_outputs",
"status": "reproducible",
"command": "python scripts/research_direction_taxonomy.py && python scripts/research_direction_extension_tasks.py && python scripts/task_walkthroughs.py",
"expected": "research-direction taxonomy, extension probes, and task walkthrough artifacts",
"boundary": "single-episode probes, not full research-direction solutions"
},
{
"id": "unified_20_task_index",
"status": "reproducible",
"command": "python scripts/tier2_task_suite.py && python scripts/build_unified_task_suite.py && python scripts/build_unified_task_model_radar.py",
"expected": "unified 20-task metrics, prediction/rank artifacts, TASK_SUITE_20.md, docs/data/task_suite_20.json, docs/data/tier2_task_suite.json, docs/assets/charts/tier2_task_suite.svg, docs/data/unified_task_model_radar.json, and docs/assets/charts/unified_task_model_radar.svg",
"boundary": "requires local public-sample annotation.hdf5 plus HOMIE Toolkit or h5py for full public-task regeneration; raw HDF5 and MP4 files are not redistributed"
},
{
"id": "source_alignment_audit",
"status": "reproducible",
"command": "python scripts/validate_source_alignment.py",
"expected": "SOURCE_ALIGNMENT_AUDIT.md and docs/data/source_alignment_audit.json",
"boundary": "offline committed-fact audit; does not fetch private gated data"
},
{
"id": "evaluation_protocol",
"status": "reproducible",
"command": "python scripts/build_evaluation_protocol.py",
"expected": "EVALUATION_PROTOCOL.md and docs/data/evaluation_protocol.json",
"boundary": "defines single-episode task evaluation rules; does not add cross-episode model quality"
},
{
"id": "figures_and_dashboard_data",
"status": "reproducible",
"command": "python scripts/generate_visualizations.py && python scripts/render_overview_figures.py && python scripts/render_task_suite_infographic.py && python scripts/export_modality_atlas_assets.py && python scripts/build_brand_assets.py && python scripts/build_figure_index.py",
"expected": "website JSON bundles, charts, overview figures, task-suite infographic, responsive modality atlas assets, brand logo derivatives, FIGURE_INDEX.md, docs/data/brand_assets.json, and docs/data/figure_index.json",
"boundary": "figures are generated presentation layers over committed metrics and sample thumbnails"
},
{
"id": "publication_validation",
"status": "reproducible",
"command": "python scripts/validate_website_integrity.py && python scripts/validate_task_surface.py && python scripts/validate_scope_claims.py && python scripts/build_artifact_index.py && python scripts/validate_mirror_parity.py && python scripts/validate_publication_package.py",
"expected": "docs/data/website_integrity.json, docs/data/task_surface_integrity.json, docs/data/scope_claims_audit.json, docs/data/artifact_index.json, docs/data/mirror_parity.json, and docs/data/publication_audit.json",
"boundary": "checks local website integrity plus public repo, prepared HF bundles, and prepared mirror parity"
},
{
"id": "qwen3_omni_multi_episode_pilot",
"status": "verified_final_diagnostic_result_not_publicly_rerunnable_without_gated_data",
"command": "scripts/omni/run_dense_multiscale_qwen_v5_h20.sh, scripts/omni/train_qwen3_omni_lora.py, and scripts/omni/run_qwen3_omni_lora_eval_sharded.sh on the selected gated episodes",
"expected": "verified v5/v6 diagnostic LoRA packages; the latest v6 package records 34,269 exported multiscale windows and 4,032 held-out test predictions",
"boundary": "the public package records metrics and manifests, but rerunning requires gated Xperience-10M episode access and base-model weights; v6 improves some structured metrics over v5 but remains a diagnostic baseline rather than a strong action/subtask model"
},
{
"id": "owner_gpu_qwen3_v6_reproduction",
"status": "reproducible_on_private_gpu_staging",
"command": "cd <staged-repo-root> && CUDA_VISIBLE_DEVICES=0,1,2,3 RUN_ID=a100_repro_qwen_v6_eval_smoke1_manual SAMPLE_LIMIT=1 MAX_NEW_TOKENS=1 scripts/omni/run_private_gpu_qwen3_v6_repro_smoke.sh",
"expected": "One-sample Qwen3-Omni v6 eval smoke writes progress.jsonl, predictions, metrics.json, and exit_code.txt with exit_code 0",
"boundary": "owner-side private staging only; depends on exported media cache, path-rewritten dataset_a100_eval.jsonl, Qwen3-Omni base model cache, v6 LoRA adapter, and the compatible Transformers video-feature patch"
}
]
}
|