Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
File size: 11,468 Bytes
f45f1a0 fcaf77a 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 86e7cb2 f45f1a0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | {
"current_public_matrix_row": "qwen3_omni_v6_lora",
"generated_at_utc": "2026-06-21T11:47:45+00:00",
"interpretation_rule": "Do not confuse the Qwen run versions with the project evidence lines. The project evidence lines are one public sample episode and selected 128-episode artifacts. Qwen v1-v6 are only the Qwen3-Omni run lineage inside the selected-128 line. The 20-task matrix uses Qwen3-Omni v6 LoRA; v5 remains the pinned prior release; v1-v4 are lineage and ablation evidence.",
"pinned_prior_release": "v5",
"related_engineering_artifacts": [
{
"name": "Full-parameter gates",
"path": "results/omni_finetune/QWEN3_FULL_PARAMETER_GATES_20260609.md",
"role": "Feasibility and short-train gates; not a public 20-task matrix method row."
},
{
"name": "Alternate fullsplit v6 package",
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full",
"role": "Verified alternate no-validation/fullsplit artifact retained for audit, not the current matrix row."
}
],
"runs": [
{
"change_from_previous": "First verified Qwen3-Omni selected-128 LoRA run.",
"dataset_contract": "xperience10m_episode_json_qa_v1",
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
"eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
"eval_samples": 448,
"metrics": {
"action_macro_f1": 0.0026621494447581404,
"contact_accuracy": 0.6450892857142857,
"json_validity_rate": 0.875,
"next_action_accuracy": 0.024553571428571428,
"object_micro_f1": 0.22299431459254582,
"subtask_accuracy": 0.006696428571428571,
"transition_accuracy": 0.8504464285714286
},
"package": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
"purpose": "Prove that the selected-128 split, LoRA training, held-out eval, validation, and public packaging loop works end to end.",
"reader_use": "Use only as lineage evidence for the first working pipeline.",
"role": "First verified 96/16/16 selected-episode Qwen3-Omni LoRA package; establishes dataset, training, eval, and packaging plumbing.",
"status": "verified",
"title": "Selected-128 validation-aware LoRA baseline",
"train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
"version": "v1"
},
{
"change_from_previous": "Reused the selected-128 split with a stricter structured-JSON answer contract and full 8-GPU LoRA training.",
"dataset_contract": "xperience10m_episode_json_qa_v1",
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
"eval_samples": 448,
"metrics": {
"action_macro_f1": 0.0024331644885523347,
"contact_accuracy": 0.71875,
"json_validity_rate": 0.9977678571428571,
"next_action_accuracy": 0.029017857142857144,
"object_micro_f1": 0.30160427807486634,
"subtask_accuracy": 0.002232142857142857,
"transition_accuracy": 0.9709821428571429
},
"package": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full",
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
"purpose": "Make the answer format schema-checked and reduce invalid JSON before expanding scale.",
"reader_use": "Use as evidence that schema-constrained evaluation improved validity and contact accuracy over v1.",
"role": "Reuses the selected-128 split with a stricter structured JSON answer contract and full 8-GPU LoRA training.",
"status": "verified",
"title": "Structured-JSON reuse full-8-GPU LoRA",
"train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora",
"version": "v2"
},
{
"change_from_previous": "Evaluated the v2 adapter with stricter labels and prompts; no new adapter training.",
"dataset_contract": "xperience10m_episode_json_qa_v1",
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
"eval_samples": 448,
"metrics": {
"action_macro_f1": 0.0021983997167007384,
"contact_accuracy": 0.7209821428571429,
"json_validity_rate": 1.0,
"next_action_accuracy": 0.03125,
"object_micro_f1": 0.30688228657389993,
"subtask_accuracy": 0.002232142857142857,
"transition_accuracy": 0.9732142857142857
},
"package": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full",
"public_matrix_role": "superseded prompt/eval lineage evidence",
"purpose": "Separate prompt/eval formatting effects from adapter-training effects.",
"reader_use": "Use as prompt/eval ablation evidence, not as a separate trained model.",
"role": "Strict-label prompt/eval pass over the v2 adapter; improves JSON validity without introducing a new adapter training run.",
"status": "verified",
"title": "Strict-label prompt evaluation",
"train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora",
"version": "v3"
},
{
"change_from_previous": "Trained a new four-epoch full-8-GPU LoRA adapter on the structured-JSON setup.",
"dataset_contract": "xperience10m_episode_json_qa_v1",
"dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
"eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
"eval_samples": 448,
"metrics": {
"action_macro_f1": 0.0018678269676001454,
"contact_accuracy": 0.7299107142857143,
"json_validity_rate": 1.0,
"next_action_accuracy": 0.033482142857142856,
"object_micro_f1": 0.31099781500364165,
"subtask_accuracy": 0.0,
"transition_accuracy": 0.9732142857142857
},
"package": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
"public_matrix_role": "superseded lineage evidence, not the current 20-task Qwen row",
"purpose": "Test whether longer structured-JSON LoRA training improves the same selected split.",
"reader_use": "Use as overfit and metric-tradeoff evidence before the multiscale export.",
"role": "Four-epoch full-8-GPU LoRA run on the same selected split; useful for overfit/metric tradeoff analysis.",
"status": "verified",
"title": "Four-epoch structured-JSON LoRA",
"train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora",
"version": "v4"
},
{
"change_from_previous": "Introduced the multiscale cap96 export and larger held-out evaluation surface.",
"dataset_contract": "xperience10m_episode_json_qa_v1",
"dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
"eval_samples": 4032,
"metrics": {
"action_macro_f1": 0.002289711036077459,
"contact_accuracy": 0.7864583333333334,
"json_validity_rate": 1.0,
"next_action_accuracy": 0.053618594823032224,
"object_micro_f1": 0.31614599936244814,
"subtask_accuracy": 0.011194029850746268,
"transition_accuracy": 0.9908234126984127
},
"package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
"public_matrix_role": "pinned prior release row and comparison baseline",
"purpose": "Move from the 448-sample compact eval to a denser multiscale 4,032-sample held-out eval.",
"reader_use": "Use as the pinned prior release; it remains stronger on JSON validity, subtask, next-action, object, and transition metrics.",
"role": "Dense/multiscale selected-128 run with 4,032 held-out predictions; kept as the pinned prior release because several metrics remain stronger than v6.",
"status": "verified",
"title": "Multiscale cap96 LoRA",
"train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
"version": "v5"
},
{
"change_from_previous": "Kept the multiscale setup, changed LoRA rank/lr to rank64/lr5e-5, and added verified task-specific probes for full 20-task coverage.",
"dataset_contract": "xperience10m_episode_json_qa_v1",
"dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
"eval_samples": 4032,
"metrics": {
"action_macro_f1": 0.0028830723979596335,
"contact_accuracy": 0.8177083333333334,
"json_validity_rate": 0.9990079365079365,
"next_action_accuracy": 0.04305335446381405,
"object_micro_f1": 0.3064982378331287,
"subtask_accuracy": 0.0037313432835820895,
"transition_accuracy": 0.9898313492063492
},
"package": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
"package_path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
"public_matrix_role": "current public 20-task Qwen3-Omni v6 LoRA row",
"purpose": "Promote the current public Qwen3-Omni 20-task row with multiscale LoRA plus task-specific probes.",
"reader_use": "Use as the current public 20-task Qwen row; it improves action macro-F1 and contact accuracy while v5 remains the prior comparator.",
"role": "Current verified Qwen3-Omni row: rank64/lr5e-5 multiscale LoRA plus task-specific probe artifacts used for the 20/20 Qwen matrix coverage.",
"status": "verified",
"title": "Rank64 lr5e-5 multiscale LoRA",
"train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora",
"version": "v6"
}
],
"scope": "Verified public-safe Qwen3-Omni LoRA/eval packages over the selected Xperience-10M 128-episode surface.",
"status": "pass",
"title": "Qwen3-Omni v1-v6 Run Lineage"
}
|