Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
File size: 4,213 Bytes
2bd8497 91b502e 2bd8497 768fd2e 2bd8497 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | {
"title": "Verified Qwen3-Omni LoRA Validation-Aware Held-Out Pilot",
"status": "verified_validation_aware_diagnostic_pilot",
"status_date": "2026-06-06",
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
"adapter": "Qwen3-Omni LoRA",
"dataset": "Ropedia Xperience-10M selected 128-episode pilot",
"split_policy": {
"unit": "episode",
"selected_episode_counts": {
"train": 96,
"val": 16,
"test": 16
},
"exported_window_counts": {
"train": 2848,
"val": 512,
"test": 448
},
"exported_episode_counts": {
"train": 89,
"val": 16,
"test": 14
},
"skipped_selected_episodes": 9,
"leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
},
"training": {
"num_processes": 8,
"epochs": 1,
"lora_rank": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"num_train_samples": 2848,
"num_val_samples": 512,
"history": [
{
"epoch": 1,
"train_loss": 0.41304643672440994,
"val_loss": 0.0330660454928875,
"global_step": 356
}
],
"loss": "answer-token cross entropy over supervised JSON tokens",
"note": "This validation-aware run uses the selected validation split during training and preserves the held-out test split for final evaluation."
},
"evaluation": {
"split": "test",
"num_samples": 448,
"held_out_episode_count": 14,
"json_validity_rate": 0.875,
"action_macro_f1": 0.0026621494447581404,
"subtask_accuracy": 0.006696428571428571,
"transition_accuracy": 0.8504464285714286,
"next_action_accuracy": 0.024553571428571428,
"contact_accuracy": 0.6450892857142857,
"object_micro_f1": 0.22299431459254582,
"quality_target": {
"json_validity_rate": 0.98,
"status": "not_met"
},
"previous_diagnostic_json_validity_rate": 0.8526785714285714
},
"interpretation": "This is a real held-out multi-episode validation-aware diagnostic pilot proving the export, LoRA training with validation monitoring, evaluation, validation, and public-safe packaging loop. JSON validity improved over the earlier no-validation diagnostic run, but task-quality metrics remain weak, so it should be used as a baseline and error-analysis starting point rather than a strong Xperience-10M model.",
"public_package": {
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
"audit_status": "pass",
"contains_raw_xperience10m_data": false,
"contains_qwen_base_weights": false,
"contains_lora_weights": false,
"error_analysis": {
"status": "pass",
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/error_analysis_summary.json",
"markdown_report": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md",
"groupings": [
"episode",
"action_family",
"train_seen_status",
"required_modality_state",
"object_category"
],
"key_readouts": {
"parsed_prediction_rate": 0.8772321428571429,
"weakest_action_family": "locomotion",
"weakest_action_family_samples": 23,
"weakest_action_family_parsed_prediction_rate": 0.2608695652173913,
"seen_action_exact_rate": 0.04580152671755725,
"unseen_action_exact_rate": 0.015772870662460567,
"required_modality_state": "rrd_missing_only_required_modalities_present"
}
}
},
"required_next_steps": [
"Improve JSON-format reliability through prompt, decoding, constrained parsing, or target formatting changes.",
"Use the published held-out error analysis to prioritize JSON constraints, action/subtask formatting, object vocabulary handling, and missing-modality robustness.",
"Run a second validation-aware Qwen3-Omni pass only after the JSON/output contract is tightened.",
"Keep the same verified package contract for Cosmos-style world-model and VLA/policy branches."
]
}
|