Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "title": "Ropedia Xperience-10M Current Result Versions and Model Groups", | |
| "generated_at_utc": "2026-06-18T12:52:47+00:00", | |
| "status": "pass", | |
| "version_count": 3, | |
| "model_group_count": 5, | |
| "comparison_rule": "Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.", | |
| "version_reading_notes": [ | |
| "Version 1 is the public-sample 12-task harness with minimal and neural heads.", | |
| "Version 2 is the selected 128-episode same-split simple/NN baseline alignment.", | |
| "Version 3 is the verified model-branch layer: the current final Qwen3-Omni LoRA package is the JSON-task diagnostic result, Cosmos3-Nano is a future-window compatibility result, Cosmos3-Super Reasoner is a base-weight JSON-task evaluation, and Cosmos3-Super Forward-Dynamics LoRA is the first Super fine-tuned adapter branch." | |
| ], | |
| "versions": [ | |
| { | |
| "id": "v1_single_episode_public_sample", | |
| "title": "Single-Episode Public-Sample Task Suite", | |
| "status": "verified", | |
| "scope": "one public Xperience-10M sample episode", | |
| "source": "results/episode_task_suite/summary_report.json", | |
| "split": "chronological 70/30 within one episode", | |
| "counts": { | |
| "episodes": 1, | |
| "windows": 1161, | |
| "frames": 5821, | |
| "feature_dim": 8546, | |
| "task_count": 12, | |
| "neural_task_count": 12 | |
| }, | |
| "models": [ | |
| "minimal task heads", | |
| "compact neural MLP task heads" | |
| ], | |
| "task_metrics": [ | |
| { | |
| "task": "caption_grounding", | |
| "task_display_name": "Language Grounding", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "mrr", | |
| "simple_primary_score": 0.016023479050338015, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "mrr", | |
| "neural_primary_score": 0.01684125567132316 | |
| }, | |
| { | |
| "task": "contact_prediction", | |
| "task_display_name": "Contact State Prediction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 1.0, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 1.0 | |
| }, | |
| { | |
| "task": "cross_modal_retrieval", | |
| "task_display_name": "Cross-Modal Retrieval", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "mrr", | |
| "simple_primary_score": 0.26925966892956127, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "mrr", | |
| "neural_primary_score": 0.1299971898648288 | |
| }, | |
| { | |
| "task": "hand_trajectory_forecast", | |
| "task_display_name": "Hand Trajectory Forecasting", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "mpjpe", | |
| "simple_primary_score": 0.8646570444107056, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "mpjpe", | |
| "neural_primary_score": 0.10785018652677536 | |
| }, | |
| { | |
| "task": "misalignment_detection", | |
| "task_display_name": "Multimodal Synchronization Detection", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "f1", | |
| "simple_primary_score": 0.5051698670605613, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "f1", | |
| "neural_primary_score": 0.7152682255845944 | |
| }, | |
| { | |
| "task": "modality_reconstruction", | |
| "task_display_name": "Cross-Modal Reconstruction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "r2", | |
| "simple_primary_score": -0.015271898913936655, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "r2", | |
| "neural_primary_score": -0.010171410134180991 | |
| }, | |
| { | |
| "task": "next_action", | |
| "task_display_name": "Next-Action Prediction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.05925925925925927, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.04186046511627907 | |
| }, | |
| { | |
| "task": "object_relevance", | |
| "task_display_name": "Object Relevance Prediction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "micro_f1", | |
| "simple_primary_score": 0.18034382095361662, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "micro_f1", | |
| "neural_primary_score": 0.1679279279279279 | |
| }, | |
| { | |
| "task": "temporal_order", | |
| "task_display_name": "Temporal Order Verification", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "accuracy", | |
| "simple_primary_score": 0.4540229885057471, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "accuracy", | |
| "neural_primary_score": 0.8577586206896551 | |
| }, | |
| { | |
| "task": "timeline_action", | |
| "task_display_name": "Action Recognition", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.05, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.014814814814814814 | |
| }, | |
| { | |
| "task": "timeline_subtask", | |
| "task_display_name": "Procedure Step Recognition", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.05056355513846935, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.02810810810810811 | |
| }, | |
| { | |
| "task": "transition_detection", | |
| "task_display_name": "Action Boundary Detection", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.6118237590630229, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.5862068965517241 | |
| } | |
| ], | |
| "interpretation": "This layer verifies the 12 task contracts and raw multimodal feature pipeline on the public sample. It is not a cross-episode benchmark." | |
| }, | |
| { | |
| "id": "v2_multi_episode_128_aligned_metadata_baselines", | |
| "title": "128-Episode Aligned Simple/NN Baselines", | |
| "status": "pass", | |
| "scope": "selected 128-episode 96/16/16 split", | |
| "source": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md", | |
| "split": "train/val/test by selected episode/session", | |
| "counts": { | |
| "rows": 3808, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "episode_counts": { | |
| "test": 16, | |
| "train": 96, | |
| "val": 16 | |
| }, | |
| "task_count": 12, | |
| "simple_supported_task_count": 8, | |
| "neural_supported_task_count": 6 | |
| }, | |
| "models": [ | |
| "metadata/text simple baselines", | |
| "metadata/text neural MLP baselines" | |
| ], | |
| "task_metrics": [ | |
| { | |
| "task": "timeline_action", | |
| "task_display_name": "Action Recognition", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.00017511601435951318, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.0 | |
| }, | |
| { | |
| "task": "timeline_subtask", | |
| "task_display_name": "Procedure Step Recognition", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.0, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.0 | |
| }, | |
| { | |
| "task": "transition_detection", | |
| "task_display_name": "Action Boundary Detection", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.5219803670507895, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.45822172492907925 | |
| }, | |
| { | |
| "task": "next_action", | |
| "task_display_name": "Next-Action Prediction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.00019966057701906761, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.0 | |
| }, | |
| { | |
| "task": "hand_trajectory_forecast", | |
| "task_display_name": "Hand Trajectory Forecasting", | |
| "simple_status": "unsupported_without_raw_128_feature_blocks", | |
| "simple_primary_metric": "mpjpe", | |
| "simple_primary_score": null, | |
| "neural_status": "not_run", | |
| "neural_primary_metric": "", | |
| "neural_primary_score": null | |
| }, | |
| { | |
| "task": "contact_prediction", | |
| "task_display_name": "Contact State Prediction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "macro_f1", | |
| "simple_primary_score": 0.5167950693374422, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "macro_f1", | |
| "neural_primary_score": 0.21951219512195122 | |
| }, | |
| { | |
| "task": "object_relevance", | |
| "task_display_name": "Object Relevance Prediction", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "micro_f1", | |
| "simple_primary_score": 0.18221614227086183, | |
| "neural_status": "pass", | |
| "neural_primary_metric": "micro_f1", | |
| "neural_primary_score": 0.1053878034339846 | |
| }, | |
| { | |
| "task": "caption_grounding", | |
| "task_display_name": "Language Grounding", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "mrr", | |
| "simple_primary_score": 0.012785504572093487, | |
| "neural_status": "not_run", | |
| "neural_primary_metric": "", | |
| "neural_primary_score": null | |
| }, | |
| { | |
| "task": "cross_modal_retrieval", | |
| "task_display_name": "Cross-Modal Retrieval", | |
| "simple_status": "unsupported_without_raw_128_feature_blocks", | |
| "simple_primary_metric": "mrr", | |
| "simple_primary_score": null, | |
| "neural_status": "not_run", | |
| "neural_primary_metric": "", | |
| "neural_primary_score": null | |
| }, | |
| { | |
| "task": "modality_reconstruction", | |
| "task_display_name": "Cross-Modal Reconstruction", | |
| "simple_status": "unsupported_without_raw_128_feature_blocks", | |
| "simple_primary_metric": "r2", | |
| "simple_primary_score": null, | |
| "neural_status": "not_run", | |
| "neural_primary_metric": "", | |
| "neural_primary_score": null | |
| }, | |
| { | |
| "task": "temporal_order", | |
| "task_display_name": "Temporal Order Verification", | |
| "simple_status": "pass", | |
| "simple_primary_metric": "f1", | |
| "simple_primary_score": 0.32713178294573647, | |
| "neural_status": "not_run", | |
| "neural_primary_metric": "", | |
| "neural_primary_score": null | |
| }, | |
| { | |
| "task": "misalignment_detection", | |
| "task_display_name": "Multimodal Synchronization Detection", | |
| "simple_status": "unsupported_without_raw_128_feature_blocks", | |
| "simple_primary_metric": "f1", | |
| "simple_primary_score": null, | |
| "neural_status": "not_run", | |
| "neural_primary_metric": "", | |
| "neural_primary_score": null | |
| } | |
| ], | |
| "interpretation": "This layer aligns the previous simple and neural baseline framing to the same selected 96/16/16 split used by the model branches. It uses public-safe JSONL metadata/text features, so raw-feature-only tasks remain explicitly unsupported until 128-run sensor feature blocks exist." | |
| }, | |
| { | |
| "id": "v3_multi_episode_foundation_model_branches", | |
| "title": "128-Episode Foundation-Model Branches", | |
| "status": "partial_verified", | |
| "scope": "selected 128-episode split and compatible derived windows", | |
| "source": "results/omni_finetune/verified_public/", | |
| "split": "episode/session held-out split; exact task target depends on backbone contract", | |
| "counts": { | |
| "verified_branch_count": 10, | |
| "qwen3_verified_package_count": 7, | |
| "cosmos3_verified_package_count": 3, | |
| "cosmos3_nano_verified_package_count": 1, | |
| "cosmos3_super_verified_package_count": 2 | |
| }, | |
| "models": [ | |
| "Qwen3-Omni LoRA", | |
| "Cosmos3-Nano future-window compatibility branch", | |
| "Cosmos3-Super Reasoner base-weight evaluation", | |
| "Cosmos3-Super forward-dynamics LoRA" | |
| ], | |
| "branches": [ | |
| { | |
| "id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full", | |
| "title": "Cosmos3-Nano Future-Window World Model", | |
| "status": "verified", | |
| "backbone": "cosmos_world_model", | |
| "dataset_contract": "xperience10m_future_window_world_model_v0", | |
| "training_objective": "future_window_and_action_conditioned_world_modeling", | |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat", | |
| "train_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter", | |
| "eval_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3213, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2403, | |
| "test": 378, | |
| "val": 432 | |
| }, | |
| "train_samples": 2403, | |
| "val_samples": 432, | |
| "eval_samples": 378, | |
| "held_out_episode_count": 14, | |
| "num_processes": 1 | |
| }, | |
| "primary_metrics": { | |
| "future_retrieval_mrr": 0.022138720585222767, | |
| "future_retrieval_recall_at_5": 0.015873015873015872, | |
| "temporal_consistency": 0.09523809523809523, | |
| "feature_reconstruction_error": 3479.218317102503, | |
| "transition_accuracy": 0.9682539682539683, | |
| "contact_accuracy": 0.7433862433862434, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 0, | |
| "train_loss": null, | |
| "val_loss": null, | |
| "note": "closed-form mean-delta adapter; no Cosmos diffusion weights fine-tuned in this compatibility run" | |
| } | |
| ], | |
| "is_current": true, | |
| "weights_repository": "planned separate Cosmos3 model repo after a real Cosmos diffusion/LoRA fine-tune exists; current result remains artifacts-only" | |
| }, | |
| { | |
| "id": "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp", | |
| "title": "Cosmos3-Super Forward-Dynamics LoRA", | |
| "status": "verified", | |
| "backbone": "cosmos3_super_forward_dynamics", | |
| "dataset_contract": "xperience10m_camera_pose_forward_dynamics_v1", | |
| "training_objective": "camera_pose_conditioned_future_vision_velocity_lora", | |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_cosmos3_camera_pose_targets_20260608", | |
| "train_run_id": "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608", | |
| "eval_run_id": "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "test": 448, | |
| "train": 2848, | |
| "val": 512 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "adapter_parameter_numel": 26214400, | |
| "held_out_episode_count": 14, | |
| "test_forward_dynamics_mse": 3.6853174321087345, | |
| "train_final_loss": 1.0785235166549683, | |
| "val_forward_dynamics_mse": 4.008244896889664 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "note": "FSDP 8-GPU LoRA over camera-pose-conditioned future vision velocity loss; adapter weights are excluded from this public package.", | |
| "train_loss": 1.0785235166549683, | |
| "val_loss": 4.008244896889664 | |
| } | |
| ], | |
| "is_current": true, | |
| "weights_repository": "https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep" | |
| }, | |
| { | |
| "id": "xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607", | |
| "title": "Cosmos3-Super Reasoner", | |
| "status": "verified", | |
| "backbone": "cosmos3_super_reasoner", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "zero_shot_structured_episode_understanding_json_qa_via_vllm_reasoner", | |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_cosmos3_super_reasoner_base_vllm_8gpu_20260607", | |
| "eval_run_id": "xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.5111607142857143, | |
| "action_macro_f1": 0.0008284021201089245, | |
| "subtask_accuracy": 0.0, | |
| "transition_accuracy": 0.36830357142857145, | |
| "next_action_accuracy": 0.013392857142857142, | |
| "contact_accuracy": 0.32142857142857145, | |
| "object_micro_f1": 0.13704276146316333, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [], | |
| "is_current": true, | |
| "weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.875, | |
| "action_macro_f1": 0.0026621494447581404, | |
| "subtask_accuracy": 0.006696428571428571, | |
| "transition_accuracy": 0.8504464285714286, | |
| "next_action_accuracy": 0.024553571428571428, | |
| "contact_accuracy": 0.6450892857142857, | |
| "object_micro_f1": 0.22299431459254582, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.41304643672440994, | |
| "val_loss": 0.0330660454928875, | |
| "global_step": 356 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 0, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.8526785714285714, | |
| "action_macro_f1": 0.00213753459655099, | |
| "subtask_accuracy": 0.004464285714285714, | |
| "transition_accuracy": 0.828125, | |
| "next_action_accuracy": 0.022321428571428572, | |
| "contact_accuracy": 0.6517857142857143, | |
| "object_micro_f1": 0.23062730627306272, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.4121775626560694, | |
| "val_loss": null, | |
| "global_step": 356 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 34269, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "test": 4032, | |
| "train": 25629, | |
| "val": 4608 | |
| }, | |
| "train_samples": 25629, | |
| "val_samples": 1024, | |
| "eval_samples": 4032, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 1.0, | |
| "action_macro_f1": 0.002289711036077459, | |
| "subtask_accuracy": 0.011194029850746268, | |
| "transition_accuracy": 0.9908234126984127, | |
| "next_action_accuracy": 0.053618594823032224, | |
| "contact_accuracy": 0.7864583333333334, | |
| "object_micro_f1": 0.31614599936244814, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.06255286606544624, | |
| "val_loss": 0.02668904885649681, | |
| "global_step": 3204 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 34269, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "test": 4032, | |
| "train": 25629, | |
| "val": 4608 | |
| }, | |
| "train_samples": 25629, | |
| "val_samples": 2048, | |
| "eval_samples": 4032, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.9990079365079365, | |
| "action_macro_f1": 0.0028830723979596335, | |
| "subtask_accuracy": 0.0037313432835820895, | |
| "transition_accuracy": 0.9898313492063492, | |
| "next_action_accuracy": 0.04305335446381405, | |
| "contact_accuracy": 0.8177083333333334, | |
| "object_micro_f1": 0.3064982378331287, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.05208605339353295, | |
| "val_loss": 0.026512427255511284, | |
| "global_step": 3204 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.013760763933660042, | |
| "val_loss": 0.032345958054065704, | |
| "global_step": 6408 | |
| } | |
| ], | |
| "is_current": true, | |
| "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.9977678571428571, | |
| "action_macro_f1": 0.0024331644885523347, | |
| "subtask_accuracy": 0.002232142857142857, | |
| "transition_accuracy": 0.9709821428571429, | |
| "next_action_accuracy": 0.029017857142857144, | |
| "contact_accuracy": 0.71875, | |
| "object_micro_f1": 0.30160427807486634, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.41282760031950355, | |
| "val_loss": 0.03288277983665466, | |
| "global_step": 356 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.027745448225544075, | |
| "val_loss": 0.027823254466056824, | |
| "global_step": 712 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 1.0, | |
| "action_macro_f1": 0.0021983997167007384, | |
| "subtask_accuracy": 0.002232142857142857, | |
| "transition_accuracy": 0.9732142857142857, | |
| "next_action_accuracy": 0.03125, | |
| "contact_accuracy": 0.7209821428571429, | |
| "object_micro_f1": 0.30688228657389993, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.41282760031950355, | |
| "val_loss": 0.03288277983665466, | |
| "global_step": 356 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.027745448225544075, | |
| "val_loss": 0.027823254466056824, | |
| "global_step": 712 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 1.0, | |
| "action_macro_f1": 0.0018678269676001454, | |
| "subtask_accuracy": 0.0, | |
| "transition_accuracy": 0.9732142857142857, | |
| "next_action_accuracy": 0.033482142857142856, | |
| "contact_accuracy": 0.7299107142857143, | |
| "object_micro_f1": 0.31099781500364165, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.40796751019628613, | |
| "val_loss": 0.03258896619081497, | |
| "global_step": 356 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.027628723937453012, | |
| "val_loss": 0.027754632756114006, | |
| "global_step": 712 | |
| }, | |
| { | |
| "epoch": 3, | |
| "train_loss": 0.02446955946807781, | |
| "val_loss": 0.026343274861574173, | |
| "global_step": 1068 | |
| }, | |
| { | |
| "epoch": 4, | |
| "train_loss": 0.022728607045444712, | |
| "val_loss": 0.025629229843616486, | |
| "global_step": 1424 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| } | |
| ], | |
| "interpretation": "This layer contains the held-out foundation-model packages. Qwen3-Omni packages evaluate structured JSON task prediction; Cosmos3-Nano evaluates a future-window world-model compatibility adapter; Cosmos3-Super Reasoner evaluates staged base weights through vLLM on the JSON task; Cosmos3-Super Forward-Dynamics LoRA is the first Super adapter branch and evaluates camera-pose-conditioned future vision velocity loss." | |
| } | |
| ], | |
| "model_groups": [ | |
| { | |
| "id": "task_head_baselines", | |
| "model_family": "Minimal and Neural Task Heads", | |
| "model_type": "lightweight supervised/self-supervised task heads", | |
| "weight_repository": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines", | |
| "one_episode_runs": [ | |
| { | |
| "id": "task_heads_single_episode_public_sample", | |
| "title": "Single-Episode Public-Sample Task Suite", | |
| "scope": "one public Xperience-10M sample episode", | |
| "status": "verified", | |
| "source": "results/episode_task_suite/summary_report.json", | |
| "split": "chronological 70/30 within one episode", | |
| "counts": { | |
| "episodes": 1, | |
| "windows": 1161, | |
| "frames": 5821, | |
| "feature_dim": 8546, | |
| "task_count": 12, | |
| "neural_task_count": 12 | |
| }, | |
| "weights": "baseline model files in the baseline model repo; no foundation-model weights", | |
| "interpretation": "Raw multimodal feature task harness on the public sample." | |
| } | |
| ], | |
| "multi_episode_128_runs": [ | |
| { | |
| "id": "task_heads_128_episode_metadata_baselines", | |
| "title": "128-Episode Aligned Simple/NN Baselines", | |
| "scope": "selected 128-episode 96/16/16 split", | |
| "status": "pass", | |
| "source": "results/omni_finetune/multi_episode_128_task_baselines/BASELINE_ALIGNMENT_REPORT.md", | |
| "split": "train/val/test by selected episode/session", | |
| "counts": { | |
| "rows": 3808, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "episode_counts": { | |
| "test": 16, | |
| "train": 96, | |
| "val": 16 | |
| }, | |
| "task_count": 12, | |
| "simple_supported_task_count": 8, | |
| "neural_supported_task_count": 6 | |
| }, | |
| "weights": "metadata/text baseline artifacts; raw 128 sensor-feature model weights not yet complete", | |
| "interpretation": "Same selected 96/16/16 split and task ids as the model branches, but metadata/text features only." | |
| } | |
| ], | |
| "comparison_note": "This is the cleanest 1-episode versus 128-episode grouping for the same simple/NN task-head family, but the feature surface changes from raw public-sample features to public-safe 128-episode metadata/text features." | |
| }, | |
| { | |
| "id": "qwen3_omni_lora", | |
| "model_family": "Qwen3-Omni LoRA", | |
| "model_type": "PEFT LoRA adapter over Qwen/Qwen3-Omni-30B-A3B-Instruct", | |
| "weight_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep", | |
| "one_episode_runs": [ | |
| { | |
| "id": "qwen3_omni_sensor_adapter_smoke_1ep", | |
| "title": "Qwen3-Omni Sensor-Adapter Smoke", | |
| "scope": "one public Xperience-10M sample episode", | |
| "status": "verified_smoke", | |
| "source": "results/omni_exploration/qwen3_adapter_smoke/metrics.json", | |
| "split": "single_episode_chronological", | |
| "counts": { | |
| "episodes": 1, | |
| "windows": 59, | |
| "train_windows": 41, | |
| "test_windows": 18, | |
| "feature_dim": 4262, | |
| "adapter_tokens": 11 | |
| }, | |
| "primary_metrics": { | |
| "accuracy": 0.0, | |
| "macro_f1": 0.0, | |
| "train_final_loss": 1.4479121318677577 | |
| }, | |
| "base_model_target": "Qwen/Qwen3-Omni-30B-A3B-Thinking", | |
| "qwen3_loaded": false, | |
| "weights": "no Qwen3 base weights or LoRA adapter weights; adapter-token readiness smoke only", | |
| "interpretation": "This validates the sensor-adapter token path on one real episode before loading or LoRA-tuning Qwen3-Omni. It is not comparable to the 128-episode held-out LoRA result." | |
| } | |
| ], | |
| "readiness_runs": [ | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_smoke_preemptible_8gpu_20260609", | |
| "title": "Full-Parameter 1-Step Feasibility Smoke", | |
| "scope_label": "full-param gate", | |
| "scope": "1 optimizer step over 8 train samples", | |
| "status": "passed", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_smoke_preemptible_8gpu_20260609/fullparam_feasibility_summary.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 8, | |
| "steps": 1, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "passed", | |
| "observed_train_steps": 1, | |
| "final_step_loss": 1.2726006507873535, | |
| "epoch_train_loss": 1.2726006507873535, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_shorttrain8_preemptible_8gpu_20260609", | |
| "title": "Full-Parameter 8-Step Short Train", | |
| "scope_label": "full-param gate", | |
| "scope": "8 optimizer steps over 64 train samples", | |
| "status": "passed", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_shorttrain8_preemptible_8gpu_20260609/fullparam_shorttrain8_summary.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 64, | |
| "steps": 8, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "passed", | |
| "observed_train_steps": 8, | |
| "final_step_loss": 1.180522084236145, | |
| "epoch_train_loss": 1.2190196067094803, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_pilot32_preemptible_8gpu_20260609", | |
| "title": "Full-Parameter 32-Step Pilot", | |
| "scope_label": "full-param gate", | |
| "scope": "32 optimizer steps over 256 train samples", | |
| "status": "passed", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot32_preemptible_8gpu_20260609/fullparam_pilot32_summary.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 256, | |
| "steps": 32, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "passed", | |
| "observed_train_steps": 32, | |
| "final_step_loss": 0.2206273376941681, | |
| "epoch_train_loss": 0.8451133379712701, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_pilot64_preemptible_8gpu_20260609", | |
| "title": "Full-Parameter 64-Step Pilot", | |
| "scope_label": "full-param gate", | |
| "scope": "64 optimizer steps over 512 train samples", | |
| "status": "passed", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot64_preemptible_8gpu_20260609/fullparam_pilot64_summary.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 512, | |
| "steps": 64, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "passed", | |
| "observed_train_steps": 64, | |
| "final_step_loss": 0.011219973675906658, | |
| "epoch_train_loss": 0.4434075650788145, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_pilot128_preemptible_8gpu_20260609", | |
| "title": "Full-Parameter 128-Step Opportunistic Pilot", | |
| "scope_label": "full-param gate", | |
| "scope": "planned 128 optimizer steps over 1024 train samples; preempted for Qwen v5 handoff", | |
| "status": "preempted_for_qwen_v5_handoff", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot128_preemptible_8gpu_20260609/fullparam_pilot128_summary.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 1024, | |
| "steps": 0, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "preempted_for_qwen_v5_handoff", | |
| "observed_train_steps": 0, | |
| "final_step_loss": null, | |
| "epoch_train_loss": null, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_pilot128_after_qwen_v5_preemptible_8gpu_20260609", | |
| "title": "Full-Parameter 128-Step Post-Qwen-v5 Pilot", | |
| "scope_label": "full-param gate", | |
| "scope": "128 optimizer steps over 1024 train samples after verified Qwen v5 handoff", | |
| "status": "passed", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot128_after_qwen_v5_preemptible_8gpu_20260609/training_metadata.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 1024, | |
| "steps": 128, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "passed", | |
| "observed_train_steps": 128, | |
| "final_step_loss": 0.0136940386146307, | |
| "epoch_train_loss": 0.21579630990163423, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611", | |
| "title": "Full-Parameter 256-Step Post-Qwen-v6 Pilot", | |
| "scope_label": "full-param gate", | |
| "scope": "256 optimizer steps over 2048 train samples after verified Qwen v6 handoff", | |
| "status": "passed", | |
| "source": "results/omni_finetune/xperience10m_qwen3_omni_128ep_fullparam_pilot256_after_qwen_v6_preemptible_8gpu_20260611/training_metadata.json", | |
| "split": "selected 128-episode train split", | |
| "counts": { | |
| "samples": 2048, | |
| "steps": 256, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "full_parameter_gate": "passed", | |
| "observed_train_steps": 256, | |
| "final_step_loss": 0.009560374543070793, | |
| "epoch_train_loss": 0.1157912792496063, | |
| "checkpoint_saved": false | |
| }, | |
| "weights": "no full-parameter checkpoint or public weights; save_mode=none", | |
| "interpretation": "Full-parameter FSDP feasibility evidence only. This gate is not a held-out model result, full fine-tune, checkpoint release, or public weight package." | |
| } | |
| ], | |
| "multi_episode_128_runs": [ | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.875, | |
| "action_macro_f1": 0.0026621494447581404, | |
| "subtask_accuracy": 0.006696428571428571, | |
| "transition_accuracy": 0.8504464285714286, | |
| "next_action_accuracy": 0.024553571428571428, | |
| "contact_accuracy": 0.6450892857142857, | |
| "object_micro_f1": 0.22299431459254582, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.41304643672440994, | |
| "val_loss": 0.0330660454928875, | |
| "global_step": 356 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_noval_tail_logits_fullstatesave_v6_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 0, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.8526785714285714, | |
| "action_macro_f1": 0.00213753459655099, | |
| "subtask_accuracy": 0.004464285714285714, | |
| "transition_accuracy": 0.828125, | |
| "next_action_accuracy": 0.022321428571428572, | |
| "contact_accuracy": 0.6517857142857143, | |
| "object_micro_f1": 0.23062730627306272, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.4121775626560694, | |
| "val_loss": null, | |
| "global_step": 356 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 34269, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "test": 4032, | |
| "train": 25629, | |
| "val": 4608 | |
| }, | |
| "train_samples": 25629, | |
| "val_samples": 1024, | |
| "eval_samples": 4032, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 1.0, | |
| "action_macro_f1": 0.002289711036077459, | |
| "subtask_accuracy": 0.011194029850746268, | |
| "transition_accuracy": 0.9908234126984127, | |
| "next_action_accuracy": 0.053618594823032224, | |
| "contact_accuracy": 0.7864583333333334, | |
| "object_micro_f1": 0.31614599936244814, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.06255286606544624, | |
| "val_loss": 0.02668904885649681, | |
| "global_step": 3204 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 34269, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "test": 4032, | |
| "train": 25629, | |
| "val": 4608 | |
| }, | |
| "train_samples": 25629, | |
| "val_samples": 2048, | |
| "eval_samples": 4032, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.9990079365079365, | |
| "action_macro_f1": 0.0028830723979596335, | |
| "subtask_accuracy": 0.0037313432835820895, | |
| "transition_accuracy": 0.9898313492063492, | |
| "next_action_accuracy": 0.04305335446381405, | |
| "contact_accuracy": 0.8177083333333334, | |
| "object_micro_f1": 0.3064982378331287, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.05208605339353295, | |
| "val_loss": 0.026512427255511284, | |
| "global_step": 3204 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.013760763933660042, | |
| "val_loss": 0.032345958054065704, | |
| "global_step": 6408 | |
| } | |
| ], | |
| "is_current": true, | |
| "weights_repository": "https://huggingface.co/cy0307/ropedia-qwen3-omni-lora-128ep" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.9977678571428571, | |
| "action_macro_f1": 0.0024331644885523347, | |
| "subtask_accuracy": 0.002232142857142857, | |
| "transition_accuracy": 0.9709821428571429, | |
| "next_action_accuracy": 0.029017857142857144, | |
| "contact_accuracy": 0.71875, | |
| "object_micro_f1": 0.30160427807486634, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.41282760031950355, | |
| "val_loss": 0.03288277983665466, | |
| "global_step": 356 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.027745448225544075, | |
| "val_loss": 0.027823254466056824, | |
| "global_step": 712 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 1.0, | |
| "action_macro_f1": 0.0021983997167007384, | |
| "subtask_accuracy": 0.002232142857142857, | |
| "transition_accuracy": 0.9732142857142857, | |
| "next_action_accuracy": 0.03125, | |
| "contact_accuracy": 0.7209821428571429, | |
| "object_micro_f1": 0.30688228657389993, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.41282760031950355, | |
| "val_loss": 0.03288277983665466, | |
| "global_step": 356 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.027745448225544075, | |
| "val_loss": 0.027823254466056824, | |
| "global_step": 712 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| }, | |
| { | |
| "id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full", | |
| "title": "Qwen3-Omni LoRA", | |
| "status": "verified", | |
| "backbone": "qwen3_omni_lora", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "structured_episode_understanding_json_qa", | |
| "source": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora", | |
| "eval_run_id": "xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 1.0, | |
| "action_macro_f1": 0.0018678269676001454, | |
| "subtask_accuracy": 0.0, | |
| "transition_accuracy": 0.9732142857142857, | |
| "next_action_accuracy": 0.033482142857142856, | |
| "contact_accuracy": 0.7299107142857143, | |
| "object_micro_f1": 0.31099781500364165, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "train_loss": 0.40796751019628613, | |
| "val_loss": 0.03258896619081497, | |
| "global_step": 356 | |
| }, | |
| { | |
| "epoch": 2, | |
| "train_loss": 0.027628723937453012, | |
| "val_loss": 0.027754632756114006, | |
| "global_step": 712 | |
| }, | |
| { | |
| "epoch": 3, | |
| "train_loss": 0.02446955946807781, | |
| "val_loss": 0.026343274861574173, | |
| "global_step": 1068 | |
| }, | |
| { | |
| "epoch": 4, | |
| "train_loss": 0.022728607045444712, | |
| "val_loss": 0.025629229843616486, | |
| "global_step": 1424 | |
| } | |
| ], | |
| "is_current": false, | |
| "weights_repository": "historical diagnostic package; keep separate from the final 128-episode adapter repo" | |
| } | |
| ], | |
| "comparison_note": "The one-episode Qwen entry is only a sensor-adapter smoke test with Qwen3 weights unloaded. The 128-episode entries are real held-out LoRA diagnostics; the current final adapter belongs in the separate Qwen model repo. The full-parameter rows are feasibility gates only and intentionally publish no checkpoints or full-parameter weights." | |
| }, | |
| { | |
| "id": "cosmos3_nano_world_model", | |
| "model_family": "Cosmos3-Nano Future-Window World Model", | |
| "model_type": "world-model/future-window branch", | |
| "weight_repository": "planned: cy0307/ropedia-cosmos3-nano-future-window-lora-128ep after real adapter weights exist", | |
| "one_episode_runs": [ | |
| { | |
| "id": "cosmos3_nano_one_episode", | |
| "title": "Cosmos3-Nano One-Episode Fine-Tune", | |
| "scope": "one public Xperience-10M sample episode", | |
| "status": "not_run", | |
| "source": null, | |
| "weights": "none", | |
| "interpretation": "No Cosmos3 one-episode adapter or diffusion-weight fine-tune is currently published. Use the public-sample task suite only as model-agnostic evidence." | |
| } | |
| ], | |
| "multi_episode_128_runs": [ | |
| { | |
| "id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full", | |
| "title": "Cosmos3-Nano Future-Window World Model", | |
| "status": "verified", | |
| "backbone": "cosmos_world_model", | |
| "dataset_contract": "xperience10m_future_window_world_model_v0", | |
| "training_objective": "future_window_and_action_conditioned_world_modeling", | |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat", | |
| "train_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter", | |
| "eval_run_id": "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full", | |
| "counts": { | |
| "dataset_samples": 3213, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2403, | |
| "test": 378, | |
| "val": 432 | |
| }, | |
| "train_samples": 2403, | |
| "val_samples": 432, | |
| "eval_samples": 378, | |
| "held_out_episode_count": 14, | |
| "num_processes": 1 | |
| }, | |
| "primary_metrics": { | |
| "future_retrieval_mrr": 0.022138720585222767, | |
| "future_retrieval_recall_at_5": 0.015873015873015872, | |
| "temporal_consistency": 0.09523809523809523, | |
| "feature_reconstruction_error": 3479.218317102503, | |
| "transition_accuracy": 0.9682539682539683, | |
| "contact_accuracy": 0.7433862433862434, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 0, | |
| "train_loss": null, | |
| "val_loss": null, | |
| "note": "closed-form mean-delta adapter; no Cosmos diffusion weights fine-tuned in this compatibility run" | |
| } | |
| ], | |
| "is_current": true, | |
| "weights_repository": "planned separate Cosmos3 model repo after a real Cosmos diffusion/LoRA fine-tune exists; current result remains artifacts-only" | |
| } | |
| ], | |
| "comparison_note": "The current 128-episode Cosmos result is a public-safe future-window compatibility adapter. It is not yet a full Cosmos diffusion/LoRA weight release." | |
| }, | |
| { | |
| "id": "cosmos3_super_reasoner", | |
| "model_family": "Cosmos3-Super Reasoner", | |
| "model_type": "base-weight vLLM Reasoner evaluation over nv-community/Cosmos3-Super", | |
| "weight_repository": "none for this run; staged base weights only, no new fine-tuned weights", | |
| "one_episode_runs": [ | |
| { | |
| "id": "cosmos3_super_one_episode", | |
| "title": "Cosmos3-Super One-Episode Fine-Tune", | |
| "scope": "one public Xperience-10M sample episode", | |
| "status": "not_run", | |
| "source": null, | |
| "weights": "none", | |
| "interpretation": "No one-episode Cosmos3-Super adapter or fine-tuned weight run is published. The available Super result is the 128-episode held-out base-weight evaluation." | |
| } | |
| ], | |
| "readiness_runs": [ | |
| { | |
| "id": "xperience10m_cosmos3_super_training_readiness_20260607", | |
| "title": "Cosmos3-Super Training Readiness Probe", | |
| "scope": "selected 128-episode 96/16/16 JSON-task dataset and staged Cosmos3-Super runtime", | |
| "status": "blocked_until_trainer_implemented", | |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_training_readiness_20260607/training_readiness.json", | |
| "split": "train/val/test by selected episode/session", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "split_counts": { | |
| "test": { | |
| "samples": 448, | |
| "episodes": 14, | |
| "actions": 189 | |
| }, | |
| "train": { | |
| "samples": 2848, | |
| "episodes": 89, | |
| "actions": 885 | |
| }, | |
| "val": { | |
| "samples": 512, | |
| "episodes": 16, | |
| "actions": 223 | |
| } | |
| } | |
| }, | |
| "primary_metrics": { | |
| "diffusers_runtime_supported": true, | |
| "chat_sft_supported": false, | |
| "weights_updated": false | |
| }, | |
| "weights": "none; readiness audit only, no adapter checkpoint", | |
| "interpretation": "This probe confirms the staged Cosmos3-Super Diffusers/GPU runtime and the same JSON QA dataset are visible. It predates the camera-pose action-target export, so use the 20260608 contract audit for the current trainer-readiness status." | |
| }, | |
| { | |
| "id": "xperience10m_cosmos3_super_training_readiness_metadata_a100_20260609", | |
| "title": "Cosmos3-Super Remote Staging Readiness Probe", | |
| "scope_label": "staging readiness", | |
| "scope": "secondary 4-GPU staging tree, JSON-task dataset visibility, and metadata-only Cosmos3-Super runtime probe", | |
| "status": "blocked_until_trainer_implemented", | |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_training_readiness_metadata_a100_20260609/training_readiness.json", | |
| "split": "train/val/test by selected episode/session", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "split_counts": { | |
| "test": { | |
| "samples": 448, | |
| "episodes": 14, | |
| "actions": 189 | |
| }, | |
| "train": { | |
| "samples": 2848, | |
| "episodes": 89, | |
| "actions": 885 | |
| }, | |
| "val": { | |
| "samples": 512, | |
| "episodes": 16, | |
| "actions": 223 | |
| } | |
| } | |
| }, | |
| "primary_metrics": { | |
| "model_files_visible": false, | |
| "diffusers_runtime_supported": false, | |
| "cuda_device_count": 4, | |
| "weights_updated": false | |
| }, | |
| "weights": "none; staging readiness audit only, no adapter checkpoint", | |
| "interpretation": "This metadata-only probe checks the secondary 4-GPU staging tree without loading the model pipeline or updating weights. It confirms the JSON task dataset is present, but the Cosmos3-Super model files and Diffusers runtime are not staged there yet, so real Super training should wait for model/runtime staging or run on the already prepared main host." | |
| }, | |
| { | |
| "id": "xperience10m_cosmos3_super_training_contract_audit_camera_pose_20260608", | |
| "title": "Cosmos3-Super Camera-Pose Target Audit", | |
| "scope_label": "action target contract", | |
| "scope": "selected 128-episode 96/16/16 dataset augmented with camera_pose proxy cosmos_action_target records", | |
| "status": "ready_for_forward_dynamics_trainer", | |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_training_contract_audit_camera_pose_20260608/training_contract_audit.json", | |
| "split": "train/val/test by selected episode/session", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "rows_with_action_target": 3808, | |
| "valid_action_targets": 3808, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "episode_split_counts": { | |
| "test": 14, | |
| "train": 89, | |
| "val": 16 | |
| } | |
| }, | |
| "primary_metrics": { | |
| "domain_name": "camera_pose", | |
| "raw_action_dim": 9, | |
| "mode": "forward_dynamics", | |
| "valid_action_targets": 3808, | |
| "weights_updated": false | |
| }, | |
| "weights": "none; action-target contract audit only, no adapter checkpoint", | |
| "interpretation": "The selected dataset now has valid Cosmos3 camera_pose forward_dynamics targets for an egocentric camera-motion proxy. These remove the target-schema blocker for action-conditioned world-model training, but they supervise noisy vision tokens rather than preds_action. The remaining work is a trainable Cosmos3-Super implementation that can backpropagate through this loss surface at the required memory scale; action-token prediction needs a separate policy or inverse-dynamics target export." | |
| }, | |
| { | |
| "id": "xperience10m_cosmos3_super_action_packer_schema_smoke_20260608", | |
| "title": "Cosmos3-Super Action Batch Packer Smoke", | |
| "scope_label": "batch packer", | |
| "scope": "one selected train row from the camera_pose forward_dynamics augmented JSONL", | |
| "status": "pass", | |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_action_packer_schema_smoke_20260608/packer_summary.json", | |
| "split": "train", | |
| "counts": { | |
| "samples": 1, | |
| "raw_action_rows": 8, | |
| "raw_action_dim": 9 | |
| }, | |
| "primary_metrics": { | |
| "mode": "forward_dynamics", | |
| "loss_surface": "vision_velocity_conditioned_on_camera_pose", | |
| "pipeline_loaded": false, | |
| "weights_updated": false | |
| }, | |
| "weights": "none; schema-only packer smoke, no adapter checkpoint", | |
| "interpretation": "The selected row maps to a camera_pose forward_dynamics contract. In the installed Cosmos3 pipeline this uses raw actions as conditioning and supervises noisy vision tokens; it does not supervise preds_action." | |
| } | |
| ], | |
| "multi_episode_128_runs": [ | |
| { | |
| "id": "xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607", | |
| "title": "Cosmos3-Super Reasoner", | |
| "status": "verified", | |
| "backbone": "cosmos3_super_reasoner", | |
| "dataset_contract": "xperience10m_episode_json_qa_v1", | |
| "training_objective": "zero_shot_structured_episode_understanding_json_qa_via_vllm_reasoner", | |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605", | |
| "train_run_id": "xperience10m_cosmos3_super_reasoner_base_vllm_8gpu_20260607", | |
| "eval_run_id": "xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "train": 2848, | |
| "val": 512, | |
| "test": 448 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "json_validity_rate": 0.5111607142857143, | |
| "action_macro_f1": 0.0008284021201089245, | |
| "subtask_accuracy": 0.0, | |
| "transition_accuracy": 0.36830357142857145, | |
| "next_action_accuracy": 0.013392857142857142, | |
| "contact_accuracy": 0.32142857142857145, | |
| "object_micro_f1": 0.13704276146316333, | |
| "held_out_episode_count": 14 | |
| }, | |
| "history": [], | |
| "is_current": true, | |
| "weights_repository": "none for this run: staged base nv-community/Cosmos3-Super weights were evaluated through vLLM; create a separate repo only after new adapter or fine-tuned weights exist" | |
| } | |
| ], | |
| "comparison_note": "Cosmos3-Super is now represented by a verified 448-window held-out Reasoner evaluation on the same JSON task as Qwen3. It uses staged base weights through vLLM, so it is a model-branch diagnostic, not a weight release. A camera-pose proxy forward-dynamics target export now passes the contract audit and schema-only packer smoke; the separate Forward-Dynamics LoRA group records the trainable adapter run and loss-based held-out evaluation." | |
| }, | |
| { | |
| "id": "cosmos3_super_forward_dynamics", | |
| "model_family": "Cosmos3-Super Forward-Dynamics LoRA", | |
| "model_type": "PEFT LoRA over nv-community/Cosmos3-Super for camera-pose-conditioned future vision velocity", | |
| "weight_repository": "https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep", | |
| "one_episode_runs": [ | |
| { | |
| "id": "cosmos3_super_forward_dynamics_overfit_smoke", | |
| "title": "Cosmos3-Super Forward-Dynamics Overfit Smoke", | |
| "scope": "small overfit smoke before 128-episode scale-up", | |
| "status": "verified_smoke", | |
| "source": "results/omni_finetune/xperience10m_cosmos3_super_forward_dynamics_lora_overfit_after_qwen_v4_20260608_fsdp8_attn256_gradfix_savefix2/", | |
| "weights": "local repaired LoRA smoke adapter, not public packaged as final", | |
| "interpretation": "Validated the trainable adapter path, FSDP save repair, and Diffusers load before the full 128-episode run." | |
| } | |
| ], | |
| "multi_episode_128_runs": [ | |
| { | |
| "id": "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp", | |
| "title": "Cosmos3-Super Forward-Dynamics LoRA", | |
| "status": "verified", | |
| "backbone": "cosmos3_super_forward_dynamics", | |
| "dataset_contract": "xperience10m_camera_pose_forward_dynamics_v1", | |
| "training_objective": "camera_pose_conditioned_future_vision_velocity_lora", | |
| "source": "results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/verified_result_summary.json", | |
| "dataset_run_id": "xperience10m_cosmos3_camera_pose_targets_20260608", | |
| "train_run_id": "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608", | |
| "eval_run_id": "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp", | |
| "counts": { | |
| "dataset_samples": 3808, | |
| "dataset_episodes": 119, | |
| "split_counts": { | |
| "test": 448, | |
| "train": 2848, | |
| "val": 512 | |
| }, | |
| "train_samples": 2848, | |
| "val_samples": 512, | |
| "eval_samples": 448, | |
| "held_out_episode_count": 14, | |
| "num_processes": 8 | |
| }, | |
| "primary_metrics": { | |
| "adapter_parameter_numel": 26214400, | |
| "held_out_episode_count": 14, | |
| "test_forward_dynamics_mse": 3.6853174321087345, | |
| "train_final_loss": 1.0785235166549683, | |
| "val_forward_dynamics_mse": 4.008244896889664 | |
| }, | |
| "history": [ | |
| { | |
| "epoch": 1, | |
| "note": "FSDP 8-GPU LoRA over camera-pose-conditioned future vision velocity loss; adapter weights are excluded from this public package.", | |
| "train_loss": 1.0785235166549683, | |
| "val_loss": 4.008244896889664 | |
| } | |
| ], | |
| "is_current": true, | |
| "weights_repository": "https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep" | |
| } | |
| ], | |
| "comparison_note": "This is the first verified Cosmos3-Super fine-tuned adapter branch. Its metric is forward-dynamics MSE, so compare it to world-model loss or future-prediction targets, not to Qwen JSON classification accuracy." | |
| } | |
| ], | |
| "model_group_reading_notes": [ | |
| "Use model_groups when comparing one-episode and 128-episode artifacts within the same model family.", | |
| "Task-head baselines have both a one-episode public-sample run and a 128-episode same-split metadata/text run.", | |
| "Qwen3-Omni has a one-episode sensor-adapter smoke test, full-parameter feasibility gates, and separate 128-episode LoRA diagnostic packages; the newest verified full-eval 128-episode adapter belongs in the Qwen LoRA model repo.", | |
| "Cosmos3-Nano has a 128-episode future-window compatibility package.", | |
| "Cosmos3-Super now has both a 128-episode base-weight Reasoner evaluation on the JSON task and a fine-tuned forward-dynamics LoRA branch over camera-pose proxy targets." | |
| ], | |
| "pending": [ | |
| "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.", | |
| "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly." | |
| ] | |
| } | |