| { |
| "run_id": "xperience10m_qwen3_omni_32ep_dataset", |
| "dataset_path": "/path/to/ropedia_workspace/ropedia-episode-task-suite/results/omni_finetune/xperience10m_qwen3_omni_32ep_dataset/dataset.jsonl", |
| "num_samples": 128, |
| "num_episodes": 1, |
| "split_counts": { |
| "train": 128 |
| }, |
| "label_counts": { |
| "Close bottle cap": 9, |
| "Pick up kettle": 8, |
| "Position kettle to pour": 8, |
| "Move kettle": 8, |
| "Hold coffee carafe": 8, |
| "Grasp coffee scoop": 8, |
| "Transfer coffee to dripper": 8, |
| "Hold gooseneck kettle": 8, |
| "Lift gooseneck kettle": 8, |
| "Move kettle away": 8, |
| "Wait/Prepare for pouring": 8, |
| "Pour coffee": 8, |
| "Grasp gooseneck kettle": 6, |
| "Place kettle on table": 6, |
| "Pick up white bottle": 6, |
| "Pour liquid from white bottle": 6, |
| "Place item on table": 6, |
| "Pour milk into coffee": 1 |
| }, |
| "action_options": [ |
| "Close bottle cap", |
| "Grasp coffee scoop", |
| "Grasp gooseneck kettle", |
| "Hold coffee carafe", |
| "Hold gooseneck kettle", |
| "Lift gooseneck kettle", |
| "Move kettle", |
| "Move kettle away", |
| "Pick up kettle", |
| "Pick up white bottle", |
| "Place item on table", |
| "Place kettle on table", |
| "Position kettle to pour", |
| "Pour coffee", |
| "Pour liquid from white bottle", |
| "Pour milk into coffee", |
| "Transfer coffee to dripper", |
| "Wait/Prepare for pouring" |
| ], |
| "subtask_options": [ |
| "Handle gooseneck kettle", |
| "Lift gooseneck kettle", |
| "Move kettle", |
| "Pick up and position kettle", |
| "Pour and close white bottle", |
| "Pour coffee", |
| "Pour milk into coffee", |
| "Prepare coffee equipment and scoop grounds", |
| "Prepare for pouring", |
| "Set down kettle and retrieve white bottle", |
| "Transfer coffee grounds to dripper" |
| ], |
| "clip_policy": { |
| "label_window_frames": 20, |
| "qwen_context_frames": 120, |
| "max_video_frames": 16, |
| "audio_span": "same_as_video_context", |
| "mosaic": "2x3 multi-camera grid" |
| }, |
| "feature_manifest": [ |
| { |
| "name": "hand_left_joints", |
| "start": 0, |
| "end": 441, |
| "dim": 441 |
| }, |
| { |
| "name": "hand_right_joints", |
| "start": 441, |
| "end": 882, |
| "dim": 441 |
| }, |
| { |
| "name": "body_joints", |
| "start": 882, |
| "end": 1974, |
| "dim": 1092 |
| }, |
| { |
| "name": "body_contacts", |
| "start": 1974, |
| "end": 2121, |
| "dim": 147 |
| }, |
| { |
| "name": "camera_translation", |
| "start": 2121, |
| "end": 2142, |
| "dim": 21 |
| }, |
| { |
| "name": "camera_rotation_matrix", |
| "start": 2142, |
| "end": 2205, |
| "dim": 63 |
| }, |
| { |
| "name": "imu_accel_gyro", |
| "start": 2205, |
| "end": 2247, |
| "dim": 42 |
| }, |
| { |
| "name": "depth_confidence", |
| "start": 2247, |
| "end": 3227, |
| "dim": 980 |
| }, |
| { |
| "name": "caption_objects_interaction_text", |
| "start": 3227, |
| "end": 4123, |
| "dim": 896 |
| }, |
| { |
| "name": "slam_point_cloud", |
| "start": 4123, |
| "end": 4145, |
| "dim": 22 |
| }, |
| { |
| "name": "calibration", |
| "start": 4145, |
| "end": 4262, |
| "dim": 117 |
| } |
| ], |
| "available_modalities": [ |
| { |
| "episode_id": "xperience-10m-sample", |
| "modalities": [ |
| { |
| "modality": "depth_confidence", |
| "shape": [ |
| 5821, |
| 140 |
| ] |
| }, |
| { |
| "modality": "caption_text", |
| "shape": [ |
| 5821, |
| 128 |
| ], |
| "fields": "objects,interaction" |
| }, |
| { |
| "modality": "slam_point_cloud_static", |
| "shape": [ |
| 22 |
| ] |
| }, |
| { |
| "modality": "calibration_static", |
| "shape": [ |
| 117 |
| ] |
| } |
| ] |
| } |
| ], |
| "notes": [ |
| "Assistant answers are strict JSON for episode understanding, not robot-control policies.", |
| "Sensor features are stored as NPZ pointers; raw annotation.hdf5 is not copied into the dataset records." |
| ] |
| } |