cy0307's picture
Publish Ropedia Xperience-10M task baseline cards
9371cfb verified
Raw
History Blame
16.5 kB
{
"status": "pass",
"generated_at_utc": "2026-06-03T15:11:33+00:00",
"repo_id": "ropedia-ai/xperience-10m",
"download_policy": "annotation.hdf5 only; no videos or visualization.rrd downloaded",
"cache_note": "raw annotation files were cached outside the published repo",
"probes": [
{
"repo_filename": "9cecac72-8874-4b97-9541-18d4858f8e43/ep10/annotation.hdf5",
"inspection": {
"cache_note": "annotation file cached outside the published repo",
"local_bytes": 6687192,
"local_human": "6.38 MiB",
"top_level_keys": [
"calibration",
"caption",
"depth",
"full_body_mocap",
"hand_mocap",
"imu",
"metadata",
"slam",
"video"
],
"dataset_count": 65,
"dataset_first_dim_histogram_top20": {
"20": 27,
"4": 14,
"190": 3,
"47": 1
},
"top_group_stats": {
"calibration": {
"dataset_count": 23,
"max_first_dim": 4,
"first_dim_values": {
"4": 14
}
},
"caption": {
"dataset_count": 1,
"max_first_dim": 0,
"first_dim_values": {}
},
"depth": {
"dataset_count": 5,
"max_first_dim": 20,
"first_dim_values": {
"20": 2
}
},
"full_body_mocap": {
"dataset_count": 9,
"max_first_dim": 20,
"first_dim_values": {
"20": 9
}
},
"hand_mocap": {
"dataset_count": 10,
"max_first_dim": 20,
"first_dim_values": {
"20": 10
}
},
"imu": {
"dataset_count": 4,
"max_first_dim": 190,
"first_dim_values": {
"190": 3,
"20": 1
}
},
"metadata": {
"dataset_count": 6,
"max_first_dim": 0,
"first_dim_values": {}
},
"slam": {
"dataset_count": 4,
"max_first_dim": 47,
"first_dim_values": {
"20": 3,
"47": 1
}
},
"video": {
"dataset_count": 3,
"max_first_dim": 20,
"first_dim_values": {
"20": 2
}
}
},
"max_first_dim_dataset": {
"path": "imu/accel_xyz",
"shape": [
190,
3
],
"dtype": "float64",
"first_dim": 190,
"storage_bytes": 4560,
"storage_human": "4.45 KiB"
},
"text_action_interaction_related_datasets": [
{
"path": "caption",
"shape": [],
"dtype": "object",
"first_dim": null,
"storage_bytes": 16,
"storage_human": "16.00 B",
"sample_values": [
"{\"config\": {\"segment_sec\": 20, \"sample_fps\": 0.5, \"total_tokens\": 2047, \"Main Task\": \"Packing items into a plastic bin. The person is placing various items into a clear plastic storage container.\"}, \"segments\": [{\"segment_id\": 0, \"start_frame\": \"82777404821554\", \"end_frame\": \"frame_0000021\", \"Sub Task\": \"Packing items into a plastic bin\", \"Current Action\": [{\"label\": \"Arrange items in bin\", \"description\": \"The person adjusts the position of items inside the plastic storage container to ensure they are organized.\", \"start_frame\": 82777404821554, \"end_frame\": 82777404821554}], \"sampled_frames\": {\"Image 1\": 82777404821554}, \"objects\": {\"82777404821554\": [\"plastic storage bin\", \"hand\", \"cardboard box\"]}, \"interaction\": {\"82777404821554\": \"The hand is reaching into and organizing items inside the plastic storage bin.\"}, \"api_call_start\": \"2026-03-12T19:33:43.280472\", \"api_call_end\": \"2026-03-12T19:33:44.979810\", \"tokens_in\": 1842, \"tokens_out\": 205}], \"global_summary\": \"The video depicts the process of organizing and packing various personal items into a plastic storage container. It focuses on the practical task of tidying up or preparing belongings for storage.\"}"
]
}
],
"caption_json_summary": {
"parse_status": "ok",
"json_bytes": 1178,
"top_keys": [
"config",
"segments",
"global_summary"
],
"config": {
"segment_sec": 20,
"sample_fps": 0.5,
"total_tokens": 2047,
"Main Task": "Packing items into a plastic bin. The person is placing various items into a clear plastic storage container."
},
"segment_count": 1,
"current_action_count": 1,
"unique_sub_task_count": 1,
"unique_action_label_count": 1,
"object_frame_count": 1,
"interaction_frame_count": 1,
"sampled_frame_count": 1,
"unique_object_count": 3,
"sub_tasks": [
"Packing items into a plastic bin"
],
"action_labels": [
"Arrange items in bin"
],
"objects": [
"cardboard box",
"hand",
"plastic storage bin"
],
"global_summary_preview": "The video depicts the process of organizing and packing various personal items into a plastic storage container. It focuses on the practical task of tidying up or preparing belongings for storage."
}
}
},
{
"repo_filename": "cdc1ae12-a460-48ac-a892-7d314095c4b1/ep23/annotation.hdf5",
"inspection": {
"cache_note": "annotation file cached outside the published repo",
"local_bytes": 6687256,
"local_human": "6.38 MiB",
"top_level_keys": [
"calibration",
"caption",
"depth",
"full_body_mocap",
"hand_mocap",
"imu",
"metadata",
"slam",
"video"
],
"dataset_count": 65,
"dataset_first_dim_histogram_top20": {
"20": 27,
"4": 14,
"188": 3,
"128": 1
},
"top_group_stats": {
"calibration": {
"dataset_count": 23,
"max_first_dim": 4,
"first_dim_values": {
"4": 14
}
},
"caption": {
"dataset_count": 1,
"max_first_dim": 0,
"first_dim_values": {}
},
"depth": {
"dataset_count": 5,
"max_first_dim": 20,
"first_dim_values": {
"20": 2
}
},
"full_body_mocap": {
"dataset_count": 9,
"max_first_dim": 20,
"first_dim_values": {
"20": 9
}
},
"hand_mocap": {
"dataset_count": 10,
"max_first_dim": 20,
"first_dim_values": {
"20": 10
}
},
"imu": {
"dataset_count": 4,
"max_first_dim": 188,
"first_dim_values": {
"188": 3,
"20": 1
}
},
"metadata": {
"dataset_count": 6,
"max_first_dim": 0,
"first_dim_values": {}
},
"slam": {
"dataset_count": 4,
"max_first_dim": 128,
"first_dim_values": {
"20": 3,
"128": 1
}
},
"video": {
"dataset_count": 3,
"max_first_dim": 20,
"first_dim_values": {
"20": 2
}
}
},
"max_first_dim_dataset": {
"path": "imu/accel_xyz",
"shape": [
188,
3
],
"dtype": "float64",
"first_dim": 188,
"storage_bytes": 4512,
"storage_human": "4.41 KiB"
},
"text_action_interaction_related_datasets": [
{
"path": "caption",
"shape": [],
"dtype": "object",
"first_dim": null,
"storage_bytes": 16,
"storage_human": "16.00 B",
"sample_values": [
"{\"config\": {\"segment_sec\": 20, \"sample_fps\": 0.5, \"total_tokens\": 2035, \"Main Task\": \"Putting on socks. The person is standing in a bathroom and putting on their socks.\"}, \"segments\": [{\"segment_id\": 0, \"start_frame\": \"78968504788029\", \"end_frame\": \"78969405629613\", \"Sub Task\": \"Putting on socks\", \"Current Action\": [{\"label\": \"Pulling up sock\", \"description\": \"The person is manually adjusting and pulling up a sock on their foot.\", \"start_frame\": 78968504788029, \"end_frame\": 78968504788029}], \"sampled_frames\": {\"Image 1\": 78968504788029}, \"objects\": {\"78968504788029\": [\"sock\", \"feet\", \"bathroom floor\", \"toilet\"]}, \"interaction\": {\"78968504788029\": \"The person's hands are gripping and pulling on the fabric of the sock to adjust it over their foot.\"}, \"api_call_start\": \"2026-03-11T07:58:15.838321\", \"api_call_end\": \"2026-03-11T07:58:17.411279\", \"tokens_in\": 1839, \"tokens_out\": 196}], \"global_summary\": \"The video focuses on the simple, everyday task of putting on socks. It provides a brief look at this routine action as the central theme.\"}"
]
}
],
"caption_json_summary": {
"parse_status": "ok",
"json_bytes": 1051,
"top_keys": [
"config",
"segments",
"global_summary"
],
"config": {
"segment_sec": 20,
"sample_fps": 0.5,
"total_tokens": 2035,
"Main Task": "Putting on socks. The person is standing in a bathroom and putting on their socks."
},
"segment_count": 1,
"current_action_count": 1,
"unique_sub_task_count": 1,
"unique_action_label_count": 1,
"object_frame_count": 1,
"interaction_frame_count": 1,
"sampled_frame_count": 1,
"unique_object_count": 4,
"sub_tasks": [
"Putting on socks"
],
"action_labels": [
"Pulling up sock"
],
"objects": [
"bathroom floor",
"feet",
"sock",
"toilet"
],
"global_summary_preview": "The video focuses on the simple, everyday task of putting on socks. It provides a brief look at this routine action as the central theme."
}
}
},
{
"repo_filename": "10282b64-a955-461e-9ef9-a1ddf8dc619a/ep5/annotation.hdf5",
"inspection": {
"cache_note": "annotation file cached outside the published repo",
"local_bytes": 6706448,
"local_human": "6.40 MiB",
"top_level_keys": [
"calibration",
"caption",
"depth",
"full_body_mocap",
"hand_mocap",
"imu",
"metadata",
"slam",
"video"
],
"dataset_count": 65,
"dataset_first_dim_histogram_top20": {
"20": 27,
"4": 14,
"190": 3,
"837": 1
},
"top_group_stats": {
"calibration": {
"dataset_count": 23,
"max_first_dim": 4,
"first_dim_values": {
"4": 14
}
},
"caption": {
"dataset_count": 1,
"max_first_dim": 0,
"first_dim_values": {}
},
"depth": {
"dataset_count": 5,
"max_first_dim": 20,
"first_dim_values": {
"20": 2
}
},
"full_body_mocap": {
"dataset_count": 9,
"max_first_dim": 20,
"first_dim_values": {
"20": 9
}
},
"hand_mocap": {
"dataset_count": 10,
"max_first_dim": 20,
"first_dim_values": {
"20": 10
}
},
"imu": {
"dataset_count": 4,
"max_first_dim": 190,
"first_dim_values": {
"190": 3,
"20": 1
}
},
"metadata": {
"dataset_count": 6,
"max_first_dim": 0,
"first_dim_values": {}
},
"slam": {
"dataset_count": 4,
"max_first_dim": 837,
"first_dim_values": {
"20": 3,
"837": 1
}
},
"video": {
"dataset_count": 3,
"max_first_dim": 20,
"first_dim_values": {
"20": 2
}
}
},
"max_first_dim_dataset": {
"path": "slam/point_cloud",
"shape": [
837,
3
],
"dtype": "float64",
"first_dim": 837,
"storage_bytes": 20088,
"storage_human": "19.62 KiB"
},
"text_action_interaction_related_datasets": [
{
"path": "caption",
"shape": [],
"dtype": "object",
"first_dim": null,
"storage_bytes": 16,
"storage_human": "16.00 B",
"sample_values": [
"{\"config\": {\"segment_sec\": 20, \"sample_fps\": 0.5, \"total_tokens\": 2060, \"Main Task\": \"walking through a retail store. The video shows a first-person perspective of someone walking through a retail aisle lined with shelves of products, while other people are seated nearby.\"}, \"segments\": [{\"segment_id\": 0, \"start_frame\": \"78307554787048\", \"end_frame\": \"frame_0000021\", \"Sub Task\": \"walking through a retail store\", \"Current Action\": [{\"label\": \"Walk down retail aisle\", \"description\": \"The camera operator is walking along a retail aisle while observing merchandise on the shelves and other people in the store.\", \"start_frame\": \"78307554787048\", \"end_frame\": \"78307554787048\"}], \"sampled_frames\": {\"Image 1\": 78307554787048}, \"objects\": {\"78307554787048\": [\"retail shelf\", \"product packaging\", \"shopping bags\", \"person seated\"]}, \"interaction\": {\"78307554787048\": \"The individual is walking through the store environment, passing by shelved products and people.\"}, \"api_call_start\": \"2026-03-12T13:50:32.723710\", \"api_call_end\": \"2026-03-12T13:50:34.677914\", \"tokens_in\": 1856, \"tokens_out\": 204}], \"global_summary\": \"The video captures a casual, observational stroll through a retail store environment. It focuses on the experience of navigating the aisles and browsing the products on display.\"}"
]
}
],
"caption_json_summary": {
"parse_status": "ok",
"json_bytes": 1299,
"top_keys": [
"config",
"segments",
"global_summary"
],
"config": {
"segment_sec": 20,
"sample_fps": 0.5,
"total_tokens": 2060,
"Main Task": "walking through a retail store. The video shows a first-person perspective of someone walking through a retail aisle lined with shelves of products, while other people are seated nearby."
},
"segment_count": 1,
"current_action_count": 1,
"unique_sub_task_count": 1,
"unique_action_label_count": 1,
"object_frame_count": 1,
"interaction_frame_count": 1,
"sampled_frame_count": 1,
"unique_object_count": 4,
"sub_tasks": [
"walking through a retail store"
],
"action_labels": [
"Walk down retail aisle"
],
"objects": [
"person seated",
"product packaging",
"retail shelf",
"shopping bags"
],
"global_summary_preview": "The video captures a casual, observational stroll through a retail store environment. It focuses on the experience of navigating the aisles and browsing the products on display."
}
}
}
]
}