Publish Ropedia Xperience-10M task baseline cards

9371cfb verified 29 days ago

16.5 kB

	{
	"status": "pass",
	"generated_at_utc": "2026-06-03T15:11:33+00:00",
	"repo_id": "ropedia-ai/xperience-10m",
	"download_policy": "annotation.hdf5 only; no videos or visualization.rrd downloaded",
	"cache_note": "raw annotation files were cached outside the published repo",
	"probes": [
	{
	"repo_filename": "9cecac72-8874-4b97-9541-18d4858f8e43/ep10/annotation.hdf5",
	"inspection": {
	"cache_note": "annotation file cached outside the published repo",
	"local_bytes": 6687192,
	"local_human": "6.38 MiB",
	"top_level_keys": [
	"calibration",
	"caption",
	"depth",
	"full_body_mocap",
	"hand_mocap",
	"imu",
	"metadata",
	"slam",
	"video"
	],
	"dataset_count": 65,
	"dataset_first_dim_histogram_top20": {
	"20": 27,
	"4": 14,
	"190": 3,
	"47": 1
	},
	"top_group_stats": {
	"calibration": {
	"dataset_count": 23,
	"max_first_dim": 4,
	"first_dim_values": {
	"4": 14
	}
	},
	"caption": {
	"dataset_count": 1,
	"max_first_dim": 0,
	"first_dim_values": {}
	},
	"depth": {
	"dataset_count": 5,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 2
	}
	},
	"full_body_mocap": {
	"dataset_count": 9,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 9
	}
	},
	"hand_mocap": {
	"dataset_count": 10,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 10
	}
	},
	"imu": {
	"dataset_count": 4,
	"max_first_dim": 190,
	"first_dim_values": {
	"190": 3,
	"20": 1
	}
	},
	"metadata": {
	"dataset_count": 6,
	"max_first_dim": 0,
	"first_dim_values": {}
	},
	"slam": {
	"dataset_count": 4,
	"max_first_dim": 47,
	"first_dim_values": {
	"20": 3,
	"47": 1
	}
	},
	"video": {
	"dataset_count": 3,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 2
	}
	}
	},
	"max_first_dim_dataset": {
	"path": "imu/accel_xyz",
	"shape": [
	190,
	3
	],
	"dtype": "float64",
	"first_dim": 190,
	"storage_bytes": 4560,
	"storage_human": "4.45 KiB"
	},
	"text_action_interaction_related_datasets": [
	{
	"path": "caption",
	"shape": [],
	"dtype": "object",
	"first_dim": null,
	"storage_bytes": 16,
	"storage_human": "16.00 B",
	"sample_values": [
	"{\"config\": {\"segment_sec\": 20, \"sample_fps\": 0.5, \"total_tokens\": 2047, \"Main Task\": \"Packing items into a plastic bin. The person is placing various items into a clear plastic storage container.\"}, \"segments\": [{\"segment_id\": 0, \"start_frame\": \"82777404821554\", \"end_frame\": \"frame_0000021\", \"Sub Task\": \"Packing items into a plastic bin\", \"Current Action\": [{\"label\": \"Arrange items in bin\", \"description\": \"The person adjusts the position of items inside the plastic storage container to ensure they are organized.\", \"start_frame\": 82777404821554, \"end_frame\": 82777404821554}], \"sampled_frames\": {\"Image 1\": 82777404821554}, \"objects\": {\"82777404821554\": [\"plastic storage bin\", \"hand\", \"cardboard box\"]}, \"interaction\": {\"82777404821554\": \"The hand is reaching into and organizing items inside the plastic storage bin.\"}, \"api_call_start\": \"2026-03-12T19:33:43.280472\", \"api_call_end\": \"2026-03-12T19:33:44.979810\", \"tokens_in\": 1842, \"tokens_out\": 205}], \"global_summary\": \"The video depicts the process of organizing and packing various personal items into a plastic storage container. It focuses on the practical task of tidying up or preparing belongings for storage.\"}"
	]
	}
	],
	"caption_json_summary": {
	"parse_status": "ok",
	"json_bytes": 1178,
	"top_keys": [
	"config",
	"segments",
	"global_summary"
	],
	"config": {
	"segment_sec": 20,
	"sample_fps": 0.5,
	"total_tokens": 2047,
	"Main Task": "Packing items into a plastic bin. The person is placing various items into a clear plastic storage container."
	},
	"segment_count": 1,
	"current_action_count": 1,
	"unique_sub_task_count": 1,
	"unique_action_label_count": 1,
	"object_frame_count": 1,
	"interaction_frame_count": 1,
	"sampled_frame_count": 1,
	"unique_object_count": 3,
	"sub_tasks": [
	"Packing items into a plastic bin"
	],
	"action_labels": [
	"Arrange items in bin"
	],
	"objects": [
	"cardboard box",
	"hand",
	"plastic storage bin"
	],
	"global_summary_preview": "The video depicts the process of organizing and packing various personal items into a plastic storage container. It focuses on the practical task of tidying up or preparing belongings for storage."
	}
	}
	},
	{
	"repo_filename": "cdc1ae12-a460-48ac-a892-7d314095c4b1/ep23/annotation.hdf5",
	"inspection": {
	"cache_note": "annotation file cached outside the published repo",
	"local_bytes": 6687256,
	"local_human": "6.38 MiB",
	"top_level_keys": [
	"calibration",
	"caption",
	"depth",
	"full_body_mocap",
	"hand_mocap",
	"imu",
	"metadata",
	"slam",
	"video"
	],
	"dataset_count": 65,
	"dataset_first_dim_histogram_top20": {
	"20": 27,
	"4": 14,
	"188": 3,
	"128": 1
	},
	"top_group_stats": {
	"calibration": {
	"dataset_count": 23,
	"max_first_dim": 4,
	"first_dim_values": {
	"4": 14
	}
	},
	"caption": {
	"dataset_count": 1,
	"max_first_dim": 0,
	"first_dim_values": {}
	},
	"depth": {
	"dataset_count": 5,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 2
	}
	},
	"full_body_mocap": {
	"dataset_count": 9,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 9
	}
	},
	"hand_mocap": {
	"dataset_count": 10,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 10
	}
	},
	"imu": {
	"dataset_count": 4,
	"max_first_dim": 188,
	"first_dim_values": {
	"188": 3,
	"20": 1
	}
	},
	"metadata": {
	"dataset_count": 6,
	"max_first_dim": 0,
	"first_dim_values": {}
	},
	"slam": {
	"dataset_count": 4,
	"max_first_dim": 128,
	"first_dim_values": {
	"20": 3,
	"128": 1
	}
	},
	"video": {
	"dataset_count": 3,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 2
	}
	}
	},
	"max_first_dim_dataset": {
	"path": "imu/accel_xyz",
	"shape": [
	188,
	3
	],
	"dtype": "float64",
	"first_dim": 188,
	"storage_bytes": 4512,
	"storage_human": "4.41 KiB"
	},
	"text_action_interaction_related_datasets": [
	{
	"path": "caption",
	"shape": [],
	"dtype": "object",
	"first_dim": null,
	"storage_bytes": 16,
	"storage_human": "16.00 B",
	"sample_values": [
	"{\"config\": {\"segment_sec\": 20, \"sample_fps\": 0.5, \"total_tokens\": 2035, \"Main Task\": \"Putting on socks. The person is standing in a bathroom and putting on their socks.\"}, \"segments\": [{\"segment_id\": 0, \"start_frame\": \"78968504788029\", \"end_frame\": \"78969405629613\", \"Sub Task\": \"Putting on socks\", \"Current Action\": [{\"label\": \"Pulling up sock\", \"description\": \"The person is manually adjusting and pulling up a sock on their foot.\", \"start_frame\": 78968504788029, \"end_frame\": 78968504788029}], \"sampled_frames\": {\"Image 1\": 78968504788029}, \"objects\": {\"78968504788029\": [\"sock\", \"feet\", \"bathroom floor\", \"toilet\"]}, \"interaction\": {\"78968504788029\": \"The person's hands are gripping and pulling on the fabric of the sock to adjust it over their foot.\"}, \"api_call_start\": \"2026-03-11T07:58:15.838321\", \"api_call_end\": \"2026-03-11T07:58:17.411279\", \"tokens_in\": 1839, \"tokens_out\": 196}], \"global_summary\": \"The video focuses on the simple, everyday task of putting on socks. It provides a brief look at this routine action as the central theme.\"}"
	]
	}
	],
	"caption_json_summary": {
	"parse_status": "ok",
	"json_bytes": 1051,
	"top_keys": [
	"config",
	"segments",
	"global_summary"
	],
	"config": {
	"segment_sec": 20,
	"sample_fps": 0.5,
	"total_tokens": 2035,
	"Main Task": "Putting on socks. The person is standing in a bathroom and putting on their socks."
	},
	"segment_count": 1,
	"current_action_count": 1,
	"unique_sub_task_count": 1,
	"unique_action_label_count": 1,
	"object_frame_count": 1,
	"interaction_frame_count": 1,
	"sampled_frame_count": 1,
	"unique_object_count": 4,
	"sub_tasks": [
	"Putting on socks"
	],
	"action_labels": [
	"Pulling up sock"
	],
	"objects": [
	"bathroom floor",
	"feet",
	"sock",
	"toilet"
	],
	"global_summary_preview": "The video focuses on the simple, everyday task of putting on socks. It provides a brief look at this routine action as the central theme."
	}
	}
	},
	{
	"repo_filename": "10282b64-a955-461e-9ef9-a1ddf8dc619a/ep5/annotation.hdf5",
	"inspection": {
	"cache_note": "annotation file cached outside the published repo",
	"local_bytes": 6706448,
	"local_human": "6.40 MiB",
	"top_level_keys": [
	"calibration",
	"caption",
	"depth",
	"full_body_mocap",
	"hand_mocap",
	"imu",
	"metadata",
	"slam",
	"video"
	],
	"dataset_count": 65,
	"dataset_first_dim_histogram_top20": {
	"20": 27,
	"4": 14,
	"190": 3,
	"837": 1
	},
	"top_group_stats": {
	"calibration": {
	"dataset_count": 23,
	"max_first_dim": 4,
	"first_dim_values": {
	"4": 14
	}
	},
	"caption": {
	"dataset_count": 1,
	"max_first_dim": 0,
	"first_dim_values": {}
	},
	"depth": {
	"dataset_count": 5,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 2
	}
	},
	"full_body_mocap": {
	"dataset_count": 9,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 9
	}
	},
	"hand_mocap": {
	"dataset_count": 10,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 10
	}
	},
	"imu": {
	"dataset_count": 4,
	"max_first_dim": 190,
	"first_dim_values": {
	"190": 3,
	"20": 1
	}
	},
	"metadata": {
	"dataset_count": 6,
	"max_first_dim": 0,
	"first_dim_values": {}
	},
	"slam": {
	"dataset_count": 4,
	"max_first_dim": 837,
	"first_dim_values": {
	"20": 3,
	"837": 1
	}
	},
	"video": {
	"dataset_count": 3,
	"max_first_dim": 20,
	"first_dim_values": {
	"20": 2
	}
	}
	},
	"max_first_dim_dataset": {
	"path": "slam/point_cloud",
	"shape": [
	837,
	3
	],
	"dtype": "float64",
	"first_dim": 837,
	"storage_bytes": 20088,
	"storage_human": "19.62 KiB"
	},
	"text_action_interaction_related_datasets": [
	{
	"path": "caption",
	"shape": [],
	"dtype": "object",
	"first_dim": null,
	"storage_bytes": 16,
	"storage_human": "16.00 B",
	"sample_values": [
	"{\"config\": {\"segment_sec\": 20, \"sample_fps\": 0.5, \"total_tokens\": 2060, \"Main Task\": \"walking through a retail store. The video shows a first-person perspective of someone walking through a retail aisle lined with shelves of products, while other people are seated nearby.\"}, \"segments\": [{\"segment_id\": 0, \"start_frame\": \"78307554787048\", \"end_frame\": \"frame_0000021\", \"Sub Task\": \"walking through a retail store\", \"Current Action\": [{\"label\": \"Walk down retail aisle\", \"description\": \"The camera operator is walking along a retail aisle while observing merchandise on the shelves and other people in the store.\", \"start_frame\": \"78307554787048\", \"end_frame\": \"78307554787048\"}], \"sampled_frames\": {\"Image 1\": 78307554787048}, \"objects\": {\"78307554787048\": [\"retail shelf\", \"product packaging\", \"shopping bags\", \"person seated\"]}, \"interaction\": {\"78307554787048\": \"The individual is walking through the store environment, passing by shelved products and people.\"}, \"api_call_start\": \"2026-03-12T13:50:32.723710\", \"api_call_end\": \"2026-03-12T13:50:34.677914\", \"tokens_in\": 1856, \"tokens_out\": 204}], \"global_summary\": \"The video captures a casual, observational stroll through a retail store environment. It focuses on the experience of navigating the aisles and browsing the products on display.\"}"
	]
	}
	],
	"caption_json_summary": {
	"parse_status": "ok",
	"json_bytes": 1299,
	"top_keys": [
	"config",
	"segments",
	"global_summary"
	],
	"config": {
	"segment_sec": 20,
	"sample_fps": 0.5,
	"total_tokens": 2060,
	"Main Task": "walking through a retail store. The video shows a first-person perspective of someone walking through a retail aisle lined with shelves of products, while other people are seated nearby."
	},
	"segment_count": 1,
	"current_action_count": 1,
	"unique_sub_task_count": 1,
	"unique_action_label_count": 1,
	"object_frame_count": 1,
	"interaction_frame_count": 1,
	"sampled_frame_count": 1,
	"unique_object_count": 4,
	"sub_tasks": [
	"walking through a retail store"
	],
	"action_labels": [
	"Walk down retail aisle"
	],
	"objects": [
	"person seated",
	"product packaging",
	"retail shelf",
	"shopping bags"
	],
	"global_summary_preview": "The video captures a casual, observational stroll through a retail store environment. It focuses on the experience of navigating the aisles and browsing the products on display."
	}
	}
	}
	]
	}