cy0307's picture
Add files using upload-large-folder tool
3c21768 verified
Raw
History Blame Contribute Delete
27.5 kB
{
"omni_relay": {
"status": "verified_full_128_episode_diagnostic_result",
"dataset": "ropedia-ai/xperience-10m",
"staging": "verified_public_package_and_adapter_publication",
"training_target": "action_subtask_quality_and_unseen_label_error_analysis",
"selection_strategy": "stratified_round_robin_by_top_level_session",
"target_episodes": 128,
"selected_sessions": 128,
"candidate_scan_top_level_sessions": 802,
"valid_candidates": 12102,
"estimated_bytes": 298188841943,
"exclude": [
"visualization.rrd"
],
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
},
"models": {
"motion_action": {
"accuracy": 0.9828178694158075,
"balanced_accuracy": 0.9643518518518519,
"macro_f1": 0.96884342657456,
"weighted_f1": 0.9824311468352843,
"num_eval_windows": 291,
"num_classes": 18,
"majority_baseline_accuracy": 0.13745704467353953,
"train_final_accuracy": 1.0,
"train_final_loss": 0.019042566418647766
},
"motion_subtask": {
"accuracy": 0.9758620689655172,
"balanced_accuracy": 0.9783924095954172,
"macro_f1": 0.9528048001232955,
"weighted_f1": 0.9778836359351952,
"num_eval_windows": 290,
"num_classes": 14,
"majority_baseline_accuracy": 0.14482758620689656,
"train_final_accuracy": 1.0,
"train_final_loss": 0.02664567530155182
},
"all_modalities_action": {
"accuracy": 0.9862542955326461,
"balanced_accuracy": 0.9856481481481482,
"macro_f1": 0.9828810433408773,
"weighted_f1": 0.9862660597416385,
"num_eval_windows": 291,
"num_classes": 18,
"majority_baseline_accuracy": 0.13745704467353953,
"train_final_accuracy": 1.0,
"train_final_loss": 0.014677195809781551,
"feature_dim": 8546,
"num_windows": 1144
},
"all_modalities_subtask": {
"accuracy": 0.9827586206896551,
"balanced_accuracy": 0.9505102040816327,
"macro_f1": 0.9173189771658273,
"weighted_f1": 0.9841228382209077,
"num_eval_windows": 290,
"num_classes": 14,
"majority_baseline_accuracy": 0.14482758620689656,
"train_final_accuracy": 1.0,
"train_final_loss": 0.012834250926971436,
"feature_dim": 8546,
"num_windows": 1147
}
},
"suite": {
"annotation": "data/sample/xperience-10m-sample/annotation.hdf5",
"num_frames": 5821,
"num_windows": 1161,
"feature_dim": 8546,
"window_frames": 20,
"stride_frames": 5,
"tasks": {
"timeline_action": {
"accuracy": 0.029154518950437316,
"balanced_accuracy": 0.03125,
"macro_f1": 0.05,
"weighted_f1": 0.04664723032069971,
"num_eval_windows": 343,
"num_classes": 18,
"task": "timeline_action",
"input": "all modalities -> current action label",
"split": "chronological",
"num_windows": 1144,
"num_train_windows": 801,
"num_test_windows": 343,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.0,
"train_final_accuracy": 1.0,
"train_final_loss": 0.016824405640363693,
"unseen_test_classes": [
"Place item on table",
"Pour coffee",
"Pour milk into coffee",
"Wait/Prepare for pouring"
],
"task_display_name": "Action Recognition"
},
"timeline_subtask": {
"accuracy": 0.05813953488372093,
"balanced_accuracy": 0.05376979652090881,
"macro_f1": 0.05056355513846935,
"weighted_f1": 0.06827161211620246,
"num_eval_windows": 344,
"num_classes": 14,
"task": "timeline_subtask",
"input": "all modalities -> current subtask label",
"split": "chronological",
"num_windows": 1147,
"num_train_windows": 803,
"num_test_windows": 344,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.0,
"train_final_accuracy": 1.0,
"train_final_loss": 0.014138756319880486,
"unseen_test_classes": [
"Move bottle to coffee equipment",
"Pour coffee",
"Pour milk into coffee",
"Prepare for pouring"
],
"task_display_name": "Procedure Step Recognition"
},
"transition_detection": {
"accuracy": 0.9080459770114943,
"balanced_accuracy": 0.6543674698795181,
"macro_f1": 0.6118237590630229,
"weighted_f1": 0.9197389592989339,
"num_eval_windows": 348,
"num_classes": 2,
"task": "transition_detection",
"input": "all modalities -> action boundary/steady",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.9540229885057471,
"train_final_accuracy": 1.0,
"train_final_loss": 0.007154403254389763,
"unseen_test_classes": [],
"boundary_precision": 0.07142857142857142,
"boundary_recall": 0.5,
"boundary_f1": 0.125,
"matched_boundaries": 2,
"true_boundaries": 4,
"predicted_boundaries": 28,
"mean_abs_timing_error_frames": 3.5,
"task_display_name": "Action Boundary Detection"
},
"next_action": {
"accuracy": 0.034482758620689655,
"balanced_accuracy": 0.04,
"macro_f1": 0.05925925925925927,
"weighted_f1": 0.05108556832694764,
"num_eval_windows": 348,
"num_classes": 18,
"task": "next_action",
"input": "all modalities at t -> action at t+20 frames",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.0,
"train_final_accuracy": 1.0,
"train_final_loss": 0.01754833571612835,
"unseen_test_classes": [
"Place item on table",
"Pour coffee",
"Pour milk into coffee",
"Wait/Prepare for pouring"
],
"task_display_name": "Next-Action Prediction"
},
"hand_trajectory_forecast": {
"mse": 14.956222534179688,
"mae": 0.420173317193985,
"r2": -1763.3831383277447,
"task": "hand_trajectory_forecast",
"input": "all modalities at t -> future left/right hand 3D joints",
"split": "chronological",
"num_windows": 1159,
"num_train_windows": 811,
"num_test_windows": 348,
"forecast_frames": 10,
"mpjpe": 0.8646570444107056,
"final_frame_mpjpe": 1.0330793857574463,
"target_dim": 1260,
"task_display_name": "Hand Trajectory Forecasting"
},
"contact_prediction": {
"accuracy": 1.0,
"balanced_accuracy": 1.0,
"macro_f1": 1.0,
"weighted_f1": 1.0,
"num_eval_windows": 348,
"num_classes": 1,
"task": "contact_prediction",
"input": "all non-contact/non-caption-label modalities -> any body contact",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"feature_dim": 7503,
"majority_baseline_accuracy": 1.0,
"train_final_accuracy": 1.0,
"train_final_loss": 0.0006056802230887115,
"unseen_test_classes": [],
"task_display_name": "Contact State Prediction"
},
"object_relevance": {
"micro_f1": 0.18034382095361662,
"macro_f1": 0.06329638076675959,
"exact_match": 0.005747126436781609,
"precision": 0.16106604866743918,
"recall": 0.20486366985998525,
"task": "object_relevance",
"input": "all non-caption modalities -> current relevant object set",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"num_objects": 34,
"task_display_name": "Object Relevance Prediction"
},
"caption_grounding": {
"mrr": 0.016023479050338015,
"median_rank": 172.0,
"mean_rank": 174.67816091954023,
"num_queries": 348,
"top1_accuracy": 0.0028735632183908046,
"top5_accuracy": 0.011494252873563218,
"top10_accuracy": 0.014367816091954023,
"task": "caption_grounding",
"input": "caption objects/interaction text query + candidate sensor windows",
"output": "matching time window",
"split": "chronological",
"num_train_windows": 813,
"num_test_windows": 348,
"task_display_name": "Language Grounding"
},
"cross_modal_retrieval": {
"mrr": 0.26925966892956127,
"median_rank": 14.0,
"mean_rank": 43.34770114942529,
"num_queries": 348,
"top1_accuracy": 0.16379310344827586,
"top5_accuracy": 0.367816091954023,
"top10_accuracy": 0.47126436781609193,
"task": "cross_modal_retrieval",
"input": "motion/IMU/camera/audio query",
"output": "matching depth/video window",
"split": "chronological",
"num_train_windows": 813,
"num_test_windows": 348,
"task_display_name": "Cross-Modal Retrieval"
},
"modality_reconstruction": {
"mse": 1358.1593017578125,
"mae": 0.29572129249572754,
"r2": -0.015271898913936655,
"task": "modality_reconstruction",
"input": "motion/IMU/camera/audio",
"output": "depth/video feature vector",
"split": "chronological",
"num_train_windows": 813,
"num_test_windows": 348,
"target_dim": 5096,
"task_display_name": "Cross-Modal Reconstruction"
},
"temporal_order": {
"accuracy": 0.4540229885057471,
"precision": 0.4665271966527197,
"recall": 0.6408045977011494,
"f1": 0.5399515738498789,
"tp": 223,
"tn": 93,
"fp": 255,
"fn": 125,
"positive_rate_true": 0.5,
"positive_rate_pred": 0.6867816091954023,
"task": "temporal_order",
"input": "two adjacent windows -> whether order is correct",
"split": "chronological",
"num_samples": 2320,
"num_train_samples": 1624,
"num_test_samples": 696,
"train_final_accuracy": 0.5086206896551724,
"task_display_name": "Temporal Order Verification"
},
"misalignment_detection": {
"accuracy": 0.5158959537572254,
"precision": 0.5166163141993958,
"recall": 0.49421965317919075,
"f1": 0.5051698670605613,
"tp": 171,
"tn": 186,
"fp": 160,
"fn": 175,
"positive_rate_true": 0.5,
"positive_rate_pred": 0.47832369942196534,
"task": "misalignment_detection",
"input": "motion+visual/audio pair -> aligned vs shifted by 8 windows",
"split": "chronological",
"num_samples": 2306,
"num_train_samples": 1614,
"num_test_samples": 692,
"train_final_accuracy": 0.49380421313506817,
"task_display_name": "Multimodal Synchronization Detection"
}
},
"neural_model": {
"name": "neural_mlp",
"type": "lightweight PyTorch MLP over shared window features",
"epochs": 80,
"hidden_dim": 128,
"batch_size": 128,
"learning_rate": 0.001,
"weight_decay": 0.0001,
"dropout": 0.1,
"device": "auto"
},
"neural_tasks": {
"timeline_action": {
"accuracy": 0.008746355685131196,
"balanced_accuracy": 0.009375,
"macro_f1": 0.014814814814814814,
"weighted_f1": 0.013821401576503616,
"num_eval_windows": 343,
"num_classes": 18,
"task": "timeline_action",
"input": "all modalities -> current action label",
"split": "chronological",
"num_windows": 1144,
"num_train_windows": 801,
"num_test_windows": 343,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.0,
"unseen_test_classes": [
"Place item on table",
"Pour coffee",
"Pour milk into coffee",
"Wait/Prepare for pouring"
],
"model": "neural_mlp",
"head": "z-score -> MLP softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.04246756529782,
"train_final_accuracy": 0.9875156054931336,
"task_display_name": "Action Recognition"
},
"timeline_subtask": {
"accuracy": 0.0377906976744186,
"balanced_accuracy": 0.045614035087719294,
"macro_f1": 0.02810810810810811,
"weighted_f1": 0.023287240729101197,
"num_eval_windows": 344,
"num_classes": 14,
"task": "timeline_subtask",
"input": "all modalities -> current subtask label",
"split": "chronological",
"num_windows": 1147,
"num_train_windows": 803,
"num_test_windows": 344,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.0,
"unseen_test_classes": [
"Move bottle to coffee equipment",
"Pour coffee",
"Pour milk into coffee",
"Prepare for pouring"
],
"model": "neural_mlp",
"head": "z-score -> MLP softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 5.4104819144748596e-05,
"train_final_accuracy": 1.0,
"task_display_name": "Procedure Step Recognition"
},
"transition_detection": {
"accuracy": 0.8735632183908046,
"balanced_accuracy": 0.666039156626506,
"macro_f1": 0.5862068965517241,
"weighted_f1": 0.8993261989694807,
"num_eval_windows": 348,
"num_classes": 2,
"task": "transition_detection",
"input": "all modalities -> action boundary/steady",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.9540229885057471,
"unseen_test_classes": [],
"model": "neural_mlp",
"head": "z-score -> MLP softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.029138497962572854,
"train_final_accuracy": 0.990159901599016,
"boundary_precision": 0.07142857142857142,
"boundary_recall": 0.75,
"boundary_f1": 0.13043478260869565,
"matched_boundaries": 3,
"true_boundaries": 4,
"predicted_boundaries": 42,
"mean_abs_timing_error_frames": 2.6666666666666665,
"task_display_name": "Action Boundary Detection"
},
"next_action": {
"accuracy": 0.02586206896551724,
"balanced_accuracy": 0.03,
"macro_f1": 0.04186046511627907,
"weighted_f1": 0.03608660785886127,
"num_eval_windows": 348,
"num_classes": 18,
"task": "next_action",
"input": "all modalities at t -> action at t+20 frames",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"feature_dim": 8546,
"majority_baseline_accuracy": 0.0,
"unseen_test_classes": [
"Place item on table",
"Pour coffee",
"Pour milk into coffee",
"Wait/Prepare for pouring"
],
"model": "neural_mlp",
"head": "z-score -> MLP softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.000416612956025105,
"train_final_accuracy": 1.0,
"task_display_name": "Next-Action Prediction"
},
"hand_trajectory_forecast": {
"mse": 0.004775360692292452,
"mae": 0.05433763191103935,
"r2": 0.43665148265771614,
"task": "hand_trajectory_forecast",
"input": "all modalities at t -> future left/right hand 3D joints",
"split": "chronological",
"num_windows": 1159,
"num_train_windows": 811,
"num_test_windows": 348,
"forecast_frames": 10,
"mpjpe": 0.10785018652677536,
"final_frame_mpjpe": 0.11407545953989029,
"target_dim": 1260,
"model": "neural_mlp",
"head": "z-score -> MLP regression",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.055699273420247435,
"task_display_name": "Hand Trajectory Forecasting"
},
"contact_prediction": {
"accuracy": 1.0,
"balanced_accuracy": 1.0,
"macro_f1": 1.0,
"weighted_f1": 1.0,
"num_eval_windows": 348,
"num_classes": 1,
"task": "contact_prediction",
"input": "all non-contact/non-caption-label modalities -> any body contact",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"feature_dim": 7503,
"majority_baseline_accuracy": 1.0,
"unseen_test_classes": [],
"model": "neural_mlp",
"head": "z-score -> MLP softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.0,
"train_final_accuracy": 1.0,
"task_display_name": "Contact State Prediction"
},
"object_relevance": {
"micro_f1": 0.1679279279279279,
"macro_f1": 0.048883162556964774,
"exact_match": 0.014367816091954023,
"precision": 0.16431593794076163,
"recall": 0.17170228445099484,
"task": "object_relevance",
"input": "all non-caption modalities -> current relevant object set",
"split": "chronological",
"num_windows": 1161,
"num_train_windows": 813,
"num_test_windows": 348,
"num_objects": 34,
"feature_dim": 7650,
"model": "neural_mlp",
"head": "z-score -> MLP sigmoid multilabel",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.003651880362182214,
"task_display_name": "Object Relevance Prediction"
},
"caption_grounding": {
"mrr": 0.01684125567132316,
"median_rank": 180.5,
"mean_rank": 178.382183908046,
"num_queries": 348,
"top1_accuracy": 0.0028735632183908046,
"top5_accuracy": 0.014367816091954023,
"top10_accuracy": 0.020114942528735632,
"task": "caption_grounding",
"input": "caption objects/interaction text query + candidate sensor windows",
"split": "chronological",
"num_train_windows": 813,
"num_test_windows": 348,
"target_dim": 896,
"output": "matching time window",
"model": "neural_mlp",
"head": "z-score -> MLP projection/regression",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.06317874967483723,
"task_display_name": "Language Grounding"
},
"cross_modal_retrieval": {
"mrr": 0.1299971898648288,
"median_rank": 40.0,
"mean_rank": 66.60057471264368,
"num_queries": 348,
"top1_accuracy": 0.05172413793103448,
"top5_accuracy": 0.19827586206896552,
"top10_accuracy": 0.2413793103448276,
"task": "cross_modal_retrieval",
"input": "motion/IMU/camera/audio query",
"split": "chronological",
"num_train_windows": 813,
"num_test_windows": 348,
"target_dim": 5096,
"output": "matching depth/video window",
"model": "neural_mlp",
"head": "z-score -> MLP projection/regression",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.21891545446596464,
"task_display_name": "Cross-Modal Retrieval"
},
"modality_reconstruction": {
"mse": 1351.3363037109375,
"mae": 0.10379635542631149,
"r2": -0.010171410134180991,
"task": "modality_reconstruction",
"input": "motion/IMU/camera/audio",
"split": "chronological",
"num_train_windows": 813,
"num_test_windows": 348,
"target_dim": 5096,
"output": "depth/video feature vector",
"model": "neural_mlp",
"head": "z-score -> MLP projection/regression",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.21891545446596464,
"task_display_name": "Cross-Modal Reconstruction"
},
"temporal_order": {
"accuracy": 0.8577586206896551,
"precision": 0.8878504672897196,
"recall": 0.8189655172413793,
"f1": 0.8520179372197308,
"tp": 285,
"tn": 312,
"fp": 36,
"fn": 63,
"positive_rate_true": 0.5,
"positive_rate_pred": 0.46120689655172414,
"task": "temporal_order",
"input": "two adjacent windows -> whether order is correct",
"split": "chronological",
"num_samples": 2320,
"num_train_samples": 1624,
"num_test_samples": 696,
"feature_dim": 25638,
"model": "neural_mlp",
"head": "z-score -> MLP binary softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.0005108328477586757,
"train_final_accuracy": 1.0,
"task_display_name": "Temporal Order Verification"
},
"misalignment_detection": {
"accuracy": 0.7008670520231214,
"precision": 0.6824146981627297,
"recall": 0.7514450867052023,
"f1": 0.7152682255845944,
"tp": 260,
"tn": 225,
"fp": 121,
"fn": 86,
"positive_rate_true": 0.5,
"positive_rate_pred": 0.5505780346820809,
"task": "misalignment_detection",
"input": "motion+visual/audio pair -> aligned vs shifted by 8 windows",
"split": "chronological",
"num_samples": 2306,
"num_train_samples": 1614,
"num_test_samples": 692,
"feature_dim": 7511,
"model": "neural_mlp",
"head": "z-score -> MLP binary softmax",
"neural_epochs": 80,
"neural_hidden_dim": 128,
"neural_batch_size": 128,
"neural_learning_rate": 0.001,
"neural_weight_decay": 0.0001,
"neural_dropout": 0.1,
"neural_device": "cpu",
"train_final_loss": 0.010604870708167664,
"train_final_accuracy": 0.9956629491945477,
"task_display_name": "Multimodal Synchronization Detection"
}
},
"task_display_names": {
"timeline_action": "Action Recognition",
"timeline_subtask": "Procedure Step Recognition",
"transition_detection": "Action Boundary Detection",
"next_action": "Next-Action Prediction",
"hand_trajectory_forecast": "Hand Trajectory Forecasting",
"contact_prediction": "Contact State Prediction",
"object_relevance": "Object Relevance Prediction",
"caption_grounding": "Language Grounding",
"cross_modal_retrieval": "Cross-Modal Retrieval",
"modality_reconstruction": "Cross-Modal Reconstruction",
"temporal_order": "Temporal Order Verification",
"misalignment_detection": "Multimodal Synchronization Detection"
}
},
"unified_task_count": 20,
"feature_manifest": [
{
"name": "hand left joints",
"start": 0,
"end": 441,
"dim": 441
},
{
"name": "hand right joints",
"start": 441,
"end": 882,
"dim": 441
},
{
"name": "body joints",
"start": 882,
"end": 1974,
"dim": 1092
},
{
"name": "body contacts",
"start": 1974,
"end": 2121,
"dim": 147
},
{
"name": "camera translation",
"start": 2121,
"end": 2142,
"dim": 21
},
{
"name": "camera rotation matrix",
"start": 2142,
"end": 2205,
"dim": 63
},
{
"name": "imu accel gyro",
"start": 2205,
"end": 2247,
"dim": 42
},
{
"name": "depth confidence",
"start": 2247,
"end": 3227,
"dim": 980
},
{
"name": "video fisheye cam0",
"start": 3227,
"end": 3913,
"dim": 686
},
{
"name": "video fisheye cam1",
"start": 3913,
"end": 4599,
"dim": 686
},
{
"name": "video fisheye cam2",
"start": 4599,
"end": 5285,
"dim": 686
},
{
"name": "video fisheye cam3",
"start": 5285,
"end": 5971,
"dim": 686
},
{
"name": "video stereo left",
"start": 5971,
"end": 6657,
"dim": 686
},
{
"name": "video stereo right",
"start": 6657,
"end": 7343,
"dim": 686
},
{
"name": "audio",
"start": 7343,
"end": 7511,
"dim": 168
},
{
"name": "language text",
"start": 7511,
"end": 8407,
"dim": 896
},
{
"name": "slam point cloud",
"start": 8407,
"end": 8429,
"dim": 22
},
{
"name": "calibration",
"start": 8429,
"end": 8546,
"dim": 117
}
]
}