File size: 3,797 Bytes
3e04138
 
 
 
 
a8124a8
3e04138
45c1706
3e04138
 
 
 
 
 
 
 
 
 
 
540e67a
 
3e04138
 
 
 
 
 
 
 
 
 
45c1706
3e04138
 
540e67a
 
3e04138
 
a8124a8
3e04138
 
 
 
 
 
 
45c1706
3e04138
 
540e67a
 
3e04138
 
 
 
 
 
 
 
 
 
 
 
 
540e67a
 
3e04138
 
 
 
 
 
 
 
 
 
 
 
 
540e67a
 
3e04138
 
 
 
 
 
 
 
 
 
 
 
 
540e67a
 
3e04138
 
 
 
 
 
 
 
 
 
 
 
 
540e67a
 
3e04138
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
{
  "title": "Xperience-10M Public Sample Modality Atlas",
  "source_sample_reference": "ropedia-ai/xperience-10m-sample public episode",
  "raw_data_redistributed": false,
  "notes": [
    "Images are lightweight derived thumbnails for review and website presentation.",
    "Raw MP4, HDF5, and RRD files remain excluded from the public repo and Hugging Face bundles.",
    "The public sample modalities are mapped into the current 8,546-dimensional task representation."
  ],
  "modalities": [
    {
      "index": 1,
      "id": "video",
      "name": "video",
      "type": "visual stream",
      "sample_contains": "6 synchronized camera MP4 streams",
      "current_baseline_use": "RGB/fisheye/stereo frame statistics",
      "image": "assets/modalities/video.jpg",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    },
    {
      "index": 2,
      "id": "audio",
      "name": "audio",
      "type": "acoustic stream",
      "sample_contains": "AAC stream embedded in MP4",
      "current_baseline_use": "acoustic signal",
      "image": "assets/modalities/audio.png",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    },
    {
      "index": 3,
      "id": "depth",
      "name": "depth",
      "type": "geometry map",
      "sample_contains": "depth map + confidence channel",
      "current_baseline_use": "spatial geometry signal",
      "image": "assets/modalities/depth.jpg",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    },
    {
      "index": 4,
      "id": "pose_slam",
      "name": "pose / SLAM",
      "type": "camera pose",
      "sample_contains": "trajectory + sparse SLAM map",
      "current_baseline_use": "position + orientation features",
      "image": "assets/modalities/pose_slam.png",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    },
    {
      "index": 5,
      "id": "motion_capture",
      "name": "motion capture",
      "type": "human motion",
      "sample_contains": "body + hand joint tracks",
      "current_baseline_use": "3D mocap feature statistics",
      "image": "assets/modalities/motion_capture.png",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    },
    {
      "index": 6,
      "id": "inertial",
      "name": "inertial",
      "type": "wearable sensor",
      "sample_contains": "accelerometer + gyroscope",
      "current_baseline_use": "wearable motion statistics",
      "image": "assets/modalities/inertial.png",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    },
    {
      "index": 7,
      "id": "language",
      "name": "language",
      "type": "semantic annotation",
      "sample_contains": "object tags + action captions",
      "current_baseline_use": "task labels + semantic targets",
      "image": "assets/modalities/language.png",
      "image_size": [
        880,
        520
      ],
      "source": "Derived thumbnail from the public Xperience-10M sample episode.",
      "feature_status": "featurized_or_label_source"
    }
  ]
}