File size: 30,576 Bytes
eea471e
367c357
b7334ff
 
367c357
b7334ff
 
367c357
 
 
 
b7334ff
 
 
 
 
367c357
b7334ff
 
a8124a8
 
b7334ff
367c357
 
b7334ff
 
 
 
367c357
b7334ff
 
 
 
 
 
367c357
b7334ff
 
367c357
 
b7334ff
367c357
b7334ff
 
 
367c357
b7334ff
 
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
 
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
367c357
 
 
 
b7334ff
367c357
a8124a8
367c357
b7334ff
 
367c357
b7334ff
367c357
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
 
 
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
 
 
 
 
 
 
 
 
367c357
 
 
 
b7334ff
367c357
a8124a8
367c357
b7334ff
 
367c357
b7334ff
367c357
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
b7334ff
 
 
 
 
 
a8124a8
367c357
b7334ff
 
 
 
 
a8124a8
 
b7334ff
 
 
 
 
 
a8124a8
367c357
b7334ff
540e67a
eea471e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
<svg xmlns="http://www.w3.org/2000/svg" width="1500" height="1840" viewBox="0 0 1500 1840">
<defs><pattern id="dotgrid2" width="18" height="18" patternUnits="userSpaceOnUse"><circle cx="2" cy="2" r="1.2" fill="#ccffa0" opacity="0.18"/></pattern><marker id="arrow2" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M 0 0 L 10 5 L 0 10 z" fill="#ccffa0" fill-opacity="0.72"/></marker></defs>
<rect width="100%" height="100%" fill="#020502"/>
<rect width="100%" height="100%" fill="url(#dotgrid2)" opacity="0.58"/>
<circle cx="1190" cy="150" r="210" fill="#ccffa0" opacity="0.08"/>
<text x="60" y="56" font-family="Inter Tight, Arial, sans-serif" font-size="34" font-weight="800" fill="#f4f8ef">Minimal Architectures for 12 Ropedia Xperience-10M Tasks</text>
<text x="60" y="88" font-family="Space Grotesk, Arial, sans-serif" font-size="16" fill="#a5afa2">Generated from scripts/episode_task_suite.py semantics and committed summary metrics. These are minimal baselines, not deep foundation models.</text>
<line x1="382" y1="177" x2="396" y2="177" stroke="#ccffa0" stroke-opacity="0.54" stroke-width="3" marker-end="url(#arrow2)"/>
<line x1="732" y1="177" x2="746" y2="177" stroke="#ccffa0" stroke-opacity="0.54" stroke-width="3" marker-end="url(#arrow2)"/>
<line x1="1092" y1="177" x2="1106" y2="177" stroke="#ccffa0" stroke-opacity="0.54" stroke-width="3" marker-end="url(#arrow2)"/>
<rect x="60" y="122" width="310" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="60" y="122" width="8" height="110" rx="4" fill="#9bdfff"/>
<text x="84" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Shared episode windows</text>
<text x="84" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">5,821 frames -&gt; 1,161 windows</text>
<text x="84" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">20-frame window, 5-frame stride</text>
<text x="84" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">chronological 70/30 split</text>
<rect x="410" y="122" width="310" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="410" y="122" width="8" height="110" rx="4" fill="#7ae5c3"/>
<text x="434" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Feature vector</text>
<text x="434" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all = 8,546 dimensions</text>
<text x="434" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">18 named blocks incl. audio</text>
<text x="434" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">mean/std fit on train only</text>
<rect x="760" y="122" width="320" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="760" y="122" width="8" height="110" rx="4" fill="#ccffa0"/>
<text x="784" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Reusable heads</text>
<text x="784" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">linear softmax classifier</text>
<text x="784" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">dual ridge regression/projection</text>
<text x="784" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">multi-label logistic + cosine rank</text>
<rect x="1120" y="122" width="320" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="1120" y="122" width="8" height="110" rx="4" fill="#d8f4a5"/>
<text x="1144" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Artifacts</text>
<text x="1144" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">metrics.json, predictions.csv/npz</text>
<text x="1144" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">model.npz with scaler and weights</text>
<text x="1144" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">summary_report.json source of</text>
<text x="1144" y="234" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">numbers</text>
<rect x="60" y="270" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="78" y="303" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#9bdfff">Softmax classifier</text>
<text x="78" y="330" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">logits = z(X)W + b; CE + L2; class weights for classifiers</text>
<rect x="780" y="270" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="798" y="303" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#ccffa0">Ridge regression/projection</text>
<text x="798" y="330" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">closed-form dual ridge on z(X), z(Y); used for forecast and reconstruction</text>
<rect x="60" y="394" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="78" y="427" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#7ae5c3">Ridge + cosine ranking</text>
<text x="78" y="454" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">project one modality into another feature space, then rank candidates by</text>
<text x="78" y="472" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">cosine</text>
<rect x="780" y="394" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="798" y="427" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#d8f4a5">Multi-label logistic</text>
<text x="798" y="454" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">sigmoid heads for object vocabulary; threshold 0.5 with top-1 fallback</text>
<rect x="60" y="540" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="540" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="80" y="558" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="128" y="575" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="80" y="612" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">timeline_action</text>
<text x="80" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="152" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all window, 8,546d</text>
<text x="80" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="152" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="152" y="686" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="80" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="152" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">current action class, 18 classes</text>
<text x="80" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="152" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.0500; NN macro-F1 0.0148</text>
<rect x="530" y="540" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="540" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="550" y="558" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="598" y="575" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="550" y="612" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">timeline_subtask</text>
<text x="550" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="622" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all window, 8,546d</text>
<text x="550" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="622" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="622" y="686" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="550" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="622" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">current subtask class, 14 classes</text>
<text x="550" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="622" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.0506; NN macro-F1 0.0281</text>
<rect x="1000" y="540" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="540" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="1020" y="558" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="1068" y="575" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="1020" y="612" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">transition_detection</text>
<text x="1020" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="1092" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all window, 8,546d</text>
<text x="1020" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="1092" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="1092" y="686" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="1020" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="1092" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">steady vs transition near action boundary</text>
<text x="1020" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="1092" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.6118; NN macro-F1 0.5862;</text>
<text x="1092" y="753" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">boundary-F1 0.1250</text>
<rect x="60" y="818" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="818" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="80" y="836" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="128" y="853" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="80" y="890" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">next_action</text>
<text x="80" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="152" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all at time t, 8,546d</text>
<text x="80" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="152" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="152" y="964" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="80" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="152" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">action at t+20 frames</text>
<text x="80" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="152" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.0593; NN macro-F1 0.0419</text>
<rect x="530" y="818" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="818" width="8" height="248" rx="4" fill="#ccffa0"/>
<rect x="550" y="836" width="96" height="24" rx="6" fill="#071207" stroke="#ccffa0" stroke-opacity="0.72"/>
<text x="598" y="853" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#ccffa0">ridge</text>
<text x="550" y="890" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">hand_trajectory_forecast</text>
<text x="550" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">INPUT</text>
<text x="622" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all at time t, 8,546d</text>
<text x="550" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">HEAD</text>
<text x="622" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal dual ridge; optional NN MLP</text>
<text x="622" y="964" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">regression</text>
<text x="550" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">OUTPUT</text>
<text x="622" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">future hand joints, 1260d</text>
<text x="550" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">METRIC</text>
<text x="622" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min MPJPE 0.8647; NN MPJPE 0.1079</text>
<rect x="1000" y="818" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="818" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="1020" y="836" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="1068" y="853" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="1020" y="890" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">contact_prediction</text>
<text x="1020" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="1092" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X without contact/text leakage, 7,503d</text>
<text x="1020" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="1092" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="1092" y="964" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="1020" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="1092" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">any body contact in window; degenerate</text>
<text x="1092" y="1006" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">one-class sample</text>
<text x="1020" y="1031" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="1092" y="1031" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 1.0000; NN macro-F1 1.0000</text>
<rect x="60" y="1096" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="1096" width="8" height="248" rx="4" fill="#d8f4a5"/>
<rect x="80" y="1114" width="96" height="24" rx="6" fill="#071207" stroke="#d8f4a5" stroke-opacity="0.72"/>
<text x="128" y="1131" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#d8f4a5">multilabel</text>
<text x="80" y="1168" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">object_relevance</text>
<text x="80" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">INPUT</text>
<text x="152" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X without caption text, 7,650d</text>
<text x="80" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">HEAD</text>
<text x="152" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal sigmoid logistic; optional NN MLP</text>
<text x="152" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">multilabel</text>
<text x="80" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">OUTPUT</text>
<text x="152" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">multi-hot object set, 34 objects</text>
<text x="80" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">METRIC</text>
<text x="152" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min micro-F1 0.1803; NN micro-F1 0.1679</text>
<rect x="530" y="1096" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="1096" width="8" height="248" rx="4" fill="#7ae5c3"/>
<rect x="550" y="1114" width="96" height="24" rx="6" fill="#071207" stroke="#7ae5c3" stroke-opacity="0.72"/>
<text x="598" y="1131" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#7ae5c3">ridge+rank</text>
<text x="550" y="1168" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">caption_grounding</text>
<text x="550" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">INPUT</text>
<text x="622" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">sensor 7,650d -&gt; text space 896d</text>
<text x="550" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">HEAD</text>
<text x="622" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal ridge or NN MLP projection, then</text>
<text x="622" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">cosine rank</text>
<text x="550" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">OUTPUT</text>
<text x="622" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">text query retrieves matching time window</text>
<text x="550" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">METRIC</text>
<text x="622" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min MRR 0.0160; NN MRR 0.0168</text>
<rect x="1000" y="1096" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="1096" width="8" height="248" rx="4" fill="#7ae5c3"/>
<rect x="1020" y="1114" width="96" height="24" rx="6" fill="#071207" stroke="#7ae5c3" stroke-opacity="0.72"/>
<text x="1068" y="1131" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#7ae5c3">ridge+rank</text>
<text x="1020" y="1168" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">cross_modal_retrieval</text>
<text x="1020" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">INPUT</text>
<text x="1092" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">motion/IMU/camera/audio 2,415d -&gt; visual</text>
<text x="1092" y="1217" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">5,096d</text>
<text x="1020" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">HEAD</text>
<text x="1092" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal ridge or NN MLP projection, then</text>
<text x="1092" y="1259" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">cosine rank</text>
<text x="1020" y="1284" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">OUTPUT</text>
<text x="1092" y="1284" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">retrieve matching depth/video window</text>
<text x="1020" y="1309" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">METRIC</text>
<text x="1092" y="1309" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min top-5 0.3678; NN top-5 0.1983</text>
<rect x="60" y="1374" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="1374" width="8" height="248" rx="4" fill="#ccffa0"/>
<rect x="80" y="1392" width="96" height="24" rx="6" fill="#071207" stroke="#ccffa0" stroke-opacity="0.72"/>
<text x="128" y="1409" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#ccffa0">ridge</text>
<text x="80" y="1446" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">modality_reconstruction</text>
<text x="80" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">INPUT</text>
<text x="152" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">motion/IMU/camera/audio 2,415d</text>
<text x="80" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">HEAD</text>
<text x="152" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal dual ridge; optional NN MLP</text>
<text x="152" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">regression</text>
<text x="80" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">OUTPUT</text>
<text x="152" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">depth/video feature vector, 5,096d</text>
<text x="80" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">METRIC</text>
<text x="152" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min R2 -0.0153; NN R2 -0.0102</text>
<rect x="530" y="1374" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="1374" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="550" y="1392" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="598" y="1409" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="550" y="1446" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">temporal_order</text>
<text x="550" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="622" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">concat[x_t, x_t+1, diff], 25,638d</text>
<text x="550" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="622" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal binary softmax; optional NN MLP</text>
<text x="622" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="550" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="622" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">correct vs reversed adjacent windows</text>
<text x="550" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="622" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min F1 0.5400; NN F1 0.8520</text>
<rect x="1000" y="1374" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="1374" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="1020" y="1392" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="1068" y="1409" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="1020" y="1446" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">misalignment_detection</text>
<text x="1020" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="1092" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">concat[motion_t, visual+audio_t/shifted],</text>
<text x="1092" y="1495" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">7,511d</text>
<text x="1020" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="1092" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal binary softmax; optional NN MLP</text>
<text x="1092" y="1537" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="1020" y="1562" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="1092" y="1562" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">aligned vs shifted by 8 windows</text>
<text x="1020" y="1587" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="1092" y="1587" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min F1 0.5052; NN F1 0.7153</text>
<rect x="60" y="1688" width="1380" height="72" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="84" y="1718" font-family="Space Grotesk, Arial, sans-serif" font-size="15" fill="#dce8d7">Interpretation: this suite tests whether each input/output contract is wired correctly before scaling to many episodes.</text>
<text x="84" y="1742" font-family="Space Grotesk, Arial, sans-serif" font-size="15" fill="#dce8d7">Research-grade conclusions need held-out episode splits and stronger sequence/vision-language/robot-policy models.</text>
</svg>