File size: 30,576 Bytes
eea471e 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff a8124a8 b7334ff 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff 367c357 a8124a8 367c357 b7334ff 367c357 b7334ff 367c357 a8124a8 367c357 b7334ff a8124a8 b7334ff 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff a8124a8 367c357 b7334ff 367c357 a8124a8 367c357 b7334ff 367c357 b7334ff 367c357 a8124a8 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff a8124a8 b7334ff a8124a8 367c357 b7334ff 540e67a eea471e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | <svg xmlns="http://www.w3.org/2000/svg" width="1500" height="1840" viewBox="0 0 1500 1840">
<defs><pattern id="dotgrid2" width="18" height="18" patternUnits="userSpaceOnUse"><circle cx="2" cy="2" r="1.2" fill="#ccffa0" opacity="0.18"/></pattern><marker id="arrow2" viewBox="0 0 10 10" refX="8" refY="5" markerWidth="7" markerHeight="7" orient="auto-start-reverse"><path d="M 0 0 L 10 5 L 0 10 z" fill="#ccffa0" fill-opacity="0.72"/></marker></defs>
<rect width="100%" height="100%" fill="#020502"/>
<rect width="100%" height="100%" fill="url(#dotgrid2)" opacity="0.58"/>
<circle cx="1190" cy="150" r="210" fill="#ccffa0" opacity="0.08"/>
<text x="60" y="56" font-family="Inter Tight, Arial, sans-serif" font-size="34" font-weight="800" fill="#f4f8ef">Minimal Architectures for 12 Ropedia Xperience-10M Tasks</text>
<text x="60" y="88" font-family="Space Grotesk, Arial, sans-serif" font-size="16" fill="#a5afa2">Generated from scripts/episode_task_suite.py semantics and committed summary metrics. These are minimal baselines, not deep foundation models.</text>
<line x1="382" y1="177" x2="396" y2="177" stroke="#ccffa0" stroke-opacity="0.54" stroke-width="3" marker-end="url(#arrow2)"/>
<line x1="732" y1="177" x2="746" y2="177" stroke="#ccffa0" stroke-opacity="0.54" stroke-width="3" marker-end="url(#arrow2)"/>
<line x1="1092" y1="177" x2="1106" y2="177" stroke="#ccffa0" stroke-opacity="0.54" stroke-width="3" marker-end="url(#arrow2)"/>
<rect x="60" y="122" width="310" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="60" y="122" width="8" height="110" rx="4" fill="#9bdfff"/>
<text x="84" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Shared episode windows</text>
<text x="84" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">5,821 frames -> 1,161 windows</text>
<text x="84" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">20-frame window, 5-frame stride</text>
<text x="84" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">chronological 70/30 split</text>
<rect x="410" y="122" width="310" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="410" y="122" width="8" height="110" rx="4" fill="#7ae5c3"/>
<text x="434" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Feature vector</text>
<text x="434" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all = 8,546 dimensions</text>
<text x="434" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">18 named blocks incl. audio</text>
<text x="434" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">mean/std fit on train only</text>
<rect x="760" y="122" width="320" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="760" y="122" width="8" height="110" rx="4" fill="#ccffa0"/>
<text x="784" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Reusable heads</text>
<text x="784" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">linear softmax classifier</text>
<text x="784" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">dual ridge regression/projection</text>
<text x="784" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">multi-label logistic + cosine rank</text>
<rect x="1120" y="122" width="320" height="110" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.26" stroke-width="2"/>
<rect x="1120" y="122" width="8" height="110" rx="4" fill="#d8f4a5"/>
<text x="1144" y="153" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#f4f8ef">Artifacts</text>
<text x="1144" y="180" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">metrics.json, predictions.csv/npz</text>
<text x="1144" y="198" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">model.npz with scaler and weights</text>
<text x="1144" y="216" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">summary_report.json source of</text>
<text x="1144" y="234" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">numbers</text>
<rect x="60" y="270" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="78" y="303" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#9bdfff">Softmax classifier</text>
<text x="78" y="330" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">logits = z(X)W + b; CE + L2; class weights for classifiers</text>
<rect x="780" y="270" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="798" y="303" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#ccffa0">Ridge regression/projection</text>
<text x="798" y="330" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">closed-form dual ridge on z(X), z(Y); used for forecast and reconstruction</text>
<rect x="60" y="394" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="78" y="427" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#7ae5c3">Ridge + cosine ranking</text>
<text x="78" y="454" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">project one modality into another feature space, then rank candidates by</text>
<text x="78" y="472" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">cosine</text>
<rect x="780" y="394" width="660" height="100" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="798" y="427" font-family="Inter Tight, Arial, sans-serif" font-size="18" font-weight="800" fill="#d8f4a5">Multi-label logistic</text>
<text x="798" y="454" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">sigmoid heads for object vocabulary; threshold 0.5 with top-1 fallback</text>
<rect x="60" y="540" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="540" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="80" y="558" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="128" y="575" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="80" y="612" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">timeline_action</text>
<text x="80" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="152" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all window, 8,546d</text>
<text x="80" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="152" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="152" y="686" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="80" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="152" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">current action class, 18 classes</text>
<text x="80" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="152" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.0500; NN macro-F1 0.0148</text>
<rect x="530" y="540" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="540" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="550" y="558" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="598" y="575" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="550" y="612" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">timeline_subtask</text>
<text x="550" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="622" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all window, 8,546d</text>
<text x="550" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="622" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="622" y="686" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="550" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="622" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">current subtask class, 14 classes</text>
<text x="550" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="622" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.0506; NN macro-F1 0.0281</text>
<rect x="1000" y="540" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="540" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="1020" y="558" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="1068" y="575" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="1020" y="612" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">transition_detection</text>
<text x="1020" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="1092" y="644" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all window, 8,546d</text>
<text x="1020" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="1092" y="669" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="1092" y="686" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="1020" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="1092" y="711" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">steady vs transition near action boundary</text>
<text x="1020" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="1092" y="736" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.6118; NN macro-F1 0.5862;</text>
<text x="1092" y="753" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">boundary-F1 0.1250</text>
<rect x="60" y="818" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="818" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="80" y="836" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="128" y="853" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="80" y="890" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">next_action</text>
<text x="80" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="152" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all at time t, 8,546d</text>
<text x="80" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="152" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="152" y="964" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="80" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="152" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">action at t+20 frames</text>
<text x="80" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="152" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 0.0593; NN macro-F1 0.0419</text>
<rect x="530" y="818" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="818" width="8" height="248" rx="4" fill="#ccffa0"/>
<rect x="550" y="836" width="96" height="24" rx="6" fill="#071207" stroke="#ccffa0" stroke-opacity="0.72"/>
<text x="598" y="853" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#ccffa0">ridge</text>
<text x="550" y="890" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">hand_trajectory_forecast</text>
<text x="550" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">INPUT</text>
<text x="622" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X_all at time t, 8,546d</text>
<text x="550" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">HEAD</text>
<text x="622" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal dual ridge; optional NN MLP</text>
<text x="622" y="964" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">regression</text>
<text x="550" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">OUTPUT</text>
<text x="622" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">future hand joints, 1260d</text>
<text x="550" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">METRIC</text>
<text x="622" y="1014" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min MPJPE 0.8647; NN MPJPE 0.1079</text>
<rect x="1000" y="818" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="818" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="1020" y="836" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="1068" y="853" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="1020" y="890" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">contact_prediction</text>
<text x="1020" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="1092" y="922" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X without contact/text leakage, 7,503d</text>
<text x="1020" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="1092" y="947" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal linear softmax; optional NN MLP</text>
<text x="1092" y="964" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="1020" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="1092" y="989" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">any body contact in window; degenerate</text>
<text x="1092" y="1006" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">one-class sample</text>
<text x="1020" y="1031" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="1092" y="1031" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min macro-F1 1.0000; NN macro-F1 1.0000</text>
<rect x="60" y="1096" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="1096" width="8" height="248" rx="4" fill="#d8f4a5"/>
<rect x="80" y="1114" width="96" height="24" rx="6" fill="#071207" stroke="#d8f4a5" stroke-opacity="0.72"/>
<text x="128" y="1131" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#d8f4a5">multilabel</text>
<text x="80" y="1168" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">object_relevance</text>
<text x="80" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">INPUT</text>
<text x="152" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">X without caption text, 7,650d</text>
<text x="80" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">HEAD</text>
<text x="152" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal sigmoid logistic; optional NN MLP</text>
<text x="152" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">multilabel</text>
<text x="80" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">OUTPUT</text>
<text x="152" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">multi-hot object set, 34 objects</text>
<text x="80" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#d8f4a5">METRIC</text>
<text x="152" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min micro-F1 0.1803; NN micro-F1 0.1679</text>
<rect x="530" y="1096" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="1096" width="8" height="248" rx="4" fill="#7ae5c3"/>
<rect x="550" y="1114" width="96" height="24" rx="6" fill="#071207" stroke="#7ae5c3" stroke-opacity="0.72"/>
<text x="598" y="1131" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#7ae5c3">ridge+rank</text>
<text x="550" y="1168" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">caption_grounding</text>
<text x="550" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">INPUT</text>
<text x="622" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">sensor 7,650d -> text space 896d</text>
<text x="550" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">HEAD</text>
<text x="622" y="1225" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal ridge or NN MLP projection, then</text>
<text x="622" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">cosine rank</text>
<text x="550" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">OUTPUT</text>
<text x="622" y="1267" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">text query retrieves matching time window</text>
<text x="550" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">METRIC</text>
<text x="622" y="1292" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min MRR 0.0160; NN MRR 0.0168</text>
<rect x="1000" y="1096" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="1096" width="8" height="248" rx="4" fill="#7ae5c3"/>
<rect x="1020" y="1114" width="96" height="24" rx="6" fill="#071207" stroke="#7ae5c3" stroke-opacity="0.72"/>
<text x="1068" y="1131" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#7ae5c3">ridge+rank</text>
<text x="1020" y="1168" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">cross_modal_retrieval</text>
<text x="1020" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">INPUT</text>
<text x="1092" y="1200" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">motion/IMU/camera/audio 2,415d -> visual</text>
<text x="1092" y="1217" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">5,096d</text>
<text x="1020" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">HEAD</text>
<text x="1092" y="1242" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal ridge or NN MLP projection, then</text>
<text x="1092" y="1259" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">cosine rank</text>
<text x="1020" y="1284" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">OUTPUT</text>
<text x="1092" y="1284" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">retrieve matching depth/video window</text>
<text x="1020" y="1309" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#7ae5c3">METRIC</text>
<text x="1092" y="1309" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min top-5 0.3678; NN top-5 0.1983</text>
<rect x="60" y="1374" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="60" y="1374" width="8" height="248" rx="4" fill="#ccffa0"/>
<rect x="80" y="1392" width="96" height="24" rx="6" fill="#071207" stroke="#ccffa0" stroke-opacity="0.72"/>
<text x="128" y="1409" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#ccffa0">ridge</text>
<text x="80" y="1446" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">modality_reconstruction</text>
<text x="80" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">INPUT</text>
<text x="152" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">motion/IMU/camera/audio 2,415d</text>
<text x="80" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">HEAD</text>
<text x="152" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal dual ridge; optional NN MLP</text>
<text x="152" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">regression</text>
<text x="80" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">OUTPUT</text>
<text x="152" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">depth/video feature vector, 5,096d</text>
<text x="80" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#ccffa0">METRIC</text>
<text x="152" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min R2 -0.0153; NN R2 -0.0102</text>
<rect x="530" y="1374" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="530" y="1374" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="550" y="1392" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="598" y="1409" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="550" y="1446" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">temporal_order</text>
<text x="550" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="622" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">concat[x_t, x_t+1, diff], 25,638d</text>
<text x="550" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="622" y="1503" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal binary softmax; optional NN MLP</text>
<text x="622" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="550" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="622" y="1545" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">correct vs reversed adjacent windows</text>
<text x="550" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="622" y="1570" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min F1 0.5400; NN F1 0.8520</text>
<rect x="1000" y="1374" width="440" height="248" rx="8" fill="#061006" stroke="#ccffa0" stroke-opacity="0.24" stroke-width="2"/>
<rect x="1000" y="1374" width="8" height="248" rx="4" fill="#9bdfff"/>
<rect x="1020" y="1392" width="96" height="24" rx="6" fill="#071207" stroke="#9bdfff" stroke-opacity="0.72"/>
<text x="1068" y="1409" text-anchor="middle" font-family="Space Grotesk, Arial, sans-serif" font-size="11" font-weight="800" fill="#9bdfff">softmax</text>
<text x="1020" y="1446" font-family="Inter Tight, Arial, sans-serif" font-size="20" font-weight="800" fill="#f4f8ef">misalignment_detection</text>
<text x="1020" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">INPUT</text>
<text x="1092" y="1478" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">concat[motion_t, visual+audio_t/shifted],</text>
<text x="1092" y="1495" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">7,511d</text>
<text x="1020" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">HEAD</text>
<text x="1092" y="1520" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">minimal binary softmax; optional NN MLP</text>
<text x="1092" y="1537" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">softmax</text>
<text x="1020" y="1562" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">OUTPUT</text>
<text x="1092" y="1562" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">aligned vs shifted by 8 windows</text>
<text x="1020" y="1587" font-family="Space Grotesk, Arial, sans-serif" font-size="12" font-weight="800" fill="#9bdfff">METRIC</text>
<text x="1092" y="1587" font-family="Space Grotesk, Arial, sans-serif" font-size="13" font-weight="500" fill="#dce8d7">min F1 0.5052; NN F1 0.7153</text>
<rect x="60" y="1688" width="1380" height="72" rx="8" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="84" y="1718" font-family="Space Grotesk, Arial, sans-serif" font-size="15" fill="#dce8d7">Interpretation: this suite tests whether each input/output contract is wired correctly before scaling to many episodes.</text>
<text x="84" y="1742" font-family="Space Grotesk, Arial, sans-serif" font-size="15" fill="#dce8d7">Research-grade conclusions need held-out episode splits and stronger sequence/vision-language/robot-policy models.</text>
</svg> |