File size: 5,437 Bytes
3cff18b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
<svg xmlns="http://www.w3.org/2000/svg" width="1440" height="832" viewBox="0 0 1440 832">
<rect width="100%" height="100%" fill="#020502"/>
<rect x="32" y="32" width="1376" height="768" rx="12" fill="#071207" stroke="#ccffa0" stroke-opacity="0.22"/>
<text x="72" y="82" fill="#f4f8ef" font-size="32" font-weight="760">Ropedia Xperience-10M tasks 13-20 baselines</text>
<text x="72" y="112" fill="#a5afa2" font-size="16">Eight additional task contracts in the same unified 20-task suite and aligned with the same 20-frame window, 5-frame stride, and chronological split.</text>
<rect x="72" y="128" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="128" width="8" height="58" rx="4" fill="#ccffa0"/>
<text x="98" y="152" fill="#f4f8ef" font-size="18" font-weight="720">Long-Horizon Next-Action Forecasting</text>
<text x="98" y="175" fill="#a5afa2" font-size="13">classification 路 Action label five seconds later.</text>
<text x="840" y="152" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.0750 macro-F1</text>
<text x="1110" y="152" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.0655 macro-F1</text>
<rect x="72" y="204" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="204" width="8" height="58" rx="4" fill="#7ae5c3"/>
<text x="98" y="228" fill="#f4f8ef" font-size="18" font-weight="720">Long-Horizon Next-Subtask Forecasting</text>
<text x="98" y="251" fill="#a5afa2" font-size="13">classification 路 Procedure subtask label five seconds later.</text>
<text x="840" y="228" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.0455 macro-F1</text>
<text x="1110" y="228" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.0507 macro-F1</text>
<rect x="72" y="280" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="280" width="8" height="58" rx="4" fill="#9bdfff"/>
<text x="98" y="304" fill="#f4f8ef" font-size="18" font-weight="720">Interaction Text Prediction</text>
<text x="98" y="327" fill="#a5afa2" font-size="13">classification 路 Raw annotation interaction phrase for the same window.</text>
<text x="840" y="304" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.0444 macro-F1</text>
<text x="1110" y="304" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.0381 macro-F1</text>
<rect x="72" y="356" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="356" width="8" height="58" rx="4" fill="#d8f4a5"/>
<text x="98" y="380" fill="#f4f8ef" font-size="18" font-weight="720">Action-Object Relation Prediction</text>
<text x="98" y="403" fill="#a5afa2" font-size="13">classification 路 Joint action plus active object-set relation.</text>
<text x="840" y="380" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.0000 macro-F1</text>
<text x="1110" y="380" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.0000 macro-F1</text>
<rect x="72" y="432" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="432" width="8" height="58" rx="4" fill="#ccffa0"/>
<text x="98" y="456" fill="#f4f8ef" font-size="18" font-weight="720">Future Object-Set Forecasting</text>
<text x="98" y="479" fill="#a5afa2" font-size="13">multi_label 路 Object set active five seconds later.</text>
<text x="840" y="456" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.1694 micro-F1</text>
<text x="1110" y="456" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.1972 micro-F1</text>
<rect x="72" y="508" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="508" width="8" height="58" rx="4" fill="#7ae5c3"/>
<text x="98" y="532" fill="#f4f8ef" font-size="18" font-weight="720">IMU-to-Hand Pose Reconstruction</text>
<text x="98" y="555" fill="#a5afa2" font-size="13">regression 路 Current left/right hand joint feature blocks.</text>
<text x="840" y="532" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.0420 MAE</text>
<text x="1110" y="532" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.0426 MAE</text>
<rect x="72" y="584" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="584" width="8" height="58" rx="4" fill="#9bdfff"/>
<text x="98" y="608" fill="#f4f8ef" font-size="18" font-weight="720">Camera-View Synchronization Retrieval</text>
<text x="98" y="631" fill="#a5afa2" font-size="13">retrieval 路 The synchronized held-out camera-3 window.</text>
<text x="840" y="608" fill="#f4f8ef" font-size="16" font-weight="700">minimal 0.4943 MRR</text>
<text x="1110" y="608" fill="#f4f8ef" font-size="16" font-weight="700">neural 0.2409 MRR</text>
<rect x="72" y="660" width="1296" height="58" rx="8" fill="#020902" stroke="#ccffa0" stroke-opacity="0.14"/>
<rect x="72" y="660" width="8" height="58" rx="4" fill="#d8f4a5"/>
<text x="98" y="684" fill="#f4f8ef" font-size="18" font-weight="720">Time-to-Next-Transition Regression</text>
<text x="98" y="707" fill="#a5afa2" font-size="13">regression 路 Frames until the next action-label boundary, capped at 200 frames.</text>
<text x="840" y="684" fill="#f4f8ef" font-size="16" font-weight="700">minimal 10.5374 MAE frames</text>
<text x="1110" y="684" fill="#f4f8ef" font-size="16" font-weight="700">neural 10.5545 MAE frames</text>
</svg>