cy0307 commited on
Commit
7de9baa
·
verified ·
1 Parent(s): 6460b80

Add files using upload-large-folder tool

Browse files
Files changed (1) hide show
  1. data/episode128_task_model_radar.json +21 -21
data/episode128_task_model_radar.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-21T10:47:17+00:00",
5
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
@@ -192,7 +192,7 @@
192
  "label": "Action Recognition",
193
  "axis_label": "01 Action Recognition",
194
  "short_label": "Action",
195
- "origin": "original_public_sample_tasks",
196
  "metric_key": "macro_f1",
197
  "metric_name": "macro-F1",
198
  "metric_direction": "higher",
@@ -283,7 +283,7 @@
283
  "label": "Procedure Step Recognition",
284
  "axis_label": "02 Procedure Step Recognition",
285
  "short_label": "Step",
286
- "origin": "original_public_sample_tasks",
287
  "metric_key": "macro_f1",
288
  "metric_name": "macro-F1",
289
  "metric_direction": "higher",
@@ -374,7 +374,7 @@
374
  "label": "Action Boundary Detection",
375
  "axis_label": "03 Action Boundary Detection",
376
  "short_label": "Boundary",
377
- "origin": "original_public_sample_tasks",
378
  "metric_key": "macro_f1",
379
  "metric_name": "macro-F1",
380
  "metric_direction": "higher",
@@ -465,7 +465,7 @@
465
  "label": "Next-Action Prediction",
466
  "axis_label": "04 Next-Action Prediction",
467
  "short_label": "Next act",
468
- "origin": "original_public_sample_tasks",
469
  "metric_key": "macro_f1",
470
  "metric_name": "macro-F1",
471
  "metric_direction": "higher",
@@ -556,7 +556,7 @@
556
  "label": "Hand Trajectory Forecasting",
557
  "axis_label": "05 Hand Trajectory Forecasting",
558
  "short_label": "Hand traj",
559
- "origin": "original_public_sample_tasks",
560
  "metric_key": "mpjpe",
561
  "metric_name": "MPJPE",
562
  "metric_direction": "lower",
@@ -647,7 +647,7 @@
647
  "label": "Contact State Prediction",
648
  "axis_label": "06 Contact State Prediction",
649
  "short_label": "Contact",
650
- "origin": "original_public_sample_tasks",
651
  "metric_key": "macro_f1",
652
  "metric_name": "macro-F1",
653
  "metric_direction": "higher",
@@ -738,7 +738,7 @@
738
  "label": "Object Relevance Prediction",
739
  "axis_label": "07 Object Relevance Prediction",
740
  "short_label": "Objects",
741
- "origin": "original_public_sample_tasks",
742
  "metric_key": "micro_f1",
743
  "metric_name": "micro-F1",
744
  "metric_direction": "higher",
@@ -829,7 +829,7 @@
829
  "label": "Language Grounding",
830
  "axis_label": "08 Language Grounding",
831
  "short_label": "Language",
832
- "origin": "original_public_sample_tasks",
833
  "metric_key": "mrr",
834
  "metric_name": "MRR",
835
  "metric_direction": "higher",
@@ -920,7 +920,7 @@
920
  "label": "Cross-Modal Retrieval",
921
  "axis_label": "09 Cross-Modal Retrieval",
922
  "short_label": "X-modal",
923
- "origin": "original_public_sample_tasks",
924
  "metric_key": "mrr",
925
  "metric_name": "MRR",
926
  "metric_direction": "higher",
@@ -1011,7 +1011,7 @@
1011
  "label": "Cross-Modal Reconstruction",
1012
  "axis_label": "10 Cross-Modal Reconstruction",
1013
  "short_label": "Recon",
1014
- "origin": "original_public_sample_tasks",
1015
  "metric_key": "r2",
1016
  "metric_name": "R2",
1017
  "metric_direction": "higher",
@@ -1102,7 +1102,7 @@
1102
  "label": "Temporal Order Verification",
1103
  "axis_label": "11 Temporal Order Verification",
1104
  "short_label": "Order",
1105
- "origin": "original_public_sample_tasks",
1106
  "metric_key": "f1",
1107
  "metric_name": "F1",
1108
  "metric_direction": "higher",
@@ -1193,7 +1193,7 @@
1193
  "label": "Multimodal Synchronization Detection",
1194
  "axis_label": "12 Multimodal Synchronization Detection",
1195
  "short_label": "Sync",
1196
- "origin": "original_public_sample_tasks",
1197
  "metric_key": "f1",
1198
  "metric_name": "F1",
1199
  "metric_direction": "higher",
@@ -1284,7 +1284,7 @@
1284
  "label": "Long-Horizon Next-Action Forecasting",
1285
  "axis_label": "13 Long-Horizon Next-Action Forecasting",
1286
  "short_label": "Long act",
1287
- "origin": "additional_public_sample_tasks",
1288
  "metric_key": "macro_f1",
1289
  "metric_name": "macro-F1",
1290
  "metric_direction": "higher",
@@ -1375,7 +1375,7 @@
1375
  "label": "Long-Horizon Next-Subtask Forecasting",
1376
  "axis_label": "14 Long-Horizon Next-Subtask Forecasting",
1377
  "short_label": "Long step",
1378
- "origin": "additional_public_sample_tasks",
1379
  "metric_key": "macro_f1",
1380
  "metric_name": "macro-F1",
1381
  "metric_direction": "higher",
@@ -1466,7 +1466,7 @@
1466
  "label": "Interaction Text Prediction",
1467
  "axis_label": "15 Interaction Text Prediction",
1468
  "short_label": "Interact txt",
1469
- "origin": "additional_public_sample_tasks",
1470
  "metric_key": "macro_f1",
1471
  "metric_name": "macro-F1",
1472
  "metric_direction": "higher",
@@ -1557,7 +1557,7 @@
1557
  "label": "Action-Object Relation Prediction",
1558
  "axis_label": "16 Action-Object Relation Prediction",
1559
  "short_label": "Act+obj",
1560
- "origin": "additional_public_sample_tasks",
1561
  "metric_key": "macro_f1",
1562
  "metric_name": "macro-F1",
1563
  "metric_direction": "higher",
@@ -1648,7 +1648,7 @@
1648
  "label": "Future Object-Set Forecasting",
1649
  "axis_label": "17 Future Object-Set Forecasting",
1650
  "short_label": "Future obj",
1651
- "origin": "additional_public_sample_tasks",
1652
  "metric_key": "micro_f1",
1653
  "metric_name": "micro-F1",
1654
  "metric_direction": "higher",
@@ -1739,7 +1739,7 @@
1739
  "label": "IMU-to-Hand Pose Reconstruction",
1740
  "axis_label": "18 IMU-to-Hand Pose Reconstruction",
1741
  "short_label": "IMU->hand",
1742
- "origin": "additional_public_sample_tasks",
1743
  "metric_key": "mae",
1744
  "metric_name": "MAE",
1745
  "metric_direction": "lower",
@@ -1830,7 +1830,7 @@
1830
  "label": "Camera-View Synchronization Retrieval",
1831
  "axis_label": "19 Camera-View Synchronization Retrieval",
1832
  "short_label": "Cam sync",
1833
- "origin": "additional_public_sample_tasks",
1834
  "metric_key": "mrr",
1835
  "metric_name": "MRR",
1836
  "metric_direction": "higher",
@@ -1921,7 +1921,7 @@
1921
  "label": "Time-to-Next-Transition Regression",
1922
  "axis_label": "20 Time-to-Next-Transition Regression",
1923
  "short_label": "Time2bdry",
1924
- "origin": "additional_public_sample_tasks",
1925
  "metric_key": "mae",
1926
  "metric_name": "MAE frames",
1927
  "metric_direction": "lower",
 
1
  {
2
  "title": "128-Episode 20-Task Radar",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-21T15:20:34+00:00",
5
  "description": "Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.",
6
  "task_count": 20,
7
  "method_count": 7,
 
192
  "label": "Action Recognition",
193
  "axis_label": "01 Action Recognition",
194
  "short_label": "Action",
195
+ "provenance_source": "walkthrough_backed_task_contract",
196
  "metric_key": "macro_f1",
197
  "metric_name": "macro-F1",
198
  "metric_direction": "higher",
 
283
  "label": "Procedure Step Recognition",
284
  "axis_label": "02 Procedure Step Recognition",
285
  "short_label": "Step",
286
+ "provenance_source": "walkthrough_backed_task_contract",
287
  "metric_key": "macro_f1",
288
  "metric_name": "macro-F1",
289
  "metric_direction": "higher",
 
374
  "label": "Action Boundary Detection",
375
  "axis_label": "03 Action Boundary Detection",
376
  "short_label": "Boundary",
377
+ "provenance_source": "walkthrough_backed_task_contract",
378
  "metric_key": "macro_f1",
379
  "metric_name": "macro-F1",
380
  "metric_direction": "higher",
 
465
  "label": "Next-Action Prediction",
466
  "axis_label": "04 Next-Action Prediction",
467
  "short_label": "Next act",
468
+ "provenance_source": "walkthrough_backed_task_contract",
469
  "metric_key": "macro_f1",
470
  "metric_name": "macro-F1",
471
  "metric_direction": "higher",
 
556
  "label": "Hand Trajectory Forecasting",
557
  "axis_label": "05 Hand Trajectory Forecasting",
558
  "short_label": "Hand traj",
559
+ "provenance_source": "walkthrough_backed_task_contract",
560
  "metric_key": "mpjpe",
561
  "metric_name": "MPJPE",
562
  "metric_direction": "lower",
 
647
  "label": "Contact State Prediction",
648
  "axis_label": "06 Contact State Prediction",
649
  "short_label": "Contact",
650
+ "provenance_source": "walkthrough_backed_task_contract",
651
  "metric_key": "macro_f1",
652
  "metric_name": "macro-F1",
653
  "metric_direction": "higher",
 
738
  "label": "Object Relevance Prediction",
739
  "axis_label": "07 Object Relevance Prediction",
740
  "short_label": "Objects",
741
+ "provenance_source": "walkthrough_backed_task_contract",
742
  "metric_key": "micro_f1",
743
  "metric_name": "micro-F1",
744
  "metric_direction": "higher",
 
829
  "label": "Language Grounding",
830
  "axis_label": "08 Language Grounding",
831
  "short_label": "Language",
832
+ "provenance_source": "walkthrough_backed_task_contract",
833
  "metric_key": "mrr",
834
  "metric_name": "MRR",
835
  "metric_direction": "higher",
 
920
  "label": "Cross-Modal Retrieval",
921
  "axis_label": "09 Cross-Modal Retrieval",
922
  "short_label": "X-modal",
923
+ "provenance_source": "walkthrough_backed_task_contract",
924
  "metric_key": "mrr",
925
  "metric_name": "MRR",
926
  "metric_direction": "higher",
 
1011
  "label": "Cross-Modal Reconstruction",
1012
  "axis_label": "10 Cross-Modal Reconstruction",
1013
  "short_label": "Recon",
1014
+ "provenance_source": "walkthrough_backed_task_contract",
1015
  "metric_key": "r2",
1016
  "metric_name": "R2",
1017
  "metric_direction": "higher",
 
1102
  "label": "Temporal Order Verification",
1103
  "axis_label": "11 Temporal Order Verification",
1104
  "short_label": "Order",
1105
+ "provenance_source": "walkthrough_backed_task_contract",
1106
  "metric_key": "f1",
1107
  "metric_name": "F1",
1108
  "metric_direction": "higher",
 
1193
  "label": "Multimodal Synchronization Detection",
1194
  "axis_label": "12 Multimodal Synchronization Detection",
1195
  "short_label": "Sync",
1196
+ "provenance_source": "walkthrough_backed_task_contract",
1197
  "metric_key": "f1",
1198
  "metric_name": "F1",
1199
  "metric_direction": "higher",
 
1284
  "label": "Long-Horizon Next-Action Forecasting",
1285
  "axis_label": "13 Long-Horizon Next-Action Forecasting",
1286
  "short_label": "Long act",
1287
+ "provenance_source": "historical_result_bundle",
1288
  "metric_key": "macro_f1",
1289
  "metric_name": "macro-F1",
1290
  "metric_direction": "higher",
 
1375
  "label": "Long-Horizon Next-Subtask Forecasting",
1376
  "axis_label": "14 Long-Horizon Next-Subtask Forecasting",
1377
  "short_label": "Long step",
1378
+ "provenance_source": "historical_result_bundle",
1379
  "metric_key": "macro_f1",
1380
  "metric_name": "macro-F1",
1381
  "metric_direction": "higher",
 
1466
  "label": "Interaction Text Prediction",
1467
  "axis_label": "15 Interaction Text Prediction",
1468
  "short_label": "Interact txt",
1469
+ "provenance_source": "historical_result_bundle",
1470
  "metric_key": "macro_f1",
1471
  "metric_name": "macro-F1",
1472
  "metric_direction": "higher",
 
1557
  "label": "Action-Object Relation Prediction",
1558
  "axis_label": "16 Action-Object Relation Prediction",
1559
  "short_label": "Act+obj",
1560
+ "provenance_source": "historical_result_bundle",
1561
  "metric_key": "macro_f1",
1562
  "metric_name": "macro-F1",
1563
  "metric_direction": "higher",
 
1648
  "label": "Future Object-Set Forecasting",
1649
  "axis_label": "17 Future Object-Set Forecasting",
1650
  "short_label": "Future obj",
1651
+ "provenance_source": "historical_result_bundle",
1652
  "metric_key": "micro_f1",
1653
  "metric_name": "micro-F1",
1654
  "metric_direction": "higher",
 
1739
  "label": "IMU-to-Hand Pose Reconstruction",
1740
  "axis_label": "18 IMU-to-Hand Pose Reconstruction",
1741
  "short_label": "IMU->hand",
1742
+ "provenance_source": "historical_result_bundle",
1743
  "metric_key": "mae",
1744
  "metric_name": "MAE",
1745
  "metric_direction": "lower",
 
1830
  "label": "Camera-View Synchronization Retrieval",
1831
  "axis_label": "19 Camera-View Synchronization Retrieval",
1832
  "short_label": "Cam sync",
1833
+ "provenance_source": "historical_result_bundle",
1834
  "metric_key": "mrr",
1835
  "metric_name": "MRR",
1836
  "metric_direction": "higher",
 
1921
  "label": "Time-to-Next-Transition Regression",
1922
  "axis_label": "20 Time-to-Next-Transition Regression",
1923
  "short_label": "Time2bdry",
1924
+ "provenance_source": "historical_result_bundle",
1925
  "metric_key": "mae",
1926
  "metric_name": "MAE frames",
1927
  "metric_direction": "lower",