Add 128-episode raw-feature baseline results
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/input_report.json +89 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu0_tasks01_05.log +9 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu0_tasks01_05_rerun.log +9 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu1_tasks06_10.log +9 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu2_tasks11_15.log +9 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu3_task16_rerun_4096cap.log +5 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu3_tasks16_20.log +9 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/metrics_summary.csv +3 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/metrics_summary_all.csv +41 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/action_object_relation/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/action_object_relation/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json +13 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/caption_grounding/metrics.json +52 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/contact_prediction/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/contact_prediction/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json +52 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json +52 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json +52 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/interaction_text_prediction/metrics.json +13 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/long_horizon_next_action/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/long_horizon_next_action/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/misalignment_detection/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/misalignment_detection/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/modality_reconstruction/metrics.json +52 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_action/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_action/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_subtask_forecast/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_subtask_forecast/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/object_relevance/metrics.json +51 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/object_set_forecast/metrics.json +51 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/temporal_order/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/temporal_order/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/time_to_transition/metrics.json +52 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_action/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_action/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_subtask/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_subtask/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/transition_detection/metrics.json +62 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/transition_detection/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/run_summary.json +43 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/run_summary_all.json +409 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/action_object_relation/metrics.json +38 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/action_object_relation/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/caption_grounding/metrics.json +25 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/contact_prediction/metrics.json +38 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/contact_prediction/predictions.csv +0 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/cross_modal_retrieval/metrics.json +25 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/hand_trajectory_forecast/metrics.json +25 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/imu_to_hand_pose/metrics.json +25 -0
- results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/long_horizon_next_action/metrics.json +38 -0
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/input_report.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"load_report": {
|
| 3 |
+
"resolved_npz_files": 357,
|
| 4 |
+
"loaded_feature_rows": 34269,
|
| 5 |
+
"input_rows": 34269,
|
| 6 |
+
"dropped_rows": 0,
|
| 7 |
+
"missing_path_examples": [],
|
| 8 |
+
"feature_dim": 4430
|
| 9 |
+
},
|
| 10 |
+
"split_counts": {
|
| 11 |
+
"train": 25629,
|
| 12 |
+
"val": 4608,
|
| 13 |
+
"test": 4032
|
| 14 |
+
},
|
| 15 |
+
"feature_manifest": [
|
| 16 |
+
{
|
| 17 |
+
"name": "hand_left_joints",
|
| 18 |
+
"start": 0,
|
| 19 |
+
"end": 441,
|
| 20 |
+
"dim": 441
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"name": "hand_right_joints",
|
| 24 |
+
"start": 441,
|
| 25 |
+
"end": 882,
|
| 26 |
+
"dim": 441
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"name": "body_joints",
|
| 30 |
+
"start": 882,
|
| 31 |
+
"end": 1974,
|
| 32 |
+
"dim": 1092
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"name": "body_contacts",
|
| 36 |
+
"start": 1974,
|
| 37 |
+
"end": 2121,
|
| 38 |
+
"dim": 147
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"name": "camera_translation",
|
| 42 |
+
"start": 2121,
|
| 43 |
+
"end": 2142,
|
| 44 |
+
"dim": 21
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"name": "camera_rotation_matrix",
|
| 48 |
+
"start": 2142,
|
| 49 |
+
"end": 2205,
|
| 50 |
+
"dim": 63
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"name": "imu_accel_gyro",
|
| 54 |
+
"start": 2205,
|
| 55 |
+
"end": 2247,
|
| 56 |
+
"dim": 42
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"name": "depth_confidence",
|
| 60 |
+
"start": 2247,
|
| 61 |
+
"end": 3227,
|
| 62 |
+
"dim": 980
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"name": "audio_fisheye_cam0_aac",
|
| 66 |
+
"start": 3227,
|
| 67 |
+
"end": 3395,
|
| 68 |
+
"dim": 168
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"name": "caption_objects_interaction_text",
|
| 72 |
+
"start": 3395,
|
| 73 |
+
"end": 4291,
|
| 74 |
+
"dim": 896
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "slam_point_cloud",
|
| 78 |
+
"start": 4291,
|
| 79 |
+
"end": 4313,
|
| 80 |
+
"dim": 22
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"name": "calibration",
|
| 84 |
+
"start": 4313,
|
| 85 |
+
"end": 4430,
|
| 86 |
+
"dim": 117
|
| 87 |
+
}
|
| 88 |
+
]
|
| 89 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu0_tasks01_05.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[raw20] loading rows from results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl
|
| 2 |
+
[raw20] loading feature matrix for 34269 rows
|
| 3 |
+
[raw20] loaded 34269 x 4430 features from 357 NPZ files
|
| 4 |
+
[raw20] running timeline_action
|
| 5 |
+
[raw20] running timeline_subtask
|
| 6 |
+
[raw20] running transition_detection
|
| 7 |
+
[raw20] running next_action
|
| 8 |
+
[raw20] running hand_trajectory_forecast
|
| 9 |
+
[raw20] done; wrote 10 result records to results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu0_tasks01_05_rerun.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[raw20] loading rows from results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl
|
| 2 |
+
[raw20] loading feature matrix for 34269 rows
|
| 3 |
+
[raw20] loaded 34269 x 4430 features from 357 NPZ files
|
| 4 |
+
[raw20] running timeline_action
|
| 5 |
+
[raw20] running timeline_subtask
|
| 6 |
+
[raw20] running transition_detection
|
| 7 |
+
[raw20] running next_action
|
| 8 |
+
[raw20] running hand_trajectory_forecast
|
| 9 |
+
[raw20] done; wrote 10 result records to results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu1_tasks06_10.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[raw20] loading rows from results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl
|
| 2 |
+
[raw20] loading feature matrix for 34269 rows
|
| 3 |
+
[raw20] loaded 34269 x 4430 features from 357 NPZ files
|
| 4 |
+
[raw20] running contact_prediction
|
| 5 |
+
[raw20] running object_relevance
|
| 6 |
+
[raw20] running caption_grounding
|
| 7 |
+
[raw20] running cross_modal_retrieval
|
| 8 |
+
[raw20] running modality_reconstruction
|
| 9 |
+
[raw20] done; wrote 10 result records to results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu2_tasks11_15.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[raw20] loading rows from results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl
|
| 2 |
+
[raw20] loading feature matrix for 34269 rows
|
| 3 |
+
[raw20] loaded 34269 x 4430 features from 357 NPZ files
|
| 4 |
+
[raw20] running temporal_order
|
| 5 |
+
[raw20] running misalignment_detection
|
| 6 |
+
[raw20] running long_horizon_next_action
|
| 7 |
+
[raw20] running next_subtask_forecast
|
| 8 |
+
[raw20] running interaction_text_prediction
|
| 9 |
+
[raw20] done; wrote 10 result records to results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu3_task16_rerun_4096cap.log
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[raw20] loading rows from results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl
|
| 2 |
+
[raw20] loading feature matrix for 34269 rows
|
| 3 |
+
[raw20] loaded 34269 x 4430 features from 357 NPZ files
|
| 4 |
+
[raw20] running action_object_relation
|
| 5 |
+
[raw20] done; wrote 2 result records to results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/logs/gpu3_tasks16_20.log
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[raw20] loading rows from results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl
|
| 2 |
+
[raw20] loading feature matrix for 34269 rows
|
| 3 |
+
[raw20] loaded 34269 x 4430 features from 357 NPZ files
|
| 4 |
+
[raw20] running action_object_relation
|
| 5 |
+
[raw20] running object_set_forecast
|
| 6 |
+
[raw20] running imu_to_hand_pose
|
| 7 |
+
[raw20] running camera_view_sync_retrieval
|
| 8 |
+
[raw20] running time_to_transition
|
| 9 |
+
[raw20] done; wrote 10 result records to results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/metrics_summary.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
task,task_display_name,model_family,status,primary_metric,primary_score,metric_direction,reason,error
|
| 2 |
+
action_object_relation,Action Object Relation,simple_raw128_centroid,pass,macro_f1,0.0,higher,,
|
| 3 |
+
action_object_relation,Action Object Relation,neural_mlp_raw128,pass,macro_f1,0.0,higher,,
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/metrics_summary_all.csv
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
task,model_family,status,primary_metric,primary_score,metric_direction,reason,error
|
| 2 |
+
action_object_relation,neural_mlp_raw128,pass,macro_f1,0.0,higher,,
|
| 3 |
+
action_object_relation,simple_raw128_centroid,pass,macro_f1,0.0,higher,,
|
| 4 |
+
camera_view_sync_retrieval,neural_mlp_raw128,unsupported,mrr,,higher,"128-episode NPZ manifest has camera pose plus audio/depth/caption features, but no two explicit video-view feature blocks for camera-view synchronization",
|
| 5 |
+
camera_view_sync_retrieval,simple_raw128_ridge,unsupported,mrr,,higher,"128-episode NPZ manifest has camera pose plus audio/depth/caption features, but no two explicit video-view feature blocks for camera-view synchronization",
|
| 6 |
+
caption_grounding,neural_mlp_raw128,pass,mrr,0.0063402121886610985,higher,,
|
| 7 |
+
caption_grounding,simple_raw128_ridge,pass,mrr,0.011150892823934555,higher,,
|
| 8 |
+
contact_prediction,neural_mlp_raw128,pass,macro_f1,1.0,higher,,
|
| 9 |
+
contact_prediction,simple_raw128_centroid,pass,macro_f1,0.886990707397193,higher,,
|
| 10 |
+
cross_modal_retrieval,neural_mlp_raw128,pass,mrr,0.002535284962505102,higher,,
|
| 11 |
+
cross_modal_retrieval,simple_raw128_ridge,pass,mrr,0.003459817497059703,higher,,
|
| 12 |
+
hand_trajectory_forecast,neural_mlp_raw128,pass,mae,0.18475216627120972,lower,,
|
| 13 |
+
hand_trajectory_forecast,simple_raw128_ridge,pass,mae,0.2729249894618988,lower,,
|
| 14 |
+
imu_to_hand_pose,neural_mlp_raw128,pass,mae,0.252998411655426,lower,,
|
| 15 |
+
imu_to_hand_pose,simple_raw128_ridge,pass,mae,0.22941437363624573,lower,,
|
| 16 |
+
interaction_text_prediction,neural_mlp_raw128,unsupported,macro_f1,,higher,raw 128-episode annotation.hdf5 interaction text is not present in the JSONL export; only hashed caption_objects_interaction_text features are available,
|
| 17 |
+
interaction_text_prediction,simple_raw128_centroid,unsupported,macro_f1,,higher,raw 128-episode annotation.hdf5 interaction text is not present in the JSONL export; only hashed caption_objects_interaction_text features are available,
|
| 18 |
+
long_horizon_next_action,neural_mlp_raw128,pass,macro_f1,0.001063859887389299,higher,,
|
| 19 |
+
long_horizon_next_action,simple_raw128_centroid,pass,macro_f1,0.0024280172369056294,higher,,
|
| 20 |
+
misalignment_detection,neural_mlp_raw128,pass,macro_f1,0.8272709077974252,higher,,
|
| 21 |
+
misalignment_detection,simple_raw128_centroid,pass,macro_f1,0.4958867673901769,higher,,
|
| 22 |
+
modality_reconstruction,neural_mlp_raw128,pass,r2,-1.3974418160502369,higher,,
|
| 23 |
+
modality_reconstruction,simple_raw128_ridge,pass,r2,-1.3450960391924882,higher,,
|
| 24 |
+
next_action,neural_mlp_raw128,pass,macro_f1,0.0018477984371755407,higher,,
|
| 25 |
+
next_action,simple_raw128_centroid,pass,macro_f1,0.003285273363482094,higher,,
|
| 26 |
+
next_subtask_forecast,neural_mlp_raw128,pass,macro_f1,0.0,higher,,
|
| 27 |
+
next_subtask_forecast,simple_raw128_centroid,pass,macro_f1,0.0,higher,,
|
| 28 |
+
object_relevance,neural_mlp_raw128_multilabel,pass,micro_f1,0.1765890386972509,higher,,
|
| 29 |
+
object_relevance,simple_raw128_ridge_multilabel,pass,micro_f1,0.0655376369662084,higher,,
|
| 30 |
+
object_set_forecast,neural_mlp_raw128_multilabel,pass,micro_f1,0.17523098630012288,higher,,
|
| 31 |
+
object_set_forecast,simple_raw128_ridge_multilabel,pass,micro_f1,0.06469493412657774,higher,,
|
| 32 |
+
temporal_order,neural_mlp_raw128,pass,macro_f1,0.8030047098504103,higher,,
|
| 33 |
+
temporal_order,simple_raw128_centroid,pass,macro_f1,0.49824413370686593,higher,,
|
| 34 |
+
time_to_transition,neural_mlp_raw128,pass,mae,42.374061584472656,lower,,
|
| 35 |
+
time_to_transition,simple_raw128_ridge,pass,mae,52.32759094238281,lower,,
|
| 36 |
+
timeline_action,neural_mlp_raw128,pass,macro_f1,0.0014955083181204041,higher,,
|
| 37 |
+
timeline_action,simple_raw128_centroid,pass,macro_f1,0.002915061325704321,higher,,
|
| 38 |
+
timeline_subtask,neural_mlp_raw128,pass,macro_f1,7.35632183908046e-05,higher,,
|
| 39 |
+
timeline_subtask,simple_raw128_centroid,pass,macro_f1,0.0,higher,,
|
| 40 |
+
transition_detection,neural_mlp_raw128,pass,macro_f1,0.4902206914147213,higher,,
|
| 41 |
+
transition_detection,simple_raw128_centroid,pass,macro_f1,0.4203613574238283,higher,,
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/action_object_relation/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "action_object_relation",
|
| 3 |
+
"task_display_name": "Action Object Relation",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 7.840090205664913,
|
| 16 |
+
"train_accuracy": 0.02232423101067169
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 1.9252043012845315,
|
| 21 |
+
"train_accuracy": 0.4901129943502825
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.8283010613446855,
|
| 26 |
+
"train_accuracy": 0.6963669177652229
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.5116730567724991,
|
| 31 |
+
"train_accuracy": 0.7887633396107973
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.37476183882840236,
|
| 36 |
+
"train_accuracy": 0.8343926553672316
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.2903947299819882,
|
| 41 |
+
"train_accuracy": 0.8655053358443189
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25488,
|
| 45 |
+
"num_test_windows": 4014,
|
| 46 |
+
"num_classes": 4149,
|
| 47 |
+
"num_train_classes": 3058,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.0,
|
| 54 |
+
"balanced_accuracy": 0.0,
|
| 55 |
+
"macro_f1": 0.0,
|
| 56 |
+
"weighted_f1": 0.0,
|
| 57 |
+
"num_eval_windows": 4014,
|
| 58 |
+
"num_classes": 4149
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.0
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/action_object_relation/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "camera_view_sync_retrieval",
|
| 3 |
+
"task_display_name": "Camera View Sync Retrieval",
|
| 4 |
+
"task_family": "retrieval",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "not run",
|
| 8 |
+
"primary_metric": "mrr",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "unsupported",
|
| 11 |
+
"reason": "128-episode NPZ manifest has camera pose plus audio/depth/caption features, but no two explicit video-view feature blocks for camera-view synchronization",
|
| 12 |
+
"primary_score": null
|
| 13 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/caption_grounding/metrics.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "caption_grounding",
|
| 3 |
+
"task_display_name": "Language Grounding",
|
| 4 |
+
"task_family": "retrieval",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "non-caption sensor blocks projected to hashed caption/object/interaction block",
|
| 8 |
+
"primary_metric": "mrr",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.9730807784066104
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.8797651895419402
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.8487889279395084
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.8318103914064764
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.821267495579444
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.8125992868655396
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25629,
|
| 39 |
+
"num_test_windows": 4032,
|
| 40 |
+
"input_dim": 3534,
|
| 41 |
+
"fit_input_dim": 2048,
|
| 42 |
+
"target_dim": 896,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"mrr": 0.0063402121886610985,
|
| 46 |
+
"top1": 0.002232142857142857,
|
| 47 |
+
"median_rank": 1392.0,
|
| 48 |
+
"num_queries": 4032
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"primary_score": 0.0063402121886610985
|
| 52 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/contact_prediction/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "contact_prediction",
|
| 3 |
+
"task_display_name": "Contact State Prediction",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.02249114251404696,
|
| 16 |
+
"train_accuracy": 0.9850950095594834
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 3.892005114592888e-06,
|
| 21 |
+
"train_accuracy": 1.0
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 1.6485579969316457e-06,
|
| 26 |
+
"train_accuracy": 1.0
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 7.494956065371638e-07,
|
| 31 |
+
"train_accuracy": 1.0
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 3.961833819914944e-07,
|
| 36 |
+
"train_accuracy": 1.0
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 3.9328009280915035e-07,
|
| 41 |
+
"train_accuracy": 1.0
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25629,
|
| 45 |
+
"num_test_windows": 4032,
|
| 46 |
+
"num_classes": 2,
|
| 47 |
+
"num_train_classes": 2,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 1.0,
|
| 54 |
+
"balanced_accuracy": 1.0,
|
| 55 |
+
"macro_f1": 1.0,
|
| 56 |
+
"weighted_f1": 1.0,
|
| 57 |
+
"num_eval_windows": 4032,
|
| 58 |
+
"num_classes": 2
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 1.0
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/contact_prediction/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/cross_modal_retrieval/metrics.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "cross_modal_retrieval",
|
| 3 |
+
"task_display_name": "Cross-Modal Retrieval",
|
| 4 |
+
"task_family": "retrieval",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "all non-depth sensor blocks projected to depth-confidence block",
|
| 8 |
+
"primary_metric": "mrr",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.7975420301166781
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.5641444217256827
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.5163868686951831
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.4953940257414378
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.4823577042322097
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.4730072832515932
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25629,
|
| 39 |
+
"num_test_windows": 4032,
|
| 40 |
+
"input_dim": 3450,
|
| 41 |
+
"fit_input_dim": 2048,
|
| 42 |
+
"target_dim": 980,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"mrr": 0.002535284962505102,
|
| 46 |
+
"top1": 0.0,
|
| 47 |
+
"median_rank": 1893.5,
|
| 48 |
+
"num_queries": 4032
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"primary_score": 0.002535284962505102
|
| 52 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/hand_trajectory_forecast/metrics.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "hand_trajectory_forecast",
|
| 3 |
+
"task_display_name": "Hand Trajectory Forecasting",
|
| 4 |
+
"task_family": "regression",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "current non-hand/non-caption features; target hand joint feature block +20 frames",
|
| 8 |
+
"primary_metric": "mae",
|
| 9 |
+
"metric_direction": "lower",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.8031348615485111
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.5183512075500258
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.3657062302656374
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.28521906561420884
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.24974514583392887
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.22191785270443581
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25502,
|
| 39 |
+
"num_test_windows": 4015,
|
| 40 |
+
"input_dim": 2652,
|
| 41 |
+
"fit_input_dim": 2048,
|
| 42 |
+
"target_dim": 882,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"mae": 0.18475216627120972,
|
| 46 |
+
"rmse": 0.43915748596191406,
|
| 47 |
+
"r2": 0.11917128475110383,
|
| 48 |
+
"mean_l2": 9.090903282165527
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"primary_score": 0.18475216627120972
|
| 52 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/imu_to_hand_pose/metrics.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "imu_to_hand_pose",
|
| 3 |
+
"task_display_name": "Imu To Hand Pose",
|
| 4 |
+
"task_family": "regression",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "IMU acceleration/gyroscope block reconstructs hand-joint blocks",
|
| 8 |
+
"primary_metric": "mae",
|
| 9 |
+
"metric_direction": "lower",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.9585941261004479
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.9127171490970956
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.8793233014191721
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.844667680290311
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.8032877514339916
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.7622054215548092
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25629,
|
| 39 |
+
"num_test_windows": 4032,
|
| 40 |
+
"input_dim": 42,
|
| 41 |
+
"fit_input_dim": 42,
|
| 42 |
+
"target_dim": 882,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"mae": 0.252998411655426,
|
| 46 |
+
"rmse": 0.5090259909629822,
|
| 47 |
+
"r2": -0.1798296121579115,
|
| 48 |
+
"mean_l2": 12.296762466430664
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"primary_score": 0.252998411655426
|
| 52 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/interaction_text_prediction/metrics.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "interaction_text_prediction",
|
| 3 |
+
"task_display_name": "Interaction Text Prediction",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "not run",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "unsupported",
|
| 11 |
+
"reason": "raw 128-episode annotation.hdf5 interaction text is not present in the JSONL export; only hashed caption_objects_interaction_text features are available",
|
| 12 |
+
"primary_score": null
|
| 13 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/long_horizon_next_action/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "long_horizon_next_action",
|
| 3 |
+
"task_display_name": "Long Horizon Next Action",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "current non-caption features; target action +100 frames",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 5.801561136460293,
|
| 16 |
+
"train_accuracy": 0.07798787298547949
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 1.1626708901668443,
|
| 21 |
+
"train_accuracy": 0.513602999840434
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.6149468233715847,
|
| 26 |
+
"train_accuracy": 0.6583692356789532
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.4115334245525937,
|
| 31 |
+
"train_accuracy": 0.7293362055209829
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.339562276861248,
|
| 36 |
+
"train_accuracy": 0.7671134514121589
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.25582631674416995,
|
| 41 |
+
"train_accuracy": 0.8034546034785384
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25068,
|
| 45 |
+
"num_test_windows": 3951,
|
| 46 |
+
"num_classes": 1211,
|
| 47 |
+
"num_train_classes": 887,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.0020248038471273096,
|
| 54 |
+
"balanced_accuracy": 0.0019148400100781054,
|
| 55 |
+
"macro_f1": 0.001063859887389299,
|
| 56 |
+
"weighted_f1": 0.0011975577833811789,
|
| 57 |
+
"num_eval_windows": 3951,
|
| 58 |
+
"num_classes": 1211
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.001063859887389299
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/long_horizon_next_action/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/misalignment_detection/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "misalignment_detection",
|
| 3 |
+
"task_display_name": "Multimodal Synchronization Detection",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "motion/camera/IMU query paired with aligned or shifted depth/audio target",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.584179866367518,
|
| 16 |
+
"train_accuracy": 0.6658104908295541
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 0.37274408434943146,
|
| 21 |
+
"train_accuracy": 0.8252598627443111
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.2720396854143376,
|
| 26 |
+
"train_accuracy": 0.8785969418469318
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.21012097329801926,
|
| 31 |
+
"train_accuracy": 0.9071116105470161
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.1713544537477405,
|
| 36 |
+
"train_accuracy": 0.925793634867761
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.14908742030340427,
|
| 41 |
+
"train_accuracy": 0.9357466789741943
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 49834,
|
| 45 |
+
"num_test_windows": 7840,
|
| 46 |
+
"num_classes": 2,
|
| 47 |
+
"num_train_classes": 2,
|
| 48 |
+
"input_dim": 3395,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.8274234693877551,
|
| 54 |
+
"balanced_accuracy": 0.8274234693877551,
|
| 55 |
+
"macro_f1": 0.8272709077974252,
|
| 56 |
+
"weighted_f1": 0.8272709077974253,
|
| 57 |
+
"num_eval_windows": 7840,
|
| 58 |
+
"num_classes": 2
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.8272709077974252
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/misalignment_detection/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/modality_reconstruction/metrics.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "modality_reconstruction",
|
| 3 |
+
"task_display_name": "Cross-Modal Reconstruction",
|
| 4 |
+
"task_family": "regression",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "all non-depth sensor blocks reconstruct depth-confidence block",
|
| 8 |
+
"primary_metric": "r2",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.795406650627551
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.5652745503729759
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.5144153572181445
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.4929477720702684
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.4814860376392508
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.4724465353307799
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25629,
|
| 39 |
+
"num_test_windows": 4032,
|
| 40 |
+
"input_dim": 3450,
|
| 41 |
+
"fit_input_dim": 2048,
|
| 42 |
+
"target_dim": 980,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"mae": 4963.66650390625,
|
| 46 |
+
"rmse": 381740.15625,
|
| 47 |
+
"r2": -1.3974418160502369,
|
| 48 |
+
"mean_l2": 3501567.0
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"primary_score": -1.3974418160502369
|
| 52 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_action/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "next_action",
|
| 3 |
+
"task_display_name": "Next-Action Prediction",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 5.596322411365963,
|
| 16 |
+
"train_accuracy": 0.08123609973077374
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 1.0855037269383347,
|
| 21 |
+
"train_accuracy": 0.5211674275235085
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.5761551915229453,
|
| 26 |
+
"train_accuracy": 0.655780561083148
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.37495767347169556,
|
| 31 |
+
"train_accuracy": 0.7235943657575403
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.28612062079771855,
|
| 36 |
+
"train_accuracy": 0.758008505989309
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.2286090604913533,
|
| 41 |
+
"train_accuracy": 0.7849311327012369
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25629,
|
| 45 |
+
"num_test_windows": 4032,
|
| 46 |
+
"num_classes": 1217,
|
| 47 |
+
"num_train_classes": 891,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.004464285714285714,
|
| 54 |
+
"balanced_accuracy": 0.0034805255007437285,
|
| 55 |
+
"macro_f1": 0.0018477984371755407,
|
| 56 |
+
"weighted_f1": 0.003505490015635165,
|
| 57 |
+
"num_eval_windows": 4032,
|
| 58 |
+
"num_classes": 1217
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.0018477984371755407
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_action/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_subtask_forecast/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "next_subtask_forecast",
|
| 3 |
+
"task_display_name": "Next Subtask Forecast",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "current non-caption features; target subtask +100 frames",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 4.954612842526032,
|
| 16 |
+
"train_accuracy": 0.13140258496888463
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 0.9000010563651207,
|
| 21 |
+
"train_accuracy": 0.6606430509015477
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.46146437387365996,
|
| 26 |
+
"train_accuracy": 0.7953566299664911
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.28807071359682185,
|
| 31 |
+
"train_accuracy": 0.8596218286261369
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.21170043317832615,
|
| 36 |
+
"train_accuracy": 0.8893011010052657
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.17162281766032153,
|
| 41 |
+
"train_accuracy": 0.9070927078346896
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25068,
|
| 45 |
+
"num_test_windows": 3951,
|
| 46 |
+
"num_classes": 891,
|
| 47 |
+
"num_train_classes": 651,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.0,
|
| 54 |
+
"balanced_accuracy": 0.0,
|
| 55 |
+
"macro_f1": 0.0,
|
| 56 |
+
"weighted_f1": 0.0,
|
| 57 |
+
"num_eval_windows": 3951,
|
| 58 |
+
"num_classes": 891
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.0
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/next_subtask_forecast/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/object_relevance/metrics.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "object_relevance",
|
| 3 |
+
"task_display_name": "Object Relevance Prediction",
|
| 4 |
+
"task_family": "multi_label",
|
| 5 |
+
"model_family": "neural_mlp_raw128_multilabel",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "micro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.3179367709060545
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.08152506840292292
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.05606942784121547
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.04551570554540027
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.03935748256850226
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.034496653577433256
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25629,
|
| 39 |
+
"num_test_windows": 4032,
|
| 40 |
+
"num_labels": 256,
|
| 41 |
+
"input_dim": 3534,
|
| 42 |
+
"fit_input_dim": 2048,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"micro_f1": 0.1765890386972509,
|
| 46 |
+
"macro_f1": 0.026473024044082846,
|
| 47 |
+
"exact_match": 0.010168650793650794
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"primary_score": 0.1765890386972509
|
| 51 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/object_set_forecast/metrics.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "object_set_forecast",
|
| 3 |
+
"task_display_name": "Object Set Forecast",
|
| 4 |
+
"task_family": "multi_label",
|
| 5 |
+
"model_family": "neural_mlp_raw128_multilabel",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "current non-caption features; target object set +100 frames",
|
| 8 |
+
"primary_metric": "micro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.3173181395106896
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.08031858284117332
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.056561457963628554
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.04601203178259028
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.03955884521596166
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.03473486830971544
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25068,
|
| 39 |
+
"num_test_windows": 3951,
|
| 40 |
+
"num_labels": 256,
|
| 41 |
+
"input_dim": 3534,
|
| 42 |
+
"fit_input_dim": 2048,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"micro_f1": 0.17523098630012288,
|
| 46 |
+
"macro_f1": 0.021405026097435987,
|
| 47 |
+
"exact_match": 0.0030372057706909645
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"primary_score": 0.17523098630012288
|
| 51 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/temporal_order/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "temporal_order",
|
| 3 |
+
"task_display_name": "Temporal Order Verification",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "concatenated adjacent sensor-window pairs",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.6823423368630226,
|
| 16 |
+
"train_accuracy": 0.534788566953798
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 0.47532859007268563,
|
| 21 |
+
"train_accuracy": 0.7526820673453406
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.3663804764708412,
|
| 26 |
+
"train_accuracy": 0.8226507439310885
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.30436070449387737,
|
| 31 |
+
"train_accuracy": 0.85561863743148
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.25795707907075516,
|
| 36 |
+
"train_accuracy": 0.8806382145653876
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.2276858064469268,
|
| 41 |
+
"train_accuracy": 0.8944988253719656
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 51080,
|
| 45 |
+
"num_test_windows": 8036,
|
| 46 |
+
"num_classes": 2,
|
| 47 |
+
"num_train_classes": 2,
|
| 48 |
+
"input_dim": 7068,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.8030114484818317,
|
| 54 |
+
"balanced_accuracy": 0.8030114484818318,
|
| 55 |
+
"macro_f1": 0.8030047098504103,
|
| 56 |
+
"weighted_f1": 0.8030047098504102,
|
| 57 |
+
"num_eval_windows": 8036,
|
| 58 |
+
"num_classes": 2
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.8030047098504103
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/temporal_order/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/time_to_transition/metrics.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "time_to_transition",
|
| 3 |
+
"task_display_name": "Time To Transition",
|
| 4 |
+
"task_family": "regression",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "non-caption sensor features regress frames to next action boundary capped at 200",
|
| 8 |
+
"primary_metric": "mae",
|
| 9 |
+
"metric_direction": "lower",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.8273973769419527
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"epoch": 5,
|
| 19 |
+
"loss": 0.4202246279718631
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 10,
|
| 23 |
+
"loss": 0.26081196071136065
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 15,
|
| 27 |
+
"loss": 0.1965682344275895
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"epoch": 20,
|
| 31 |
+
"loss": 0.1588804939971577
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 25,
|
| 35 |
+
"loss": 0.13691731317311034
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"num_train_windows": 25629,
|
| 39 |
+
"num_test_windows": 4032,
|
| 40 |
+
"input_dim": 3534,
|
| 41 |
+
"fit_input_dim": 2048,
|
| 42 |
+
"target_dim": 1,
|
| 43 |
+
"splits": {
|
| 44 |
+
"test": {
|
| 45 |
+
"mae": 42.374061584472656,
|
| 46 |
+
"rmse": 55.66938400268555,
|
| 47 |
+
"r2": -0.23432442537520948,
|
| 48 |
+
"mean_l2": 42.374061584472656
|
| 49 |
+
}
|
| 50 |
+
},
|
| 51 |
+
"primary_score": 42.374061584472656
|
| 52 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_action/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "timeline_action",
|
| 3 |
+
"task_display_name": "Action Recognition",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 5.34494523035712,
|
| 16 |
+
"train_accuracy": 0.08946896094268211
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 0.9199241166383841,
|
| 21 |
+
"train_accuracy": 0.5943267392407039
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.44217930797357696,
|
| 26 |
+
"train_accuracy": 0.7336610870498264
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.28310169599762225,
|
| 31 |
+
"train_accuracy": 0.8029966054079363
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.19902630149213452,
|
| 36 |
+
"train_accuracy": 0.842209996488353
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.18476505181807562,
|
| 41 |
+
"train_accuracy": 0.855359163447657
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25629,
|
| 45 |
+
"num_test_windows": 4032,
|
| 46 |
+
"num_classes": 1222,
|
| 47 |
+
"num_train_classes": 896,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.001984126984126984,
|
| 54 |
+
"balanced_accuracy": 0.0015447515447515447,
|
| 55 |
+
"macro_f1": 0.0014955083181204041,
|
| 56 |
+
"weighted_f1": 0.0019879946780531578,
|
| 57 |
+
"num_eval_windows": 4032,
|
| 58 |
+
"num_classes": 1222
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.0014955083181204041
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_action/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_subtask/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "timeline_subtask",
|
| 3 |
+
"task_display_name": "Procedure Step Recognition",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 4.781946432307218,
|
| 16 |
+
"train_accuracy": 0.14272893987280033
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 0.8307986326160719,
|
| 21 |
+
"train_accuracy": 0.6892972804245191
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.3857223062508468,
|
| 26 |
+
"train_accuracy": 0.8305435249131843
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.25177908692243084,
|
| 31 |
+
"train_accuracy": 0.8828280463537399
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.1772773926213155,
|
| 36 |
+
"train_accuracy": 0.9092044168715128
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.1421989650903465,
|
| 41 |
+
"train_accuracy": 0.9217683093370791
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25629,
|
| 45 |
+
"num_test_windows": 4032,
|
| 46 |
+
"num_classes": 892,
|
| 47 |
+
"num_train_classes": 652,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.000496031746031746,
|
| 54 |
+
"balanced_accuracy": 0.0013333333333333333,
|
| 55 |
+
"macro_f1": 7.35632183908046e-05,
|
| 56 |
+
"weighted_f1": 2.7367268746579092e-05,
|
| 57 |
+
"num_eval_windows": 4032,
|
| 58 |
+
"num_classes": 892
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 7.35632183908046e-05
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/timeline_subtask/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/transition_detection/metrics.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "transition_detection",
|
| 3 |
+
"task_display_name": "Action Boundary Detection",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "neural_mlp_raw128",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"device": "cuda",
|
| 12 |
+
"history": [
|
| 13 |
+
{
|
| 14 |
+
"epoch": 1,
|
| 15 |
+
"loss": 0.5537091893220953,
|
| 16 |
+
"train_accuracy": 0.7428303874517148
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 5,
|
| 20 |
+
"loss": 0.19651318897678038,
|
| 21 |
+
"train_accuracy": 0.9144718873151508
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 10,
|
| 25 |
+
"loss": 0.09844583694868982,
|
| 26 |
+
"train_accuracy": 0.9596940965312731
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"epoch": 15,
|
| 30 |
+
"loss": 0.10910748333434252,
|
| 31 |
+
"train_accuracy": 0.9566506691638378
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"epoch": 20,
|
| 35 |
+
"loss": 0.07368280102906076,
|
| 36 |
+
"train_accuracy": 0.9692925982285692
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 25,
|
| 40 |
+
"loss": 0.030949957263201364,
|
| 41 |
+
"train_accuracy": 0.9879043271294237
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"num_train_windows": 25629,
|
| 45 |
+
"num_test_windows": 4032,
|
| 46 |
+
"num_classes": 2,
|
| 47 |
+
"num_train_classes": 2,
|
| 48 |
+
"input_dim": 3534,
|
| 49 |
+
"fit_input_dim": 2048,
|
| 50 |
+
"selected_column_count": 2048,
|
| 51 |
+
"splits": {
|
| 52 |
+
"test": {
|
| 53 |
+
"accuracy": 0.9446924603174603,
|
| 54 |
+
"balanced_accuracy": 0.49010925819436457,
|
| 55 |
+
"macro_f1": 0.4902206914147213,
|
| 56 |
+
"weighted_f1": 0.9627185273267364,
|
| 57 |
+
"num_eval_windows": 4032,
|
| 58 |
+
"num_classes": 2
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"primary_score": 0.4902206914147213
|
| 62 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/neural_mlp_raw128/transition_detection/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/run_summary.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_jsonl": "results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset/dataset.jsonl",
|
| 3 |
+
"feature_manifest_json": "results/omni_finetune/xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_dataset_dense_20f_stride10/dataset_manifest.json",
|
| 4 |
+
"tasks_requested": [
|
| 5 |
+
"action_object_relation"
|
| 6 |
+
],
|
| 7 |
+
"load_report": {
|
| 8 |
+
"resolved_npz_files": 357,
|
| 9 |
+
"loaded_feature_rows": 34269,
|
| 10 |
+
"input_rows": 34269,
|
| 11 |
+
"dropped_rows": 0,
|
| 12 |
+
"missing_path_examples": [],
|
| 13 |
+
"feature_dim": 4430
|
| 14 |
+
},
|
| 15 |
+
"num_result_records": 2,
|
| 16 |
+
"status_counts": {
|
| 17 |
+
"pass": 2
|
| 18 |
+
},
|
| 19 |
+
"results": [
|
| 20 |
+
{
|
| 21 |
+
"task": "action_object_relation",
|
| 22 |
+
"task_display_name": "Action Object Relation",
|
| 23 |
+
"model_family": "simple_raw128_centroid",
|
| 24 |
+
"status": "pass",
|
| 25 |
+
"primary_metric": "macro_f1",
|
| 26 |
+
"primary_score": 0.0,
|
| 27 |
+
"metric_direction": "higher",
|
| 28 |
+
"reason": null,
|
| 29 |
+
"error": null
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"task": "action_object_relation",
|
| 33 |
+
"task_display_name": "Action Object Relation",
|
| 34 |
+
"model_family": "neural_mlp_raw128",
|
| 35 |
+
"status": "pass",
|
| 36 |
+
"primary_metric": "macro_f1",
|
| 37 |
+
"primary_score": 0.0,
|
| 38 |
+
"metric_direction": "higher",
|
| 39 |
+
"reason": null,
|
| 40 |
+
"error": null
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/run_summary_all.json
ADDED
|
@@ -0,0 +1,409 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_records": 40,
|
| 3 |
+
"status_counts": {
|
| 4 |
+
"pass": 36,
|
| 5 |
+
"unsupported": 4
|
| 6 |
+
},
|
| 7 |
+
"records": [
|
| 8 |
+
{
|
| 9 |
+
"task": "action_object_relation",
|
| 10 |
+
"model_family": "neural_mlp_raw128",
|
| 11 |
+
"status": "pass",
|
| 12 |
+
"primary_metric": "macro_f1",
|
| 13 |
+
"primary_score": 0.0,
|
| 14 |
+
"metric_direction": "higher",
|
| 15 |
+
"reason": null,
|
| 16 |
+
"error": null
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"task": "action_object_relation",
|
| 20 |
+
"model_family": "simple_raw128_centroid",
|
| 21 |
+
"status": "pass",
|
| 22 |
+
"primary_metric": "macro_f1",
|
| 23 |
+
"primary_score": 0.0,
|
| 24 |
+
"metric_direction": "higher",
|
| 25 |
+
"reason": null,
|
| 26 |
+
"error": null
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"task": "camera_view_sync_retrieval",
|
| 30 |
+
"model_family": "neural_mlp_raw128",
|
| 31 |
+
"status": "unsupported",
|
| 32 |
+
"primary_metric": "mrr",
|
| 33 |
+
"primary_score": null,
|
| 34 |
+
"metric_direction": "higher",
|
| 35 |
+
"reason": "128-episode NPZ manifest has camera pose plus audio/depth/caption features, but no two explicit video-view feature blocks for camera-view synchronization",
|
| 36 |
+
"error": null
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"task": "camera_view_sync_retrieval",
|
| 40 |
+
"model_family": "simple_raw128_ridge",
|
| 41 |
+
"status": "unsupported",
|
| 42 |
+
"primary_metric": "mrr",
|
| 43 |
+
"primary_score": null,
|
| 44 |
+
"metric_direction": "higher",
|
| 45 |
+
"reason": "128-episode NPZ manifest has camera pose plus audio/depth/caption features, but no two explicit video-view feature blocks for camera-view synchronization",
|
| 46 |
+
"error": null
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"task": "caption_grounding",
|
| 50 |
+
"model_family": "neural_mlp_raw128",
|
| 51 |
+
"status": "pass",
|
| 52 |
+
"primary_metric": "mrr",
|
| 53 |
+
"primary_score": 0.0063402121886610985,
|
| 54 |
+
"metric_direction": "higher",
|
| 55 |
+
"reason": null,
|
| 56 |
+
"error": null
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"task": "caption_grounding",
|
| 60 |
+
"model_family": "simple_raw128_ridge",
|
| 61 |
+
"status": "pass",
|
| 62 |
+
"primary_metric": "mrr",
|
| 63 |
+
"primary_score": 0.011150892823934555,
|
| 64 |
+
"metric_direction": "higher",
|
| 65 |
+
"reason": null,
|
| 66 |
+
"error": null
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"task": "contact_prediction",
|
| 70 |
+
"model_family": "neural_mlp_raw128",
|
| 71 |
+
"status": "pass",
|
| 72 |
+
"primary_metric": "macro_f1",
|
| 73 |
+
"primary_score": 1.0,
|
| 74 |
+
"metric_direction": "higher",
|
| 75 |
+
"reason": null,
|
| 76 |
+
"error": null
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"task": "contact_prediction",
|
| 80 |
+
"model_family": "simple_raw128_centroid",
|
| 81 |
+
"status": "pass",
|
| 82 |
+
"primary_metric": "macro_f1",
|
| 83 |
+
"primary_score": 0.886990707397193,
|
| 84 |
+
"metric_direction": "higher",
|
| 85 |
+
"reason": null,
|
| 86 |
+
"error": null
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"task": "cross_modal_retrieval",
|
| 90 |
+
"model_family": "neural_mlp_raw128",
|
| 91 |
+
"status": "pass",
|
| 92 |
+
"primary_metric": "mrr",
|
| 93 |
+
"primary_score": 0.002535284962505102,
|
| 94 |
+
"metric_direction": "higher",
|
| 95 |
+
"reason": null,
|
| 96 |
+
"error": null
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"task": "cross_modal_retrieval",
|
| 100 |
+
"model_family": "simple_raw128_ridge",
|
| 101 |
+
"status": "pass",
|
| 102 |
+
"primary_metric": "mrr",
|
| 103 |
+
"primary_score": 0.003459817497059703,
|
| 104 |
+
"metric_direction": "higher",
|
| 105 |
+
"reason": null,
|
| 106 |
+
"error": null
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"task": "hand_trajectory_forecast",
|
| 110 |
+
"model_family": "neural_mlp_raw128",
|
| 111 |
+
"status": "pass",
|
| 112 |
+
"primary_metric": "mae",
|
| 113 |
+
"primary_score": 0.18475216627120972,
|
| 114 |
+
"metric_direction": "lower",
|
| 115 |
+
"reason": null,
|
| 116 |
+
"error": null
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"task": "hand_trajectory_forecast",
|
| 120 |
+
"model_family": "simple_raw128_ridge",
|
| 121 |
+
"status": "pass",
|
| 122 |
+
"primary_metric": "mae",
|
| 123 |
+
"primary_score": 0.2729249894618988,
|
| 124 |
+
"metric_direction": "lower",
|
| 125 |
+
"reason": null,
|
| 126 |
+
"error": null
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"task": "imu_to_hand_pose",
|
| 130 |
+
"model_family": "neural_mlp_raw128",
|
| 131 |
+
"status": "pass",
|
| 132 |
+
"primary_metric": "mae",
|
| 133 |
+
"primary_score": 0.252998411655426,
|
| 134 |
+
"metric_direction": "lower",
|
| 135 |
+
"reason": null,
|
| 136 |
+
"error": null
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"task": "imu_to_hand_pose",
|
| 140 |
+
"model_family": "simple_raw128_ridge",
|
| 141 |
+
"status": "pass",
|
| 142 |
+
"primary_metric": "mae",
|
| 143 |
+
"primary_score": 0.22941437363624573,
|
| 144 |
+
"metric_direction": "lower",
|
| 145 |
+
"reason": null,
|
| 146 |
+
"error": null
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"task": "interaction_text_prediction",
|
| 150 |
+
"model_family": "neural_mlp_raw128",
|
| 151 |
+
"status": "unsupported",
|
| 152 |
+
"primary_metric": "macro_f1",
|
| 153 |
+
"primary_score": null,
|
| 154 |
+
"metric_direction": "higher",
|
| 155 |
+
"reason": "raw 128-episode annotation.hdf5 interaction text is not present in the JSONL export; only hashed caption_objects_interaction_text features are available",
|
| 156 |
+
"error": null
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"task": "interaction_text_prediction",
|
| 160 |
+
"model_family": "simple_raw128_centroid",
|
| 161 |
+
"status": "unsupported",
|
| 162 |
+
"primary_metric": "macro_f1",
|
| 163 |
+
"primary_score": null,
|
| 164 |
+
"metric_direction": "higher",
|
| 165 |
+
"reason": "raw 128-episode annotation.hdf5 interaction text is not present in the JSONL export; only hashed caption_objects_interaction_text features are available",
|
| 166 |
+
"error": null
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"task": "long_horizon_next_action",
|
| 170 |
+
"model_family": "neural_mlp_raw128",
|
| 171 |
+
"status": "pass",
|
| 172 |
+
"primary_metric": "macro_f1",
|
| 173 |
+
"primary_score": 0.001063859887389299,
|
| 174 |
+
"metric_direction": "higher",
|
| 175 |
+
"reason": null,
|
| 176 |
+
"error": null
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"task": "long_horizon_next_action",
|
| 180 |
+
"model_family": "simple_raw128_centroid",
|
| 181 |
+
"status": "pass",
|
| 182 |
+
"primary_metric": "macro_f1",
|
| 183 |
+
"primary_score": 0.0024280172369056294,
|
| 184 |
+
"metric_direction": "higher",
|
| 185 |
+
"reason": null,
|
| 186 |
+
"error": null
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"task": "misalignment_detection",
|
| 190 |
+
"model_family": "neural_mlp_raw128",
|
| 191 |
+
"status": "pass",
|
| 192 |
+
"primary_metric": "macro_f1",
|
| 193 |
+
"primary_score": 0.8272709077974252,
|
| 194 |
+
"metric_direction": "higher",
|
| 195 |
+
"reason": null,
|
| 196 |
+
"error": null
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"task": "misalignment_detection",
|
| 200 |
+
"model_family": "simple_raw128_centroid",
|
| 201 |
+
"status": "pass",
|
| 202 |
+
"primary_metric": "macro_f1",
|
| 203 |
+
"primary_score": 0.4958867673901769,
|
| 204 |
+
"metric_direction": "higher",
|
| 205 |
+
"reason": null,
|
| 206 |
+
"error": null
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"task": "modality_reconstruction",
|
| 210 |
+
"model_family": "neural_mlp_raw128",
|
| 211 |
+
"status": "pass",
|
| 212 |
+
"primary_metric": "r2",
|
| 213 |
+
"primary_score": -1.3974418160502369,
|
| 214 |
+
"metric_direction": "higher",
|
| 215 |
+
"reason": null,
|
| 216 |
+
"error": null
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"task": "modality_reconstruction",
|
| 220 |
+
"model_family": "simple_raw128_ridge",
|
| 221 |
+
"status": "pass",
|
| 222 |
+
"primary_metric": "r2",
|
| 223 |
+
"primary_score": -1.3450960391924882,
|
| 224 |
+
"metric_direction": "higher",
|
| 225 |
+
"reason": null,
|
| 226 |
+
"error": null
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"task": "next_action",
|
| 230 |
+
"model_family": "neural_mlp_raw128",
|
| 231 |
+
"status": "pass",
|
| 232 |
+
"primary_metric": "macro_f1",
|
| 233 |
+
"primary_score": 0.0018477984371755407,
|
| 234 |
+
"metric_direction": "higher",
|
| 235 |
+
"reason": null,
|
| 236 |
+
"error": null
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"task": "next_action",
|
| 240 |
+
"model_family": "simple_raw128_centroid",
|
| 241 |
+
"status": "pass",
|
| 242 |
+
"primary_metric": "macro_f1",
|
| 243 |
+
"primary_score": 0.003285273363482094,
|
| 244 |
+
"metric_direction": "higher",
|
| 245 |
+
"reason": null,
|
| 246 |
+
"error": null
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"task": "next_subtask_forecast",
|
| 250 |
+
"model_family": "neural_mlp_raw128",
|
| 251 |
+
"status": "pass",
|
| 252 |
+
"primary_metric": "macro_f1",
|
| 253 |
+
"primary_score": 0.0,
|
| 254 |
+
"metric_direction": "higher",
|
| 255 |
+
"reason": null,
|
| 256 |
+
"error": null
|
| 257 |
+
},
|
| 258 |
+
{
|
| 259 |
+
"task": "next_subtask_forecast",
|
| 260 |
+
"model_family": "simple_raw128_centroid",
|
| 261 |
+
"status": "pass",
|
| 262 |
+
"primary_metric": "macro_f1",
|
| 263 |
+
"primary_score": 0.0,
|
| 264 |
+
"metric_direction": "higher",
|
| 265 |
+
"reason": null,
|
| 266 |
+
"error": null
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"task": "object_relevance",
|
| 270 |
+
"model_family": "neural_mlp_raw128_multilabel",
|
| 271 |
+
"status": "pass",
|
| 272 |
+
"primary_metric": "micro_f1",
|
| 273 |
+
"primary_score": 0.1765890386972509,
|
| 274 |
+
"metric_direction": "higher",
|
| 275 |
+
"reason": null,
|
| 276 |
+
"error": null
|
| 277 |
+
},
|
| 278 |
+
{
|
| 279 |
+
"task": "object_relevance",
|
| 280 |
+
"model_family": "simple_raw128_ridge_multilabel",
|
| 281 |
+
"status": "pass",
|
| 282 |
+
"primary_metric": "micro_f1",
|
| 283 |
+
"primary_score": 0.0655376369662084,
|
| 284 |
+
"metric_direction": "higher",
|
| 285 |
+
"reason": null,
|
| 286 |
+
"error": null
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"task": "object_set_forecast",
|
| 290 |
+
"model_family": "neural_mlp_raw128_multilabel",
|
| 291 |
+
"status": "pass",
|
| 292 |
+
"primary_metric": "micro_f1",
|
| 293 |
+
"primary_score": 0.17523098630012288,
|
| 294 |
+
"metric_direction": "higher",
|
| 295 |
+
"reason": null,
|
| 296 |
+
"error": null
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"task": "object_set_forecast",
|
| 300 |
+
"model_family": "simple_raw128_ridge_multilabel",
|
| 301 |
+
"status": "pass",
|
| 302 |
+
"primary_metric": "micro_f1",
|
| 303 |
+
"primary_score": 0.06469493412657774,
|
| 304 |
+
"metric_direction": "higher",
|
| 305 |
+
"reason": null,
|
| 306 |
+
"error": null
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"task": "temporal_order",
|
| 310 |
+
"model_family": "neural_mlp_raw128",
|
| 311 |
+
"status": "pass",
|
| 312 |
+
"primary_metric": "macro_f1",
|
| 313 |
+
"primary_score": 0.8030047098504103,
|
| 314 |
+
"metric_direction": "higher",
|
| 315 |
+
"reason": null,
|
| 316 |
+
"error": null
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"task": "temporal_order",
|
| 320 |
+
"model_family": "simple_raw128_centroid",
|
| 321 |
+
"status": "pass",
|
| 322 |
+
"primary_metric": "macro_f1",
|
| 323 |
+
"primary_score": 0.49824413370686593,
|
| 324 |
+
"metric_direction": "higher",
|
| 325 |
+
"reason": null,
|
| 326 |
+
"error": null
|
| 327 |
+
},
|
| 328 |
+
{
|
| 329 |
+
"task": "time_to_transition",
|
| 330 |
+
"model_family": "neural_mlp_raw128",
|
| 331 |
+
"status": "pass",
|
| 332 |
+
"primary_metric": "mae",
|
| 333 |
+
"primary_score": 42.374061584472656,
|
| 334 |
+
"metric_direction": "lower",
|
| 335 |
+
"reason": null,
|
| 336 |
+
"error": null
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"task": "time_to_transition",
|
| 340 |
+
"model_family": "simple_raw128_ridge",
|
| 341 |
+
"status": "pass",
|
| 342 |
+
"primary_metric": "mae",
|
| 343 |
+
"primary_score": 52.32759094238281,
|
| 344 |
+
"metric_direction": "lower",
|
| 345 |
+
"reason": null,
|
| 346 |
+
"error": null
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"task": "timeline_action",
|
| 350 |
+
"model_family": "neural_mlp_raw128",
|
| 351 |
+
"status": "pass",
|
| 352 |
+
"primary_metric": "macro_f1",
|
| 353 |
+
"primary_score": 0.0014955083181204041,
|
| 354 |
+
"metric_direction": "higher",
|
| 355 |
+
"reason": null,
|
| 356 |
+
"error": null
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"task": "timeline_action",
|
| 360 |
+
"model_family": "simple_raw128_centroid",
|
| 361 |
+
"status": "pass",
|
| 362 |
+
"primary_metric": "macro_f1",
|
| 363 |
+
"primary_score": 0.002915061325704321,
|
| 364 |
+
"metric_direction": "higher",
|
| 365 |
+
"reason": null,
|
| 366 |
+
"error": null
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"task": "timeline_subtask",
|
| 370 |
+
"model_family": "neural_mlp_raw128",
|
| 371 |
+
"status": "pass",
|
| 372 |
+
"primary_metric": "macro_f1",
|
| 373 |
+
"primary_score": 7.35632183908046e-05,
|
| 374 |
+
"metric_direction": "higher",
|
| 375 |
+
"reason": null,
|
| 376 |
+
"error": null
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"task": "timeline_subtask",
|
| 380 |
+
"model_family": "simple_raw128_centroid",
|
| 381 |
+
"status": "pass",
|
| 382 |
+
"primary_metric": "macro_f1",
|
| 383 |
+
"primary_score": 0.0,
|
| 384 |
+
"metric_direction": "higher",
|
| 385 |
+
"reason": null,
|
| 386 |
+
"error": null
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"task": "transition_detection",
|
| 390 |
+
"model_family": "neural_mlp_raw128",
|
| 391 |
+
"status": "pass",
|
| 392 |
+
"primary_metric": "macro_f1",
|
| 393 |
+
"primary_score": 0.4902206914147213,
|
| 394 |
+
"metric_direction": "higher",
|
| 395 |
+
"reason": null,
|
| 396 |
+
"error": null
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"task": "transition_detection",
|
| 400 |
+
"model_family": "simple_raw128_centroid",
|
| 401 |
+
"status": "pass",
|
| 402 |
+
"primary_metric": "macro_f1",
|
| 403 |
+
"primary_score": 0.4203613574238283,
|
| 404 |
+
"metric_direction": "higher",
|
| 405 |
+
"reason": null,
|
| 406 |
+
"error": null
|
| 407 |
+
}
|
| 408 |
+
]
|
| 409 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/action_object_relation/metrics.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "action_object_relation",
|
| 3 |
+
"task_display_name": "Action Object Relation",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "simple_raw128_centroid",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25488,
|
| 12 |
+
"num_val_windows": 4569,
|
| 13 |
+
"num_test_windows": 4014,
|
| 14 |
+
"num_classes": 4149,
|
| 15 |
+
"num_train_classes": 3058,
|
| 16 |
+
"input_dim": 3534,
|
| 17 |
+
"fit_input_dim": 2048,
|
| 18 |
+
"selected_column_count": 2048,
|
| 19 |
+
"splits": {
|
| 20 |
+
"val": {
|
| 21 |
+
"accuracy": 0.0,
|
| 22 |
+
"balanced_accuracy": 0.0,
|
| 23 |
+
"macro_f1": 0.0,
|
| 24 |
+
"weighted_f1": 0.0,
|
| 25 |
+
"num_eval_windows": 4569,
|
| 26 |
+
"num_classes": 4149
|
| 27 |
+
},
|
| 28 |
+
"test": {
|
| 29 |
+
"accuracy": 0.0,
|
| 30 |
+
"balanced_accuracy": 0.0,
|
| 31 |
+
"macro_f1": 0.0,
|
| 32 |
+
"weighted_f1": 0.0,
|
| 33 |
+
"num_eval_windows": 4014,
|
| 34 |
+
"num_classes": 4149
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"primary_score": 0.0
|
| 38 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/action_object_relation/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/caption_grounding/metrics.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "caption_grounding",
|
| 3 |
+
"task_display_name": "Language Grounding",
|
| 4 |
+
"task_family": "retrieval",
|
| 5 |
+
"model_family": "simple_raw128_ridge",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "non-caption sensor blocks projected to hashed caption/object/interaction block",
|
| 8 |
+
"primary_metric": "mrr",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25629,
|
| 12 |
+
"num_test_windows": 4032,
|
| 13 |
+
"input_dim": 3534,
|
| 14 |
+
"fit_input_dim": 2048,
|
| 15 |
+
"target_dim": 896,
|
| 16 |
+
"splits": {
|
| 17 |
+
"test": {
|
| 18 |
+
"mrr": 0.011150892823934555,
|
| 19 |
+
"top1": 0.003720238095238095,
|
| 20 |
+
"median_rank": 786.0,
|
| 21 |
+
"num_queries": 4032
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"primary_score": 0.011150892823934555
|
| 25 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/contact_prediction/metrics.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "contact_prediction",
|
| 3 |
+
"task_display_name": "Contact State Prediction",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "simple_raw128_centroid",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "sensor features excluding hashed caption text",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25629,
|
| 12 |
+
"num_val_windows": 4608,
|
| 13 |
+
"num_test_windows": 4032,
|
| 14 |
+
"num_classes": 2,
|
| 15 |
+
"num_train_classes": 2,
|
| 16 |
+
"input_dim": 3534,
|
| 17 |
+
"fit_input_dim": 2048,
|
| 18 |
+
"selected_column_count": 2048,
|
| 19 |
+
"splits": {
|
| 20 |
+
"val": {
|
| 21 |
+
"accuracy": 0.8932291666666666,
|
| 22 |
+
"balanced_accuracy": 0.9390938351077,
|
| 23 |
+
"macro_f1": 0.8166515974696689,
|
| 24 |
+
"weighted_f1": 0.9058806656743642,
|
| 25 |
+
"num_eval_windows": 4608,
|
| 26 |
+
"num_classes": 2
|
| 27 |
+
},
|
| 28 |
+
"test": {
|
| 29 |
+
"accuracy": 0.9109623015873016,
|
| 30 |
+
"balanced_accuracy": 0.9425416133162612,
|
| 31 |
+
"macro_f1": 0.886990707397193,
|
| 32 |
+
"weighted_f1": 0.9155965516219,
|
| 33 |
+
"num_eval_windows": 4032,
|
| 34 |
+
"num_classes": 2
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"primary_score": 0.886990707397193
|
| 38 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/contact_prediction/predictions.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/cross_modal_retrieval/metrics.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "cross_modal_retrieval",
|
| 3 |
+
"task_display_name": "Cross-Modal Retrieval",
|
| 4 |
+
"task_family": "retrieval",
|
| 5 |
+
"model_family": "simple_raw128_ridge",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "all non-depth sensor blocks projected to depth-confidence block",
|
| 8 |
+
"primary_metric": "mrr",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25629,
|
| 12 |
+
"num_test_windows": 4032,
|
| 13 |
+
"input_dim": 3450,
|
| 14 |
+
"fit_input_dim": 2048,
|
| 15 |
+
"target_dim": 980,
|
| 16 |
+
"splits": {
|
| 17 |
+
"test": {
|
| 18 |
+
"mrr": 0.003459817497059703,
|
| 19 |
+
"top1": 0.000744047619047619,
|
| 20 |
+
"median_rank": 1797.5,
|
| 21 |
+
"num_queries": 4032
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"primary_score": 0.003459817497059703
|
| 25 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/hand_trajectory_forecast/metrics.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "hand_trajectory_forecast",
|
| 3 |
+
"task_display_name": "Hand Trajectory Forecasting",
|
| 4 |
+
"task_family": "regression",
|
| 5 |
+
"model_family": "simple_raw128_ridge",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "current non-hand/non-caption features; target hand joint feature block +20 frames",
|
| 8 |
+
"primary_metric": "mae",
|
| 9 |
+
"metric_direction": "lower",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25502,
|
| 12 |
+
"num_test_windows": 4015,
|
| 13 |
+
"input_dim": 2652,
|
| 14 |
+
"fit_input_dim": 2048,
|
| 15 |
+
"target_dim": 882,
|
| 16 |
+
"splits": {
|
| 17 |
+
"test": {
|
| 18 |
+
"mae": 0.2729249894618988,
|
| 19 |
+
"rmse": 0.5156853199005127,
|
| 20 |
+
"r2": -0.21456409310612812,
|
| 21 |
+
"mean_l2": 13.391268730163574
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"primary_score": 0.2729249894618988
|
| 25 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/imu_to_hand_pose/metrics.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "imu_to_hand_pose",
|
| 3 |
+
"task_display_name": "Imu To Hand Pose",
|
| 4 |
+
"task_family": "regression",
|
| 5 |
+
"model_family": "simple_raw128_ridge",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "IMU acceleration/gyroscope block reconstructs hand-joint blocks",
|
| 8 |
+
"primary_metric": "mae",
|
| 9 |
+
"metric_direction": "lower",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25629,
|
| 12 |
+
"num_test_windows": 4032,
|
| 13 |
+
"input_dim": 42,
|
| 14 |
+
"fit_input_dim": 42,
|
| 15 |
+
"target_dim": 882,
|
| 16 |
+
"splits": {
|
| 17 |
+
"test": {
|
| 18 |
+
"mae": 0.22941437363624573,
|
| 19 |
+
"rmse": 0.4729202091693878,
|
| 20 |
+
"r2": -0.018392341461985984,
|
| 21 |
+
"mean_l2": 11.224305152893066
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"primary_score": 0.22941437363624573
|
| 25 |
+
}
|
results/omni_finetune/a100_128_raw20_task_baselines_20260616T073954Z/simple_raw128/long_horizon_next_action/metrics.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"task": "long_horizon_next_action",
|
| 3 |
+
"task_display_name": "Long Horizon Next Action",
|
| 4 |
+
"task_family": "classification",
|
| 5 |
+
"model_family": "simple_raw128_centroid",
|
| 6 |
+
"source": "128_episode_raw_sensor_features",
|
| 7 |
+
"input_features": "current non-caption features; target action +100 frames",
|
| 8 |
+
"primary_metric": "macro_f1",
|
| 9 |
+
"metric_direction": "higher",
|
| 10 |
+
"status": "pass",
|
| 11 |
+
"num_train_windows": 25068,
|
| 12 |
+
"num_val_windows": 4496,
|
| 13 |
+
"num_test_windows": 3951,
|
| 14 |
+
"num_classes": 1211,
|
| 15 |
+
"num_train_classes": 887,
|
| 16 |
+
"input_dim": 3534,
|
| 17 |
+
"fit_input_dim": 2048,
|
| 18 |
+
"selected_column_count": 2048,
|
| 19 |
+
"splits": {
|
| 20 |
+
"val": {
|
| 21 |
+
"accuracy": 0.002224199288256228,
|
| 22 |
+
"balanced_accuracy": 0.003450987577971705,
|
| 23 |
+
"macro_f1": 0.0038119991336365605,
|
| 24 |
+
"weighted_f1": 0.0024887619404382845,
|
| 25 |
+
"num_eval_windows": 4496,
|
| 26 |
+
"num_classes": 1211
|
| 27 |
+
},
|
| 28 |
+
"test": {
|
| 29 |
+
"accuracy": 0.0030372057706909645,
|
| 30 |
+
"balanced_accuracy": 0.003899407470836042,
|
| 31 |
+
"macro_f1": 0.0024280172369056294,
|
| 32 |
+
"weighted_f1": 0.0025638705344299727,
|
| 33 |
+
"num_eval_windows": 3951,
|
| 34 |
+
"num_classes": 1211
|
| 35 |
+
}
|
| 36 |
+
},
|
| 37 |
+
"primary_score": 0.0024280172369056294
|
| 38 |
+
}
|