Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "title": "Single-Episode 20-Task Radar", | |
| "status": "pass", | |
| "generated_at_utc": "2026-06-19T11:30:03+00:00", | |
| "description": "Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.", | |
| "task_count": 20, | |
| "method_count": 2, | |
| "method_task_record_count": 40, | |
| "scored_method_task_count": 40, | |
| "normalization_policy": { | |
| "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", | |
| "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", | |
| "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", | |
| "result_record_policy": "every method has 20 task records; records without a numeric score carry explicit unsupported/not-evaluated status and reason fields", | |
| "foundation_model_overlay": "Qwen3/Cosmos points are plotted only on task-aligned axes. Scoreless records mean the public result does not evaluate that task contract.", | |
| "metadata_128_overlay": "128-episode aligned baselines have 20 records. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists; raw interaction text and paired camera-view embeddings remain explicit gaps.", | |
| "raw_128_overlay": "128-episode raw-feature baselines use staged sensor NPZ features. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export." | |
| }, | |
| "source_unified_radar": "docs/data/unified_task_model_radar.json", | |
| "source_result_matrix": "docs/data/task_method_20_result_matrix.json", | |
| "series": [ | |
| { | |
| "id": "minimal", | |
| "label": "Minimal", | |
| "short_label": "Min", | |
| "color": "#ccffa0", | |
| "kind": "full_20_task_baseline", | |
| "scope": "1 public sample episode", | |
| "stroke_dasharray": null, | |
| "method_detail": "Single-episode simple heads over the public sample split.", | |
| "plotted_as": "filled polygon", | |
| "result_record_count": 20, | |
| "scored_task_count": 20, | |
| "covered_task_count": 20, | |
| "proxy_scored_task_count": 0, | |
| "scoreless_task_count": 0, | |
| "unsupported_task_count": 0, | |
| "not_evaluated_task_count": 0, | |
| "status_counts": { | |
| "scored": 20 | |
| }, | |
| "coverage_fraction": 1.0, | |
| "result_record_fraction": 1.0 | |
| }, | |
| { | |
| "id": "neural_mlp", | |
| "label": "Neural MLP", | |
| "short_label": "NN", | |
| "color": "#67e8d1", | |
| "kind": "full_20_task_baseline", | |
| "scope": "1 public sample episode", | |
| "stroke_dasharray": null, | |
| "method_detail": "Single-episode compact PyTorch MLP heads on the same 20 task contracts.", | |
| "plotted_as": "filled polygon", | |
| "result_record_count": 20, | |
| "scored_task_count": 20, | |
| "covered_task_count": 20, | |
| "proxy_scored_task_count": 0, | |
| "scoreless_task_count": 0, | |
| "unsupported_task_count": 0, | |
| "not_evaluated_task_count": 0, | |
| "status_counts": { | |
| "scored": 20 | |
| }, | |
| "coverage_fraction": 1.0, | |
| "result_record_fraction": 1.0 | |
| } | |
| ], | |
| "tasks": [ | |
| { | |
| "task_number": 1, | |
| "task_id": "timeline_action", | |
| "label": "Action Recognition", | |
| "axis_label": "01 Action Recognition", | |
| "short_label": "Action", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.05, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/timeline_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.05, | |
| "raw_text": "0.0500", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.014814814814814814, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.014814814814814814, | |
| "raw_text": "0.0148", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 2, | |
| "task_id": "timeline_subtask", | |
| "label": "Procedure Step Recognition", | |
| "axis_label": "02 Procedure Step Recognition", | |
| "short_label": "Step", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.05056355513846935, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/timeline_subtask/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.05056355513846935, | |
| "raw_text": "0.0506", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.02810810810810811, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.02810810810810811, | |
| "raw_text": "0.0281", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 3, | |
| "task_id": "transition_detection", | |
| "label": "Action Boundary Detection", | |
| "axis_label": "03 Action Boundary Detection", | |
| "short_label": "Boundary", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.6118237590630229, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/transition_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.6118237590630229, | |
| "raw_text": "0.6118", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.5862068965517241, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.5862068965517241, | |
| "raw_text": "0.5862", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 4, | |
| "task_id": "next_action", | |
| "label": "Next-Action Prediction", | |
| "axis_label": "04 Next-Action Prediction", | |
| "short_label": "Next act", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.05925925925925927, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.05925925925925927, | |
| "raw_text": "0.0593", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.04186046511627907, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.04186046511627907, | |
| "raw_text": "0.0419", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 5, | |
| "task_id": "hand_trajectory_forecast", | |
| "label": "Hand Trajectory Forecasting", | |
| "axis_label": "05 Hand Trajectory Forecasting", | |
| "short_label": "Hand traj", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "mpjpe", | |
| "metric_name": "MPJPE", | |
| "metric_direction": "lower", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.8646570444107056, | |
| "metric_key": "mpjpe", | |
| "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.12473175026322614, | |
| "raw_text": "0.8647", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.10785018652677536, | |
| "metric_key": "mpjpe", | |
| "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 1.0, | |
| "raw_text": "0.1079", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 6, | |
| "task_id": "contact_prediction", | |
| "label": "Contact State Prediction", | |
| "axis_label": "06 Contact State Prediction", | |
| "short_label": "Contact", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 1.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/contact_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 1.0, | |
| "raw_text": "1.000", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 1.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 1.0, | |
| "raw_text": "1.000", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 7, | |
| "task_id": "object_relevance", | |
| "label": "Object Relevance Prediction", | |
| "axis_label": "07 Object Relevance Prediction", | |
| "short_label": "Objects", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "micro_f1", | |
| "metric_name": "micro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.18034382095361662, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/object_relevance/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.18034382095361662, | |
| "raw_text": "0.1803", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.1679279279279279, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.1679279279279279, | |
| "raw_text": "0.1679", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 8, | |
| "task_id": "caption_grounding", | |
| "label": "Language Grounding", | |
| "axis_label": "08 Language Grounding", | |
| "short_label": "Language", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "mrr", | |
| "metric_name": "MRR", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.016023479050338015, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/caption_grounding/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.016023479050338015, | |
| "raw_text": "0.0160", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.01684125567132316, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.01684125567132316, | |
| "raw_text": "0.0168", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 9, | |
| "task_id": "cross_modal_retrieval", | |
| "label": "Cross-Modal Retrieval", | |
| "axis_label": "09 Cross-Modal Retrieval", | |
| "short_label": "X-modal", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "mrr", | |
| "metric_name": "MRR", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.26925966892956127, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.26925966892956127, | |
| "raw_text": "0.2693", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.1299971898648288, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.1299971898648288, | |
| "raw_text": "0.1300", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 10, | |
| "task_id": "modality_reconstruction", | |
| "label": "Cross-Modal Reconstruction", | |
| "axis_label": "10 Cross-Modal Reconstruction", | |
| "short_label": "Recon", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "r2", | |
| "metric_name": "R2", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": -0.015271898913936655, | |
| "metric_key": "r2", | |
| "source": "results/episode_task_suite/modality_reconstruction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.0, | |
| "raw_text": "-0.0153", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": -0.010171410134180991, | |
| "metric_key": "r2", | |
| "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.0, | |
| "raw_text": "-0.0102", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 11, | |
| "task_id": "temporal_order", | |
| "label": "Temporal Order Verification", | |
| "axis_label": "11 Temporal Order Verification", | |
| "short_label": "Order", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "f1", | |
| "metric_name": "F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.5399515738498789, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/temporal_order/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.5399515738498789, | |
| "raw_text": "0.5400", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.8520179372197308, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.8520179372197308, | |
| "raw_text": "0.8520", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 12, | |
| "task_id": "misalignment_detection", | |
| "label": "Multimodal Synchronization Detection", | |
| "axis_label": "12 Multimodal Synchronization Detection", | |
| "short_label": "Sync", | |
| "origin": "original_public_sample_tasks", | |
| "metric_key": "f1", | |
| "metric_name": "F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.5051698670605613, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/misalignment_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.5051698670605613, | |
| "raw_text": "0.5052", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.7152682255845944, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.7152682255845944, | |
| "raw_text": "0.7153", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 13, | |
| "task_id": "long_horizon_next_action", | |
| "label": "Long-Horizon Next-Action Forecasting", | |
| "axis_label": "13 Long-Horizon Next-Action Forecasting", | |
| "short_label": "Long act", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.07499999999999998, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.07499999999999998, | |
| "raw_text": "0.0750", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.06545454545454546, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.06545454545454546, | |
| "raw_text": "0.0655", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 14, | |
| "task_id": "next_subtask_forecast", | |
| "label": "Long-Horizon Next-Subtask Forecasting", | |
| "axis_label": "14 Long-Horizon Next-Subtask Forecasting", | |
| "short_label": "Long step", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.04545454545454545, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.04545454545454545, | |
| "raw_text": "0.0455", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.050724637681159424, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.050724637681159424, | |
| "raw_text": "0.0507", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 15, | |
| "task_id": "interaction_text_prediction", | |
| "label": "Interaction Text Prediction", | |
| "axis_label": "15 Interaction Text Prediction", | |
| "short_label": "Interact txt", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": true, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.04444444444444444, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.04444444444444444, | |
| "raw_text": "0.0444", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.0380952380952381, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.0380952380952381, | |
| "raw_text": "0.0381", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 16, | |
| "task_id": "action_object_relation", | |
| "label": "Action-Object Relation Prediction", | |
| "axis_label": "16 Action-Object Relation Prediction", | |
| "short_label": "Act+obj", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "macro_f1", | |
| "metric_name": "macro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.0, | |
| "raw_text": "0.0000", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.0, | |
| "raw_text": "0.0000", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 17, | |
| "task_id": "object_set_forecast", | |
| "label": "Future Object-Set Forecasting", | |
| "axis_label": "17 Future Object-Set Forecasting", | |
| "short_label": "Future obj", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "micro_f1", | |
| "metric_name": "micro-F1", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.16939890710382516, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.16939890710382516, | |
| "raw_text": "0.1694", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.19718309859154928, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.19718309859154928, | |
| "raw_text": "0.1972", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 18, | |
| "task_id": "imu_to_hand_pose", | |
| "label": "IMU-to-Hand Pose Reconstruction", | |
| "axis_label": "18 IMU-to-Hand Pose Reconstruction", | |
| "short_label": "IMU->hand", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "mae", | |
| "metric_name": "MAE", | |
| "metric_direction": "lower", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.042049407958984375, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 1.0, | |
| "raw_text": "0.0420", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.042562149465084076, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.9879531106266066, | |
| "raw_text": "0.0426", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 19, | |
| "task_id": "camera_view_sync_retrieval", | |
| "label": "Camera-View Synchronization Retrieval", | |
| "axis_label": "19 Camera-View Synchronization Retrieval", | |
| "short_label": "Cam sync", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "mrr", | |
| "metric_name": "MRR", | |
| "metric_direction": "higher", | |
| "raw128_proxy_axis": true, | |
| "values": { | |
| "minimal": { | |
| "raw": 0.4943004846572876, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.4943004846572876, | |
| "raw_text": "0.4943", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 0.24086658656597137, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.24086658656597137, | |
| "raw_text": "0.2409", | |
| "status_label": "scored" | |
| } | |
| } | |
| }, | |
| { | |
| "task_number": 20, | |
| "task_id": "time_to_transition", | |
| "label": "Time-to-Next-Transition Regression", | |
| "axis_label": "20 Time-to-Next-Transition Regression", | |
| "short_label": "Time2bdry", | |
| "origin": "additional_public_sample_tasks", | |
| "metric_key": "mae", | |
| "metric_name": "MAE frames", | |
| "metric_direction": "lower", | |
| "raw128_proxy_axis": false, | |
| "values": { | |
| "minimal": { | |
| "raw": 10.53735637664795, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 1.0, | |
| "raw_text": "10.54", | |
| "status_label": "scored" | |
| }, | |
| "neural_mlp": { | |
| "raw": 10.55449390411377, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "status": "scored", | |
| "normalized_score": 0.9983762814568361, | |
| "raw_text": "10.55", | |
| "status_label": "scored" | |
| } | |
| } | |
| } | |
| ], | |
| "task_method_result_matrix": [ | |
| { | |
| "task_number": 1, | |
| "task_id": "timeline_action", | |
| "task_label": "Action Recognition", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.05, | |
| "raw_text": "0.0500", | |
| "normalized_score": 0.05, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/timeline_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 1, | |
| "task_id": "timeline_action", | |
| "task_label": "Action Recognition", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.014814814814814814, | |
| "raw_text": "0.0148", | |
| "normalized_score": 0.014814814814814814, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 2, | |
| "task_id": "timeline_subtask", | |
| "task_label": "Procedure Step Recognition", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.05056355513846935, | |
| "raw_text": "0.0506", | |
| "normalized_score": 0.05056355513846935, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/timeline_subtask/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 2, | |
| "task_id": "timeline_subtask", | |
| "task_label": "Procedure Step Recognition", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.02810810810810811, | |
| "raw_text": "0.0281", | |
| "normalized_score": 0.02810810810810811, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/timeline_subtask/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 3, | |
| "task_id": "transition_detection", | |
| "task_label": "Action Boundary Detection", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.6118237590630229, | |
| "raw_text": "0.6118", | |
| "normalized_score": 0.6118237590630229, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/transition_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 3, | |
| "task_id": "transition_detection", | |
| "task_label": "Action Boundary Detection", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.5862068965517241, | |
| "raw_text": "0.5862", | |
| "normalized_score": 0.5862068965517241, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/transition_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 4, | |
| "task_id": "next_action", | |
| "task_label": "Next-Action Prediction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.05925925925925927, | |
| "raw_text": "0.0593", | |
| "normalized_score": 0.05925925925925927, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 4, | |
| "task_id": "next_action", | |
| "task_label": "Next-Action Prediction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.04186046511627907, | |
| "raw_text": "0.0419", | |
| "normalized_score": 0.04186046511627907, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 5, | |
| "task_id": "hand_trajectory_forecast", | |
| "task_label": "Hand Trajectory Forecasting", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.8646570444107056, | |
| "raw_text": "0.8647", | |
| "normalized_score": 0.12473175026322614, | |
| "metric_key": "mpjpe", | |
| "source": "results/episode_task_suite/hand_trajectory_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 5, | |
| "task_id": "hand_trajectory_forecast", | |
| "task_label": "Hand Trajectory Forecasting", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.10785018652677536, | |
| "raw_text": "0.1079", | |
| "normalized_score": 1.0, | |
| "metric_key": "mpjpe", | |
| "source": "results/episode_task_suite/neural_mlp/hand_trajectory_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 6, | |
| "task_id": "contact_prediction", | |
| "task_label": "Contact State Prediction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 1.0, | |
| "raw_text": "1.000", | |
| "normalized_score": 1.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/contact_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 6, | |
| "task_id": "contact_prediction", | |
| "task_label": "Contact State Prediction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 1.0, | |
| "raw_text": "1.000", | |
| "normalized_score": 1.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/contact_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 7, | |
| "task_id": "object_relevance", | |
| "task_label": "Object Relevance Prediction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.18034382095361662, | |
| "raw_text": "0.1803", | |
| "normalized_score": 0.18034382095361662, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/object_relevance/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 7, | |
| "task_id": "object_relevance", | |
| "task_label": "Object Relevance Prediction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.1679279279279279, | |
| "raw_text": "0.1679", | |
| "normalized_score": 0.1679279279279279, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/neural_mlp/object_relevance/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 8, | |
| "task_id": "caption_grounding", | |
| "task_label": "Language Grounding", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.016023479050338015, | |
| "raw_text": "0.0160", | |
| "normalized_score": 0.016023479050338015, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/caption_grounding/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 8, | |
| "task_id": "caption_grounding", | |
| "task_label": "Language Grounding", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.01684125567132316, | |
| "raw_text": "0.0168", | |
| "normalized_score": 0.01684125567132316, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/neural_mlp/caption_grounding/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 9, | |
| "task_id": "cross_modal_retrieval", | |
| "task_label": "Cross-Modal Retrieval", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.26925966892956127, | |
| "raw_text": "0.2693", | |
| "normalized_score": 0.26925966892956127, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 9, | |
| "task_id": "cross_modal_retrieval", | |
| "task_label": "Cross-Modal Retrieval", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.1299971898648288, | |
| "raw_text": "0.1300", | |
| "normalized_score": 0.1299971898648288, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/neural_mlp/cross_modal_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 10, | |
| "task_id": "modality_reconstruction", | |
| "task_label": "Cross-Modal Reconstruction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": -0.015271898913936655, | |
| "raw_text": "-0.0153", | |
| "normalized_score": 0.0, | |
| "metric_key": "r2", | |
| "source": "results/episode_task_suite/modality_reconstruction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 10, | |
| "task_id": "modality_reconstruction", | |
| "task_label": "Cross-Modal Reconstruction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": -0.010171410134180991, | |
| "raw_text": "-0.0102", | |
| "normalized_score": 0.0, | |
| "metric_key": "r2", | |
| "source": "results/episode_task_suite/neural_mlp/modality_reconstruction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 11, | |
| "task_id": "temporal_order", | |
| "task_label": "Temporal Order Verification", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.5399515738498789, | |
| "raw_text": "0.5400", | |
| "normalized_score": 0.5399515738498789, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/temporal_order/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 11, | |
| "task_id": "temporal_order", | |
| "task_label": "Temporal Order Verification", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.8520179372197308, | |
| "raw_text": "0.8520", | |
| "normalized_score": 0.8520179372197308, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/neural_mlp/temporal_order/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 12, | |
| "task_id": "misalignment_detection", | |
| "task_label": "Multimodal Synchronization Detection", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.5051698670605613, | |
| "raw_text": "0.5052", | |
| "normalized_score": 0.5051698670605613, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/misalignment_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 12, | |
| "task_id": "misalignment_detection", | |
| "task_label": "Multimodal Synchronization Detection", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.7152682255845944, | |
| "raw_text": "0.7153", | |
| "normalized_score": 0.7152682255845944, | |
| "metric_key": "f1", | |
| "source": "results/episode_task_suite/neural_mlp/misalignment_detection/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 13, | |
| "task_id": "long_horizon_next_action", | |
| "task_label": "Long-Horizon Next-Action Forecasting", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.07499999999999998, | |
| "raw_text": "0.0750", | |
| "normalized_score": 0.07499999999999998, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/long_horizon_next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 13, | |
| "task_id": "long_horizon_next_action", | |
| "task_label": "Long-Horizon Next-Action Forecasting", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.06545454545454546, | |
| "raw_text": "0.0655", | |
| "normalized_score": 0.06545454545454546, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/long_horizon_next_action/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 14, | |
| "task_id": "next_subtask_forecast", | |
| "task_label": "Long-Horizon Next-Subtask Forecasting", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.04545454545454545, | |
| "raw_text": "0.0455", | |
| "normalized_score": 0.04545454545454545, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/next_subtask_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 14, | |
| "task_id": "next_subtask_forecast", | |
| "task_label": "Long-Horizon Next-Subtask Forecasting", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.050724637681159424, | |
| "raw_text": "0.0507", | |
| "normalized_score": 0.050724637681159424, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/next_subtask_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 15, | |
| "task_id": "interaction_text_prediction", | |
| "task_label": "Interaction Text Prediction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.04444444444444444, | |
| "raw_text": "0.0444", | |
| "normalized_score": 0.04444444444444444, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/interaction_text_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 15, | |
| "task_id": "interaction_text_prediction", | |
| "task_label": "Interaction Text Prediction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.0380952380952381, | |
| "raw_text": "0.0381", | |
| "normalized_score": 0.0380952380952381, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/interaction_text_prediction/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 16, | |
| "task_id": "action_object_relation", | |
| "task_label": "Action-Object Relation Prediction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.0, | |
| "raw_text": "0.0000", | |
| "normalized_score": 0.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/action_object_relation/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 16, | |
| "task_id": "action_object_relation", | |
| "task_label": "Action-Object Relation Prediction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.0, | |
| "raw_text": "0.0000", | |
| "normalized_score": 0.0, | |
| "metric_key": "macro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/action_object_relation/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 17, | |
| "task_id": "object_set_forecast", | |
| "task_label": "Future Object-Set Forecasting", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.16939890710382516, | |
| "raw_text": "0.1694", | |
| "normalized_score": 0.16939890710382516, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/object_set_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 17, | |
| "task_id": "object_set_forecast", | |
| "task_label": "Future Object-Set Forecasting", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.19718309859154928, | |
| "raw_text": "0.1972", | |
| "normalized_score": 0.19718309859154928, | |
| "metric_key": "micro_f1", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/object_set_forecast/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 18, | |
| "task_id": "imu_to_hand_pose", | |
| "task_label": "IMU-to-Hand Pose Reconstruction", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.042049407958984375, | |
| "raw_text": "0.0420", | |
| "normalized_score": 1.0, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/imu_to_hand_pose/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 18, | |
| "task_id": "imu_to_hand_pose", | |
| "task_label": "IMU-to-Hand Pose Reconstruction", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.042562149465084076, | |
| "raw_text": "0.0426", | |
| "normalized_score": 0.9879531106266066, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/imu_to_hand_pose/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 19, | |
| "task_id": "camera_view_sync_retrieval", | |
| "task_label": "Camera-View Synchronization Retrieval", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.4943004846572876, | |
| "raw_text": "0.4943", | |
| "normalized_score": 0.4943004846572876, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/tier2_task_suite/camera_view_sync_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 19, | |
| "task_id": "camera_view_sync_retrieval", | |
| "task_label": "Camera-View Synchronization Retrieval", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 0.24086658656597137, | |
| "raw_text": "0.2409", | |
| "normalized_score": 0.24086658656597137, | |
| "metric_key": "mrr", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/camera_view_sync_retrieval/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 20, | |
| "task_id": "time_to_transition", | |
| "task_label": "Time-to-Next-Transition Regression", | |
| "series_id": "minimal", | |
| "method": "Minimal", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 10.53735637664795, | |
| "raw_text": "10.54", | |
| "normalized_score": 1.0, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/time_to_transition/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| }, | |
| { | |
| "task_number": 20, | |
| "task_id": "time_to_transition", | |
| "task_label": "Time-to-Next-Transition Regression", | |
| "series_id": "neural_mlp", | |
| "method": "Neural MLP", | |
| "status": "scored", | |
| "status_label": "scored", | |
| "scored": true, | |
| "proxy_scored": false, | |
| "raw": 10.55449390411377, | |
| "raw_text": "10.55", | |
| "normalized_score": 0.9983762814568361, | |
| "metric_key": "mae", | |
| "source": "results/episode_task_suite/tier2_task_suite/neural_mlp/time_to_transition/metrics.json", | |
| "scope": "single_episode_public_sample", | |
| "reason": null | |
| } | |
| ] | |
| } | |