| { |
| "title": "Ropedia Xperience-10M Research Takeaways", |
| "status": "pass", |
| "generated_at_utc": "2026-06-03T12:56:49+00:00", |
| "source_files": [ |
| "docs/data/summary_metrics.json", |
| "results/episode_task_suite/summary_report.json", |
| "results/episode_task_suite/neural_mlp/*/metrics.json", |
| "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md" |
| ], |
| "scope": { |
| "validated_episode_count": 1, |
| "num_frames": 5821, |
| "num_windows": 1161, |
| "feature_dim": 8546, |
| "audio_featurized": true, |
| "raw_data_redistributed": false |
| }, |
| "takeaways": [ |
| { |
| "id": "episode_to_benchmark", |
| "title": "One episode can become a real benchmark contract", |
| "readout": "The public sample is converted into 5,821 frames, 1,161 aligned 20-frame windows, and an 8,546-dimensional feature contract.", |
| "evidence": [ |
| { |
| "label": "frames", |
| "value": 5821 |
| }, |
| { |
| "label": "windows", |
| "value": 1161 |
| }, |
| { |
| "label": "feature_dim", |
| "value": 8546 |
| } |
| ], |
| "source": "docs/data/summary_metrics.json", |
| "current_scope": "This benchmark defines the task contract; cross-episode generalization is evaluated in the multi-episode stage." |
| }, |
| { |
| "id": "chronological_split_exposes_class_shift", |
| "title": "Chronological splits expose action-class shift", |
| "readout": "Earlier all-feature action classifiers reach high macro-F1 on their local split, but the 12-task chronological action/subtask heads are much harder because later held-out windows include unseen labels.", |
| "evidence": [ |
| { |
| "label": "all_feature_action_macro_f1", |
| "value": 0.9828810433408773 |
| }, |
| { |
| "label": "suite_action_macro_f1", |
| "value": 0.05 |
| }, |
| { |
| "label": "suite_subtask_macro_f1", |
| "value": 0.05056355513846935 |
| }, |
| { |
| "label": "unseen_action_test_classes", |
| "value": 4 |
| } |
| ], |
| "source": "results/episode_task_suite/summary_report.json", |
| "current_scope": "This split is useful for studying label shift; broad action-recognition conclusions need held-out episodes." |
| }, |
| { |
| "id": "neural_heads_help_dynamics", |
| "title": "Small neural heads help dynamic and temporal probes", |
| "readout": "The MLP heads substantially improve hand trajectory forecasting, temporal-order verification, and motion/visual synchronization.", |
| "evidence": [ |
| { |
| "label": "hand_mpjpe_minimal", |
| "value": 0.8646570444107056 |
| }, |
| { |
| "label": "hand_mpjpe_neural", |
| "value": 0.10785018652677536 |
| }, |
| { |
| "label": "hand_mpjpe_relative_improvement", |
| "value": 0.8752682497367739 |
| }, |
| { |
| "label": "temporal_order_f1_minimal", |
| "value": 0.5399515738498789 |
| }, |
| { |
| "label": "temporal_order_f1_neural", |
| "value": 0.8520179372197308 |
| }, |
| { |
| "label": "misalignment_f1_minimal", |
| "value": 0.5051698670605613 |
| }, |
| { |
| "label": "misalignment_f1_neural", |
| "value": 0.7152682255845944 |
| } |
| ], |
| "source": "results/episode_task_suite/neural_mlp/*/metrics.json", |
| "current_scope": "These gains are measured within one episode and are candidates for held-out-episode testing." |
| }, |
| { |
| "id": "retrieval_and_reconstruction_remain_open", |
| "title": "Retrieval and reconstruction remain the harder multimodal problems", |
| "readout": "Ridge/cosine retrieval remains stronger than the neural projection on this sample, and cross-modal reconstruction still has negative R2.", |
| "evidence": [ |
| { |
| "label": "retrieval_mrr_minimal", |
| "value": 0.26925966892956127 |
| }, |
| { |
| "label": "retrieval_mrr_neural", |
| "value": 0.1299971898648288 |
| }, |
| { |
| "label": "retrieval_top5_minimal", |
| "value": 0.367816091954023 |
| }, |
| { |
| "label": "reconstruction_r2_minimal", |
| "value": -0.015271898913936655 |
| }, |
| { |
| "label": "reconstruction_r2_neural", |
| "value": -0.010171410134180991 |
| } |
| ], |
| "source": "results/episode_task_suite/cross_modal_retrieval/metrics.json", |
| "current_scope": "The current reconstruction task predicts feature vectors; depth, mesh, NeRF, and Gaussian-splatting outputs are future task variants." |
| }, |
| { |
| "id": "scale_requires_episodes", |
| "title": "The next scientific unit is held-out episodes, not more adjacent windows", |
| "readout": "The prepared Qwen3-Omni path targets 32 episodes from 32 sessions, but it remains data-gated until access and held-out evaluation complete.", |
| "evidence": [ |
| { |
| "label": "target_episodes", |
| "value": 32 |
| }, |
| { |
| "label": "selected_sessions", |
| "value": 32 |
| }, |
| { |
| "label": "valid_candidates", |
| "value": 680 |
| } |
| ], |
| "source": "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md", |
| "current_scope": "The 32-episode Qwen3-Omni fine-tune requires gated data staging and held-out evaluation." |
| } |
| ] |
| } |
|
|