ropedia-xperience-10m-task-baselines / docs /data /task_method_20_gap_audit.json
cy0307's picture
Refine reader-facing public wording (3/6)
0f657b5 verified
Raw
History Blame Contribute Delete
8.5 kB
{
"generated_at_utc": "2026-06-22T11:00:00+00:00",
"immediate_actions": [
{
"artifact": "docs/data/task_method_20_gap_audit.json",
"id": "gap_audit",
"purpose": "Verify the 180/180 scored result records and keep proxy flags reproducible."
},
{
"artifact": "scripts/omni/score_model_output_probes.py",
"id": "model_output_probe",
"purpose": "Rescore verified model-output probes when new held-out artifacts arrive without fabricating unsupported cells."
},
{
"artifact": "scripts/omni/launch_all_task_model_scoring_when_free.sh",
"id": "guarded_gpu_launcher",
"purpose": "Launch future replacement scoring runs only after enough private GPU capacity is idle."
}
],
"methods": {
"cosmos3_nano_future_window": {
"kind": "partial_128_episode_world_model_overlay",
"label": "Cosmos3-Nano Future Window",
"proxy_scored_task_count": 0,
"result_record_count": 20,
"scope": "128 selected episodes, held-out test",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"scored": 20
}
},
"cosmos3_super_reasoner": {
"kind": "partial_128_episode_foundation_model_overlay",
"label": "Cosmos3-Super Reasoner",
"proxy_scored_task_count": 0,
"result_record_count": 20,
"scope": "128 selected episodes, held-out test",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"scored": 20
}
},
"metadata128_neural_mlp": {
"kind": "partial_128_episode_aligned_baseline",
"label": "128ep Aligned NN",
"proxy_scored_task_count": 1,
"result_record_count": 20,
"scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"proxy_scored": 1,
"scored": 19
}
},
"metadata128_simple": {
"kind": "partial_128_episode_aligned_baseline",
"label": "128ep Aligned Simple",
"proxy_scored_task_count": 1,
"result_record_count": 20,
"scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"proxy_scored": 1,
"scored": 19
}
},
"minimal": {
"kind": "full_20_task_baseline",
"label": "Minimal",
"proxy_scored_task_count": 0,
"result_record_count": 20,
"scope": "1 public sample episode",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"scored": 20
}
},
"neural_mlp": {
"kind": "full_20_task_baseline",
"label": "Neural MLP",
"proxy_scored_task_count": 0,
"result_record_count": 20,
"scope": "1 public sample episode",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"scored": 20
}
},
"qwen3_omni_v6_lora": {
"kind": "partial_128_episode_foundation_model_overlay",
"label": "Qwen3-Omni v6 LoRA",
"proxy_scored_task_count": 0,
"result_record_count": 20,
"scope": "128 selected episodes, held-out test",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"scored": 20
}
},
"raw128_neural_mlp": {
"kind": "complete_128_episode_raw_feature_baseline",
"label": "128ep Raw NN",
"proxy_scored_task_count": 2,
"result_record_count": 20,
"scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"proxy_scored": 2,
"scored": 18
}
},
"raw128_simple": {
"kind": "complete_128_episode_raw_feature_baseline",
"label": "128ep Raw Simple",
"proxy_scored_task_count": 2,
"result_record_count": 20,
"scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
"scored_task_count": 20,
"scoreless_task_count": 0,
"status_counts": {
"proxy_scored": 2,
"scored": 18
}
}
},
"missing_by_method": {},
"missing_by_status": {},
"missing_by_task": {},
"missing_records": [],
"proxy_records": [
{
"method": "128ep Raw Simple",
"metric_key": "macro_f1",
"reason": "documented compact proxy completion for this raw128 task axis",
"series_id": "raw128_simple",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json",
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"task_number": 15
},
{
"method": "128ep Raw NN",
"metric_key": "macro_f1",
"reason": "documented compact proxy completion for this raw128 task axis",
"series_id": "raw128_neural_mlp",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json",
"task_id": "interaction_text_prediction",
"task_label": "Interaction Text Prediction",
"task_number": 15
},
{
"method": "128ep Aligned Simple",
"metric_key": "mrr",
"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy",
"series_id": "metadata128_simple",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/camera_view_sync_retrieval/metrics.json",
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"task_number": 19
},
{
"method": "128ep Aligned NN",
"metric_key": "mrr",
"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy",
"series_id": "metadata128_neural_mlp",
"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/camera_view_sync_retrieval/metrics.json",
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"task_number": 19
},
{
"method": "128ep Raw Simple",
"metric_key": "mrr",
"reason": "documented compact proxy completion for this raw128 task axis",
"series_id": "raw128_simple",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json",
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"task_number": 19
},
{
"method": "128ep Raw NN",
"metric_key": "mrr",
"reason": "documented compact proxy completion for this raw128 task axis",
"series_id": "raw128_neural_mlp",
"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json",
"task_id": "camera_view_sync_retrieval",
"task_label": "Camera-View Synchronization Retrieval",
"task_number": 19
}
],
"score_summary": {
"method_count": 9,
"method_task_record_count": 180,
"proxy_scored_method_task_count": 6,
"scored_method_task_count": 180,
"scoreless_method_task_count": 0,
"task_count": 20
},
"source_matrix": "docs/data/task_method_20_result_matrix.json",
"status": "pass",
"target_policy": {
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
},
"title": "Task Method 20-Result Completion Audit"
}