ropedia-xperience-10m-task-baselines / docs /data /task_method_20_gap_audit.json

Refine reader-facing public wording (3/6)

0f657b5 verified 6 days ago

8.5 kB

	{
	"generated_at_utc": "2026-06-22T11:00:00+00:00",
	"immediate_actions": [
	{
	"artifact": "docs/data/task_method_20_gap_audit.json",
	"id": "gap_audit",
	"purpose": "Verify the 180/180 scored result records and keep proxy flags reproducible."
	},
	{
	"artifact": "scripts/omni/score_model_output_probes.py",
	"id": "model_output_probe",
	"purpose": "Rescore verified model-output probes when new held-out artifacts arrive without fabricating unsupported cells."
	},
	{
	"artifact": "scripts/omni/launch_all_task_model_scoring_when_free.sh",
	"id": "guarded_gpu_launcher",
	"purpose": "Launch future replacement scoring runs only after enough private GPU capacity is idle."
	}
	],
	"methods": {
	"cosmos3_nano_future_window": {
	"kind": "partial_128_episode_world_model_overlay",
	"label": "Cosmos3-Nano Future Window",
	"proxy_scored_task_count": 0,
	"result_record_count": 20,
	"scope": "128 selected episodes, held-out test",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"scored": 20
	}
	},
	"cosmos3_super_reasoner": {
	"kind": "partial_128_episode_foundation_model_overlay",
	"label": "Cosmos3-Super Reasoner",
	"proxy_scored_task_count": 0,
	"result_record_count": 20,
	"scope": "128 selected episodes, held-out test",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"scored": 20
	}
	},
	"metadata128_neural_mlp": {
	"kind": "partial_128_episode_aligned_baseline",
	"label": "128ep Aligned NN",
	"proxy_scored_task_count": 1,
	"result_record_count": 20,
	"scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"proxy_scored": 1,
	"scored": 19
	}
	},
	"metadata128_simple": {
	"kind": "partial_128_episode_aligned_baseline",
	"label": "128ep Aligned Simple",
	"proxy_scored_task_count": 1,
	"result_record_count": 20,
	"scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"proxy_scored": 1,
	"scored": 19
	}
	},
	"minimal": {
	"kind": "full_20_task_baseline",
	"label": "Minimal",
	"proxy_scored_task_count": 0,
	"result_record_count": 20,
	"scope": "1 public sample episode",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"scored": 20
	}
	},
	"neural_mlp": {
	"kind": "full_20_task_baseline",
	"label": "Neural MLP",
	"proxy_scored_task_count": 0,
	"result_record_count": 20,
	"scope": "1 public sample episode",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"scored": 20
	}
	},
	"qwen3_omni_v6_lora": {
	"kind": "partial_128_episode_foundation_model_overlay",
	"label": "Qwen3-Omni v6 LoRA",
	"proxy_scored_task_count": 0,
	"result_record_count": 20,
	"scope": "128 selected episodes, held-out test",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"scored": 20
	}
	},
	"raw128_neural_mlp": {
	"kind": "complete_128_episode_raw_feature_baseline",
	"label": "128ep Raw NN",
	"proxy_scored_task_count": 2,
	"result_record_count": 20,
	"scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"proxy_scored": 2,
	"scored": 18
	}
	},
	"raw128_simple": {
	"kind": "complete_128_episode_raw_feature_baseline",
	"label": "128ep Raw Simple",
	"proxy_scored_task_count": 2,
	"result_record_count": 20,
	"scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes",
	"scored_task_count": 20,
	"scoreless_task_count": 0,
	"status_counts": {
	"proxy_scored": 2,
	"scored": 18
	}
	}
	},
	"missing_by_method": {},
	"missing_by_status": {},
	"missing_by_task": {},
	"missing_records": [],
	"proxy_records": [
	{
	"method": "128ep Raw Simple",
	"metric_key": "macro_f1",
	"reason": "documented compact proxy completion for this raw128 task axis",
	"series_id": "raw128_simple",
	"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/interaction_text_prediction/metrics.json",
	"task_id": "interaction_text_prediction",
	"task_label": "Interaction Text Prediction",
	"task_number": 15
	},
	{
	"method": "128ep Raw NN",
	"metric_key": "macro_f1",
	"reason": "documented compact proxy completion for this raw128 task axis",
	"series_id": "raw128_neural_mlp",
	"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/interaction_text_prediction/metrics.json",
	"task_id": "interaction_text_prediction",
	"task_label": "Interaction Text Prediction",
	"task_number": 15
	},
	{
	"method": "128ep Aligned Simple",
	"metric_key": "mrr",
	"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy",
	"series_id": "metadata128_simple",
	"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/camera_view_sync_retrieval/metrics.json",
	"task_id": "camera_view_sync_retrieval",
	"task_label": "Camera-View Synchronization Retrieval",
	"task_number": 19
	},
	{
	"method": "128ep Aligned NN",
	"metric_key": "mrr",
	"reason": "paired camera-view embeddings are absent from the 128 JSONL/feature export; metadata features retrieve the synchronized same-window depth/audio block as a documented compact synchronization proxy",
	"series_id": "metadata128_neural_mlp",
	"source": "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/neural_mlp/camera_view_sync_retrieval/metrics.json",
	"task_id": "camera_view_sync_retrieval",
	"task_label": "Camera-View Synchronization Retrieval",
	"task_number": 19
	},
	{
	"method": "128ep Raw Simple",
	"metric_key": "mrr",
	"reason": "documented compact proxy completion for this raw128 task axis",
	"series_id": "raw128_simple",
	"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/simple_raw128/camera_view_sync_retrieval/metrics.json",
	"task_id": "camera_view_sync_retrieval",
	"task_label": "Camera-View Synchronization Retrieval",
	"task_number": 19
	},
	{
	"method": "128ep Raw NN",
	"metric_key": "mrr",
	"reason": "documented compact proxy completion for this raw128 task axis",
	"series_id": "raw128_neural_mlp",
	"source": "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z/neural_mlp_raw128/camera_view_sync_retrieval/metrics.json",
	"task_id": "camera_view_sync_retrieval",
	"task_label": "Camera-View Synchronization Retrieval",
	"task_number": 19
	}
	],
	"score_summary": {
	"method_count": 9,
	"method_task_record_count": 180,
	"proxy_scored_method_task_count": 6,
	"scored_method_task_count": 180,
	"scoreless_method_task_count": 0,
	"task_count": 20
	},
	"source_matrix": "docs/data/task_method_20_result_matrix.json",
	"status": "pass",
	"target_policy": {
	"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
	"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
	"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
	},
	"title": "Task Method 20-Result Completion Audit"
	}