Refine reader-facing scope wording (2/4)

2600a90 verified 3 days ago

7.39 kB

	{
	"status": "current",
	"updated_utc": "2026-06-21T00:00:00Z",
	"interpretation_rule": "Read the 1-episode line as the inspectable task lab. Read the 128-episode line as the selected comparison surface for metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super, and Cosmos3-Nano.",
	"reader_summary": "The suite has two public reading lanes. Line 1 is the fully inspectable one-episode task lab. Line 2 is the 128-episode comparison surface for aligned baselines, the Qwen3-Omni series, and the Cosmos3 series. Compare scores within the same lane first.",
	"score_formula": "2 single-episode methods x 20 tasks = 40 records; 7 selected-128 methods x 20 tasks = 140 records; total public matrix = 180/180 scored records.",
	"lines": [
	{
	"id": "single_public_sample_episode",
	"label": "1 sample episode",
	"short_label": "Line 1",
	"data_unit": "One public Xperience-10M sample episode",
	"result_statement": "40/40 direct scores from Minimal and Neural MLP heads on the same 20 task contracts.",
	"best_read_as": "Inspect the raw sample, understand file organization, reproduce the 20 task targets, and compare Minimal vs Neural MLP behavior inside one episode.",
	"read_separately_from": "The selected-128 comparison rows and broader held-out model behavior.",
	"frames": 5821,
	"windows": 1161,
	"window_definition": "20-frame aligned windows with 5-frame stride",
	"feature_dimensions": 8546,
	"methods": [
	"Minimal heads",
	"Neural MLP heads"
	],
	"task_axes": 20,
	"method_task_records": 40,
	"scored_records": 40,
	"direct_scored_records": 40,
	"proxy_scored_records": 0,
	"best_use": "Inspect raw files, understand each task, rerun local baselines, and debug task quality.",
	"primary_visuals": [
	"docs/assets/charts/two_evidence_line_map.svg",
	"docs/assets/charts/single_episode_task_model_radar.svg"
	],
	"primary_artifacts": [
	"docs/data/single_episode_task_model_radar.json",
	"docs/data/two_evidence_line_result_summary.json",
	"results/episode_task_suite/summary_report.json",
	"results/episode_task_suite/feature_manifest.json",
	"docs/single_episode_explorer.html"
	]
	},
	{
	"id": "selected_128_episode_surface",
	"label": "128 selected episodes",
	"short_label": "Line 2",
	"data_unit": "Selected held-out 96/16/16 split with public-safe processed features linked to official gated episode paths",
	"result_statement": "140/140 selected-128 scores across seven methods: 134 direct scores plus 6 documented compact-proxy scores.",
	"best_read_as": "Compare same-split metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano while keeping the 6 compact-proxy cells visible.",
	"read_separately_from": "Direct raw-target interpretation for the proxy-marked cells.",
	"episodes": 128,
	"split": {
	"train": 96,
	"validation": 16,
	"test": 16
	},
	"exported_windows": 34269,
	"methods": [
	"Metadata simple",
	"Metadata NN",
	"Raw-feature simple",
	"Raw-feature NN",
	"Qwen3-Omni",
	"Cosmos3-Super",
	"Cosmos3-Nano"
	],
	"task_axes": 20,
	"method_task_records": 140,
	"scored_records": 140,
	"direct_scored_records": 134,
	"proxy_scored_records": 6,
	"proxy_policy": "Proxy flags remain visible where the public export lacks a direct raw target.",
	"best_use": "Compare same-split metadata/raw baselines, Qwen3-Omni v6 LoRA, Cosmos3-Super Reasoner, and Cosmos3-Nano Future Window while keeping evidence type explicit.",
	"primary_visuals": [
	"docs/assets/charts/two_evidence_line_map.svg",
	"docs/assets/charts/episode128_task_model_radar.svg",
	"docs/assets/charts/unified_task_model_radar.svg"
	],
	"primary_artifacts": [
	"docs/data/episode128_task_model_radar.json",
	"docs/data/two_evidence_line_result_summary.json",
	"docs/data/xperience10m_128_episode_feature_index.json",
	"docs/data/omni_model_comparison.json",
	"docs/data/qwen3_omni_run_lineage.json",
	"docs/data/task_method_20_gap_audit.json"
	]
	}
	],
	"method_blocks": [
	{
	"line_id": "single_public_sample_episode",
	"line_label": "1 sample episode",
	"block": "Task-head baselines",
	"methods": [
	"Minimal",
	"Neural MLP"
	],
	"scored_records": 40,
	"direct_scored_records": 40,
	"proxy_scored_records": 0,
	"evidence_type": "Direct target metrics on the public sample windows.",
	"read_as": "Task-lab reproducibility and simple-vs-neural behavior."
	},
	{
	"line_id": "selected_128_episode_surface",
	"line_label": "128 selected episodes",
	"block": "Aligned baseline heads",
	"methods": [
	"Metadata simple",
	"Metadata NN",
	"Raw-feature simple",
	"Raw-feature NN"
	],
	"scored_records": 80,
	"direct_scored_records": 74,
	"proxy_scored_records": 6,
	"evidence_type": "Direct processed-target metrics where available; compact proxies for documented raw-target gaps.",
	"read_as": "Same-split metadata/raw-feature baseline comparison."
	},
	{
	"line_id": "selected_128_episode_surface",
	"line_label": "128 selected episodes",
	"block": "Qwen3-Omni series",
	"methods": [
	"Qwen3-Omni v6 LoRA"
	],
	"scored_records": 20,
	"direct_scored_records": 20,
	"proxy_scored_records": 0,
	"evidence_type": "Verified selected-128 Qwen3-Omni v6 LoRA plus source-linked task-specific probes.",
	"read_as": "Trainable Qwen3-Omni diagnostic baseline on the selected-128 surface."
	},
	{
	"line_id": "selected_128_episode_surface",
	"line_label": "128 selected episodes",
	"block": "Cosmos3 series",
	"methods": [
	"Cosmos3-Super Reasoner",
	"Cosmos3-Nano Future Window"
	],
	"scored_records": 40,
	"direct_scored_records": 40,
	"proxy_scored_records": 0,
	"evidence_type": "Verified Cosmos3-Super Reasoner and Cosmos3-Nano Future Window public-safe artifacts.",
	"read_as": "Cosmos3 reasoner and future-window diagnostics on the selected-128 surface."
	}
	],
	"related_model_artifacts": [
	{
	"name": "Qwen3-Omni v1-v6 run lineage",
	"role": "Explains the LoRA/evaluation version ladder; v6 is the current 20-task matrix row, v5 remains the pinned prior release, and v1-v4 are lineage/ablation evidence.",
	"repo": "docs/data/qwen3_omni_run_lineage.json"
	},
	{
	"name": "Cosmos3-Super Forward-Dynamics LoRA",
	"role": "Separate fine-tuned adapter artifact for forward-dynamics loss metrics; published with weights/results but not counted as a 20-task matrix method row.",
	"repo": "https://huggingface.co/cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep"
	}
	],
	"combined_public_matrix": {
	"task_axes": 20,
	"methods": 9,
	"method_task_records": 180,
	"scored_records": 180,
	"direct_scored_records": 174,
	"proxy_scored_records": 6,
	"summary_artifact": "docs/data/two_evidence_line_result_summary.json",
	"artifact": "docs/data/task_method_20_result_matrix.json"
	}
	}