Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
Add files using upload-large-folder tool
Browse files
data/artifact_index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
-
"generated_at_utc": "2026-06-22T11:
|
| 4 |
"status": "pass",
|
| 5 |
"artifact_count": 228,
|
| 6 |
"missing": [],
|
|
@@ -92,8 +92,8 @@
|
|
| 92 |
"surface": "repo_hf",
|
| 93 |
"shows": "Defines terminology that can be confused across data scope, task metrics, model branches, and public mirrors.",
|
| 94 |
"exists": true,
|
| 95 |
-
"bytes":
|
| 96 |
-
"sha256": "
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "glossary_json",
|
|
@@ -103,8 +103,8 @@
|
|
| 103 |
"surface": "website_hf",
|
| 104 |
"shows": "Machine-readable terminology layer for the website, artifact dataset, model mirror, and public QA checks.",
|
| 105 |
"exists": true,
|
| 106 |
-
"bytes":
|
| 107 |
-
"sha256": "
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"id": "research_roadmap",
|
|
@@ -1159,8 +1159,8 @@
|
|
| 1159 |
"surface": "website_hf",
|
| 1160 |
"shows": "Lists the official public sample HDF5, MP4, and RRD files, derived browser-preview clips, playback/download URLs, file sizes, browser behavior, and HDF5 group organization.",
|
| 1161 |
"exists": true,
|
| 1162 |
-
"bytes":
|
| 1163 |
-
"sha256": "
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"id": "quality_gates",
|
|
@@ -1182,7 +1182,7 @@
|
|
| 1182 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 1183 |
"exists": true,
|
| 1184 |
"bytes": 8640,
|
| 1185 |
-
"sha256": "
|
| 1186 |
},
|
| 1187 |
{
|
| 1188 |
"id": "public_surface_qa",
|
|
@@ -1399,7 +1399,7 @@
|
|
| 1399 |
"volatile": true,
|
| 1400 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1401 |
"exists": true,
|
| 1402 |
-
"bytes":
|
| 1403 |
"hash_policy": "existence_and_size_only"
|
| 1404 |
},
|
| 1405 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Task Suite Artifact Index",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:43:01+00:00",
|
| 4 |
"status": "pass",
|
| 5 |
"artifact_count": 228,
|
| 6 |
"missing": [],
|
|
|
|
| 92 |
"surface": "repo_hf",
|
| 93 |
"shows": "Defines terminology that can be confused across data scope, task metrics, model branches, and public mirrors.",
|
| 94 |
"exists": true,
|
| 95 |
+
"bytes": 11122,
|
| 96 |
+
"sha256": "fe781a4eb5dd56454b5e0cb3383c88a2106c7bbf269888a0a7613b1618c8d196"
|
| 97 |
},
|
| 98 |
{
|
| 99 |
"id": "glossary_json",
|
|
|
|
| 103 |
"surface": "website_hf",
|
| 104 |
"shows": "Machine-readable terminology layer for the website, artifact dataset, model mirror, and public QA checks.",
|
| 105 |
"exists": true,
|
| 106 |
+
"bytes": 19260,
|
| 107 |
+
"sha256": "525de375608793cd34ab386819eac5291177b53ca5839d54a9046707206e844a"
|
| 108 |
},
|
| 109 |
{
|
| 110 |
"id": "research_roadmap",
|
|
|
|
| 1159 |
"surface": "website_hf",
|
| 1160 |
"shows": "Lists the official public sample HDF5, MP4, and RRD files, derived browser-preview clips, playback/download URLs, file sizes, browser behavior, and HDF5 group organization.",
|
| 1161 |
"exists": true,
|
| 1162 |
+
"bytes": 11210,
|
| 1163 |
+
"sha256": "e52ed2da6077c0f67fa37e0106cc59ab06b0e6fe62237837f0a7bb2dabdd9a03"
|
| 1164 |
},
|
| 1165 |
{
|
| 1166 |
"id": "quality_gates",
|
|
|
|
| 1182 |
"shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
|
| 1183 |
"exists": true,
|
| 1184 |
"bytes": 8640,
|
| 1185 |
+
"sha256": "b3d609ef68cafdd53e789b3c56edc3e7f984312bdabfa6388edb8c15cea78af3"
|
| 1186 |
},
|
| 1187 |
{
|
| 1188 |
"id": "public_surface_qa",
|
|
|
|
| 1399 |
"volatile": true,
|
| 1400 |
"shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
|
| 1401 |
"exists": true,
|
| 1402 |
+
"bytes": 24947,
|
| 1403 |
"hash_policy": "existence_and_size_only"
|
| 1404 |
},
|
| 1405 |
{
|
data/glossary.json
CHANGED
|
@@ -110,6 +110,14 @@
|
|
| 110 |
"do_not_confuse_with": "Task result summaries.",
|
| 111 |
"primary_files": ["docs/data/raw_sample_files.json"]
|
| 112 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
{
|
| 114 |
"term": "Interaction text",
|
| 115 |
"category": "files_features",
|
|
|
|
| 110 |
"do_not_confuse_with": "Task result summaries.",
|
| 111 |
"primary_files": ["docs/data/raw_sample_files.json"]
|
| 112 |
},
|
| 113 |
+
{
|
| 114 |
+
"term": "visualization.rrd",
|
| 115 |
+
"category": "files_features",
|
| 116 |
+
"plain_meaning": "Rerun viewer recording for visual inspection.",
|
| 117 |
+
"project_usage": "Can be downloaded from the official sample dataset and opened in Rerun 0.29.0 to inspect the sample episode. It is not used for published training or metric rows.",
|
| 118 |
+
"do_not_confuse_with": "MP4 video streams or model inputs.",
|
| 119 |
+
"primary_files": ["docs/data/raw_sample_files.json", "REPRODUCIBILITY.md"]
|
| 120 |
+
},
|
| 121 |
{
|
| 122 |
"term": "Interaction text",
|
| 123 |
"category": "files_features",
|
data/public_surface_qa.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-22T11:
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
@@ -18,7 +18,7 @@
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
-
"generated_at_utc": "2026-06-22T11:
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
@@ -28,7 +28,7 @@
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
-
"generated_at_utc": "2026-06-22T11:
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
|
@@ -38,7 +38,7 @@
|
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
-
"generated_at_utc": "2026-06-22T11:
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Public Project Surface",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:43:00+00:00",
|
| 5 |
"scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
|
| 6 |
"checks": [
|
| 7 |
{
|
|
|
|
| 18 |
"website_integrity": {
|
| 19 |
"exists": true,
|
| 20 |
"status": "pass",
|
| 21 |
+
"generated_at_utc": "2026-06-22T11:42:36+00:00"
|
| 22 |
},
|
| 23 |
"rendered_site_check": {
|
| 24 |
"exists": true,
|
|
|
|
| 28 |
"task_surface_integrity": {
|
| 29 |
"exists": true,
|
| 30 |
"status": "pass",
|
| 31 |
+
"generated_at_utc": "2026-06-22T11:42:36+00:00"
|
| 32 |
},
|
| 33 |
"source_alignment": {
|
| 34 |
"exists": true,
|
|
|
|
| 38 |
"scale_up_status": {
|
| 39 |
"exists": true,
|
| 40 |
"status": "pass",
|
| 41 |
+
"generated_at_utc": "2026-06-22T11:42:38+00:00"
|
| 42 |
},
|
| 43 |
"publication_package": {
|
| 44 |
"exists": true,
|
data/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-22T11:
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:43:47+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
data/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-22T11:
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:43:30+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
data/task_surface_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-22T11:
|
| 4 |
"summary": {
|
| 5 |
"original_walkthrough_task_count": 12,
|
| 6 |
"expected_original_walkthrough_task_count": 12,
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:42:36+00:00",
|
| 4 |
"summary": {
|
| 5 |
"original_walkthrough_task_count": 12,
|
| 6 |
"expected_original_walkthrough_task_count": 12,
|
data/website_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-22T11:
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
|
@@ -80,8 +80,8 @@
|
|
| 80 |
"name": "project_overview_precedes_progress_ledger",
|
| 81 |
"status": "pass",
|
| 82 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 83 |
-
"overview_index":
|
| 84 |
-
"evidence_index":
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"name": "project_status_links_json",
|
|
@@ -159,9 +159,9 @@
|
|
| 159 |
"name": "evaluation_protocol_between_overview_and_progress",
|
| 160 |
"status": "pass",
|
| 161 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 162 |
-
"overview_index":
|
| 163 |
-
"protocol_index":
|
| 164 |
-
"evidence_index":
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"name": "evaluation_protocol_links_json",
|
|
@@ -290,7 +290,7 @@
|
|
| 290 |
},
|
| 291 |
{
|
| 292 |
"path": "index.html",
|
| 293 |
-
"id_count":
|
| 294 |
"reference_count": 267,
|
| 295 |
"image_count": 56
|
| 296 |
},
|
|
@@ -355,7 +355,7 @@
|
|
| 355 |
},
|
| 356 |
{
|
| 357 |
"path": "data/glossary.json",
|
| 358 |
-
"bytes":
|
| 359 |
"top_level_type": "dict"
|
| 360 |
},
|
| 361 |
{
|
|
@@ -445,7 +445,7 @@
|
|
| 445 |
},
|
| 446 |
{
|
| 447 |
"path": "data/raw_sample_files.json",
|
| 448 |
-
"bytes":
|
| 449 |
"top_level_type": "dict"
|
| 450 |
},
|
| 451 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:42:36+00:00",
|
| 4 |
"docs_root": "docs",
|
| 5 |
"site_base": "/ropedia-xperience-10m-task-suite/",
|
| 6 |
"summary": {
|
|
|
|
| 80 |
"name": "project_overview_precedes_progress_ledger",
|
| 81 |
"status": "pass",
|
| 82 |
"reason": "The project overview should appear before the deeper progress ledger.",
|
| 83 |
+
"overview_index": 151276,
|
| 84 |
+
"evidence_index": 202699
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"name": "project_status_links_json",
|
|
|
|
| 159 |
"name": "evaluation_protocol_between_overview_and_progress",
|
| 160 |
"status": "pass",
|
| 161 |
"reason": "The evaluation protocol should appear before the deeper evidence ledger.",
|
| 162 |
+
"overview_index": 151276,
|
| 163 |
+
"protocol_index": 198889,
|
| 164 |
+
"evidence_index": 202699
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"name": "evaluation_protocol_links_json",
|
|
|
|
| 290 |
},
|
| 291 |
{
|
| 292 |
"path": "index.html",
|
| 293 |
+
"id_count": 101,
|
| 294 |
"reference_count": 267,
|
| 295 |
"image_count": 56
|
| 296 |
},
|
|
|
|
| 355 |
},
|
| 356 |
{
|
| 357 |
"path": "data/glossary.json",
|
| 358 |
+
"bytes": 19260,
|
| 359 |
"top_level_type": "dict"
|
| 360 |
},
|
| 361 |
{
|
|
|
|
| 445 |
},
|
| 446 |
{
|
| 447 |
"path": "data/raw_sample_files.json",
|
| 448 |
+
"bytes": 11210,
|
| 449 |
"top_level_type": "dict"
|
| 450 |
},
|
| 451 |
{
|
scripts/build_two_evidence_line_result_summary.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Build a concise result summary for the two public evidence lines."""
|
| 3 |
+
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import json
|
| 7 |
+
from datetime import datetime, timezone
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
ROOT = Path(__file__).resolve().parents[1]
|
| 12 |
+
MATRIX_JSON = ROOT / "docs/data/task_method_20_result_matrix.json"
|
| 13 |
+
LINES_JSON = ROOT / "docs/data/two_evidence_lines.json"
|
| 14 |
+
OUTPUT_JSON = ROOT / "docs/data/two_evidence_line_result_summary.json"
|
| 15 |
+
OUTPUT_MD = ROOT / "TWO_EVIDENCE_LINE_RESULT_SUMMARY.md"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def read_json(path: Path) -> dict:
|
| 19 |
+
return json.loads(path.read_text(encoding="utf-8"))
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def write_json(path: Path, payload: dict) -> None:
|
| 23 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 24 |
+
path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def markdown_table(headers: list[str], rows: list[list[str]]) -> str:
|
| 28 |
+
lines = [
|
| 29 |
+
"| " + " | ".join(headers) + " |",
|
| 30 |
+
"| " + " | ".join("---" for _ in headers) + " |",
|
| 31 |
+
]
|
| 32 |
+
for row in rows:
|
| 33 |
+
escaped = [str(cell).replace("\n", " ").replace("|", "\\|") for cell in row]
|
| 34 |
+
lines.append("| " + " | ".join(escaped) + " |")
|
| 35 |
+
return "\n".join(lines)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def line_for_series(scope: str) -> str:
|
| 39 |
+
if scope.startswith("1 public sample episode"):
|
| 40 |
+
return "single_public_sample_episode"
|
| 41 |
+
if scope.startswith("128 selected episodes"):
|
| 42 |
+
return "selected_128_episode_surface"
|
| 43 |
+
raise ValueError(f"Cannot map series scope to evidence line: {scope}")
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def build_method_blocks(lines_out: list[dict]) -> list[dict]:
|
| 47 |
+
methods_by_id = {
|
| 48 |
+
method["id"]: {**method, "line_label": line["label"], "line_id": line["id"]}
|
| 49 |
+
for line in lines_out
|
| 50 |
+
for method in line["methods"]
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
def summarize(method_ids: list[str]) -> dict:
|
| 54 |
+
methods = [methods_by_id[method_id] for method_id in method_ids]
|
| 55 |
+
return {
|
| 56 |
+
"methods": [method["label"] for method in methods],
|
| 57 |
+
"scored_method_task_count": sum(method["scored_task_count"] for method in methods),
|
| 58 |
+
"method_task_record_count": sum(method["result_record_count"] for method in methods),
|
| 59 |
+
"direct_scored_method_task_count": sum(method["direct_scored_task_count"] for method in methods),
|
| 60 |
+
"proxy_scored_method_task_count": sum(method["proxy_scored_task_count"] for method in methods),
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
blocks = [
|
| 64 |
+
{
|
| 65 |
+
"line_id": "single_public_sample_episode",
|
| 66 |
+
"line_label": "1 sample episode",
|
| 67 |
+
"block": "Task-head baselines",
|
| 68 |
+
"method_ids": ["minimal", "neural_mlp"],
|
| 69 |
+
"evidence_type": "Direct target metrics on the public sample windows.",
|
| 70 |
+
"read_as": "Task construction, local reproducibility, and Minimal-vs-Neural behavior.",
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"line_id": "selected_128_episode_surface",
|
| 74 |
+
"line_label": "128 selected episodes",
|
| 75 |
+
"block": "Aligned baseline heads",
|
| 76 |
+
"method_ids": [
|
| 77 |
+
"metadata128_simple",
|
| 78 |
+
"metadata128_neural_mlp",
|
| 79 |
+
"raw128_simple",
|
| 80 |
+
"raw128_neural_mlp",
|
| 81 |
+
],
|
| 82 |
+
"evidence_type": "Direct processed-target metrics where available; compact proxies for documented raw-target gaps.",
|
| 83 |
+
"read_as": "Same-split metadata/raw-feature baseline comparison.",
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"line_id": "selected_128_episode_surface",
|
| 87 |
+
"line_label": "128 selected episodes",
|
| 88 |
+
"block": "Qwen3-Omni series",
|
| 89 |
+
"method_ids": ["qwen3_omni_v6_lora"],
|
| 90 |
+
"evidence_type": "Verified selected-128 Qwen3-Omni v6 LoRA plus source-linked task-specific probes.",
|
| 91 |
+
"read_as": "Trainable Qwen3-Omni diagnostic baseline on the selected-128 surface.",
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"line_id": "selected_128_episode_surface",
|
| 95 |
+
"line_label": "128 selected episodes",
|
| 96 |
+
"block": "Cosmos3 series",
|
| 97 |
+
"method_ids": [
|
| 98 |
+
"cosmos3_super_reasoner",
|
| 99 |
+
"cosmos3_nano_future_window",
|
| 100 |
+
],
|
| 101 |
+
"evidence_type": "Verified Cosmos3-Super Reasoner and Cosmos3-Nano Future Window public-safe artifacts.",
|
| 102 |
+
"read_as": "Cosmos3 reasoner and future-window diagnostics on the selected-128 surface.",
|
| 103 |
+
},
|
| 104 |
+
]
|
| 105 |
+
for block in blocks:
|
| 106 |
+
block.update(summarize(block["method_ids"]))
|
| 107 |
+
return blocks
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def build_payload(matrix: dict, lines: dict) -> dict:
|
| 111 |
+
line_meta = {line["id"]: line for line in lines["lines"]}
|
| 112 |
+
line_rows: dict[str, dict] = {
|
| 113 |
+
line_id: {
|
| 114 |
+
"id": line_id,
|
| 115 |
+
"label": meta["label"],
|
| 116 |
+
"short_label": meta.get("short_label"),
|
| 117 |
+
"data_unit": meta["data_unit"],
|
| 118 |
+
"result_statement": meta.get("result_statement"),
|
| 119 |
+
"best_read_as": meta.get("best_read_as"),
|
| 120 |
+
"read_separately_from": meta.get("read_separately_from"),
|
| 121 |
+
"primary_use": meta["best_use"],
|
| 122 |
+
"task_count": matrix["task_count"],
|
| 123 |
+
"method_count": 0,
|
| 124 |
+
"method_task_record_count": 0,
|
| 125 |
+
"scored_method_task_count": 0,
|
| 126 |
+
"direct_scored_method_task_count": 0,
|
| 127 |
+
"proxy_scored_method_task_count": 0,
|
| 128 |
+
"methods": [],
|
| 129 |
+
"primary_visuals": meta.get("primary_visuals", []),
|
| 130 |
+
"artifact_entry_points": meta["primary_artifacts"],
|
| 131 |
+
}
|
| 132 |
+
for line_id, meta in line_meta.items()
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
series_to_line: dict[str, str] = {}
|
| 136 |
+
for series in matrix["series"]:
|
| 137 |
+
line_id = line_for_series(series["scope"])
|
| 138 |
+
series_to_line[series["id"]] = line_id
|
| 139 |
+
line = line_rows[line_id]
|
| 140 |
+
line["method_count"] += 1
|
| 141 |
+
line["method_task_record_count"] += series["result_record_count"]
|
| 142 |
+
line["scored_method_task_count"] += series["scored_task_count"]
|
| 143 |
+
line["proxy_scored_method_task_count"] += series.get("proxy_scored_task_count", 0)
|
| 144 |
+
line["direct_scored_method_task_count"] += (
|
| 145 |
+
series["scored_task_count"] - series.get("proxy_scored_task_count", 0)
|
| 146 |
+
)
|
| 147 |
+
line["methods"].append(
|
| 148 |
+
{
|
| 149 |
+
"id": series["id"],
|
| 150 |
+
"label": series["label"],
|
| 151 |
+
"scope": series["scope"],
|
| 152 |
+
"method_detail": series.get("method_detail"),
|
| 153 |
+
"scored_task_count": series["scored_task_count"],
|
| 154 |
+
"result_record_count": series["result_record_count"],
|
| 155 |
+
"direct_scored_task_count": (
|
| 156 |
+
series["scored_task_count"] - series.get("proxy_scored_task_count", 0)
|
| 157 |
+
),
|
| 158 |
+
"proxy_scored_task_count": series.get("proxy_scored_task_count", 0),
|
| 159 |
+
"status_counts": series.get("status_counts", {}),
|
| 160 |
+
}
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
proxy_records = []
|
| 164 |
+
for record in matrix["records"]:
|
| 165 |
+
if not record.get("proxy_scored"):
|
| 166 |
+
continue
|
| 167 |
+
proxy_records.append(
|
| 168 |
+
{
|
| 169 |
+
"line_id": series_to_line[record["series_id"]],
|
| 170 |
+
"task_number": record["task_number"],
|
| 171 |
+
"task_id": record["task_id"],
|
| 172 |
+
"task_label": record["task_label"],
|
| 173 |
+
"series_id": record["series_id"],
|
| 174 |
+
"method": record["method"],
|
| 175 |
+
"metric_key": record.get("metric_key"),
|
| 176 |
+
"source": record.get("source"),
|
| 177 |
+
"reason": record.get("reason"),
|
| 178 |
+
}
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
lines_out = list(line_rows.values())
|
| 182 |
+
total_records = sum(line["method_task_record_count"] for line in lines_out)
|
| 183 |
+
total_scored = sum(line["scored_method_task_count"] for line in lines_out)
|
| 184 |
+
total_direct = sum(line["direct_scored_method_task_count"] for line in lines_out)
|
| 185 |
+
total_proxy = sum(line["proxy_scored_method_task_count"] for line in lines_out)
|
| 186 |
+
|
| 187 |
+
return {
|
| 188 |
+
"title": "Two Evidence-Line Result Summary",
|
| 189 |
+
"status": "pass",
|
| 190 |
+
"generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 191 |
+
"source_matrix": "docs/data/task_method_20_result_matrix.json",
|
| 192 |
+
"source_lines": "docs/data/two_evidence_lines.json",
|
| 193 |
+
"interpretation_rule": lines["interpretation_rule"],
|
| 194 |
+
"reader_summary": lines.get("reader_summary"),
|
| 195 |
+
"score_formula": lines.get("score_formula"),
|
| 196 |
+
"summary": {
|
| 197 |
+
"line_count": len(lines_out),
|
| 198 |
+
"task_count": matrix["task_count"],
|
| 199 |
+
"method_count": matrix["method_count"],
|
| 200 |
+
"method_task_record_count": total_records,
|
| 201 |
+
"scored_method_task_count": total_scored,
|
| 202 |
+
"direct_scored_method_task_count": total_direct,
|
| 203 |
+
"proxy_scored_method_task_count": total_proxy,
|
| 204 |
+
},
|
| 205 |
+
"lines": lines_out,
|
| 206 |
+
"method_blocks": build_method_blocks(lines_out),
|
| 207 |
+
"related_model_artifacts": lines.get("related_model_artifacts", []),
|
| 208 |
+
"proxy_records": proxy_records,
|
| 209 |
+
"reading_order": [
|
| 210 |
+
{
|
| 211 |
+
"step": "Choose the evidence line",
|
| 212 |
+
"reason": "Line 1 answers task-lab and reproducibility questions; line 2 answers selected-128 comparison questions.",
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
"step": "Open the matching radar",
|
| 216 |
+
"reason": "Use the 1-episode radar for Minimal-vs-Neural behavior and the 128-episode radar for metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano.",
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"step": "Inspect the matrix row",
|
| 220 |
+
"reason": "Every numeric score is tied to a method, task, metric key, source artifact, and proxy flag.",
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"step": "Check proxy cells before interpreting totals",
|
| 224 |
+
"reason": "The six compact-proxy cells are numeric but are not direct raw-target measurements.",
|
| 225 |
+
},
|
| 226 |
+
],
|
| 227 |
+
"reader_policy": {
|
| 228 |
+
"single_public_sample_episode": (
|
| 229 |
+
"Use for task construction, raw-file inspection, local reproducibility, "
|
| 230 |
+
"and controlled Minimal-vs-Neural baseline behavior."
|
| 231 |
+
),
|
| 232 |
+
"selected_128_episode_surface": (
|
| 233 |
+
"Use for held-out comparison, metadata/raw-feature baselines, Qwen3-Omni v6 LoRA, "
|
| 234 |
+
"Cosmos3-Super Reasoner, Cosmos3-Nano Future Window, and scale-up decisions."
|
| 235 |
+
),
|
| 236 |
+
"proxy_policy": (
|
| 237 |
+
"Proxy-scored cells stay numeric only when the source artifact and reason "
|
| 238 |
+
"are attached; they should not be read as direct raw-target measurements."
|
| 239 |
+
),
|
| 240 |
+
},
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def write_markdown(payload: dict) -> None:
|
| 245 |
+
summary = payload["summary"]
|
| 246 |
+
line_rows = []
|
| 247 |
+
entry_rows = []
|
| 248 |
+
method_rows = []
|
| 249 |
+
for line in payload["lines"]:
|
| 250 |
+
method_labels = ", ".join(method["label"] for method in line["methods"])
|
| 251 |
+
line_rows.append(
|
| 252 |
+
[
|
| 253 |
+
line["label"],
|
| 254 |
+
line.get("result_statement") or "",
|
| 255 |
+
line.get("best_read_as") or line["primary_use"],
|
| 256 |
+
line.get("read_separately_from") or "",
|
| 257 |
+
]
|
| 258 |
+
)
|
| 259 |
+
entry_rows.append(
|
| 260 |
+
[
|
| 261 |
+
line["label"],
|
| 262 |
+
str(line["method_count"]),
|
| 263 |
+
str(line["task_count"]),
|
| 264 |
+
f"{line['scored_method_task_count']}/{line['method_task_record_count']}",
|
| 265 |
+
str(line["direct_scored_method_task_count"]),
|
| 266 |
+
str(line["proxy_scored_method_task_count"]),
|
| 267 |
+
"<br>".join(line.get("primary_visuals", [])),
|
| 268 |
+
"<br>".join(line["artifact_entry_points"]),
|
| 269 |
+
]
|
| 270 |
+
)
|
| 271 |
+
for method in line["methods"]:
|
| 272 |
+
method_rows.append(
|
| 273 |
+
[
|
| 274 |
+
line["label"],
|
| 275 |
+
method["label"],
|
| 276 |
+
method.get("method_detail") or "",
|
| 277 |
+
f"{method['scored_task_count']}/{method['result_record_count']}",
|
| 278 |
+
str(method["direct_scored_task_count"]),
|
| 279 |
+
str(method["proxy_scored_task_count"]),
|
| 280 |
+
]
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
proxy_rows = [
|
| 284 |
+
[
|
| 285 |
+
row["task_number"],
|
| 286 |
+
row["task_label"],
|
| 287 |
+
row["method"],
|
| 288 |
+
row.get("metric_key") or "",
|
| 289 |
+
row.get("reason") or "",
|
| 290 |
+
]
|
| 291 |
+
for row in payload["proxy_records"]
|
| 292 |
+
]
|
| 293 |
+
method_block_rows = [
|
| 294 |
+
[
|
| 295 |
+
block["line_label"],
|
| 296 |
+
block["block"],
|
| 297 |
+
", ".join(block["methods"]),
|
| 298 |
+
f"{block['scored_method_task_count']}/{block['method_task_record_count']}",
|
| 299 |
+
str(block["direct_scored_method_task_count"]),
|
| 300 |
+
str(block["proxy_scored_method_task_count"]),
|
| 301 |
+
block["evidence_type"],
|
| 302 |
+
block["read_as"],
|
| 303 |
+
]
|
| 304 |
+
for block in payload["method_blocks"]
|
| 305 |
+
]
|
| 306 |
+
related_artifact_rows = [
|
| 307 |
+
[row.get("name", ""), row.get("role", ""), row.get("repo", "")]
|
| 308 |
+
for row in payload.get("related_model_artifacts", [])
|
| 309 |
+
]
|
| 310 |
+
|
| 311 |
+
text = f"""# Two Evidence-Line Result Summary
|
| 312 |
+
|
| 313 |
+
Generated: `{payload['generated_at_utc']}`.
|
| 314 |
+
|
| 315 |
+
Source matrix: [`{payload['source_matrix']}`]({payload['source_matrix']})
|
| 316 |
+
|
| 317 |
+
Interpretation rule: {payload['interpretation_rule']}
|
| 318 |
+
|
| 319 |
+
## Read This First
|
| 320 |
+
|
| 321 |
+
{payload.get('reader_summary') or ''}
|
| 322 |
+
|
| 323 |
+
Score formula: {payload.get('score_formula') or ''}
|
| 324 |
+
|
| 325 |
+
| Line | What the scores mean | Best use | Read separately from |
|
| 326 |
+
| --- | --- | --- | --- |
|
| 327 |
+
""" + "\n".join(
|
| 328 |
+
"| " + " | ".join(str(cell).replace("|", "\\|") for cell in row) + " |"
|
| 329 |
+
for row in line_rows
|
| 330 |
+
) + f"""
|
| 331 |
+
|
| 332 |
+
## Public Score Totals
|
| 333 |
+
|
| 334 |
+
- Lines: {summary['line_count']}
|
| 335 |
+
- Tasks per method: {summary['task_count']}
|
| 336 |
+
- Methods: {summary['method_count']}
|
| 337 |
+
- Scored records: {summary['scored_method_task_count']}/{summary['method_task_record_count']}
|
| 338 |
+
- Direct scores: {summary['direct_scored_method_task_count']}
|
| 339 |
+
- Compact-proxy scores: {summary['proxy_scored_method_task_count']} documented cells
|
| 340 |
+
|
| 341 |
+
## Line Ledger And Entry Points
|
| 342 |
+
|
| 343 |
+
{markdown_table(['Line', 'Methods', 'Tasks', 'Scored records', 'Direct scores', 'Proxy scores', 'Primary visuals', 'Source artifacts'], entry_rows)}
|
| 344 |
+
|
| 345 |
+
## Method Blocks By Evidence Line
|
| 346 |
+
|
| 347 |
+
{markdown_table(['Line', 'Method block', 'Methods', 'Scored records', 'Direct scores', 'Proxy scores', 'Evidence type', 'Read as'], method_block_rows)}
|
| 348 |
+
|
| 349 |
+
## Method Detail By Line
|
| 350 |
+
|
| 351 |
+
{markdown_table(['Line', 'Method', 'Method detail', 'Scored records', 'Direct scores', 'Proxy scores'], method_rows)}
|
| 352 |
+
|
| 353 |
+
## Related Model Artifacts
|
| 354 |
+
|
| 355 |
+
{markdown_table(['Artifact', 'Role', 'Link or path'], related_artifact_rows)}
|
| 356 |
+
|
| 357 |
+
## Proxy-Scored Cells
|
| 358 |
+
|
| 359 |
+
{markdown_table(['Task', 'Task label', 'Method', 'Metric', 'Reason'], proxy_rows)}
|
| 360 |
+
|
| 361 |
+
## Reading Order
|
| 362 |
+
|
| 363 |
+
{markdown_table(['Step', 'Reason'], [[row['step'], row['reason']] for row in payload['reading_order']])}
|
| 364 |
+
|
| 365 |
+
## Reader Policy
|
| 366 |
+
|
| 367 |
+
- 1 sample episode: {payload['reader_policy']['single_public_sample_episode']}
|
| 368 |
+
- 128 selected episodes: {payload['reader_policy']['selected_128_episode_surface']}
|
| 369 |
+
- Proxy scores: {payload['reader_policy']['proxy_policy']}
|
| 370 |
+
"""
|
| 371 |
+
OUTPUT_MD.write_text(text, encoding="utf-8")
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def main() -> int:
|
| 375 |
+
payload = build_payload(read_json(MATRIX_JSON), read_json(LINES_JSON))
|
| 376 |
+
write_json(OUTPUT_JSON, payload)
|
| 377 |
+
write_markdown(payload)
|
| 378 |
+
print(f"Wrote {OUTPUT_JSON.relative_to(ROOT)} and {OUTPUT_MD.relative_to(ROOT)}")
|
| 379 |
+
return 0
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
if __name__ == "__main__":
|
| 383 |
+
raise SystemExit(main())
|