cy0307 commited on
Commit
da00455
·
verified ·
1 Parent(s): 45834a6

Add files using upload-large-folder tool

Browse files
data/artifact_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
- "generated_at_utc": "2026-06-22T11:26:48+00:00",
4
  "status": "pass",
5
  "artifact_count": 228,
6
  "missing": [],
@@ -92,8 +92,8 @@
92
  "surface": "repo_hf",
93
  "shows": "Defines terminology that can be confused across data scope, task metrics, model branches, and public mirrors.",
94
  "exists": true,
95
- "bytes": 10861,
96
- "sha256": "7840db0fabaa4a4d1af706de1b14b2c566ba3a38b53ef68b082cf7ba24b8d9c5"
97
  },
98
  {
99
  "id": "glossary_json",
@@ -103,8 +103,8 @@
103
  "surface": "website_hf",
104
  "shows": "Machine-readable terminology layer for the website, artifact dataset, model mirror, and public QA checks.",
105
  "exists": true,
106
- "bytes": 18767,
107
- "sha256": "d900f71e9e28540efbd258acff686d01e1d6046834872c74d0494393c8525b9a"
108
  },
109
  {
110
  "id": "research_roadmap",
@@ -1159,8 +1159,8 @@
1159
  "surface": "website_hf",
1160
  "shows": "Lists the official public sample HDF5, MP4, and RRD files, derived browser-preview clips, playback/download URLs, file sizes, browser behavior, and HDF5 group organization.",
1161
  "exists": true,
1162
- "bytes": 9777,
1163
- "sha256": "16020e539b6ebe18e00801c2c6a51df3975afb4934d422fd11bc532639b578ec"
1164
  },
1165
  {
1166
  "id": "quality_gates",
@@ -1182,7 +1182,7 @@
1182
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1183
  "exists": true,
1184
  "bytes": 8640,
1185
- "sha256": "2f01aed08e480cb4b5e0924bbd80518d290ca363336460a2f80197bbb962b3b0"
1186
  },
1187
  {
1188
  "id": "public_surface_qa",
@@ -1399,7 +1399,7 @@
1399
  "volatile": true,
1400
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1401
  "exists": true,
1402
- "bytes": 24946,
1403
  "hash_policy": "existence_and_size_only"
1404
  },
1405
  {
 
1
  {
2
  "title": "Ropedia Xperience-10M Task Suite Artifact Index",
3
+ "generated_at_utc": "2026-06-22T11:43:01+00:00",
4
  "status": "pass",
5
  "artifact_count": 228,
6
  "missing": [],
 
92
  "surface": "repo_hf",
93
  "shows": "Defines terminology that can be confused across data scope, task metrics, model branches, and public mirrors.",
94
  "exists": true,
95
+ "bytes": 11122,
96
+ "sha256": "fe781a4eb5dd56454b5e0cb3383c88a2106c7bbf269888a0a7613b1618c8d196"
97
  },
98
  {
99
  "id": "glossary_json",
 
103
  "surface": "website_hf",
104
  "shows": "Machine-readable terminology layer for the website, artifact dataset, model mirror, and public QA checks.",
105
  "exists": true,
106
+ "bytes": 19260,
107
+ "sha256": "525de375608793cd34ab386819eac5291177b53ca5839d54a9046707206e844a"
108
  },
109
  {
110
  "id": "research_roadmap",
 
1159
  "surface": "website_hf",
1160
  "shows": "Lists the official public sample HDF5, MP4, and RRD files, derived browser-preview clips, playback/download URLs, file sizes, browser behavior, and HDF5 group organization.",
1161
  "exists": true,
1162
+ "bytes": 11210,
1163
+ "sha256": "e52ed2da6077c0f67fa37e0106cc59ab06b0e6fe62237837f0a7bb2dabdd9a03"
1164
  },
1165
  {
1166
  "id": "quality_gates",
 
1182
  "shows": "Machine-readable release-check summary for validators, mirrors, and public project surfaces.",
1183
  "exists": true,
1184
  "bytes": 8640,
1185
+ "sha256": "b3d609ef68cafdd53e789b3c56edc3e7f984312bdabfa6388edb8c15cea78af3"
1186
  },
1187
  {
1188
  "id": "public_surface_qa",
 
1399
  "volatile": true,
1400
  "shows": "Confirms local website links, anchors, JSON data files, and referenced images resolve.",
1401
  "exists": true,
1402
+ "bytes": 24947,
1403
  "hash_policy": "existence_and_size_only"
1404
  },
1405
  {
data/glossary.json CHANGED
@@ -110,6 +110,14 @@
110
  "do_not_confuse_with": "Task result summaries.",
111
  "primary_files": ["docs/data/raw_sample_files.json"]
112
  },
 
 
 
 
 
 
 
 
113
  {
114
  "term": "Interaction text",
115
  "category": "files_features",
 
110
  "do_not_confuse_with": "Task result summaries.",
111
  "primary_files": ["docs/data/raw_sample_files.json"]
112
  },
113
+ {
114
+ "term": "visualization.rrd",
115
+ "category": "files_features",
116
+ "plain_meaning": "Rerun viewer recording for visual inspection.",
117
+ "project_usage": "Can be downloaded from the official sample dataset and opened in Rerun 0.29.0 to inspect the sample episode. It is not used for published training or metric rows.",
118
+ "do_not_confuse_with": "MP4 video streams or model inputs.",
119
+ "primary_files": ["docs/data/raw_sample_files.json", "REPRODUCIBILITY.md"]
120
+ },
121
  {
122
  "term": "Interaction text",
123
  "category": "files_features",
data/public_surface_qa.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-22T11:28:35+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
@@ -18,7 +18,7 @@
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
- "generated_at_utc": "2026-06-22T11:26:47+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
@@ -28,7 +28,7 @@
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
- "generated_at_utc": "2026-06-22T11:26:46+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
@@ -38,7 +38,7 @@
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
- "generated_at_utc": "2026-06-22T11:26:49+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
 
1
  {
2
  "title": "Ropedia Xperience-10M Public Project Surface",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-22T11:43:00+00:00",
5
  "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
6
  "checks": [
7
  {
 
18
  "website_integrity": {
19
  "exists": true,
20
  "status": "pass",
21
+ "generated_at_utc": "2026-06-22T11:42:36+00:00"
22
  },
23
  "rendered_site_check": {
24
  "exists": true,
 
28
  "task_surface_integrity": {
29
  "exists": true,
30
  "status": "pass",
31
+ "generated_at_utc": "2026-06-22T11:42:36+00:00"
32
  },
33
  "source_alignment": {
34
  "exists": true,
 
38
  "scale_up_status": {
39
  "exists": true,
40
  "status": "pass",
41
+ "generated_at_utc": "2026-06-22T11:42:38+00:00"
42
  },
43
  "publication_package": {
44
  "exists": true,
data/publication_audit.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-22T11:27:50+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-22T11:43:47+00:00",
4
  "checks": [
5
  {
6
  "name": "required_publication_assets_present",
data/source_alignment_audit.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
- "generated_at_utc": "2026-06-22T11:26:46+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
 
1
  {
2
  "title": "Ropedia Xperience-10M Source Alignment Note",
3
  "status": "pass",
4
+ "generated_at_utc": "2026-06-22T11:43:30+00:00",
5
  "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
6
  "alignment_summary": {
7
  "full_dataset_repo": "ropedia-ai/xperience-10m",
data/task_surface_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-22T11:26:46+00:00",
4
  "summary": {
5
  "original_walkthrough_task_count": 12,
6
  "expected_original_walkthrough_task_count": 12,
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-22T11:42:36+00:00",
4
  "summary": {
5
  "original_walkthrough_task_count": 12,
6
  "expected_original_walkthrough_task_count": 12,
data/website_integrity.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "status": "pass",
3
- "generated_at_utc": "2026-06-22T11:26:47+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
@@ -80,8 +80,8 @@
80
  "name": "project_overview_precedes_progress_ledger",
81
  "status": "pass",
82
  "reason": "The project overview should appear before the deeper progress ledger.",
83
- "overview_index": 151125,
84
- "evidence_index": 202548
85
  },
86
  {
87
  "name": "project_status_links_json",
@@ -159,9 +159,9 @@
159
  "name": "evaluation_protocol_between_overview_and_progress",
160
  "status": "pass",
161
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
162
- "overview_index": 151125,
163
- "protocol_index": 198738,
164
- "evidence_index": 202548
165
  },
166
  {
167
  "name": "evaluation_protocol_links_json",
@@ -290,7 +290,7 @@
290
  },
291
  {
292
  "path": "index.html",
293
- "id_count": 100,
294
  "reference_count": 267,
295
  "image_count": 56
296
  },
@@ -355,7 +355,7 @@
355
  },
356
  {
357
  "path": "data/glossary.json",
358
- "bytes": 18767,
359
  "top_level_type": "dict"
360
  },
361
  {
@@ -445,7 +445,7 @@
445
  },
446
  {
447
  "path": "data/raw_sample_files.json",
448
- "bytes": 9777,
449
  "top_level_type": "dict"
450
  },
451
  {
 
1
  {
2
  "status": "pass",
3
+ "generated_at_utc": "2026-06-22T11:42:36+00:00",
4
  "docs_root": "docs",
5
  "site_base": "/ropedia-xperience-10m-task-suite/",
6
  "summary": {
 
80
  "name": "project_overview_precedes_progress_ledger",
81
  "status": "pass",
82
  "reason": "The project overview should appear before the deeper progress ledger.",
83
+ "overview_index": 151276,
84
+ "evidence_index": 202699
85
  },
86
  {
87
  "name": "project_status_links_json",
 
159
  "name": "evaluation_protocol_between_overview_and_progress",
160
  "status": "pass",
161
  "reason": "The evaluation protocol should appear before the deeper evidence ledger.",
162
+ "overview_index": 151276,
163
+ "protocol_index": 198889,
164
+ "evidence_index": 202699
165
  },
166
  {
167
  "name": "evaluation_protocol_links_json",
 
290
  },
291
  {
292
  "path": "index.html",
293
+ "id_count": 101,
294
  "reference_count": 267,
295
  "image_count": 56
296
  },
 
355
  },
356
  {
357
  "path": "data/glossary.json",
358
+ "bytes": 19260,
359
  "top_level_type": "dict"
360
  },
361
  {
 
445
  },
446
  {
447
  "path": "data/raw_sample_files.json",
448
+ "bytes": 11210,
449
  "top_level_type": "dict"
450
  },
451
  {
scripts/build_two_evidence_line_result_summary.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Build a concise result summary for the two public evidence lines."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+
11
+ ROOT = Path(__file__).resolve().parents[1]
12
+ MATRIX_JSON = ROOT / "docs/data/task_method_20_result_matrix.json"
13
+ LINES_JSON = ROOT / "docs/data/two_evidence_lines.json"
14
+ OUTPUT_JSON = ROOT / "docs/data/two_evidence_line_result_summary.json"
15
+ OUTPUT_MD = ROOT / "TWO_EVIDENCE_LINE_RESULT_SUMMARY.md"
16
+
17
+
18
+ def read_json(path: Path) -> dict:
19
+ return json.loads(path.read_text(encoding="utf-8"))
20
+
21
+
22
+ def write_json(path: Path, payload: dict) -> None:
23
+ path.parent.mkdir(parents=True, exist_ok=True)
24
+ path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
25
+
26
+
27
+ def markdown_table(headers: list[str], rows: list[list[str]]) -> str:
28
+ lines = [
29
+ "| " + " | ".join(headers) + " |",
30
+ "| " + " | ".join("---" for _ in headers) + " |",
31
+ ]
32
+ for row in rows:
33
+ escaped = [str(cell).replace("\n", " ").replace("|", "\\|") for cell in row]
34
+ lines.append("| " + " | ".join(escaped) + " |")
35
+ return "\n".join(lines)
36
+
37
+
38
+ def line_for_series(scope: str) -> str:
39
+ if scope.startswith("1 public sample episode"):
40
+ return "single_public_sample_episode"
41
+ if scope.startswith("128 selected episodes"):
42
+ return "selected_128_episode_surface"
43
+ raise ValueError(f"Cannot map series scope to evidence line: {scope}")
44
+
45
+
46
+ def build_method_blocks(lines_out: list[dict]) -> list[dict]:
47
+ methods_by_id = {
48
+ method["id"]: {**method, "line_label": line["label"], "line_id": line["id"]}
49
+ for line in lines_out
50
+ for method in line["methods"]
51
+ }
52
+
53
+ def summarize(method_ids: list[str]) -> dict:
54
+ methods = [methods_by_id[method_id] for method_id in method_ids]
55
+ return {
56
+ "methods": [method["label"] for method in methods],
57
+ "scored_method_task_count": sum(method["scored_task_count"] for method in methods),
58
+ "method_task_record_count": sum(method["result_record_count"] for method in methods),
59
+ "direct_scored_method_task_count": sum(method["direct_scored_task_count"] for method in methods),
60
+ "proxy_scored_method_task_count": sum(method["proxy_scored_task_count"] for method in methods),
61
+ }
62
+
63
+ blocks = [
64
+ {
65
+ "line_id": "single_public_sample_episode",
66
+ "line_label": "1 sample episode",
67
+ "block": "Task-head baselines",
68
+ "method_ids": ["minimal", "neural_mlp"],
69
+ "evidence_type": "Direct target metrics on the public sample windows.",
70
+ "read_as": "Task construction, local reproducibility, and Minimal-vs-Neural behavior.",
71
+ },
72
+ {
73
+ "line_id": "selected_128_episode_surface",
74
+ "line_label": "128 selected episodes",
75
+ "block": "Aligned baseline heads",
76
+ "method_ids": [
77
+ "metadata128_simple",
78
+ "metadata128_neural_mlp",
79
+ "raw128_simple",
80
+ "raw128_neural_mlp",
81
+ ],
82
+ "evidence_type": "Direct processed-target metrics where available; compact proxies for documented raw-target gaps.",
83
+ "read_as": "Same-split metadata/raw-feature baseline comparison.",
84
+ },
85
+ {
86
+ "line_id": "selected_128_episode_surface",
87
+ "line_label": "128 selected episodes",
88
+ "block": "Qwen3-Omni series",
89
+ "method_ids": ["qwen3_omni_v6_lora"],
90
+ "evidence_type": "Verified selected-128 Qwen3-Omni v6 LoRA plus source-linked task-specific probes.",
91
+ "read_as": "Trainable Qwen3-Omni diagnostic baseline on the selected-128 surface.",
92
+ },
93
+ {
94
+ "line_id": "selected_128_episode_surface",
95
+ "line_label": "128 selected episodes",
96
+ "block": "Cosmos3 series",
97
+ "method_ids": [
98
+ "cosmos3_super_reasoner",
99
+ "cosmos3_nano_future_window",
100
+ ],
101
+ "evidence_type": "Verified Cosmos3-Super Reasoner and Cosmos3-Nano Future Window public-safe artifacts.",
102
+ "read_as": "Cosmos3 reasoner and future-window diagnostics on the selected-128 surface.",
103
+ },
104
+ ]
105
+ for block in blocks:
106
+ block.update(summarize(block["method_ids"]))
107
+ return blocks
108
+
109
+
110
+ def build_payload(matrix: dict, lines: dict) -> dict:
111
+ line_meta = {line["id"]: line for line in lines["lines"]}
112
+ line_rows: dict[str, dict] = {
113
+ line_id: {
114
+ "id": line_id,
115
+ "label": meta["label"],
116
+ "short_label": meta.get("short_label"),
117
+ "data_unit": meta["data_unit"],
118
+ "result_statement": meta.get("result_statement"),
119
+ "best_read_as": meta.get("best_read_as"),
120
+ "read_separately_from": meta.get("read_separately_from"),
121
+ "primary_use": meta["best_use"],
122
+ "task_count": matrix["task_count"],
123
+ "method_count": 0,
124
+ "method_task_record_count": 0,
125
+ "scored_method_task_count": 0,
126
+ "direct_scored_method_task_count": 0,
127
+ "proxy_scored_method_task_count": 0,
128
+ "methods": [],
129
+ "primary_visuals": meta.get("primary_visuals", []),
130
+ "artifact_entry_points": meta["primary_artifacts"],
131
+ }
132
+ for line_id, meta in line_meta.items()
133
+ }
134
+
135
+ series_to_line: dict[str, str] = {}
136
+ for series in matrix["series"]:
137
+ line_id = line_for_series(series["scope"])
138
+ series_to_line[series["id"]] = line_id
139
+ line = line_rows[line_id]
140
+ line["method_count"] += 1
141
+ line["method_task_record_count"] += series["result_record_count"]
142
+ line["scored_method_task_count"] += series["scored_task_count"]
143
+ line["proxy_scored_method_task_count"] += series.get("proxy_scored_task_count", 0)
144
+ line["direct_scored_method_task_count"] += (
145
+ series["scored_task_count"] - series.get("proxy_scored_task_count", 0)
146
+ )
147
+ line["methods"].append(
148
+ {
149
+ "id": series["id"],
150
+ "label": series["label"],
151
+ "scope": series["scope"],
152
+ "method_detail": series.get("method_detail"),
153
+ "scored_task_count": series["scored_task_count"],
154
+ "result_record_count": series["result_record_count"],
155
+ "direct_scored_task_count": (
156
+ series["scored_task_count"] - series.get("proxy_scored_task_count", 0)
157
+ ),
158
+ "proxy_scored_task_count": series.get("proxy_scored_task_count", 0),
159
+ "status_counts": series.get("status_counts", {}),
160
+ }
161
+ )
162
+
163
+ proxy_records = []
164
+ for record in matrix["records"]:
165
+ if not record.get("proxy_scored"):
166
+ continue
167
+ proxy_records.append(
168
+ {
169
+ "line_id": series_to_line[record["series_id"]],
170
+ "task_number": record["task_number"],
171
+ "task_id": record["task_id"],
172
+ "task_label": record["task_label"],
173
+ "series_id": record["series_id"],
174
+ "method": record["method"],
175
+ "metric_key": record.get("metric_key"),
176
+ "source": record.get("source"),
177
+ "reason": record.get("reason"),
178
+ }
179
+ )
180
+
181
+ lines_out = list(line_rows.values())
182
+ total_records = sum(line["method_task_record_count"] for line in lines_out)
183
+ total_scored = sum(line["scored_method_task_count"] for line in lines_out)
184
+ total_direct = sum(line["direct_scored_method_task_count"] for line in lines_out)
185
+ total_proxy = sum(line["proxy_scored_method_task_count"] for line in lines_out)
186
+
187
+ return {
188
+ "title": "Two Evidence-Line Result Summary",
189
+ "status": "pass",
190
+ "generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
191
+ "source_matrix": "docs/data/task_method_20_result_matrix.json",
192
+ "source_lines": "docs/data/two_evidence_lines.json",
193
+ "interpretation_rule": lines["interpretation_rule"],
194
+ "reader_summary": lines.get("reader_summary"),
195
+ "score_formula": lines.get("score_formula"),
196
+ "summary": {
197
+ "line_count": len(lines_out),
198
+ "task_count": matrix["task_count"],
199
+ "method_count": matrix["method_count"],
200
+ "method_task_record_count": total_records,
201
+ "scored_method_task_count": total_scored,
202
+ "direct_scored_method_task_count": total_direct,
203
+ "proxy_scored_method_task_count": total_proxy,
204
+ },
205
+ "lines": lines_out,
206
+ "method_blocks": build_method_blocks(lines_out),
207
+ "related_model_artifacts": lines.get("related_model_artifacts", []),
208
+ "proxy_records": proxy_records,
209
+ "reading_order": [
210
+ {
211
+ "step": "Choose the evidence line",
212
+ "reason": "Line 1 answers task-lab and reproducibility questions; line 2 answers selected-128 comparison questions.",
213
+ },
214
+ {
215
+ "step": "Open the matching radar",
216
+ "reason": "Use the 1-episode radar for Minimal-vs-Neural behavior and the 128-episode radar for metadata/raw baselines, Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano.",
217
+ },
218
+ {
219
+ "step": "Inspect the matrix row",
220
+ "reason": "Every numeric score is tied to a method, task, metric key, source artifact, and proxy flag.",
221
+ },
222
+ {
223
+ "step": "Check proxy cells before interpreting totals",
224
+ "reason": "The six compact-proxy cells are numeric but are not direct raw-target measurements.",
225
+ },
226
+ ],
227
+ "reader_policy": {
228
+ "single_public_sample_episode": (
229
+ "Use for task construction, raw-file inspection, local reproducibility, "
230
+ "and controlled Minimal-vs-Neural baseline behavior."
231
+ ),
232
+ "selected_128_episode_surface": (
233
+ "Use for held-out comparison, metadata/raw-feature baselines, Qwen3-Omni v6 LoRA, "
234
+ "Cosmos3-Super Reasoner, Cosmos3-Nano Future Window, and scale-up decisions."
235
+ ),
236
+ "proxy_policy": (
237
+ "Proxy-scored cells stay numeric only when the source artifact and reason "
238
+ "are attached; they should not be read as direct raw-target measurements."
239
+ ),
240
+ },
241
+ }
242
+
243
+
244
+ def write_markdown(payload: dict) -> None:
245
+ summary = payload["summary"]
246
+ line_rows = []
247
+ entry_rows = []
248
+ method_rows = []
249
+ for line in payload["lines"]:
250
+ method_labels = ", ".join(method["label"] for method in line["methods"])
251
+ line_rows.append(
252
+ [
253
+ line["label"],
254
+ line.get("result_statement") or "",
255
+ line.get("best_read_as") or line["primary_use"],
256
+ line.get("read_separately_from") or "",
257
+ ]
258
+ )
259
+ entry_rows.append(
260
+ [
261
+ line["label"],
262
+ str(line["method_count"]),
263
+ str(line["task_count"]),
264
+ f"{line['scored_method_task_count']}/{line['method_task_record_count']}",
265
+ str(line["direct_scored_method_task_count"]),
266
+ str(line["proxy_scored_method_task_count"]),
267
+ "<br>".join(line.get("primary_visuals", [])),
268
+ "<br>".join(line["artifact_entry_points"]),
269
+ ]
270
+ )
271
+ for method in line["methods"]:
272
+ method_rows.append(
273
+ [
274
+ line["label"],
275
+ method["label"],
276
+ method.get("method_detail") or "",
277
+ f"{method['scored_task_count']}/{method['result_record_count']}",
278
+ str(method["direct_scored_task_count"]),
279
+ str(method["proxy_scored_task_count"]),
280
+ ]
281
+ )
282
+
283
+ proxy_rows = [
284
+ [
285
+ row["task_number"],
286
+ row["task_label"],
287
+ row["method"],
288
+ row.get("metric_key") or "",
289
+ row.get("reason") or "",
290
+ ]
291
+ for row in payload["proxy_records"]
292
+ ]
293
+ method_block_rows = [
294
+ [
295
+ block["line_label"],
296
+ block["block"],
297
+ ", ".join(block["methods"]),
298
+ f"{block['scored_method_task_count']}/{block['method_task_record_count']}",
299
+ str(block["direct_scored_method_task_count"]),
300
+ str(block["proxy_scored_method_task_count"]),
301
+ block["evidence_type"],
302
+ block["read_as"],
303
+ ]
304
+ for block in payload["method_blocks"]
305
+ ]
306
+ related_artifact_rows = [
307
+ [row.get("name", ""), row.get("role", ""), row.get("repo", "")]
308
+ for row in payload.get("related_model_artifacts", [])
309
+ ]
310
+
311
+ text = f"""# Two Evidence-Line Result Summary
312
+
313
+ Generated: `{payload['generated_at_utc']}`.
314
+
315
+ Source matrix: [`{payload['source_matrix']}`]({payload['source_matrix']})
316
+
317
+ Interpretation rule: {payload['interpretation_rule']}
318
+
319
+ ## Read This First
320
+
321
+ {payload.get('reader_summary') or ''}
322
+
323
+ Score formula: {payload.get('score_formula') or ''}
324
+
325
+ | Line | What the scores mean | Best use | Read separately from |
326
+ | --- | --- | --- | --- |
327
+ """ + "\n".join(
328
+ "| " + " | ".join(str(cell).replace("|", "\\|") for cell in row) + " |"
329
+ for row in line_rows
330
+ ) + f"""
331
+
332
+ ## Public Score Totals
333
+
334
+ - Lines: {summary['line_count']}
335
+ - Tasks per method: {summary['task_count']}
336
+ - Methods: {summary['method_count']}
337
+ - Scored records: {summary['scored_method_task_count']}/{summary['method_task_record_count']}
338
+ - Direct scores: {summary['direct_scored_method_task_count']}
339
+ - Compact-proxy scores: {summary['proxy_scored_method_task_count']} documented cells
340
+
341
+ ## Line Ledger And Entry Points
342
+
343
+ {markdown_table(['Line', 'Methods', 'Tasks', 'Scored records', 'Direct scores', 'Proxy scores', 'Primary visuals', 'Source artifacts'], entry_rows)}
344
+
345
+ ## Method Blocks By Evidence Line
346
+
347
+ {markdown_table(['Line', 'Method block', 'Methods', 'Scored records', 'Direct scores', 'Proxy scores', 'Evidence type', 'Read as'], method_block_rows)}
348
+
349
+ ## Method Detail By Line
350
+
351
+ {markdown_table(['Line', 'Method', 'Method detail', 'Scored records', 'Direct scores', 'Proxy scores'], method_rows)}
352
+
353
+ ## Related Model Artifacts
354
+
355
+ {markdown_table(['Artifact', 'Role', 'Link or path'], related_artifact_rows)}
356
+
357
+ ## Proxy-Scored Cells
358
+
359
+ {markdown_table(['Task', 'Task label', 'Method', 'Metric', 'Reason'], proxy_rows)}
360
+
361
+ ## Reading Order
362
+
363
+ {markdown_table(['Step', 'Reason'], [[row['step'], row['reason']] for row in payload['reading_order']])}
364
+
365
+ ## Reader Policy
366
+
367
+ - 1 sample episode: {payload['reader_policy']['single_public_sample_episode']}
368
+ - 128 selected episodes: {payload['reader_policy']['selected_128_episode_surface']}
369
+ - Proxy scores: {payload['reader_policy']['proxy_policy']}
370
+ """
371
+ OUTPUT_MD.write_text(text, encoding="utf-8")
372
+
373
+
374
+ def main() -> int:
375
+ payload = build_payload(read_json(MATRIX_JSON), read_json(LINES_JSON))
376
+ write_json(OUTPUT_JSON, payload)
377
+ write_markdown(payload)
378
+ print(f"Wrote {OUTPUT_JSON.relative_to(ROOT)} and {OUTPUT_MD.relative_to(ROOT)}")
379
+ return 0
380
+
381
+
382
+ if __name__ == "__main__":
383
+ raise SystemExit(main())