cy0307 commited on 1 day ago

Commit

3d29d20

verified ·

1 Parent(s): 0bc6a41

Refine reader-facing public wording (2/6)

Browse files

Files changed (20) hide show

data/evidence_contract.json +88 -88
data/mirror_parity.json +0 -0
data/omni_model_comparison.json +2 -2
data/project_brief.json +1 -1
data/public_reader_map.json +105 -40
data/public_surface_qa.json +7 -7
data/publication_audit.json +7 -7
docs/data/evidence_contract.json +88 -88
docs/data/mirror_parity.json +0 -0
docs/data/omni_model_comparison.json +2 -2
docs/data/project_brief.json +1 -1
docs/data/public_reader_map.json +105 -40
docs/data/public_surface_qa.json +7 -7
docs/data/publication_audit.json +7 -7
metrics/evidence_contract.json +88 -88
metrics/mirror_parity.json +0 -0
metrics/omni_model_comparison.json +2 -2
metrics/project_brief.json +1 -1
metrics/public_reader_map.json +105 -40
metrics/public_surface_qa.json +7 -7

data/evidence_contract.json CHANGED Viewed

@@ -1,170 +1,169 @@
 {
   "project": "Ropedia Xperience-10M Task Suite",
   "scope": "single public Xperience-10M sample episode",
-  "claims": [
     {
       "id": "project_status",
-      "claim": "A first-pass reader has a compact current-state summary.",
       "status": "verified",
       "evidence": [
         "PROJECT_STATUS.md",
         "docs/data/project_status.json"
       ],
-      "boundary": "summarizes existing evidence and current limitations"
     },
     {
       "id": "research_roadmap",
-      "claim": "The research roadmap is explicit.",
       "status": "current",
       "evidence": [
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
-      "boundary": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
     },
-        {
-            "id": "official_dataset_card_alignment",
-            "claim": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
       "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json",
         "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
       ],
-            "boundary": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
-        },
-        {
-            "id": "source_alignment",
-            "claim": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
-            "status": "verified",
-            "evidence": [
-                "SOURCE_ALIGNMENT_AUDIT.md",
-                "docs/data/source_alignment_audit.json",
-                "scripts/validate_source_alignment.py"
-            ],
-            "boundary": "offline committed-fact check; does not fetch private gated data"
-        },
-        {
-            "id": "aligned_windows",
-      "claim": "The public Xperience-10M sample has been converted into aligned model windows.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/windows.csv",
         "results/episode_task_suite/shared_windows.npz",
         "results/episode_task_suite/summary_report.json"
       ],
-      "boundary": "5,821 frames, 1,161 windows, one public sample episode"
     },
-        {
-            "id": "feature_contract",
-            "claim": "The current feature contract is explicit and inspectable.",
-            "status": "verified",
-            "evidence": [
-                "results/episode_task_suite/feature_manifest.json",
-                "results/episode_task_suite/available_modalities.json"
-            ],
-            "boundary": "8,546-dimensional aligned multimodal window representation"
-        },
-        {
-            "id": "evaluation_protocol",
-            "claim": "The task evaluation protocol is explicit and generated from committed metrics.",
-            "status": "verified",
-            "evidence": [
-                "EVALUATION_PROTOCOL.md",
-                "docs/data/evaluation_protocol.json",
-                "scripts/build_evaluation_protocol.py"
-            ],
-            "boundary": "defines windows, split, per-task metrics, leakage controls, and current limitations"
-        },
     {
       "id": "modality_atlas",
-      "claim": "The public sample modalities are inspectable without raw data redistribution.",
       "status": "verified",
       "evidence": [
         "docs/data/modality_atlas.json",
         "docs/assets/modalities/",
         "docs/index.html"
       ],
-      "boundary": "derived thumbnails for presentation; raw data remains excluded"
     },
     {
       "id": "task_surface_integrity",
-      "claim": "Public task cards stay readable for non-expert readers.",
       "status": "verified",
       "evidence": [
         "docs/data/task_surface_integrity.json",
         "scripts/validate_task_surface.py",
         "docs/index.html"
       ],
-      "boundary": "presentation integrity for the public task surface"
     },
     {
       "id": "figure_index",
-      "claim": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
       "status": "verified",
       "evidence": [
         "FIGURE_INDEX.md",
         "docs/data/figure_index.json",
         "scripts/build_figure_index.py"
       ],
-      "boundary": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
     },
     {
       "id": "brand_assets",
-      "claim": "A project logo is consistently applied across public surfaces.",
       "status": "verified",
       "evidence": [
         "docs/assets/brand/",
         "docs/data/brand_assets.json",
         "scripts/build_brand_assets.py"
       ],
-      "boundary": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
     },
     {
       "id": "twelve_tasks",
-      "claim": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
       "status": "verified",
       "evidence": [
         "scripts/episode_task_suite.py",
         "results/episode_task_suite/*/metrics.json",
         "results/episode_task_suite/*/predictions.*"
       ],
-      "boundary": "chronological single-episode split, not cross-episode generalization"
     },
     {
       "id": "minimal_vs_neural",
-      "claim": "Minimal and neural heads use the same task contracts.",
       "status": "verified",
       "evidence": [
         "scripts/neural_task_models.py",
         "results/episode_task_suite/neural_mlp/",
         "docs/assets/task_architectures.png"
       ],
-      "boundary": "small heads only; not a foundation model"
     },
     {
       "id": "research_directions",
-      "claim": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_directions/research_direction_taxonomy.json",
         "docs/data/research_directions.json"
       ],
-      "boundary": "some directions remain proxy-only"
     },
     {
       "id": "direction_extensions",
-      "claim": "Four extra direction probes are coded and evaluated.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
         "docs/data/research_direction_extensions.json"
       ],
-      "boundary": "single-episode probes, not full research-direction solutions"
     },
     {
       "id": "qwen3_omni_diagnostic_pilot",
-      "claim": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
       "status": "verified_diagnostic",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
@@ -172,94 +171,94 @@
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py"
       ],
-      "boundary": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
     },
     {
       "id": "multi_episode_quality_improvement",
-      "claim": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
       "status": "active_next_step",
       "evidence": [
         "scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
         "docs/data/omni_finetune_verified_result.json",
         "FOUNDATION_MODEL_PLAN.md"
       ],
-      "boundary": "stronger model quality requires output-format improvements and action/subtask error analysis"
     },
     {
       "id": "scale_up_status_check",
-      "claim": "Older pilot path strings are tracked as setup-file provenance.",
       "status": "verified",
       "evidence": [
         "scripts/validate_scope_claims.py",
         "docs/data/scope_claims_audit.json"
       ],
-      "boundary": "run/path identifiers stay separate from completed held-out-episode results"
     },
     {
       "id": "mirror_parity",
-      "claim": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
       "status": "verified",
       "evidence": [
         "scripts/validate_mirror_parity.py",
         "docs/data/mirror_parity.json"
       ],
-      "boundary": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
     },
     {
       "id": "publication_package",
-      "claim": "The public GitHub and Hugging Face bundles contain the intended release files.",
       "status": "verified",
       "evidence": [
         "scripts/validate_publication_package.py",
         "docs/data/publication_audit.json"
       ],
-      "boundary": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
     },
     {
       "id": "website_integrity",
-      "claim": "The public website has checked local references.",
       "status": "verified",
       "evidence": [
         "scripts/validate_website_integrity.py",
         "docs/data/website_integrity.json"
       ],
-      "boundary": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
     },
     {
       "id": "rendered_site_check",
-      "claim": "The rendered website walkthrough has a browser-level interaction check.",
       "status": "verified",
       "evidence": [
         "RENDERED_SITE_CHECK.md",
         "scripts/build_rendered_site_check.py",
         "docs/data/rendered_site_check.json"
       ],
-      "boundary": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
     },
     {
       "id": "quality_gates",
-      "claim": "The release gate is explicit.",
       "status": "verified",
       "evidence": [
         "QUALITY_GATES.md",
         "scripts/build_quality_gates.py",
         "docs/data/quality_gates.json"
       ],
-      "boundary": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
     },
     {
       "id": "live_publication_status",
-      "claim": "The live public mirrors are checked after upload.",
       "status": "verified",
       "evidence": [
         "scripts/verify_live_publication.py",
         "docs/data/live_publication_status.json"
       ],
-      "boundary": "fetches public GitHub/HF URLs; it does not validate private training state"
     },
     {
       "id": "citation_metadata",
-      "claim": "The project is externally citable and machine-readable.",
       "status": "verified",
       "evidence": [
         "CITATION.cff",
@@ -267,11 +266,11 @@
         "docs/data/project_manifest.json",
         "LICENSE"
       ],
-      "boundary": "code license does not override original Xperience-10M dataset terms"
     },
     {
       "id": "project_path",
-      "claim": "A first-time reader has an explicit project path.",
       "status": "verified",
       "evidence": [
         "docs/data/project_packet.json",
@@ -280,29 +279,30 @@
         "README.md",
         "docs/index.html"
       ],
-      "boundary": "guides inspection across data, tasks, results, and scale-up status"
     },
     {
       "id": "artifact_index",
-      "claim": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
       "status": "verified",
       "evidence": [
         "ARTIFACT_GUIDE.md",
         "scripts/build_artifact_index.py",
         "docs/data/artifact_index.json"
       ],
-      "boundary": "selective source-of-truth catalog, not a complete inventory of every output file"
     },
     {
       "id": "reproducibility_contract",
-      "claim": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
       "status": "verified",
       "evidence": [
         "REPRODUCIBILITY.md",
         "docs/data/reproducibility_matrix.json",
         "notes/reproducibility_audit.md"
       ],
-      "boundary": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
     }
   ]
 }

 {
   "project": "Ropedia Xperience-10M Task Suite",
   "scope": "single public Xperience-10M sample episode",
+  "readouts": [
     {
       "id": "project_status",
       "status": "verified",
       "evidence": [
         "PROJECT_STATUS.md",
         "docs/data/project_status.json"
       ],
+      "readout": "A first-pass reader has a compact current-state summary.",
+      "scope_note": "summarizes existing evidence and current limitations"
     },
     {
       "id": "research_roadmap",
       "status": "current",
       "evidence": [
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
+      "readout": "The research roadmap is explicit.",
+      "scope_note": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
     },
+    {
+      "id": "official_dataset_card_alignment",
       "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json",
         "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
       ],
+      "readout": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
+      "scope_note": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
+    },
+    {
+      "id": "source_alignment",
+      "status": "verified",
+      "evidence": [
+        "SOURCE_ALIGNMENT_AUDIT.md",
+        "docs/data/source_alignment_audit.json",
+        "scripts/validate_source_alignment.py"
+      ],
+      "readout": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
+      "scope_note": "offline committed-fact check; does not fetch private gated data"
+    },
+    {
+      "id": "aligned_windows",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/windows.csv",
         "results/episode_task_suite/shared_windows.npz",
         "results/episode_task_suite/summary_report.json"
       ],
+      "readout": "The public Xperience-10M sample has been converted into aligned model windows.",
+      "scope_note": "5,821 frames, 1,161 windows, one public sample episode"
+    },
+    {
+      "id": "feature_contract",
+      "status": "verified",
+      "evidence": [
+        "results/episode_task_suite/feature_manifest.json",
+        "results/episode_task_suite/available_modalities.json"
+      ],
+      "readout": "The current feature contract is explicit and inspectable.",
+      "scope_note": "8,546-dimensional aligned multimodal window representation"
+    },
+    {
+      "id": "evaluation_protocol",
+      "status": "verified",
+      "evidence": [
+        "EVALUATION_PROTOCOL.md",
+        "docs/data/evaluation_protocol.json",
+        "scripts/build_evaluation_protocol.py"
+      ],
+      "readout": "The task evaluation protocol is explicit and generated from committed metrics.",
+      "scope_note": "defines windows, split, per-task metrics, leakage controls, and current limitations"
     },
     {
       "id": "modality_atlas",
       "status": "verified",
       "evidence": [
         "docs/data/modality_atlas.json",
         "docs/assets/modalities/",
         "docs/index.html"
       ],
+      "readout": "The public sample modalities are inspectable without raw data redistribution.",
+      "scope_note": "derived thumbnails for presentation; raw data remains excluded"
     },
     {
       "id": "task_surface_integrity",
       "status": "verified",
       "evidence": [
         "docs/data/task_surface_integrity.json",
         "scripts/validate_task_surface.py",
         "docs/index.html"
       ],
+      "readout": "Public task cards stay readable for non-expert readers.",
+      "scope_note": "presentation integrity for the public task surface"
     },
     {
       "id": "figure_index",
       "status": "verified",
       "evidence": [
         "FIGURE_INDEX.md",
         "docs/data/figure_index.json",
         "scripts/build_figure_index.py"
       ],
+      "readout": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
+      "scope_note": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
     },
     {
       "id": "brand_assets",
       "status": "verified",
       "evidence": [
         "docs/assets/brand/",
         "docs/data/brand_assets.json",
         "scripts/build_brand_assets.py"
       ],
+      "readout": "A project logo is consistently applied across public surfaces.",
+      "scope_note": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
     },
     {
       "id": "twelve_tasks",
       "status": "verified",
       "evidence": [
         "scripts/episode_task_suite.py",
         "results/episode_task_suite/*/metrics.json",
         "results/episode_task_suite/*/predictions.*"
       ],
+      "readout": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
+      "scope_note": "chronological single-episode split, not cross-episode generalization"
     },
     {
       "id": "minimal_vs_neural",
       "status": "verified",
       "evidence": [
         "scripts/neural_task_models.py",
         "results/episode_task_suite/neural_mlp/",
         "docs/assets/task_architectures.png"
       ],
+      "readout": "Minimal and neural heads use the same task contracts.",
+      "scope_note": "small heads only; not a foundation model"
     },
     {
       "id": "research_directions",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_directions/research_direction_taxonomy.json",
         "docs/data/research_directions.json"
       ],
+      "readout": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
+      "scope_note": "some directions remain proxy-only"
     },
     {
       "id": "direction_extensions",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
         "docs/data/research_direction_extensions.json"
       ],
+      "readout": "Four extra direction probes are coded and evaluated.",
+      "scope_note": "single-episode probes, not full research-direction solutions"
     },
     {
       "id": "qwen3_omni_diagnostic_pilot",
       "status": "verified_diagnostic",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py"
       ],
+      "readout": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
+      "scope_note": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
     },
     {
       "id": "multi_episode_quality_improvement",
       "status": "active_next_step",
       "evidence": [
         "scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
         "docs/data/omni_finetune_verified_result.json",
         "FOUNDATION_MODEL_PLAN.md"
       ],
+      "readout": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
+      "scope_note": "stronger model quality requires output-format improvements and action/subtask error analysis"
     },
     {
       "id": "scale_up_status_check",
       "status": "verified",
       "evidence": [
         "scripts/validate_scope_claims.py",
         "docs/data/scope_claims_audit.json"
       ],
+      "readout": "Older pilot path strings are tracked as setup-file provenance.",
+      "scope_note": "run/path identifiers stay separate from completed held-out-episode results"
     },
     {
       "id": "mirror_parity",
       "status": "verified",
       "evidence": [
         "scripts/validate_mirror_parity.py",
         "docs/data/mirror_parity.json"
       ],
+      "readout": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
+      "scope_note": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
     },
     {
       "id": "publication_package",
       "status": "verified",
       "evidence": [
         "scripts/validate_publication_package.py",
         "docs/data/publication_audit.json"
       ],
+      "readout": "The public GitHub and Hugging Face bundles contain the intended release files.",
+      "scope_note": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
     },
     {
       "id": "website_integrity",
       "status": "verified",
       "evidence": [
         "scripts/validate_website_integrity.py",
         "docs/data/website_integrity.json"
       ],
+      "readout": "The public website has checked local references.",
+      "scope_note": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
     },
     {
       "id": "rendered_site_check",
       "status": "verified",
       "evidence": [
         "RENDERED_SITE_CHECK.md",
         "scripts/build_rendered_site_check.py",
         "docs/data/rendered_site_check.json"
       ],
+      "readout": "The rendered website walkthrough has a browser-level interaction check.",
+      "scope_note": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
     },
     {
       "id": "quality_gates",
       "status": "verified",
       "evidence": [
         "QUALITY_GATES.md",
         "scripts/build_quality_gates.py",
         "docs/data/quality_gates.json"
       ],
+      "readout": "The release gate is explicit.",
+      "scope_note": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
     },
     {
       "id": "live_publication_status",
       "status": "verified",
       "evidence": [
         "scripts/verify_live_publication.py",
         "docs/data/live_publication_status.json"
       ],
+      "readout": "The live public mirrors are checked after upload.",
+      "scope_note": "fetches public GitHub/HF URLs; it does not validate private training state"
     },
     {
       "id": "citation_metadata",
       "status": "verified",
       "evidence": [
         "CITATION.cff",
         "docs/data/project_manifest.json",
         "LICENSE"
       ],
+      "readout": "The project is externally citable and machine-readable.",
+      "scope_note": "code license does not override original Xperience-10M dataset terms"
     },
     {
       "id": "project_path",
       "status": "verified",
       "evidence": [
         "docs/data/project_packet.json",
         "README.md",
         "docs/index.html"
       ],
+      "readout": "A first-time reader has an explicit project path.",
+      "scope_note": "guides inspection across data, tasks, results, and scale-up status"
     },
     {
       "id": "artifact_index",
       "status": "verified",
       "evidence": [
         "ARTIFACT_GUIDE.md",
         "scripts/build_artifact_index.py",
         "docs/data/artifact_index.json"
       ],
+      "readout": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
+      "scope_note": "selective source-of-truth catalog, not a complete inventory of every output file"
     },
     {
       "id": "reproducibility_contract",
       "status": "verified",
       "evidence": [
         "REPRODUCIBILITY.md",
         "docs/data/reproducibility_matrix.json",
         "notes/reproducibility_audit.md"
       ],
+      "readout": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
+      "scope_note": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
     }
   ]
 }

data/mirror_parity.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

data/omni_model_comparison.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
-  "generated_at_utc": "2026-06-21T15:17:00+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
@@ -1758,6 +1758,6 @@
   ],
   "pending": [
     "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
-    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
+  "generated_at_utc": "2026-06-22T10:59:59+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
   ],
   "pending": [
     "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
+    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

data/project_brief.json CHANGED Viewed

@@ -56,7 +56,7 @@
     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
   "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
-  "next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
   "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
+  "next_stage": "Improve action/subtask quality through error analysis before presenting larger robustness or alternative-backbone results.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

data/public_reader_map.json CHANGED Viewed

@@ -6,77 +6,124 @@
     {
       "reader_goal": "Understand the project in one pass",
       "start_here": "PROJECT_BRIEF.md",
-      "then_inspect": ["PROJECT_STATUS.md", "RESEARCH_TAKEAWAYS.md"]
     },
     {
       "reader_goal": "Understand the two evidence lines",
       "start_here": "TWO_EVIDENCE_LINES.md",
-      "then_inspect": ["docs/data/two_evidence_lines.json", "docs/data/two_evidence_line_result_summary.json"]
     },
     {
       "reader_goal": "See the visual public dashboard",
       "start_here": "GitHub Pages dashboard or Hugging Face Space",
-      "then_inspect": ["docs/index.html", "docs/data/project_packet.json"]
     },
     {
       "reader_goal": "Decode project terminology",
       "start_here": "GLOSSARY.md",
-      "then_inspect": ["docs/data/glossary.json", "Homepage Glossary section"]
     },
     {
       "reader_goal": "Understand the data unit",
       "start_here": "results/episode_task_suite/windows.csv",
-      "then_inspect": ["results/episode_task_suite/feature_manifest.json", "docs/data/raw_sample_files.json"]
     },
     {
       "reader_goal": "Trace the 128-episode split",
       "start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
-      "then_inspect": ["docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/xperience10m_128_episode_selection.csv"]
     },
     {
       "reader_goal": "Inspect the 20-task benchmark",
       "start_here": "TASK_SUITE_20.md",
-      "then_inspect": ["docs/data/task_suite_20.json", "EVALUATION_PROTOCOL.md"]
     },
     {
       "reader_goal": "Compare current results",
       "start_here": "RESEARCH_TAKEAWAYS.md",
-      "then_inspect": ["docs/data/task_method_20_result_matrix.json", "docs/data/unified_task_model_radar.json"]
     },
     {
       "reader_goal": "Compare 1-episode and 128-episode methods",
       "start_here": "Homepage radar section",
-      "then_inspect": ["docs/data/single_episode_task_model_radar.json", "docs/data/episode128_task_model_radar.json"]
     },
     {
       "reader_goal": "Read Qwen3-Omni v1-v6 correctly",
       "start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
-      "then_inspect": ["docs/data/qwen3_omni_run_lineage.json", "docs/data/qwen3_v5_v6_comparison.json"]
     },
     {
       "reader_goal": "Find all derived artifacts",
       "start_here": "ARTIFACT_GUIDE.md",
-      "then_inspect": ["Hugging Face artifact dataset", "docs/data/artifact_index.json"]
     },
     {
       "reader_goal": "Download model weights with their matching results",
       "start_here": "Hugging Face weights/results repo",
-      "then_inspect": ["manifest.json", "analysis/docs/data/task_method_20_result_matrix.json", "results/"]
     },
     {
       "reader_goal": "Reproduce or extend the work",
       "start_here": "REPRODUCIBILITY.md",
-      "then_inspect": ["QUALITY_GATES.md", "scripts/", "results/"]
     },
     {
       "reader_goal": "Understand foundation-model directions",
       "start_here": "THREE_FOUNDATION_PIPELINES.md",
-      "then_inspect": ["FOUNDATION_MODEL_PLAN.md", "docs/data/three_foundation_pipelines.json"]
     },
     {
       "reader_goal": "Check public-release health",
       "start_here": "PUBLIC_SURFACE_QA.md",
-      "then_inspect": ["docs/data/live_publication_status.json", "docs/data/mirror_parity.json"]
     }
   ],
   "public_surfaces": [
@@ -125,31 +172,49 @@
     "Foundation directions",
     "Public-release checks"
   ],
-  "claim_boundaries": [
-    {
-      "claim_type": "Single public-sample task behavior",
-      "public_evidence": ["results/episode_task_suite/", "docs/data/task_suite_20.json"],
-      "boundary": "Describes one public sample episode, not the full dataset distribution."
-    },
-    {
-      "claim_type": "128-episode method comparison",
-      "public_evidence": ["XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md", "docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/*128*", "docs/data/omni_model_comparison.json"],
-      "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
-    },
-    {
-      "claim_type": "Qwen3-Omni v1-v6 lineage",
-      "public_evidence": ["QWEN3_OMNI_RUN_LINEAGE.md", "docs/data/qwen3_omni_run_lineage.json"],
-      "boundary": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
-    },
-    {
-      "claim_type": "Foundation-model track quality",
-      "public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
-      "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
-    },
-    {
-      "claim_type": "Reproducibility",
-      "public_evidence": ["REPRODUCIBILITY.md", "QUALITY_GATES.md", "release validators"],
-      "boundary": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
     }
   ]
 }

     {
       "reader_goal": "Understand the project in one pass",
       "start_here": "PROJECT_BRIEF.md",
+      "then_inspect": [
+        "PROJECT_STATUS.md",
+        "RESEARCH_TAKEAWAYS.md"
+      ]
     },
     {
       "reader_goal": "Understand the two evidence lines",
       "start_here": "TWO_EVIDENCE_LINES.md",
+      "then_inspect": [
+        "docs/data/two_evidence_lines.json",
+        "docs/data/two_evidence_line_result_summary.json"
+      ]
     },
     {
       "reader_goal": "See the visual public dashboard",
       "start_here": "GitHub Pages dashboard or Hugging Face Space",
+      "then_inspect": [
+        "docs/index.html",
+        "docs/data/project_packet.json"
+      ]
     },
     {
       "reader_goal": "Decode project terminology",
       "start_here": "GLOSSARY.md",
+      "then_inspect": [
+        "docs/data/glossary.json",
+        "Homepage Glossary section"
+      ]
     },
     {
       "reader_goal": "Understand the data unit",
       "start_here": "results/episode_task_suite/windows.csv",
+      "then_inspect": [
+        "results/episode_task_suite/feature_manifest.json",
+        "docs/data/raw_sample_files.json"
+      ]
     },
     {
       "reader_goal": "Trace the 128-episode split",
       "start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
+      "then_inspect": [
+        "docs/data/xperience10m_128_episode_feature_index.json",
+        "results/omni_finetune/xperience10m_128_episode_selection.csv"
+      ]
     },
     {
       "reader_goal": "Inspect the 20-task benchmark",
       "start_here": "TASK_SUITE_20.md",
+      "then_inspect": [
+        "docs/data/task_suite_20.json",
+        "EVALUATION_PROTOCOL.md"
+      ]
     },
     {
       "reader_goal": "Compare current results",
       "start_here": "RESEARCH_TAKEAWAYS.md",
+      "then_inspect": [
+        "docs/data/task_method_20_result_matrix.json",
+        "docs/data/unified_task_model_radar.json"
+      ]
     },
     {
       "reader_goal": "Compare 1-episode and 128-episode methods",
       "start_here": "Homepage radar section",
+      "then_inspect": [
+        "docs/data/single_episode_task_model_radar.json",
+        "docs/data/episode128_task_model_radar.json"
+      ]
     },
     {
       "reader_goal": "Read Qwen3-Omni v1-v6 correctly",
       "start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
+      "then_inspect": [
+        "docs/data/qwen3_omni_run_lineage.json",
+        "docs/data/qwen3_v5_v6_comparison.json"
+      ]
     },
     {
       "reader_goal": "Find all derived artifacts",
       "start_here": "ARTIFACT_GUIDE.md",
+      "then_inspect": [
+        "Hugging Face artifact dataset",
+        "docs/data/artifact_index.json"
+      ]
     },
     {
       "reader_goal": "Download model weights with their matching results",
       "start_here": "Hugging Face weights/results repo",
+      "then_inspect": [
+        "manifest.json",
+        "analysis/docs/data/task_method_20_result_matrix.json",
+        "results/"
+      ]
     },
     {
       "reader_goal": "Reproduce or extend the work",
       "start_here": "REPRODUCIBILITY.md",
+      "then_inspect": [
+        "QUALITY_GATES.md",
+        "scripts/",
+        "results/"
+      ]
     },
     {
       "reader_goal": "Understand foundation-model directions",
       "start_here": "THREE_FOUNDATION_PIPELINES.md",
+      "then_inspect": [
+        "FOUNDATION_MODEL_PLAN.md",
+        "docs/data/three_foundation_pipelines.json"
+      ]
     },
     {
       "reader_goal": "Check public-release health",
       "start_here": "PUBLIC_SURFACE_QA.md",
+      "then_inspect": [
+        "docs/data/live_publication_status.json",
+        "docs/data/mirror_parity.json"
+      ]
     }
   ],
   "public_surfaces": [
     "Foundation directions",
     "Public-release checks"
   ],
+  "reading_scopes": [
+    {
+      "public_evidence": [
+        "results/episode_task_suite/",
+        "docs/data/task_suite_20.json"
+      ],
+      "topic": "Single public-sample task behavior",
+      "scope_note": "Describes one public sample episode, not the full dataset distribution."
+    },
+    {
+      "public_evidence": [
+        "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
+        "docs/data/xperience10m_128_episode_feature_index.json",
+        "results/omni_finetune/*128*",
+        "docs/data/omni_model_comparison.json"
+      ],
+      "topic": "128-episode method comparison",
+      "scope_note": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
+    },
+    {
+      "public_evidence": [
+        "QWEN3_OMNI_RUN_LINEAGE.md",
+        "docs/data/qwen3_omni_run_lineage.json"
+      ],
+      "topic": "Qwen3-Omni v1-v6 lineage",
+      "scope_note": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
+    },
+    {
+      "public_evidence": [
+        "Verified Qwen3-Omni and Cosmos3 result packages",
+        "model cards"
+      ],
+      "topic": "Foundation-model track quality",
+      "scope_note": "Numeric task scores appear only when a task-specific eval or probe exists."
+    },
+    {
+      "public_evidence": [
+        "REPRODUCIBILITY.md",
+        "QUALITY_GATES.md",
+        "release validators"
+      ],
+      "topic": "Reproducibility",
+      "scope_note": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
     }
   ]
 }

data/public_surface_qa.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Public Project Surface",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:26+00:00",
   "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
   "checks": [
     {
@@ -18,7 +18,7 @@
         "website_integrity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:09:34+00:00"
         },
         "rendered_site_check": {
           "exists": true,
@@ -28,27 +28,27 @@
         "task_surface_integrity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:10:38+00:00"
         },
         "source_alignment": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:10:38+00:00"
         },
         "scale_up_status": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-21T20:58:21+00:00"
         },
         "publication_package": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:19:22+00:00"
         },
         "mirror_parity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:19:19+00:00"
         }
       },
       "failures": {}

 {
   "title": "Ropedia Xperience-10M Public Project Surface",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:45+00:00",
   "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
   "checks": [
     {
         "website_integrity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:07+00:00"
         },
         "rendered_site_check": {
           "exists": true,
         "task_surface_integrity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:07+00:00"
         },
         "source_alignment": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:08+00:00"
         },
         "scale_up_status": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:10+00:00"
         },
         "publication_package": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:18:16+00:00"
         },
         "mirror_parity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:18:11+00:00"
         }
       },
       "failures": {}

data/publication_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:19:22+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
@@ -246,8 +246,8 @@
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
-      "file_count": 631,
-      "text_file_count": 470,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
         "bytes": 10221085
@@ -257,8 +257,8 @@
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
-      "file_count": 4702,
-      "text_file_count": 1328,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061
@@ -268,8 +268,8 @@
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
-      "file_count": 5464,
-      "text_file_count": 1502,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:16+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
+      "file_count": 640,
+      "text_file_count": 479,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
         "bytes": 10221085
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
+      "file_count": 4708,
+      "text_file_count": 1334,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
+      "file_count": 5470,
+      "text_file_count": 1508,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061

docs/data/evidence_contract.json CHANGED Viewed

@@ -1,170 +1,169 @@
 {
   "project": "Ropedia Xperience-10M Task Suite",
   "scope": "single public Xperience-10M sample episode",
-  "claims": [
     {
       "id": "project_status",
-      "claim": "A first-pass reader has a compact current-state summary.",
       "status": "verified",
       "evidence": [
         "PROJECT_STATUS.md",
         "docs/data/project_status.json"
       ],
-      "boundary": "summarizes existing evidence and current limitations"
     },
     {
       "id": "research_roadmap",
-      "claim": "The research roadmap is explicit.",
       "status": "current",
       "evidence": [
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
-      "boundary": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
     },
-        {
-            "id": "official_dataset_card_alignment",
-            "claim": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
       "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json",
         "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
       ],
-            "boundary": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
-        },
-        {
-            "id": "source_alignment",
-            "claim": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
-            "status": "verified",
-            "evidence": [
-                "SOURCE_ALIGNMENT_AUDIT.md",
-                "docs/data/source_alignment_audit.json",
-                "scripts/validate_source_alignment.py"
-            ],
-            "boundary": "offline committed-fact check; does not fetch private gated data"
-        },
-        {
-            "id": "aligned_windows",
-      "claim": "The public Xperience-10M sample has been converted into aligned model windows.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/windows.csv",
         "results/episode_task_suite/shared_windows.npz",
         "results/episode_task_suite/summary_report.json"
       ],
-      "boundary": "5,821 frames, 1,161 windows, one public sample episode"
     },
-        {
-            "id": "feature_contract",
-            "claim": "The current feature contract is explicit and inspectable.",
-            "status": "verified",
-            "evidence": [
-                "results/episode_task_suite/feature_manifest.json",
-                "results/episode_task_suite/available_modalities.json"
-            ],
-            "boundary": "8,546-dimensional aligned multimodal window representation"
-        },
-        {
-            "id": "evaluation_protocol",
-            "claim": "The task evaluation protocol is explicit and generated from committed metrics.",
-            "status": "verified",
-            "evidence": [
-                "EVALUATION_PROTOCOL.md",
-                "docs/data/evaluation_protocol.json",
-                "scripts/build_evaluation_protocol.py"
-            ],
-            "boundary": "defines windows, split, per-task metrics, leakage controls, and current limitations"
-        },
     {
       "id": "modality_atlas",
-      "claim": "The public sample modalities are inspectable without raw data redistribution.",
       "status": "verified",
       "evidence": [
         "docs/data/modality_atlas.json",
         "docs/assets/modalities/",
         "docs/index.html"
       ],
-      "boundary": "derived thumbnails for presentation; raw data remains excluded"
     },
     {
       "id": "task_surface_integrity",
-      "claim": "Public task cards stay readable for non-expert readers.",
       "status": "verified",
       "evidence": [
         "docs/data/task_surface_integrity.json",
         "scripts/validate_task_surface.py",
         "docs/index.html"
       ],
-      "boundary": "presentation integrity for the public task surface"
     },
     {
       "id": "figure_index",
-      "claim": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
       "status": "verified",
       "evidence": [
         "FIGURE_INDEX.md",
         "docs/data/figure_index.json",
         "scripts/build_figure_index.py"
       ],
-      "boundary": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
     },
     {
       "id": "brand_assets",
-      "claim": "A project logo is consistently applied across public surfaces.",
       "status": "verified",
       "evidence": [
         "docs/assets/brand/",
         "docs/data/brand_assets.json",
         "scripts/build_brand_assets.py"
       ],
-      "boundary": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
     },
     {
       "id": "twelve_tasks",
-      "claim": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
       "status": "verified",
       "evidence": [
         "scripts/episode_task_suite.py",
         "results/episode_task_suite/*/metrics.json",
         "results/episode_task_suite/*/predictions.*"
       ],
-      "boundary": "chronological single-episode split, not cross-episode generalization"
     },
     {
       "id": "minimal_vs_neural",
-      "claim": "Minimal and neural heads use the same task contracts.",
       "status": "verified",
       "evidence": [
         "scripts/neural_task_models.py",
         "results/episode_task_suite/neural_mlp/",
         "docs/assets/task_architectures.png"
       ],
-      "boundary": "small heads only; not a foundation model"
     },
     {
       "id": "research_directions",
-      "claim": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_directions/research_direction_taxonomy.json",
         "docs/data/research_directions.json"
       ],
-      "boundary": "some directions remain proxy-only"
     },
     {
       "id": "direction_extensions",
-      "claim": "Four extra direction probes are coded and evaluated.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
         "docs/data/research_direction_extensions.json"
       ],
-      "boundary": "single-episode probes, not full research-direction solutions"
     },
     {
       "id": "qwen3_omni_diagnostic_pilot",
-      "claim": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
       "status": "verified_diagnostic",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
@@ -172,94 +171,94 @@
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py"
       ],
-      "boundary": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
     },
     {
       "id": "multi_episode_quality_improvement",
-      "claim": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
       "status": "active_next_step",
       "evidence": [
         "scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
         "docs/data/omni_finetune_verified_result.json",
         "FOUNDATION_MODEL_PLAN.md"
       ],
-      "boundary": "stronger model quality requires output-format improvements and action/subtask error analysis"
     },
     {
       "id": "scale_up_status_check",
-      "claim": "Older pilot path strings are tracked as setup-file provenance.",
       "status": "verified",
       "evidence": [
         "scripts/validate_scope_claims.py",
         "docs/data/scope_claims_audit.json"
       ],
-      "boundary": "run/path identifiers stay separate from completed held-out-episode results"
     },
     {
       "id": "mirror_parity",
-      "claim": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
       "status": "verified",
       "evidence": [
         "scripts/validate_mirror_parity.py",
         "docs/data/mirror_parity.json"
       ],
-      "boundary": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
     },
     {
       "id": "publication_package",
-      "claim": "The public GitHub and Hugging Face bundles contain the intended release files.",
       "status": "verified",
       "evidence": [
         "scripts/validate_publication_package.py",
         "docs/data/publication_audit.json"
       ],
-      "boundary": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
     },
     {
       "id": "website_integrity",
-      "claim": "The public website has checked local references.",
       "status": "verified",
       "evidence": [
         "scripts/validate_website_integrity.py",
         "docs/data/website_integrity.json"
       ],
-      "boundary": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
     },
     {
       "id": "rendered_site_check",
-      "claim": "The rendered website walkthrough has a browser-level interaction check.",
       "status": "verified",
       "evidence": [
         "RENDERED_SITE_CHECK.md",
         "scripts/build_rendered_site_check.py",
         "docs/data/rendered_site_check.json"
       ],
-      "boundary": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
     },
     {
       "id": "quality_gates",
-      "claim": "The release gate is explicit.",
       "status": "verified",
       "evidence": [
         "QUALITY_GATES.md",
         "scripts/build_quality_gates.py",
         "docs/data/quality_gates.json"
       ],
-      "boundary": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
     },
     {
       "id": "live_publication_status",
-      "claim": "The live public mirrors are checked after upload.",
       "status": "verified",
       "evidence": [
         "scripts/verify_live_publication.py",
         "docs/data/live_publication_status.json"
       ],
-      "boundary": "fetches public GitHub/HF URLs; it does not validate private training state"
     },
     {
       "id": "citation_metadata",
-      "claim": "The project is externally citable and machine-readable.",
       "status": "verified",
       "evidence": [
         "CITATION.cff",
@@ -267,11 +266,11 @@
         "docs/data/project_manifest.json",
         "LICENSE"
       ],
-      "boundary": "code license does not override original Xperience-10M dataset terms"
     },
     {
       "id": "project_path",
-      "claim": "A first-time reader has an explicit project path.",
       "status": "verified",
       "evidence": [
         "docs/data/project_packet.json",
@@ -280,29 +279,30 @@
         "README.md",
         "docs/index.html"
       ],
-      "boundary": "guides inspection across data, tasks, results, and scale-up status"
     },
     {
       "id": "artifact_index",
-      "claim": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
       "status": "verified",
       "evidence": [
         "ARTIFACT_GUIDE.md",
         "scripts/build_artifact_index.py",
         "docs/data/artifact_index.json"
       ],
-      "boundary": "selective source-of-truth catalog, not a complete inventory of every output file"
     },
     {
       "id": "reproducibility_contract",
-      "claim": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
       "status": "verified",
       "evidence": [
         "REPRODUCIBILITY.md",
         "docs/data/reproducibility_matrix.json",
         "notes/reproducibility_audit.md"
       ],
-      "boundary": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
     }
   ]
 }

 {
   "project": "Ropedia Xperience-10M Task Suite",
   "scope": "single public Xperience-10M sample episode",
+  "readouts": [
     {
       "id": "project_status",
       "status": "verified",
       "evidence": [
         "PROJECT_STATUS.md",
         "docs/data/project_status.json"
       ],
+      "readout": "A first-pass reader has a compact current-state summary.",
+      "scope_note": "summarizes existing evidence and current limitations"
     },
     {
       "id": "research_roadmap",
       "status": "current",
       "evidence": [
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
+      "readout": "The research roadmap is explicit.",
+      "scope_note": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
     },
+    {
+      "id": "official_dataset_card_alignment",
       "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json",
         "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
       ],
+      "readout": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
+      "scope_note": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
+    },
+    {
+      "id": "source_alignment",
+      "status": "verified",
+      "evidence": [
+        "SOURCE_ALIGNMENT_AUDIT.md",
+        "docs/data/source_alignment_audit.json",
+        "scripts/validate_source_alignment.py"
+      ],
+      "readout": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
+      "scope_note": "offline committed-fact check; does not fetch private gated data"
+    },
+    {
+      "id": "aligned_windows",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/windows.csv",
         "results/episode_task_suite/shared_windows.npz",
         "results/episode_task_suite/summary_report.json"
       ],
+      "readout": "The public Xperience-10M sample has been converted into aligned model windows.",
+      "scope_note": "5,821 frames, 1,161 windows, one public sample episode"
+    },
+    {
+      "id": "feature_contract",
+      "status": "verified",
+      "evidence": [
+        "results/episode_task_suite/feature_manifest.json",
+        "results/episode_task_suite/available_modalities.json"
+      ],
+      "readout": "The current feature contract is explicit and inspectable.",
+      "scope_note": "8,546-dimensional aligned multimodal window representation"
+    },
+    {
+      "id": "evaluation_protocol",
+      "status": "verified",
+      "evidence": [
+        "EVALUATION_PROTOCOL.md",
+        "docs/data/evaluation_protocol.json",
+        "scripts/build_evaluation_protocol.py"
+      ],
+      "readout": "The task evaluation protocol is explicit and generated from committed metrics.",
+      "scope_note": "defines windows, split, per-task metrics, leakage controls, and current limitations"
     },
     {
       "id": "modality_atlas",
       "status": "verified",
       "evidence": [
         "docs/data/modality_atlas.json",
         "docs/assets/modalities/",
         "docs/index.html"
       ],
+      "readout": "The public sample modalities are inspectable without raw data redistribution.",
+      "scope_note": "derived thumbnails for presentation; raw data remains excluded"
     },
     {
       "id": "task_surface_integrity",
       "status": "verified",
       "evidence": [
         "docs/data/task_surface_integrity.json",
         "scripts/validate_task_surface.py",
         "docs/index.html"
       ],
+      "readout": "Public task cards stay readable for non-expert readers.",
+      "scope_note": "presentation integrity for the public task surface"
     },
     {
       "id": "figure_index",
       "status": "verified",
       "evidence": [
         "FIGURE_INDEX.md",
         "docs/data/figure_index.json",
         "scripts/build_figure_index.py"
       ],
+      "readout": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
+      "scope_note": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
     },
     {
       "id": "brand_assets",
       "status": "verified",
       "evidence": [
         "docs/assets/brand/",
         "docs/data/brand_assets.json",
         "scripts/build_brand_assets.py"
       ],
+      "readout": "A project logo is consistently applied across public surfaces.",
+      "scope_note": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
     },
     {
       "id": "twelve_tasks",
       "status": "verified",
       "evidence": [
         "scripts/episode_task_suite.py",
         "results/episode_task_suite/*/metrics.json",
         "results/episode_task_suite/*/predictions.*"
       ],
+      "readout": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
+      "scope_note": "chronological single-episode split, not cross-episode generalization"
     },
     {
       "id": "minimal_vs_neural",
       "status": "verified",
       "evidence": [
         "scripts/neural_task_models.py",
         "results/episode_task_suite/neural_mlp/",
         "docs/assets/task_architectures.png"
       ],
+      "readout": "Minimal and neural heads use the same task contracts.",
+      "scope_note": "small heads only; not a foundation model"
     },
     {
       "id": "research_directions",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_directions/research_direction_taxonomy.json",
         "docs/data/research_directions.json"
       ],
+      "readout": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
+      "scope_note": "some directions remain proxy-only"
     },
     {
       "id": "direction_extensions",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
         "docs/data/research_direction_extensions.json"
       ],
+      "readout": "Four extra direction probes are coded and evaluated.",
+      "scope_note": "single-episode probes, not full research-direction solutions"
     },
     {
       "id": "qwen3_omni_diagnostic_pilot",
       "status": "verified_diagnostic",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py"
       ],
+      "readout": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
+      "scope_note": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
     },
     {
       "id": "multi_episode_quality_improvement",
       "status": "active_next_step",
       "evidence": [
         "scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
         "docs/data/omni_finetune_verified_result.json",
         "FOUNDATION_MODEL_PLAN.md"
       ],
+      "readout": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
+      "scope_note": "stronger model quality requires output-format improvements and action/subtask error analysis"
     },
     {
       "id": "scale_up_status_check",
       "status": "verified",
       "evidence": [
         "scripts/validate_scope_claims.py",
         "docs/data/scope_claims_audit.json"
       ],
+      "readout": "Older pilot path strings are tracked as setup-file provenance.",
+      "scope_note": "run/path identifiers stay separate from completed held-out-episode results"
     },
     {
       "id": "mirror_parity",
       "status": "verified",
       "evidence": [
         "scripts/validate_mirror_parity.py",
         "docs/data/mirror_parity.json"
       ],
+      "readout": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
+      "scope_note": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
     },
     {
       "id": "publication_package",
       "status": "verified",
       "evidence": [
         "scripts/validate_publication_package.py",
         "docs/data/publication_audit.json"
       ],
+      "readout": "The public GitHub and Hugging Face bundles contain the intended release files.",
+      "scope_note": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
     },
     {
       "id": "website_integrity",
       "status": "verified",
       "evidence": [
         "scripts/validate_website_integrity.py",
         "docs/data/website_integrity.json"
       ],
+      "readout": "The public website has checked local references.",
+      "scope_note": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
     },
     {
       "id": "rendered_site_check",
       "status": "verified",
       "evidence": [
         "RENDERED_SITE_CHECK.md",
         "scripts/build_rendered_site_check.py",
         "docs/data/rendered_site_check.json"
       ],
+      "readout": "The rendered website walkthrough has a browser-level interaction check.",
+      "scope_note": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
     },
     {
       "id": "quality_gates",
       "status": "verified",
       "evidence": [
         "QUALITY_GATES.md",
         "scripts/build_quality_gates.py",
         "docs/data/quality_gates.json"
       ],
+      "readout": "The release gate is explicit.",
+      "scope_note": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
     },
     {
       "id": "live_publication_status",
       "status": "verified",
       "evidence": [
         "scripts/verify_live_publication.py",
         "docs/data/live_publication_status.json"
       ],
+      "readout": "The live public mirrors are checked after upload.",
+      "scope_note": "fetches public GitHub/HF URLs; it does not validate private training state"
     },
     {
       "id": "citation_metadata",
       "status": "verified",
       "evidence": [
         "CITATION.cff",
         "docs/data/project_manifest.json",
         "LICENSE"
       ],
+      "readout": "The project is externally citable and machine-readable.",
+      "scope_note": "code license does not override original Xperience-10M dataset terms"
     },
     {
       "id": "project_path",
       "status": "verified",
       "evidence": [
         "docs/data/project_packet.json",
         "README.md",
         "docs/index.html"
       ],
+      "readout": "A first-time reader has an explicit project path.",
+      "scope_note": "guides inspection across data, tasks, results, and scale-up status"
     },
     {
       "id": "artifact_index",
       "status": "verified",
       "evidence": [
         "ARTIFACT_GUIDE.md",
         "scripts/build_artifact_index.py",
         "docs/data/artifact_index.json"
       ],
+      "readout": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
+      "scope_note": "selective source-of-truth catalog, not a complete inventory of every output file"
     },
     {
       "id": "reproducibility_contract",
       "status": "verified",
       "evidence": [
         "REPRODUCIBILITY.md",
         "docs/data/reproducibility_matrix.json",
         "notes/reproducibility_audit.md"
       ],
+      "readout": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
+      "scope_note": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
     }
   ]
 }

docs/data/mirror_parity.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

docs/data/omni_model_comparison.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
-  "generated_at_utc": "2026-06-21T15:17:00+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
@@ -1758,6 +1758,6 @@
   ],
   "pending": [
     "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
-    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
+  "generated_at_utc": "2026-06-22T10:59:59+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
   ],
   "pending": [
     "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
+    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

docs/data/project_brief.json CHANGED Viewed

@@ -56,7 +56,7 @@
     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
   "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
-  "next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
   "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
+  "next_stage": "Improve action/subtask quality through error analysis before presenting larger robustness or alternative-backbone results.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

docs/data/public_reader_map.json CHANGED Viewed

@@ -6,77 +6,124 @@
     {
       "reader_goal": "Understand the project in one pass",
       "start_here": "PROJECT_BRIEF.md",
-      "then_inspect": ["PROJECT_STATUS.md", "RESEARCH_TAKEAWAYS.md"]
     },
     {
       "reader_goal": "Understand the two evidence lines",
       "start_here": "TWO_EVIDENCE_LINES.md",
-      "then_inspect": ["docs/data/two_evidence_lines.json", "docs/data/two_evidence_line_result_summary.json"]
     },
     {
       "reader_goal": "See the visual public dashboard",
       "start_here": "GitHub Pages dashboard or Hugging Face Space",
-      "then_inspect": ["docs/index.html", "docs/data/project_packet.json"]
     },
     {
       "reader_goal": "Decode project terminology",
       "start_here": "GLOSSARY.md",
-      "then_inspect": ["docs/data/glossary.json", "Homepage Glossary section"]
     },
     {
       "reader_goal": "Understand the data unit",
       "start_here": "results/episode_task_suite/windows.csv",
-      "then_inspect": ["results/episode_task_suite/feature_manifest.json", "docs/data/raw_sample_files.json"]
     },
     {
       "reader_goal": "Trace the 128-episode split",
       "start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
-      "then_inspect": ["docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/xperience10m_128_episode_selection.csv"]
     },
     {
       "reader_goal": "Inspect the 20-task benchmark",
       "start_here": "TASK_SUITE_20.md",
-      "then_inspect": ["docs/data/task_suite_20.json", "EVALUATION_PROTOCOL.md"]
     },
     {
       "reader_goal": "Compare current results",
       "start_here": "RESEARCH_TAKEAWAYS.md",
-      "then_inspect": ["docs/data/task_method_20_result_matrix.json", "docs/data/unified_task_model_radar.json"]
     },
     {
       "reader_goal": "Compare 1-episode and 128-episode methods",
       "start_here": "Homepage radar section",
-      "then_inspect": ["docs/data/single_episode_task_model_radar.json", "docs/data/episode128_task_model_radar.json"]
     },
     {
       "reader_goal": "Read Qwen3-Omni v1-v6 correctly",
       "start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
-      "then_inspect": ["docs/data/qwen3_omni_run_lineage.json", "docs/data/qwen3_v5_v6_comparison.json"]
     },
     {
       "reader_goal": "Find all derived artifacts",
       "start_here": "ARTIFACT_GUIDE.md",
-      "then_inspect": ["Hugging Face artifact dataset", "docs/data/artifact_index.json"]
     },
     {
       "reader_goal": "Download model weights with their matching results",
       "start_here": "Hugging Face weights/results repo",
-      "then_inspect": ["manifest.json", "analysis/docs/data/task_method_20_result_matrix.json", "results/"]
     },
     {
       "reader_goal": "Reproduce or extend the work",
       "start_here": "REPRODUCIBILITY.md",
-      "then_inspect": ["QUALITY_GATES.md", "scripts/", "results/"]
     },
     {
       "reader_goal": "Understand foundation-model directions",
       "start_here": "THREE_FOUNDATION_PIPELINES.md",
-      "then_inspect": ["FOUNDATION_MODEL_PLAN.md", "docs/data/three_foundation_pipelines.json"]
     },
     {
       "reader_goal": "Check public-release health",
       "start_here": "PUBLIC_SURFACE_QA.md",
-      "then_inspect": ["docs/data/live_publication_status.json", "docs/data/mirror_parity.json"]
     }
   ],
   "public_surfaces": [
@@ -125,31 +172,49 @@
     "Foundation directions",
     "Public-release checks"
   ],
-  "claim_boundaries": [
-    {
-      "claim_type": "Single public-sample task behavior",
-      "public_evidence": ["results/episode_task_suite/", "docs/data/task_suite_20.json"],
-      "boundary": "Describes one public sample episode, not the full dataset distribution."
-    },
-    {
-      "claim_type": "128-episode method comparison",
-      "public_evidence": ["XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md", "docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/*128*", "docs/data/omni_model_comparison.json"],
-      "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
-    },
-    {
-      "claim_type": "Qwen3-Omni v1-v6 lineage",
-      "public_evidence": ["QWEN3_OMNI_RUN_LINEAGE.md", "docs/data/qwen3_omni_run_lineage.json"],
-      "boundary": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
-    },
-    {
-      "claim_type": "Foundation-model track quality",
-      "public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
-      "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
-    },
-    {
-      "claim_type": "Reproducibility",
-      "public_evidence": ["REPRODUCIBILITY.md", "QUALITY_GATES.md", "release validators"],
-      "boundary": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
     }
   ]
 }

     {
       "reader_goal": "Understand the project in one pass",
       "start_here": "PROJECT_BRIEF.md",
+      "then_inspect": [
+        "PROJECT_STATUS.md",
+        "RESEARCH_TAKEAWAYS.md"
+      ]
     },
     {
       "reader_goal": "Understand the two evidence lines",
       "start_here": "TWO_EVIDENCE_LINES.md",
+      "then_inspect": [
+        "docs/data/two_evidence_lines.json",
+        "docs/data/two_evidence_line_result_summary.json"
+      ]
     },
     {
       "reader_goal": "See the visual public dashboard",
       "start_here": "GitHub Pages dashboard or Hugging Face Space",
+      "then_inspect": [
+        "docs/index.html",
+        "docs/data/project_packet.json"
+      ]
     },
     {
       "reader_goal": "Decode project terminology",
       "start_here": "GLOSSARY.md",
+      "then_inspect": [
+        "docs/data/glossary.json",
+        "Homepage Glossary section"
+      ]
     },
     {
       "reader_goal": "Understand the data unit",
       "start_here": "results/episode_task_suite/windows.csv",
+      "then_inspect": [
+        "results/episode_task_suite/feature_manifest.json",
+        "docs/data/raw_sample_files.json"
+      ]
     },
     {
       "reader_goal": "Trace the 128-episode split",
       "start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
+      "then_inspect": [
+        "docs/data/xperience10m_128_episode_feature_index.json",
+        "results/omni_finetune/xperience10m_128_episode_selection.csv"
+      ]
     },
     {
       "reader_goal": "Inspect the 20-task benchmark",
       "start_here": "TASK_SUITE_20.md",
+      "then_inspect": [
+        "docs/data/task_suite_20.json",
+        "EVALUATION_PROTOCOL.md"
+      ]
     },
     {
       "reader_goal": "Compare current results",
       "start_here": "RESEARCH_TAKEAWAYS.md",
+      "then_inspect": [
+        "docs/data/task_method_20_result_matrix.json",
+        "docs/data/unified_task_model_radar.json"
+      ]
     },
     {
       "reader_goal": "Compare 1-episode and 128-episode methods",
       "start_here": "Homepage radar section",
+      "then_inspect": [
+        "docs/data/single_episode_task_model_radar.json",
+        "docs/data/episode128_task_model_radar.json"
+      ]
     },
     {
       "reader_goal": "Read Qwen3-Omni v1-v6 correctly",
       "start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
+      "then_inspect": [
+        "docs/data/qwen3_omni_run_lineage.json",
+        "docs/data/qwen3_v5_v6_comparison.json"
+      ]
     },
     {
       "reader_goal": "Find all derived artifacts",
       "start_here": "ARTIFACT_GUIDE.md",
+      "then_inspect": [
+        "Hugging Face artifact dataset",
+        "docs/data/artifact_index.json"
+      ]
     },
     {
       "reader_goal": "Download model weights with their matching results",
       "start_here": "Hugging Face weights/results repo",
+      "then_inspect": [
+        "manifest.json",
+        "analysis/docs/data/task_method_20_result_matrix.json",
+        "results/"
+      ]
     },
     {
       "reader_goal": "Reproduce or extend the work",
       "start_here": "REPRODUCIBILITY.md",
+      "then_inspect": [
+        "QUALITY_GATES.md",
+        "scripts/",
+        "results/"
+      ]
     },
     {
       "reader_goal": "Understand foundation-model directions",
       "start_here": "THREE_FOUNDATION_PIPELINES.md",
+      "then_inspect": [
+        "FOUNDATION_MODEL_PLAN.md",
+        "docs/data/three_foundation_pipelines.json"
+      ]
     },
     {
       "reader_goal": "Check public-release health",
       "start_here": "PUBLIC_SURFACE_QA.md",
+      "then_inspect": [
+        "docs/data/live_publication_status.json",
+        "docs/data/mirror_parity.json"
+      ]
     }
   ],
   "public_surfaces": [
     "Foundation directions",
     "Public-release checks"
   ],
+  "reading_scopes": [
+    {
+      "public_evidence": [
+        "results/episode_task_suite/",
+        "docs/data/task_suite_20.json"
+      ],
+      "topic": "Single public-sample task behavior",
+      "scope_note": "Describes one public sample episode, not the full dataset distribution."
+    },
+    {
+      "public_evidence": [
+        "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
+        "docs/data/xperience10m_128_episode_feature_index.json",
+        "results/omni_finetune/*128*",
+        "docs/data/omni_model_comparison.json"
+      ],
+      "topic": "128-episode method comparison",
+      "scope_note": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
+    },
+    {
+      "public_evidence": [
+        "QWEN3_OMNI_RUN_LINEAGE.md",
+        "docs/data/qwen3_omni_run_lineage.json"
+      ],
+      "topic": "Qwen3-Omni v1-v6 lineage",
+      "scope_note": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
+    },
+    {
+      "public_evidence": [
+        "Verified Qwen3-Omni and Cosmos3 result packages",
+        "model cards"
+      ],
+      "topic": "Foundation-model track quality",
+      "scope_note": "Numeric task scores appear only when a task-specific eval or probe exists."
+    },
+    {
+      "public_evidence": [
+        "REPRODUCIBILITY.md",
+        "QUALITY_GATES.md",
+        "release validators"
+      ],
+      "topic": "Reproducibility",
+      "scope_note": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
     }
   ]
 }

docs/data/public_surface_qa.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Public Project Surface",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:26+00:00",
   "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
   "checks": [
     {
@@ -18,7 +18,7 @@
         "website_integrity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:09:34+00:00"
         },
         "rendered_site_check": {
           "exists": true,
@@ -28,27 +28,27 @@
         "task_surface_integrity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:10:38+00:00"
         },
         "source_alignment": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:10:38+00:00"
         },
         "scale_up_status": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-21T20:58:21+00:00"
         },
         "publication_package": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:19:22+00:00"
         },
         "mirror_parity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:19:19+00:00"
         }
       },
       "failures": {}

 {
   "title": "Ropedia Xperience-10M Public Project Surface",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:45+00:00",
   "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
   "checks": [
     {
         "website_integrity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:07+00:00"
         },
         "rendered_site_check": {
           "exists": true,
         "task_surface_integrity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:07+00:00"
         },
         "source_alignment": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:08+00:00"
         },
         "scale_up_status": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:10+00:00"
         },
         "publication_package": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:18:16+00:00"
         },
         "mirror_parity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:18:11+00:00"
         }
       },
       "failures": {}

docs/data/publication_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:19:22+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
@@ -246,8 +246,8 @@
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
-      "file_count": 631,
-      "text_file_count": 470,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
         "bytes": 10221085
@@ -257,8 +257,8 @@
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
-      "file_count": 4702,
-      "text_file_count": 1328,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061
@@ -268,8 +268,8 @@
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
-      "file_count": 5464,
-      "text_file_count": 1502,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:16+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
+      "file_count": 640,
+      "text_file_count": 479,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
         "bytes": 10221085
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
+      "file_count": 4708,
+      "text_file_count": 1334,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
+      "file_count": 5470,
+      "text_file_count": 1508,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061

metrics/evidence_contract.json CHANGED Viewed

@@ -1,170 +1,169 @@
 {
   "project": "Ropedia Xperience-10M Task Suite",
   "scope": "single public Xperience-10M sample episode",
-  "claims": [
     {
       "id": "project_status",
-      "claim": "A first-pass reader has a compact current-state summary.",
       "status": "verified",
       "evidence": [
         "PROJECT_STATUS.md",
         "docs/data/project_status.json"
       ],
-      "boundary": "summarizes existing evidence and current limitations"
     },
     {
       "id": "research_roadmap",
-      "claim": "The research roadmap is explicit.",
       "status": "current",
       "evidence": [
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
-      "boundary": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
     },
-        {
-            "id": "official_dataset_card_alignment",
-            "claim": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
       "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json",
         "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
       ],
-            "boundary": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
-        },
-        {
-            "id": "source_alignment",
-            "claim": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
-            "status": "verified",
-            "evidence": [
-                "SOURCE_ALIGNMENT_AUDIT.md",
-                "docs/data/source_alignment_audit.json",
-                "scripts/validate_source_alignment.py"
-            ],
-            "boundary": "offline committed-fact check; does not fetch private gated data"
-        },
-        {
-            "id": "aligned_windows",
-      "claim": "The public Xperience-10M sample has been converted into aligned model windows.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/windows.csv",
         "results/episode_task_suite/shared_windows.npz",
         "results/episode_task_suite/summary_report.json"
       ],
-      "boundary": "5,821 frames, 1,161 windows, one public sample episode"
     },
-        {
-            "id": "feature_contract",
-            "claim": "The current feature contract is explicit and inspectable.",
-            "status": "verified",
-            "evidence": [
-                "results/episode_task_suite/feature_manifest.json",
-                "results/episode_task_suite/available_modalities.json"
-            ],
-            "boundary": "8,546-dimensional aligned multimodal window representation"
-        },
-        {
-            "id": "evaluation_protocol",
-            "claim": "The task evaluation protocol is explicit and generated from committed metrics.",
-            "status": "verified",
-            "evidence": [
-                "EVALUATION_PROTOCOL.md",
-                "docs/data/evaluation_protocol.json",
-                "scripts/build_evaluation_protocol.py"
-            ],
-            "boundary": "defines windows, split, per-task metrics, leakage controls, and current limitations"
-        },
     {
       "id": "modality_atlas",
-      "claim": "The public sample modalities are inspectable without raw data redistribution.",
       "status": "verified",
       "evidence": [
         "docs/data/modality_atlas.json",
         "docs/assets/modalities/",
         "docs/index.html"
       ],
-      "boundary": "derived thumbnails for presentation; raw data remains excluded"
     },
     {
       "id": "task_surface_integrity",
-      "claim": "Public task cards stay readable for non-expert readers.",
       "status": "verified",
       "evidence": [
         "docs/data/task_surface_integrity.json",
         "scripts/validate_task_surface.py",
         "docs/index.html"
       ],
-      "boundary": "presentation integrity for the public task surface"
     },
     {
       "id": "figure_index",
-      "claim": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
       "status": "verified",
       "evidence": [
         "FIGURE_INDEX.md",
         "docs/data/figure_index.json",
         "scripts/build_figure_index.py"
       ],
-      "boundary": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
     },
     {
       "id": "brand_assets",
-      "claim": "A project logo is consistently applied across public surfaces.",
       "status": "verified",
       "evidence": [
         "docs/assets/brand/",
         "docs/data/brand_assets.json",
         "scripts/build_brand_assets.py"
       ],
-      "boundary": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
     },
     {
       "id": "twelve_tasks",
-      "claim": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
       "status": "verified",
       "evidence": [
         "scripts/episode_task_suite.py",
         "results/episode_task_suite/*/metrics.json",
         "results/episode_task_suite/*/predictions.*"
       ],
-      "boundary": "chronological single-episode split, not cross-episode generalization"
     },
     {
       "id": "minimal_vs_neural",
-      "claim": "Minimal and neural heads use the same task contracts.",
       "status": "verified",
       "evidence": [
         "scripts/neural_task_models.py",
         "results/episode_task_suite/neural_mlp/",
         "docs/assets/task_architectures.png"
       ],
-      "boundary": "small heads only; not a foundation model"
     },
     {
       "id": "research_directions",
-      "claim": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_directions/research_direction_taxonomy.json",
         "docs/data/research_directions.json"
       ],
-      "boundary": "some directions remain proxy-only"
     },
     {
       "id": "direction_extensions",
-      "claim": "Four extra direction probes are coded and evaluated.",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
         "docs/data/research_direction_extensions.json"
       ],
-      "boundary": "single-episode probes, not full research-direction solutions"
     },
     {
       "id": "qwen3_omni_diagnostic_pilot",
-      "claim": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
       "status": "verified_diagnostic",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
@@ -172,94 +171,94 @@
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py"
       ],
-      "boundary": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
     },
     {
       "id": "multi_episode_quality_improvement",
-      "claim": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
       "status": "active_next_step",
       "evidence": [
         "scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
         "docs/data/omni_finetune_verified_result.json",
         "FOUNDATION_MODEL_PLAN.md"
       ],
-      "boundary": "stronger model quality requires output-format improvements and action/subtask error analysis"
     },
     {
       "id": "scale_up_status_check",
-      "claim": "Older pilot path strings are tracked as setup-file provenance.",
       "status": "verified",
       "evidence": [
         "scripts/validate_scope_claims.py",
         "docs/data/scope_claims_audit.json"
       ],
-      "boundary": "run/path identifiers stay separate from completed held-out-episode results"
     },
     {
       "id": "mirror_parity",
-      "claim": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
       "status": "verified",
       "evidence": [
         "scripts/validate_mirror_parity.py",
         "docs/data/mirror_parity.json"
       ],
-      "boundary": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
     },
     {
       "id": "publication_package",
-      "claim": "The public GitHub and Hugging Face bundles contain the intended release files.",
       "status": "verified",
       "evidence": [
         "scripts/validate_publication_package.py",
         "docs/data/publication_audit.json"
       ],
-      "boundary": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
     },
     {
       "id": "website_integrity",
-      "claim": "The public website has checked local references.",
       "status": "verified",
       "evidence": [
         "scripts/validate_website_integrity.py",
         "docs/data/website_integrity.json"
       ],
-      "boundary": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
     },
     {
       "id": "rendered_site_check",
-      "claim": "The rendered website walkthrough has a browser-level interaction check.",
       "status": "verified",
       "evidence": [
         "RENDERED_SITE_CHECK.md",
         "scripts/build_rendered_site_check.py",
         "docs/data/rendered_site_check.json"
       ],
-      "boundary": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
     },
     {
       "id": "quality_gates",
-      "claim": "The release gate is explicit.",
       "status": "verified",
       "evidence": [
         "QUALITY_GATES.md",
         "scripts/build_quality_gates.py",
         "docs/data/quality_gates.json"
       ],
-      "boundary": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
     },
     {
       "id": "live_publication_status",
-      "claim": "The live public mirrors are checked after upload.",
       "status": "verified",
       "evidence": [
         "scripts/verify_live_publication.py",
         "docs/data/live_publication_status.json"
       ],
-      "boundary": "fetches public GitHub/HF URLs; it does not validate private training state"
     },
     {
       "id": "citation_metadata",
-      "claim": "The project is externally citable and machine-readable.",
       "status": "verified",
       "evidence": [
         "CITATION.cff",
@@ -267,11 +266,11 @@
         "docs/data/project_manifest.json",
         "LICENSE"
       ],
-      "boundary": "code license does not override original Xperience-10M dataset terms"
     },
     {
       "id": "project_path",
-      "claim": "A first-time reader has an explicit project path.",
       "status": "verified",
       "evidence": [
         "docs/data/project_packet.json",
@@ -280,29 +279,30 @@
         "README.md",
         "docs/index.html"
       ],
-      "boundary": "guides inspection across data, tasks, results, and scale-up status"
     },
     {
       "id": "artifact_index",
-      "claim": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
       "status": "verified",
       "evidence": [
         "ARTIFACT_GUIDE.md",
         "scripts/build_artifact_index.py",
         "docs/data/artifact_index.json"
       ],
-      "boundary": "selective source-of-truth catalog, not a complete inventory of every output file"
     },
     {
       "id": "reproducibility_contract",
-      "claim": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
       "status": "verified",
       "evidence": [
         "REPRODUCIBILITY.md",
         "docs/data/reproducibility_matrix.json",
         "notes/reproducibility_audit.md"
       ],
-      "boundary": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
     }
   ]
 }

 {
   "project": "Ropedia Xperience-10M Task Suite",
   "scope": "single public Xperience-10M sample episode",
+  "readouts": [
     {
       "id": "project_status",
       "status": "verified",
       "evidence": [
         "PROJECT_STATUS.md",
         "docs/data/project_status.json"
       ],
+      "readout": "A first-pass reader has a compact current-state summary.",
+      "scope_note": "summarizes existing evidence and current limitations"
     },
     {
       "id": "research_roadmap",
       "status": "current",
       "evidence": [
         "RESEARCH_ROADMAP.md",
         "docs/data/research_roadmap.json"
       ],
+      "readout": "The research roadmap is explicit.",
+      "scope_note": "connects public-sample task development to multi-episode data preparation, Qwen3-Omni LoRA, robustness runs, and larger omni-model extensions"
     },
+    {
+      "id": "official_dataset_card_alignment",
       "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json",
         "https://huggingface.co/datasets/ropedia-ai/xperience-10m"
       ],
+      "readout": "The public dataset description is aligned with the official gated Xperience-10M dataset card and public sample card.",
+      "scope_note": "summarizes upstream public metadata, API listing facts, sample license/tooling, and dataset-card facts; does not grant access or mirror raw data"
+    },
+    {
+      "id": "source_alignment",
+      "status": "verified",
+      "evidence": [
+        "SOURCE_ALIGNMENT_AUDIT.md",
+        "docs/data/source_alignment_audit.json",
+        "scripts/validate_source_alignment.py"
+      ],
+      "readout": "Source facts, sample details, API-listing notes, and project coverage are validated across repo, website, and HF cards.",
+      "scope_note": "offline committed-fact check; does not fetch private gated data"
+    },
+    {
+      "id": "aligned_windows",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/windows.csv",
         "results/episode_task_suite/shared_windows.npz",
         "results/episode_task_suite/summary_report.json"
       ],
+      "readout": "The public Xperience-10M sample has been converted into aligned model windows.",
+      "scope_note": "5,821 frames, 1,161 windows, one public sample episode"
+    },
+    {
+      "id": "feature_contract",
+      "status": "verified",
+      "evidence": [
+        "results/episode_task_suite/feature_manifest.json",
+        "results/episode_task_suite/available_modalities.json"
+      ],
+      "readout": "The current feature contract is explicit and inspectable.",
+      "scope_note": "8,546-dimensional aligned multimodal window representation"
+    },
+    {
+      "id": "evaluation_protocol",
+      "status": "verified",
+      "evidence": [
+        "EVALUATION_PROTOCOL.md",
+        "docs/data/evaluation_protocol.json",
+        "scripts/build_evaluation_protocol.py"
+      ],
+      "readout": "The task evaluation protocol is explicit and generated from committed metrics.",
+      "scope_note": "defines windows, split, per-task metrics, leakage controls, and current limitations"
     },
     {
       "id": "modality_atlas",
       "status": "verified",
       "evidence": [
         "docs/data/modality_atlas.json",
         "docs/assets/modalities/",
         "docs/index.html"
       ],
+      "readout": "The public sample modalities are inspectable without raw data redistribution.",
+      "scope_note": "derived thumbnails for presentation; raw data remains excluded"
     },
     {
       "id": "task_surface_integrity",
       "status": "verified",
       "evidence": [
         "docs/data/task_surface_integrity.json",
         "scripts/validate_task_surface.py",
         "docs/index.html"
       ],
+      "readout": "Public task cards stay readable for non-expert readers.",
+      "scope_note": "presentation integrity for the public task surface"
     },
     {
       "id": "figure_index",
       "status": "verified",
       "evidence": [
         "FIGURE_INDEX.md",
         "docs/data/figure_index.json",
         "scripts/build_figure_index.py"
       ],
+      "readout": "Public figures, charts, and modality thumbnails are indexed as project evidence.",
+      "scope_note": "records derived visual assets, dimensions, hashes, roles, and source scripts; raw Xperience-10M data remains excluded"
     },
     {
       "id": "brand_assets",
       "status": "verified",
       "evidence": [
         "docs/assets/brand/",
         "docs/data/brand_assets.json",
         "scripts/build_brand_assets.py"
       ],
+      "readout": "A project logo is consistently applied across public surfaces.",
+      "scope_note": "generated logo and deterministic derivatives only; no raw dataset data or model weights"
     },
     {
       "id": "twelve_tasks",
       "status": "verified",
       "evidence": [
         "scripts/episode_task_suite.py",
         "results/episode_task_suite/*/metrics.json",
         "results/episode_task_suite/*/predictions.*"
       ],
+      "readout": "The 12 task heads are implemented as scripts with saved metrics and predictions.",
+      "scope_note": "chronological single-episode split, not cross-episode generalization"
     },
     {
       "id": "minimal_vs_neural",
       "status": "verified",
       "evidence": [
         "scripts/neural_task_models.py",
         "results/episode_task_suite/neural_mlp/",
         "docs/assets/task_architectures.png"
       ],
+      "readout": "Minimal and neural heads use the same task contracts.",
+      "scope_note": "small heads only; not a foundation model"
     },
     {
       "id": "research_directions",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_directions/research_direction_taxonomy.json",
         "docs/data/research_directions.json"
       ],
+      "readout": "Four Ropedia research directions are mapped honestly as direct, proxy, or diagnostic evidence.",
+      "scope_note": "some directions remain proxy-only"
     },
     {
       "id": "direction_extensions",
       "status": "verified",
       "evidence": [
         "results/episode_task_suite/research_direction_extensions/research_direction_extension_results.json",
         "docs/data/research_direction_extensions.json"
       ],
+      "readout": "Four extra direction probes are coded and evaluated.",
+      "scope_note": "single-episode probes, not full research-direction solutions"
     },
     {
       "id": "qwen3_omni_diagnostic_pilot",
       "status": "verified_diagnostic",
       "evidence": [
         "docs/data/omni_finetune_verified_result.json",
         "scripts/omni/package_verified_omni_result.py",
         "scripts/omni/audit_verified_omni_package.py"
       ],
+      "readout": "Qwen3-Omni has a verified selected-episode held-out diagnostic pilot.",
+      "scope_note": "the pipeline is verified, but model quality is weak: JSON validity is below target and action/subtask metrics are low"
     },
     {
       "id": "multi_episode_quality_improvement",
       "status": "active_next_step",
       "evidence": [
         "scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh",
         "docs/data/omni_finetune_verified_result.json",
         "FOUNDATION_MODEL_PLAN.md"
       ],
+      "readout": "The next Qwen3-Omni step is structured-output and task-quality improvement on the same selected split.",
+      "scope_note": "stronger model quality requires output-format improvements and action/subtask error analysis"
     },
     {
       "id": "scale_up_status_check",
       "status": "verified",
       "evidence": [
         "scripts/validate_scope_claims.py",
         "docs/data/scope_claims_audit.json"
       ],
+      "readout": "Older pilot path strings are tracked as setup-file provenance.",
+      "scope_note": "run/path identifiers stay separate from completed held-out-episode results"
     },
     {
       "id": "mirror_parity",
       "status": "verified",
       "evidence": [
         "scripts/validate_mirror_parity.py",
         "docs/data/mirror_parity.json"
       ],
+      "readout": "Prepared GitHub and Hugging Face mirrors carry matching critical data, visual, HTML, and validator files.",
+      "scope_note": "compares prepared local mirror bundles before upload; live URLs are checked after publishing"
     },
     {
       "id": "publication_package",
       "status": "verified",
       "evidence": [
         "scripts/validate_publication_package.py",
         "docs/data/publication_audit.json"
       ],
+      "readout": "The public GitHub and Hugging Face bundles contain the intended release files.",
+      "scope_note": "checks public files, HF bundles, and current public-card assets; temporary local outputs are excluded"
     },
     {
       "id": "website_integrity",
       "status": "verified",
       "evidence": [
         "scripts/validate_website_integrity.py",
         "docs/data/website_integrity.json"
       ],
+      "readout": "The public website has checked local references.",
+      "scope_note": "checks local links, anchors, JSON data, and referenced images; external URLs are not fetched"
     },
     {
       "id": "rendered_site_check",
       "status": "verified",
       "evidence": [
         "RENDERED_SITE_CHECK.md",
         "scripts/build_rendered_site_check.py",
         "docs/data/rendered_site_check.json"
       ],
+      "readout": "The rendered website walkthrough has a browser-level interaction check.",
+      "scope_note": "checks local page load, tab switch, walkthrough deep link, player controls, and console health"
     },
     {
       "id": "quality_gates",
       "status": "verified",
       "evidence": [
         "QUALITY_GATES.md",
         "scripts/build_quality_gates.py",
         "docs/data/quality_gates.json"
       ],
+      "readout": "The release gate is explicit.",
+      "scope_note": "summarizes packaging and live-mirror checks; cross-episode model quality is measured by later held-out reports"
     },
     {
       "id": "live_publication_status",
       "status": "verified",
       "evidence": [
         "scripts/verify_live_publication.py",
         "docs/data/live_publication_status.json"
       ],
+      "readout": "The live public mirrors are checked after upload.",
+      "scope_note": "fetches public GitHub/HF URLs; it does not validate private training state"
     },
     {
       "id": "citation_metadata",
       "status": "verified",
       "evidence": [
         "CITATION.cff",
         "docs/data/project_manifest.json",
         "LICENSE"
       ],
+      "readout": "The project is externally citable and machine-readable.",
+      "scope_note": "code license does not override original Xperience-10M dataset terms"
     },
     {
       "id": "project_path",
       "status": "verified",
       "evidence": [
         "docs/data/project_packet.json",
         "README.md",
         "docs/index.html"
       ],
+      "readout": "A first-time reader has an explicit project path.",
+      "scope_note": "guides inspection across data, tasks, results, and scale-up status"
     },
     {
       "id": "artifact_index",
       "status": "verified",
       "evidence": [
         "ARTIFACT_GUIDE.md",
         "scripts/build_artifact_index.py",
         "docs/data/artifact_index.json"
       ],
+      "readout": "The core project artifacts are grouped for human reading and indexed with existence, size, and hash metadata where stable.",
+      "scope_note": "selective source-of-truth catalog, not a complete inventory of every output file"
     },
     {
       "id": "reproducibility_contract",
       "status": "verified",
       "evidence": [
         "REPRODUCIBILITY.md",
         "docs/data/reproducibility_matrix.json",
         "notes/reproducibility_audit.md"
       ],
+      "readout": "The public reproduction path is documented with commands, expected outputs, and exact-match reproduction evidence.",
+      "scope_note": "publicly reproduces the single-episode pipeline; multi-episode Qwen3-Omni metrics are added only after data preparation and held-out evaluation"
     }
   ]
 }

metrics/mirror_parity.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

metrics/omni_model_comparison.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
-  "generated_at_utc": "2026-06-21T15:17:00+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
@@ -1758,6 +1758,6 @@
   ],
   "pending": [
     "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
-    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

 {
   "title": "Ropedia Xperience-10M Current Result Versions and Model Groups",
+  "generated_at_utc": "2026-06-22T10:59:59+00:00",
   "status": "pass",
   "version_count": 3,
   "model_group_count": 5,
   ],
   "pending": [
     "Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.",
+    "Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly."
   ]
 }

metrics/project_brief.json CHANGED Viewed

@@ -56,7 +56,7 @@
     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
   "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
-  "next_stage": "Improve action/subtask quality through error analysis before larger robustness or alternative-backbone claims.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

     "Use docs/data/omni_finetune_verified_result.json for the current multi-episode Qwen3-Omni pilot result."
   ],
   "scope_boundary": "The public sample is enough to build and verify task definitions, feature contracts, metrics, visualization, and baseline code. The final multi-episode Qwen3-Omni diagnostic result verifies the training loop and strict-JSON output reliability, but does not yet show strong action/subtask model quality.",
+  "next_stage": "Improve action/subtask quality through error analysis before presenting larger robustness or alternative-backbone results.",
   "entry_points": {
     "visual_dashboard": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
     "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",

metrics/public_reader_map.json CHANGED Viewed

@@ -6,77 +6,124 @@
     {
       "reader_goal": "Understand the project in one pass",
       "start_here": "PROJECT_BRIEF.md",
-      "then_inspect": ["PROJECT_STATUS.md", "RESEARCH_TAKEAWAYS.md"]
     },
     {
       "reader_goal": "Understand the two evidence lines",
       "start_here": "TWO_EVIDENCE_LINES.md",
-      "then_inspect": ["docs/data/two_evidence_lines.json", "docs/data/two_evidence_line_result_summary.json"]
     },
     {
       "reader_goal": "See the visual public dashboard",
       "start_here": "GitHub Pages dashboard or Hugging Face Space",
-      "then_inspect": ["docs/index.html", "docs/data/project_packet.json"]
     },
     {
       "reader_goal": "Decode project terminology",
       "start_here": "GLOSSARY.md",
-      "then_inspect": ["docs/data/glossary.json", "Homepage Glossary section"]
     },
     {
       "reader_goal": "Understand the data unit",
       "start_here": "results/episode_task_suite/windows.csv",
-      "then_inspect": ["results/episode_task_suite/feature_manifest.json", "docs/data/raw_sample_files.json"]
     },
     {
       "reader_goal": "Trace the 128-episode split",
       "start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
-      "then_inspect": ["docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/xperience10m_128_episode_selection.csv"]
     },
     {
       "reader_goal": "Inspect the 20-task benchmark",
       "start_here": "TASK_SUITE_20.md",
-      "then_inspect": ["docs/data/task_suite_20.json", "EVALUATION_PROTOCOL.md"]
     },
     {
       "reader_goal": "Compare current results",
       "start_here": "RESEARCH_TAKEAWAYS.md",
-      "then_inspect": ["docs/data/task_method_20_result_matrix.json", "docs/data/unified_task_model_radar.json"]
     },
     {
       "reader_goal": "Compare 1-episode and 128-episode methods",
       "start_here": "Homepage radar section",
-      "then_inspect": ["docs/data/single_episode_task_model_radar.json", "docs/data/episode128_task_model_radar.json"]
     },
     {
       "reader_goal": "Read Qwen3-Omni v1-v6 correctly",
       "start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
-      "then_inspect": ["docs/data/qwen3_omni_run_lineage.json", "docs/data/qwen3_v5_v6_comparison.json"]
     },
     {
       "reader_goal": "Find all derived artifacts",
       "start_here": "ARTIFACT_GUIDE.md",
-      "then_inspect": ["Hugging Face artifact dataset", "docs/data/artifact_index.json"]
     },
     {
       "reader_goal": "Download model weights with their matching results",
       "start_here": "Hugging Face weights/results repo",
-      "then_inspect": ["manifest.json", "analysis/docs/data/task_method_20_result_matrix.json", "results/"]
     },
     {
       "reader_goal": "Reproduce or extend the work",
       "start_here": "REPRODUCIBILITY.md",
-      "then_inspect": ["QUALITY_GATES.md", "scripts/", "results/"]
     },
     {
       "reader_goal": "Understand foundation-model directions",
       "start_here": "THREE_FOUNDATION_PIPELINES.md",
-      "then_inspect": ["FOUNDATION_MODEL_PLAN.md", "docs/data/three_foundation_pipelines.json"]
     },
     {
       "reader_goal": "Check public-release health",
       "start_here": "PUBLIC_SURFACE_QA.md",
-      "then_inspect": ["docs/data/live_publication_status.json", "docs/data/mirror_parity.json"]
     }
   ],
   "public_surfaces": [
@@ -125,31 +172,49 @@
     "Foundation directions",
     "Public-release checks"
   ],
-  "claim_boundaries": [
-    {
-      "claim_type": "Single public-sample task behavior",
-      "public_evidence": ["results/episode_task_suite/", "docs/data/task_suite_20.json"],
-      "boundary": "Describes one public sample episode, not the full dataset distribution."
-    },
-    {
-      "claim_type": "128-episode method comparison",
-      "public_evidence": ["XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md", "docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/*128*", "docs/data/omni_model_comparison.json"],
-      "boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
-    },
-    {
-      "claim_type": "Qwen3-Omni v1-v6 lineage",
-      "public_evidence": ["QWEN3_OMNI_RUN_LINEAGE.md", "docs/data/qwen3_omni_run_lineage.json"],
-      "boundary": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
-    },
-    {
-      "claim_type": "Foundation-model track quality",
-      "public_evidence": ["Verified Qwen3-Omni and Cosmos3 result packages", "model cards"],
-      "boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
-    },
-    {
-      "claim_type": "Reproducibility",
-      "public_evidence": ["REPRODUCIBILITY.md", "QUALITY_GATES.md", "release validators"],
-      "boundary": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
     }
   ]
 }

     {
       "reader_goal": "Understand the project in one pass",
       "start_here": "PROJECT_BRIEF.md",
+      "then_inspect": [
+        "PROJECT_STATUS.md",
+        "RESEARCH_TAKEAWAYS.md"
+      ]
     },
     {
       "reader_goal": "Understand the two evidence lines",
       "start_here": "TWO_EVIDENCE_LINES.md",
+      "then_inspect": [
+        "docs/data/two_evidence_lines.json",
+        "docs/data/two_evidence_line_result_summary.json"
+      ]
     },
     {
       "reader_goal": "See the visual public dashboard",
       "start_here": "GitHub Pages dashboard or Hugging Face Space",
+      "then_inspect": [
+        "docs/index.html",
+        "docs/data/project_packet.json"
+      ]
     },
     {
       "reader_goal": "Decode project terminology",
       "start_here": "GLOSSARY.md",
+      "then_inspect": [
+        "docs/data/glossary.json",
+        "Homepage Glossary section"
+      ]
     },
     {
       "reader_goal": "Understand the data unit",
       "start_here": "results/episode_task_suite/windows.csv",
+      "then_inspect": [
+        "results/episode_task_suite/feature_manifest.json",
+        "docs/data/raw_sample_files.json"
+      ]
     },
     {
       "reader_goal": "Trace the 128-episode split",
       "start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
+      "then_inspect": [
+        "docs/data/xperience10m_128_episode_feature_index.json",
+        "results/omni_finetune/xperience10m_128_episode_selection.csv"
+      ]
     },
     {
       "reader_goal": "Inspect the 20-task benchmark",
       "start_here": "TASK_SUITE_20.md",
+      "then_inspect": [
+        "docs/data/task_suite_20.json",
+        "EVALUATION_PROTOCOL.md"
+      ]
     },
     {
       "reader_goal": "Compare current results",
       "start_here": "RESEARCH_TAKEAWAYS.md",
+      "then_inspect": [
+        "docs/data/task_method_20_result_matrix.json",
+        "docs/data/unified_task_model_radar.json"
+      ]
     },
     {
       "reader_goal": "Compare 1-episode and 128-episode methods",
       "start_here": "Homepage radar section",
+      "then_inspect": [
+        "docs/data/single_episode_task_model_radar.json",
+        "docs/data/episode128_task_model_radar.json"
+      ]
     },
     {
       "reader_goal": "Read Qwen3-Omni v1-v6 correctly",
       "start_here": "QWEN3_OMNI_RUN_LINEAGE.md",
+      "then_inspect": [
+        "docs/data/qwen3_omni_run_lineage.json",
+        "docs/data/qwen3_v5_v6_comparison.json"
+      ]
     },
     {
       "reader_goal": "Find all derived artifacts",
       "start_here": "ARTIFACT_GUIDE.md",
+      "then_inspect": [
+        "Hugging Face artifact dataset",
+        "docs/data/artifact_index.json"
+      ]
     },
     {
       "reader_goal": "Download model weights with their matching results",
       "start_here": "Hugging Face weights/results repo",
+      "then_inspect": [
+        "manifest.json",
+        "analysis/docs/data/task_method_20_result_matrix.json",
+        "results/"
+      ]
     },
     {
       "reader_goal": "Reproduce or extend the work",
       "start_here": "REPRODUCIBILITY.md",
+      "then_inspect": [
+        "QUALITY_GATES.md",
+        "scripts/",
+        "results/"
+      ]
     },
     {
       "reader_goal": "Understand foundation-model directions",
       "start_here": "THREE_FOUNDATION_PIPELINES.md",
+      "then_inspect": [
+        "FOUNDATION_MODEL_PLAN.md",
+        "docs/data/three_foundation_pipelines.json"
+      ]
     },
     {
       "reader_goal": "Check public-release health",
       "start_here": "PUBLIC_SURFACE_QA.md",
+      "then_inspect": [
+        "docs/data/live_publication_status.json",
+        "docs/data/mirror_parity.json"
+      ]
     }
   ],
   "public_surfaces": [
     "Foundation directions",
     "Public-release checks"
   ],
+  "reading_scopes": [
+    {
+      "public_evidence": [
+        "results/episode_task_suite/",
+        "docs/data/task_suite_20.json"
+      ],
+      "topic": "Single public-sample task behavior",
+      "scope_note": "Describes one public sample episode, not the full dataset distribution."
+    },
+    {
+      "public_evidence": [
+        "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
+        "docs/data/xperience10m_128_episode_feature_index.json",
+        "results/omni_finetune/*128*",
+        "docs/data/omni_model_comparison.json"
+      ],
+      "topic": "128-episode method comparison",
+      "scope_note": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
+    },
+    {
+      "public_evidence": [
+        "QWEN3_OMNI_RUN_LINEAGE.md",
+        "docs/data/qwen3_omni_run_lineage.json"
+      ],
+      "topic": "Qwen3-Omni v1-v6 lineage",
+      "scope_note": "v1-v4 are pipeline and ablation evidence, v5 is the pinned prior release, and v6 is the current public 20-task Qwen row."
+    },
+    {
+      "public_evidence": [
+        "Verified Qwen3-Omni and Cosmos3 result packages",
+        "model cards"
+      ],
+      "topic": "Foundation-model track quality",
+      "scope_note": "Numeric task scores appear only when a task-specific eval or probe exists."
+    },
+    {
+      "public_evidence": [
+        "REPRODUCIBILITY.md",
+        "QUALITY_GATES.md",
+        "release validators"
+      ],
+      "topic": "Reproducibility",
+      "scope_note": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
     }
   ]
 }

metrics/public_surface_qa.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Public Project Surface",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:26+00:00",
   "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
   "checks": [
     {
@@ -18,7 +18,7 @@
         "website_integrity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:09:34+00:00"
         },
         "rendered_site_check": {
           "exists": true,
@@ -28,27 +28,27 @@
         "task_surface_integrity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:10:38+00:00"
         },
         "source_alignment": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:10:38+00:00"
         },
         "scale_up_status": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-21T20:58:21+00:00"
         },
         "publication_package": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:19:22+00:00"
         },
         "mirror_parity": {
           "exists": true,
           "status": "pass",
-          "generated_at_utc": "2026-06-22T10:19:19+00:00"
         }
       },
       "failures": {}

 {
   "title": "Ropedia Xperience-10M Public Project Surface",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:45+00:00",
   "scope": "Repo README, GitHub Pages HTML, Hugging Face Space card, artifact dataset card, and model card.",
   "checks": [
     {
         "website_integrity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:07+00:00"
         },
         "rendered_site_check": {
           "exists": true,
         "task_surface_integrity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:07+00:00"
         },
         "source_alignment": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:08+00:00"
         },
         "scale_up_status": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:17:10+00:00"
         },
         "publication_package": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:18:16+00:00"
         },
         "mirror_parity": {
           "exists": true,
           "status": "pass",
+          "generated_at_utc": "2026-06-22T11:18:11+00:00"
         }
       },
       "failures": {}