cy0307 commited on 1 day ago

Commit

0f657b5

verified ·

1 Parent(s): 3d29d20

Refine reader-facing public wording (3/6)

Browse files

Files changed (20) hide show

data/quality_gates.json +1 -1
data/research_takeaways.json +2 -2
data/scope_claims_audit.json +2 -2
data/source_alignment_audit.json +1 -1
data/summary_metrics.json +1 -1
data/task_method_20_gap_audit.json +2 -2
docs/data/quality_gates.json +1 -1
docs/data/research_takeaways.json +2 -2
docs/data/scope_claims_audit.json +2 -2
docs/data/source_alignment_audit.json +1 -1
docs/data/summary_metrics.json +1 -1
docs/data/task_method_20_gap_audit.json +2 -2
docs/data/task_surface_integrity.json +1 -1
metrics/publication_audit.json +7 -7
metrics/quality_gates.json +1 -1
metrics/research_takeaways.json +2 -2
metrics/scope_claims_audit.json +2 -2
metrics/source_alignment_audit.json +1 -1
metrics/summary_metrics.json +1 -1
metrics/task_method_20_gap_audit.json +2 -2

data/quality_gates.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:26+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:45+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

data/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-21T15:18:59+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -186,7 +186,7 @@
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
-      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T10:59:59+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
     }
   ]
 }

data/scope_claims_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:29+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
@@ -25,7 +25,7 @@
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
-      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:10+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
+      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]

data/source_alignment_audit.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:10:38+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:08+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

data/summary_metrics.json CHANGED Viewed

@@ -14,7 +14,7 @@
       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
-    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
   },
   "models": {
     "motion_action": {

       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
+    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
   },
   "models": {
     "motion_action": {

data/task_method_20_gap_audit.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "generated_at_utc": "2026-06-21T15:21:42+00:00",
   "immediate_actions": [
     {
       "artifact": "docs/data/task_method_20_gap_audit.json",
@@ -210,7 +210,7 @@
   "target_policy": {
     "numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
     "proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
-    "scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model claims. The current release has zero scoreless cells."
   },
   "title": "Task Method 20-Result Completion Audit"
 }

 {
+  "generated_at_utc": "2026-06-22T11:00:00+00:00",
   "immediate_actions": [
     {
       "artifact": "docs/data/task_method_20_gap_audit.json",
   "target_policy": {
     "numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
     "proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
+    "scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
   },
   "title": "Task Method 20-Result Completion Audit"
 }

docs/data/quality_gates.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:26+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:45+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

docs/data/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-21T15:18:59+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -186,7 +186,7 @@
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
-      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T10:59:59+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
     }
   ]
 }

docs/data/scope_claims_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:29+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
@@ -25,7 +25,7 @@
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
-      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:10+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
+      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]

docs/data/source_alignment_audit.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:10:38+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:08+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

docs/data/summary_metrics.json CHANGED Viewed

@@ -14,7 +14,7 @@
       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
-    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
   },
   "models": {
     "motion_action": {

       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
+    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
   },
   "models": {
     "motion_action": {

docs/data/task_method_20_gap_audit.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "generated_at_utc": "2026-06-21T15:21:42+00:00",
   "immediate_actions": [
     {
       "artifact": "docs/data/task_method_20_gap_audit.json",
@@ -210,7 +210,7 @@
   "target_policy": {
     "numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
     "proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
-    "scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model claims. The current release has zero scoreless cells."
   },
   "title": "Task Method 20-Result Completion Audit"
 }

 {
+  "generated_at_utc": "2026-06-22T11:00:00+00:00",
   "immediate_actions": [
     {
       "artifact": "docs/data/task_method_20_gap_audit.json",
   "target_policy": {
     "numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
     "proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
+    "scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
   },
   "title": "Task Method 20-Result Completion Audit"
 }

docs/data/task_surface_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:10:38+00:00",
   "summary": {
     "original_walkthrough_task_count": 12,
     "expected_original_walkthrough_task_count": 12,

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:07+00:00",
   "summary": {
     "original_walkthrough_task_count": 12,
     "expected_original_walkthrough_task_count": 12,

metrics/publication_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:19:22+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
@@ -246,8 +246,8 @@
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
-      "file_count": 631,
-      "text_file_count": 470,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
         "bytes": 10221085
@@ -257,8 +257,8 @@
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
-      "file_count": 4702,
-      "text_file_count": 1328,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061
@@ -268,8 +268,8 @@
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
-      "file_count": 5464,
-      "text_file_count": 1502,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:16+00:00",
   "checks": [
     {
       "name": "required_publication_assets_present",
     "hf_space_bundle": {
       "root": "hf_publish/space",
       "exists": true,
+      "file_count": 640,
+      "text_file_count": 479,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
         "bytes": 10221085
     "hf_artifact_bundle": {
       "root": "hf_publish/artifacts",
       "exists": true,
+      "file_count": 4708,
+      "text_file_count": 1334,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061
     "hf_model_bundle": {
       "root": "hf_publish/model",
       "exists": true,
+      "file_count": 5470,
+      "text_file_count": 1508,
       "largest_file": {
         "path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
         "bytes": 135591061

metrics/quality_gates.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:26+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

 {
   "title": "Ropedia Xperience-10M Release Checks",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:18:45+00:00",
   "rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
   "automated_gates": [
     {

metrics/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-21T15:18:59+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -186,7 +186,7 @@
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
-      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T10:59:59+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
         }
       ],
       "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
     }
   ]
 }

metrics/scope_claims_audit.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:20:29+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
@@ -25,7 +25,7 @@
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
-      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:10+00:00",
   "summary": {
     "qwen3_omni_verified_diagnostic_pilot": true,
     "dataset_manifest_num_episodes": 119,
     {
       "name": "summary_metrics_preserves_verified_diagnostic_status",
       "status": "pass",
+      "detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking.",
       "evidence": [
         "docs/data/summary_metrics.json"
       ]

metrics/source_alignment_audit.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:10:38+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

 {
   "title": "Ropedia Xperience-10M Source Alignment Note",
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:08+00:00",
   "alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
   "alignment_summary": {
     "full_dataset_repo": "ropedia-ai/xperience-10m",

metrics/summary_metrics.json CHANGED Viewed

@@ -14,7 +14,7 @@
       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
-    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines, not strong model-quality claims."
   },
   "models": {
     "motion_action": {

       "visualization.rrd"
     ],
     "access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
+    "current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
   },
   "models": {
     "motion_action": {

metrics/task_method_20_gap_audit.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "generated_at_utc": "2026-06-21T15:21:42+00:00",
   "immediate_actions": [
     {
       "artifact": "docs/data/task_method_20_gap_audit.json",
@@ -210,7 +210,7 @@
   "target_policy": {
     "numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
     "proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
-    "scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model claims. The current release has zero scoreless cells."
   },
   "title": "Task Method 20-Result Completion Audit"
 }

 {
+  "generated_at_utc": "2026-06-22T11:00:00+00:00",
   "immediate_actions": [
     {
       "artifact": "docs/data/task_method_20_gap_audit.json",
   "target_policy": {
     "numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
     "proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
+    "scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
   },
   "title": "Task Method 20-Result Completion Audit"
 }