cy0307 commited on 7 days ago

Commit

8a8926d

verified ·

1 Parent(s): 0f657b5

Refine reader-facing public wording (4/6)

Browse files

Files changed (20) hide show

data/task_surface_integrity.json +1 -1
data/website_integrity.json +10 -10
data/xperience10m_dataset_card_alignment.json +2 -2
docs/data/website_integrity.json +10 -10
docs/data/xperience10m_dataset_card_alignment.json +2 -2
metrics/task_surface_integrity.json +1 -1
metrics/website_integrity.json +10 -10
metrics/xperience10m_dataset_card_alignment.json +2 -2
results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md +1 -1
results/omni_finetune/OMNI_MODEL_COMPARISON.md +2 -2
results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md +2 -2
results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/dataset/target_manifest.json +1 -1
results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/analysis/ERROR_ANALYSIS.md +1 -1
results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/analysis/error_analysis_summary.json +1 -1
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md +1 -1
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/error_analysis_summary.json +1 -1
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/analysis/ERROR_ANALYSIS.md +1 -1
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/analysis/error_analysis_summary.json +1 -1
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/analysis/ERROR_ANALYSIS.md +1 -1
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/analysis/error_analysis_summary.json +1 -1

data/task_surface_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:10:38+00:00",
   "summary": {
     "original_walkthrough_task_count": 12,
     "expected_original_walkthrough_task_count": 12,

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:07+00:00",
   "summary": {
     "original_walkthrough_task_count": 12,
     "expected_original_walkthrough_task_count": 12,

data/website_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:09:34+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
@@ -340,7 +340,7 @@
     },
     {
       "path": "data/evidence_contract.json",
-      "bytes": 12219,
       "top_level_type": "dict"
     },
     {
@@ -370,7 +370,7 @@
     },
     {
       "path": "data/mirror_parity.json",
-      "bytes": 1460402,
       "top_level_type": "dict"
     },
     {
@@ -385,12 +385,12 @@
     },
     {
       "path": "data/omni_model_comparison.json",
-      "bytes": 82102,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
-      "bytes": 4032,
       "top_level_type": "dict"
     },
     {
@@ -410,7 +410,7 @@
     },
     {
       "path": "data/public_reader_map.json",
-      "bytes": 6951,
       "top_level_type": "dict"
     },
     {
@@ -480,12 +480,12 @@
     },
     {
       "path": "data/research_takeaways.json",
-      "bytes": 7165,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
-      "bytes": 21313,
       "top_level_type": "dict"
     },
     {
@@ -505,7 +505,7 @@
     },
     {
       "path": "data/summary_metrics.json",
-      "bytes": 27518,
       "top_level_type": "dict"
     },
     {
@@ -515,7 +515,7 @@
     },
     {
       "path": "data/task_method_20_gap_audit.json",
-      "bytes": 8500,
       "top_level_type": "dict"
     },
     {

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:07+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     },
     {
       "path": "data/evidence_contract.json",
+      "bytes": 12101,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/mirror_parity.json",
+      "bytes": 1461299,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/omni_model_comparison.json",
+      "bytes": 82104,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
+      "bytes": 4044,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/public_reader_map.json",
+      "bytes": 7454,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/research_takeaways.json",
+      "bytes": 7174,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
+      "bytes": 21322,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/summary_metrics.json",
+      "bytes": 27527,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/task_method_20_gap_audit.json",
+      "bytes": 8501,
       "top_level_type": "dict"
     },
     {

data/xperience10m_dataset_card_alignment.json CHANGED Viewed

@@ -186,7 +186,7 @@
       "modality reconstruction",
       "misalignment detection"
     ],
-    "not_yet_claimed": [
       "large-scale audio-visual pretraining",
       "caption generation",
       "depth-pixel estimation",
@@ -194,7 +194,7 @@
       "neural rendering",
       "policy learning",
       "cross-episode generalization",
-      "real held-out multi-episode Qwen3-Omni model quality"
     ]
   },
   "responsible_use_boundary": [

       "modality reconstruction",
       "misalignment detection"
     ],
+    "not_yet_demonstrated": [
       "large-scale audio-visual pretraining",
       "caption generation",
       "depth-pixel estimation",
       "neural rendering",
       "policy learning",
       "cross-episode generalization",
+      "held-out multi-episode Qwen3-Omni model quality"
     ]
   },
   "responsible_use_boundary": [

docs/data/website_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:09:34+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
@@ -340,7 +340,7 @@
     },
     {
       "path": "data/evidence_contract.json",
-      "bytes": 12219,
       "top_level_type": "dict"
     },
     {
@@ -370,7 +370,7 @@
     },
     {
       "path": "data/mirror_parity.json",
-      "bytes": 1460402,
       "top_level_type": "dict"
     },
     {
@@ -385,12 +385,12 @@
     },
     {
       "path": "data/omni_model_comparison.json",
-      "bytes": 82102,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
-      "bytes": 4032,
       "top_level_type": "dict"
     },
     {
@@ -410,7 +410,7 @@
     },
     {
       "path": "data/public_reader_map.json",
-      "bytes": 6951,
       "top_level_type": "dict"
     },
     {
@@ -480,12 +480,12 @@
     },
     {
       "path": "data/research_takeaways.json",
-      "bytes": 7165,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
-      "bytes": 21313,
       "top_level_type": "dict"
     },
     {
@@ -505,7 +505,7 @@
     },
     {
       "path": "data/summary_metrics.json",
-      "bytes": 27518,
       "top_level_type": "dict"
     },
     {
@@ -515,7 +515,7 @@
     },
     {
       "path": "data/task_method_20_gap_audit.json",
-      "bytes": 8500,
       "top_level_type": "dict"
     },
     {

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:07+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     },
     {
       "path": "data/evidence_contract.json",
+      "bytes": 12101,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/mirror_parity.json",
+      "bytes": 1461299,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/omni_model_comparison.json",
+      "bytes": 82104,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
+      "bytes": 4044,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/public_reader_map.json",
+      "bytes": 7454,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/research_takeaways.json",
+      "bytes": 7174,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
+      "bytes": 21322,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/summary_metrics.json",
+      "bytes": 27527,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/task_method_20_gap_audit.json",
+      "bytes": 8501,
       "top_level_type": "dict"
     },
     {

docs/data/xperience10m_dataset_card_alignment.json CHANGED Viewed

@@ -186,7 +186,7 @@
       "modality reconstruction",
       "misalignment detection"
     ],
-    "not_yet_claimed": [
       "large-scale audio-visual pretraining",
       "caption generation",
       "depth-pixel estimation",
@@ -194,7 +194,7 @@
       "neural rendering",
       "policy learning",
       "cross-episode generalization",
-      "real held-out multi-episode Qwen3-Omni model quality"
     ]
   },
   "responsible_use_boundary": [

       "modality reconstruction",
       "misalignment detection"
     ],
+    "not_yet_demonstrated": [
       "large-scale audio-visual pretraining",
       "caption generation",
       "depth-pixel estimation",
       "neural rendering",
       "policy learning",
       "cross-episode generalization",
+      "held-out multi-episode Qwen3-Omni model quality"
     ]
   },
   "responsible_use_boundary": [

metrics/task_surface_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:10:38+00:00",
   "summary": {
     "original_walkthrough_task_count": 12,
     "expected_original_walkthrough_task_count": 12,

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:07+00:00",
   "summary": {
     "original_walkthrough_task_count": 12,
     "expected_original_walkthrough_task_count": 12,

metrics/website_integrity.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "status": "pass",
-  "generated_at_utc": "2026-06-22T10:09:34+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
@@ -340,7 +340,7 @@
     },
     {
       "path": "data/evidence_contract.json",
-      "bytes": 12219,
       "top_level_type": "dict"
     },
     {
@@ -370,7 +370,7 @@
     },
     {
       "path": "data/mirror_parity.json",
-      "bytes": 1460402,
       "top_level_type": "dict"
     },
     {
@@ -385,12 +385,12 @@
     },
     {
       "path": "data/omni_model_comparison.json",
-      "bytes": 82102,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
-      "bytes": 4032,
       "top_level_type": "dict"
     },
     {
@@ -410,7 +410,7 @@
     },
     {
       "path": "data/public_reader_map.json",
-      "bytes": 6951,
       "top_level_type": "dict"
     },
     {
@@ -480,12 +480,12 @@
     },
     {
       "path": "data/research_takeaways.json",
-      "bytes": 7165,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
-      "bytes": 21313,
       "top_level_type": "dict"
     },
     {
@@ -505,7 +505,7 @@
     },
     {
       "path": "data/summary_metrics.json",
-      "bytes": 27518,
       "top_level_type": "dict"
     },
     {
@@ -515,7 +515,7 @@
     },
     {
       "path": "data/task_method_20_gap_audit.json",
-      "bytes": 8500,
       "top_level_type": "dict"
     },
     {

 {
   "status": "pass",
+  "generated_at_utc": "2026-06-22T11:17:07+00:00",
   "docs_root": "docs",
   "site_base": "/ropedia-xperience-10m-task-suite/",
   "summary": {
     },
     {
       "path": "data/evidence_contract.json",
+      "bytes": 12101,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/mirror_parity.json",
+      "bytes": 1461299,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/omni_model_comparison.json",
+      "bytes": 82104,
       "top_level_type": "dict"
     },
     {
       "path": "data/project_brief.json",
+      "bytes": 4044,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/public_reader_map.json",
+      "bytes": 7454,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/research_takeaways.json",
+      "bytes": 7174,
       "top_level_type": "dict"
     },
     {
       "path": "data/scope_claims_audit.json",
+      "bytes": 21322,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/summary_metrics.json",
+      "bytes": 27527,
       "top_level_type": "dict"
     },
     {
     },
     {
       "path": "data/task_method_20_gap_audit.json",
+      "bytes": 8501,
       "top_level_type": "dict"
     },
     {

metrics/xperience10m_dataset_card_alignment.json CHANGED Viewed

@@ -186,7 +186,7 @@
       "modality reconstruction",
       "misalignment detection"
     ],
-    "not_yet_claimed": [
       "large-scale audio-visual pretraining",
       "caption generation",
       "depth-pixel estimation",
@@ -194,7 +194,7 @@
       "neural rendering",
       "policy learning",
       "cross-episode generalization",
-      "real held-out multi-episode Qwen3-Omni model quality"
     ]
   },
   "responsible_use_boundary": [

       "modality reconstruction",
       "misalignment detection"
     ],
+    "not_yet_demonstrated": [
       "large-scale audio-visual pretraining",
       "caption generation",
       "depth-pixel estimation",
       "neural rendering",
       "policy learning",
       "cross-episode generalization",
+      "held-out multi-episode Qwen3-Omni model quality"
     ]
   },
   "responsible_use_boundary": [

results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md CHANGED Viewed

@@ -36,7 +36,7 @@ final train loss of 0.4130 plus a validation loss of 0.0331. The result verifies
 the multi-episode pipeline and gives a real error-analysis baseline; it is still
 not a strong final model.
-A stronger model-quality pilot should be claimed only after:
 - selected valid episodes are available locally,
 - the manifest builder confirms complete held-out episode splits,

 the multi-episode pipeline and gives a real error-analysis baseline; it is still
 not a strong final model.
+A stronger model-quality pilot should be presented only after:
 - selected valid episodes are available locally,
 - the manifest builder confirms complete held-out episode splits,

results/omni_finetune/OMNI_MODEL_COMPARISON.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Omni Model Comparison
-Generated: `2026-06-21T15:17:00+00:00`
 Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
@@ -133,4 +133,4 @@ This is the first verified Cosmos3-Super fine-tuned adapter branch. Its metric i
 ## Pending
 - Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.
-- Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before claiming v6 is globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly.

 # Omni Model Comparison
+Generated: `2026-06-22T10:59:59+00:00`
 Compare only rows with the same scope and target. Single-episode raw-feature metrics, 128-episode metadata baselines, Qwen3 structured JSON metrics, and the two Cosmos3 targets answer different questions: Nano future-window retrieval versus Super structured JSON Reasoner evaluation.
 ## Pending
 - Use the verified Qwen3 v6 rank64/lr5e-5 dense multiscale full-eval package as the latest current Qwen row; the v5 release tag remains pinned as the previous verified release.
+- Read results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before presenting v6 as globally better than v5, because v6 improves action macro-F1 and contact accuracy but regresses subtask, next-action, object micro-F1, and JSON validity slightly.

results/omni_finetune/XPERIENCE10M_128_DATA_PREPARATION_AND_FINETUNE_PLAN.md CHANGED Viewed

@@ -1,7 +1,7 @@
 # Xperience-10M 128-Episode Data Preparation and Fine-Tune Plan
 This is the executable plan for moving from metadata selection to real
-multi-episode training. It does not claim model-quality results until data is
 downloaded, staged, audited, trained, and evaluated on held-out sessions.
 ## Current Preflight
@@ -21,7 +21,7 @@ Current execution status:
 - a 128-episode data-preparation job has been launched on an HF-reachable host,
 - staged-file transfer is active,
 - later batches are scheduled after storage checks,
-- no multi-episode model-quality training result is claimed yet.
 ## Selected Data

 # Xperience-10M 128-Episode Data Preparation and Fine-Tune Plan
 This is the executable plan for moving from metadata selection to real
+multi-episode training. It does not present model-quality results until data is
 downloaded, staged, audited, trained, and evaluated on held-out sessions.
 ## Current Preflight
 - a 128-episode data-preparation job has been launched on an HF-reachable host,
 - staged-file transfer is active,
 - later batches are scheduled after storage checks,
+- no multi-episode model-quality training result is reported yet.
 ## Selected Data

results/omni_finetune/verified_public/xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp/dataset/target_manifest.json CHANGED Viewed

@@ -16,7 +16,7 @@
   "issues": [],
   "limitations": [
     "This is an egocentric camera-motion proxy, not a robot gripper or human hand-control action.",
-    "Use it for Cosmos3 action-packer and one-episode overfit smoke tests before claiming model-quality improvement.",
     "Fit any normalization on train episodes only before a full publishable Cosmos adapter run."
   ]
 }

   "issues": [],
   "limitations": [
     "This is an egocentric camera-motion proxy, not a robot gripper or human hand-control action.",
+    "Use it for Cosmos3 action-packer and one-episode overfit smoke tests before reporting model-quality improvement.",
     "Fit any normalization on train episodes only before a full publishable Cosmos adapter run."
   ]
 }

results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/analysis/ERROR_ANALYSIS.md CHANGED Viewed

@@ -66,7 +66,7 @@ This report is computed from the verified public package predictions. It contain
 ## Interpretation
-The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality.
 Generated files:

 ## Interpretation
+The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality.
 Generated files:

results/omni_finetune/verified_public/xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607/analysis/error_analysis_summary.json CHANGED Viewed

@@ -523,5 +523,5 @@
       "visualization.rrd"
     ]
   },
-  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality."
 }

       "visualization.rrd"
     ]
   },
+  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality."
 }

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/ERROR_ANALYSIS.md CHANGED Viewed

@@ -66,7 +66,7 @@ This report is computed from the verified public package predictions. It contain
 ## Interpretation
-The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality.
 Generated files:

 ## Interpretation
+The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality.
 Generated files:

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/analysis/error_analysis_summary.json CHANGED Viewed

@@ -663,5 +663,5 @@
       "visualization.rrd"
     ]
   },
-  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality."
 }

       "visualization.rrd"
     ]
   },
+  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality."
 }

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/analysis/ERROR_ANALYSIS.md CHANGED Viewed

@@ -66,7 +66,7 @@ This report is computed from the verified public package predictions. It contain
 ## Interpretation
-The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality.
 Generated files:

 ## Interpretation
+The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality.
 Generated files:

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v2_reuse_full8gpu_lora_eval_test_full/analysis/error_analysis_summary.json CHANGED Viewed

@@ -457,5 +457,5 @@
       "visualization.rrd"
     ]
   },
-  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality."
 }

       "visualization.rrd"
     ]
   },
+  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality."
 }

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/analysis/ERROR_ANALYSIS.md CHANGED Viewed

@@ -66,7 +66,7 @@ This report is computed from public-safe predictions and an episode manifest. It
 ## Interpretation
-The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality.
 Generated files:

 ## Interpretation
+The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality.
 Generated files:

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/analysis/error_analysis_summary.json CHANGED Viewed

@@ -452,5 +452,5 @@
       "visualization.rrd"
     ]
   },
-  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before claiming stronger model quality."
 }

       "visualization.rrd"
     ]
   },
+  "interpretation": "The diagnostic pilot is dominated by invalid or weak structured outputs and exact-label failures. These tables identify where to tighten JSON constraints, action/subtask target formatting, object vocabularies, and missing-modality robustness before presenting stronger model quality."
 }