Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
Refine reader-facing public wording (3/6)
Browse files- data/quality_gates.json +1 -1
- data/research_takeaways.json +2 -2
- data/scope_claims_audit.json +2 -2
- data/source_alignment_audit.json +1 -1
- data/summary_metrics.json +1 -1
- data/task_method_20_gap_audit.json +2 -2
- docs/data/quality_gates.json +1 -1
- docs/data/research_takeaways.json +2 -2
- docs/data/scope_claims_audit.json +2 -2
- docs/data/source_alignment_audit.json +1 -1
- docs/data/summary_metrics.json +1 -1
- docs/data/task_method_20_gap_audit.json +2 -2
- docs/data/task_surface_integrity.json +1 -1
- metrics/publication_audit.json +7 -7
- metrics/quality_gates.json +1 -1
- metrics/research_takeaways.json +2 -2
- metrics/scope_claims_audit.json +2 -2
- metrics/source_alignment_audit.json +1 -1
- metrics/summary_metrics.json +1 -1
- metrics/task_method_20_gap_audit.json +2 -2
data/quality_gates.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:18:45+00:00",
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
data/research_takeaways.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
@@ -186,7 +186,7 @@
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T10:59:59+00:00",
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
data/scope_claims_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
-
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:17:10+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
+
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking.",
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
data/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:17:08+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
data/summary_metrics.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
|
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
data/task_method_20_gap_audit.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at_utc": "2026-06-
|
| 3 |
"immediate_actions": [
|
| 4 |
{
|
| 5 |
"artifact": "docs/data/task_method_20_gap_audit.json",
|
|
@@ -210,7 +210,7 @@
|
|
| 210 |
"target_policy": {
|
| 211 |
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
|
| 212 |
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
|
| 213 |
-
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model
|
| 214 |
},
|
| 215 |
"title": "Task Method 20-Result Completion Audit"
|
| 216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at_utc": "2026-06-22T11:00:00+00:00",
|
| 3 |
"immediate_actions": [
|
| 4 |
{
|
| 5 |
"artifact": "docs/data/task_method_20_gap_audit.json",
|
|
|
|
| 210 |
"target_policy": {
|
| 211 |
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
|
| 212 |
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
|
| 213 |
+
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
|
| 214 |
},
|
| 215 |
"title": "Task Method 20-Result Completion Audit"
|
| 216 |
}
|
docs/data/quality_gates.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:18:45+00:00",
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
docs/data/research_takeaways.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
@@ -186,7 +186,7 @@
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T10:59:59+00:00",
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
docs/data/scope_claims_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
-
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:17:10+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
+
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking.",
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
docs/data/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:17:08+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
docs/data/summary_metrics.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
|
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
docs/data/task_method_20_gap_audit.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at_utc": "2026-06-
|
| 3 |
"immediate_actions": [
|
| 4 |
{
|
| 5 |
"artifact": "docs/data/task_method_20_gap_audit.json",
|
|
@@ -210,7 +210,7 @@
|
|
| 210 |
"target_policy": {
|
| 211 |
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
|
| 212 |
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
|
| 213 |
-
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model
|
| 214 |
},
|
| 215 |
"title": "Task Method 20-Result Completion Audit"
|
| 216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at_utc": "2026-06-22T11:00:00+00:00",
|
| 3 |
"immediate_actions": [
|
| 4 |
{
|
| 5 |
"artifact": "docs/data/task_method_20_gap_audit.json",
|
|
|
|
| 210 |
"target_policy": {
|
| 211 |
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
|
| 212 |
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
|
| 213 |
+
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
|
| 214 |
},
|
| 215 |
"title": "Task Method 20-Result Completion Audit"
|
| 216 |
}
|
docs/data/task_surface_integrity.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"original_walkthrough_task_count": 12,
|
| 6 |
"expected_original_walkthrough_task_count": 12,
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:17:07+00:00",
|
| 4 |
"summary": {
|
| 5 |
"original_walkthrough_task_count": 12,
|
| 6 |
"expected_original_walkthrough_task_count": 12,
|
metrics/publication_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
@@ -246,8 +246,8 @@
|
|
| 246 |
"hf_space_bundle": {
|
| 247 |
"root": "hf_publish/space",
|
| 248 |
"exists": true,
|
| 249 |
-
"file_count":
|
| 250 |
-
"text_file_count":
|
| 251 |
"largest_file": {
|
| 252 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 253 |
"bytes": 10221085
|
|
@@ -257,8 +257,8 @@
|
|
| 257 |
"hf_artifact_bundle": {
|
| 258 |
"root": "hf_publish/artifacts",
|
| 259 |
"exists": true,
|
| 260 |
-
"file_count":
|
| 261 |
-
"text_file_count":
|
| 262 |
"largest_file": {
|
| 263 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 264 |
"bytes": 135591061
|
|
@@ -268,8 +268,8 @@
|
|
| 268 |
"hf_model_bundle": {
|
| 269 |
"root": "hf_publish/model",
|
| 270 |
"exists": true,
|
| 271 |
-
"file_count":
|
| 272 |
-
"text_file_count":
|
| 273 |
"largest_file": {
|
| 274 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 275 |
"bytes": 135591061
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:18:16+00:00",
|
| 4 |
"checks": [
|
| 5 |
{
|
| 6 |
"name": "required_publication_assets_present",
|
|
|
|
| 246 |
"hf_space_bundle": {
|
| 247 |
"root": "hf_publish/space",
|
| 248 |
"exists": true,
|
| 249 |
+
"file_count": 640,
|
| 250 |
+
"text_file_count": 479,
|
| 251 |
"largest_file": {
|
| 252 |
"path": "results/omni_finetune/xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z/modality_reconstruction/predictions.jsonl",
|
| 253 |
"bytes": 10221085
|
|
|
|
| 257 |
"hf_artifact_bundle": {
|
| 258 |
"root": "hf_publish/artifacts",
|
| 259 |
"exists": true,
|
| 260 |
+
"file_count": 4708,
|
| 261 |
+
"text_file_count": 1334,
|
| 262 |
"largest_file": {
|
| 263 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 264 |
"bytes": 135591061
|
|
|
|
| 268 |
"hf_model_bundle": {
|
| 269 |
"root": "hf_publish/model",
|
| 270 |
"exists": true,
|
| 271 |
+
"file_count": 5470,
|
| 272 |
+
"text_file_count": 1508,
|
| 273 |
"largest_file": {
|
| 274 |
"path": "results/omni_finetune/xperience10m_128ep_dense_multiscale_hierarchical_v1_20260608/dense_multiscale_windows.jsonl",
|
| 275 |
"bytes": 135591061
|
metrics/quality_gates.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Release Checks",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:18:45+00:00",
|
| 5 |
"rule": "A release is current when the automated reports pass and the live GitHub/Hugging Face mirrors are verified after publishing.",
|
| 6 |
"automated_gates": [
|
| 7 |
{
|
metrics/research_takeaways.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
@@ -186,7 +186,7 @@
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Research Takeaways",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T10:59:59+00:00",
|
| 5 |
"source_files": [
|
| 6 |
"docs/data/summary_metrics.json",
|
| 7 |
"results/episode_task_suite/summary_report.json",
|
|
|
|
| 186 |
}
|
| 187 |
],
|
| 188 |
"source": "docs/data/omni_finetune_verified_result.json",
|
| 189 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
metrics/scope_claims_audit.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
-
"generated_at_utc": "2026-06-
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
-
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
|
|
|
| 1 |
{
|
| 2 |
"status": "pass",
|
| 3 |
+
"generated_at_utc": "2026-06-22T11:17:10+00:00",
|
| 4 |
"summary": {
|
| 5 |
"qwen3_omni_verified_diagnostic_pilot": true,
|
| 6 |
"dataset_manifest_num_episodes": 119,
|
|
|
|
| 25 |
{
|
| 26 |
"name": "summary_metrics_preserves_verified_diagnostic_status",
|
| 27 |
"status": "pass",
|
| 28 |
+
"detail": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking.",
|
| 29 |
"evidence": [
|
| 30 |
"docs/data/summary_metrics.json"
|
| 31 |
]
|
metrics/source_alignment_audit.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
-
"generated_at_utc": "2026-06-
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
|
|
|
| 1 |
{
|
| 2 |
"title": "Ropedia Xperience-10M Source Alignment Note",
|
| 3 |
"status": "pass",
|
| 4 |
+
"generated_at_utc": "2026-06-22T11:17:08+00:00",
|
| 5 |
"alignment_json": "docs/data/xperience10m_dataset_card_alignment.json",
|
| 6 |
"alignment_summary": {
|
| 7 |
"full_dataset_repo": "ropedia-ai/xperience-10m",
|
metrics/summary_metrics.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
-
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
|
|
|
| 14 |
"visualization.rrd"
|
| 15 |
],
|
| 16 |
"access_status": "The gated Xperience-10M dataset is available for selected multi-episode pilot preparation.",
|
| 17 |
+
"current_scope": "The selected-episode Qwen3-Omni diagnostic pilot is verified on the 96/16/16 split and now meets the 98% target for JSON validity; action/subtask quality remains weak, so current results are diagnostic baselines rather than a final model-quality ranking."
|
| 18 |
},
|
| 19 |
"models": {
|
| 20 |
"motion_action": {
|
metrics/task_method_20_gap_audit.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at_utc": "2026-06-
|
| 3 |
"immediate_actions": [
|
| 4 |
{
|
| 5 |
"artifact": "docs/data/task_method_20_gap_audit.json",
|
|
@@ -210,7 +210,7 @@
|
|
| 210 |
"target_policy": {
|
| 211 |
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
|
| 212 |
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
|
| 213 |
-
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model
|
| 214 |
},
|
| 215 |
"title": "Task Method 20-Result Completion Audit"
|
| 216 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at_utc": "2026-06-22T11:00:00+00:00",
|
| 3 |
"immediate_actions": [
|
| 4 |
{
|
| 5 |
"artifact": "docs/data/task_method_20_gap_audit.json",
|
|
|
|
| 210 |
"target_policy": {
|
| 211 |
"numeric_score_gate": "A method-task cell is numeric only when a runner or verified package emits that exact task target and metric.",
|
| 212 |
"proxy_policy": "Proxy scores are allowed only when the matrix marks them as proxy_scored and keeps the reason/source attached.",
|
| 213 |
+
"scoreless_cell_policy": "If future unsupported or not-evaluated cells appear, they must stay explicit in the public matrix instead of being hidden or backfilled with proxy model numbers. The current release has zero scoreless cells."
|
| 214 |
},
|
| 215 |
"title": "Task Method 20-Result Completion Audit"
|
| 216 |
}
|