ropedia-xperience-10m-task-baselines / docs /data /public_reader_map.json
cy0307's picture
Refresh weights-results links and publication QA mirrors
7d21adc verified
Raw
History Blame
5.91 kB
{
"title": "Ropedia Xperience-10M Public Reader Map",
"status": "published",
"purpose": "Organize the GitHub repo, GitHub Pages dashboard, Hugging Face Space, artifact dataset, baseline model repo, consolidated weights/results repo, and model-branch repos without removing evidence.",
"fast_paths": [
{
"reader_goal": "Understand the project in one pass",
"start_here": "PROJECT_BRIEF.md",
"then_inspect": ["PROJECT_STATUS.md", "RESEARCH_TAKEAWAYS.md"]
},
{
"reader_goal": "See the visual public dashboard",
"start_here": "GitHub Pages dashboard or Hugging Face Space",
"then_inspect": ["docs/index.html", "docs/data/project_packet.json"]
},
{
"reader_goal": "Understand the data unit",
"start_here": "results/episode_task_suite/windows.csv",
"then_inspect": ["results/episode_task_suite/feature_manifest.json", "docs/data/raw_sample_files.json"]
},
{
"reader_goal": "Trace the 128-episode split",
"start_here": "XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md",
"then_inspect": ["docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/xperience10m_128_episode_selection.csv"]
},
{
"reader_goal": "Inspect the 20-task benchmark",
"start_here": "TASK_SUITE_20.md",
"then_inspect": ["docs/data/task_suite_20.json", "EVALUATION_PROTOCOL.md"]
},
{
"reader_goal": "Compare current results",
"start_here": "RESEARCH_TAKEAWAYS.md",
"then_inspect": ["docs/data/task_method_20_result_matrix.json", "docs/data/unified_task_model_radar.json"]
},
{
"reader_goal": "Compare 1-episode and 128-episode methods",
"start_here": "Homepage radar section",
"then_inspect": ["docs/data/single_episode_task_model_radar.json", "docs/data/episode128_task_model_radar.json"]
},
{
"reader_goal": "Find all derived artifacts",
"start_here": "ARTIFACT_GUIDE.md",
"then_inspect": ["Hugging Face artifact dataset", "docs/data/artifact_index.json"]
},
{
"reader_goal": "Download model weights with their matching results",
"start_here": "Hugging Face weights/results repo",
"then_inspect": ["manifest.json", "analysis/docs/data/task_method_20_result_matrix.json", "results/"]
},
{
"reader_goal": "Reproduce or extend the work",
"start_here": "REPRODUCIBILITY.md",
"then_inspect": ["QUALITY_GATES.md", "scripts/", "results/"]
},
{
"reader_goal": "Understand foundation-model directions",
"start_here": "THREE_FOUNDATION_PIPELINES.md",
"then_inspect": ["FOUNDATION_MODEL_PLAN.md", "docs/data/three_foundation_pipelines.json"]
},
{
"reader_goal": "Check public-release health",
"start_here": "PUBLIC_SURFACE_QA.md",
"then_inspect": ["docs/data/live_publication_status.json", "docs/data/mirror_parity.json"]
}
],
"public_surfaces": [
{
"surface": "GitHub repo",
"responsibility": "Source of truth for docs, scripts, generated data, validators, and commit history.",
"best_use": "Auditing implementation and citing exact files."
},
{
"surface": "GitHub Pages dashboard",
"responsibility": "Reader-facing visual overview of the dataset sample, tasks, methods, results, directions, and resources.",
"best_use": "Understanding the project quickly."
},
{
"surface": "Hugging Face Space",
"responsibility": "Hub-hosted copy of the dashboard and static app assets.",
"best_use": "Sharing the visual dashboard from Hugging Face."
},
{
"surface": "HF artifact dataset",
"responsibility": "Public-safe derived artifacts, reports, metrics, website JSON, and sanitized model result packages.",
"best_use": "Downloading evidence bundles."
},
{
"surface": "HF baseline model repo",
"responsibility": "Baseline weights, metrics, figures, and mirrored task artifacts.",
"best_use": "Reusing compact baseline outputs."
},
{
"surface": "HF weights/results repo",
"responsibility": "Consolidated baseline weights, Qwen3/Cosmos adapter weights, verified results, analysis files, and file-level manifest.",
"best_use": "Auditing all public-safe weight-bearing artifacts from one repo."
},
{
"surface": "Qwen3/Cosmos model repos",
"responsibility": "Adapter-specific public weights or package cards when a branch is verified and publishable.",
"best_use": "Inspecting model-branch artifacts."
}
],
"evidence_layers": [
"Dataset/source boundary",
"Data contract",
"Task suite",
"Results",
"Foundation directions",
"Public-release checks"
],
"claim_boundaries": [
{
"claim_type": "Single public-sample task behavior",
"public_evidence": ["results/episode_task_suite/", "docs/data/task_suite_20.json"],
"boundary": "Describes one public sample episode, not the full dataset distribution."
},
{
"claim_type": "128-episode baseline comparison",
"public_evidence": ["XPERIENCE10M_128_EPISODE_FEATURE_INDEX.md", "docs/data/xperience10m_128_episode_feature_index.json", "results/omni_finetune/*128*", "docs/data/omni_model_comparison.json"],
"boundary": "Uses selected held-out episodes and derived public-safe summaries; official raw files remain gated upstream."
},
{
"claim_type": "Foundation-model branch quality",
"public_evidence": ["Verified Qwen3/Cosmos result packages", "model cards"],
"boundary": "Numeric task scores appear only when a task-specific eval or probe exists."
},
{
"claim_type": "Reproducibility",
"public_evidence": ["REPRODUCIBILITY.md", "QUALITY_GATES.md", "release validators"],
"boundary": "Raw gated Xperience-10M files and full foundation weights are not redistributed."
}
]
}