Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| """Validate public package contents for the repo and HF bundles. | |
| This check scans the GitHub repo plus the prepared Hugging Face | |
| Space/artifact/model folders for generated Python caches, raw Xperience-10M | |
| data, heavyweight checkpoint formats that do not belong in this public package, | |
| and accidental Hugging Face token strings. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import re | |
| import subprocess | |
| import sys | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parents[1] | |
| DEFAULT_HF_ROOT = ROOT.parent / "hf_publish" | |
| BANNED_DIR_NAMES = {"__pycache__"} | |
| BANNED_FILE_NAMES = {".DS_Store"} | |
| BANNED_SUFFIXES = {".pyc", ".pyo"} | |
| RAW_DATA_SUFFIXES = {".mp4", ".hdf5", ".h5", ".rrd"} | |
| HEAVY_MODEL_SUFFIXES = {".safetensors", ".bin", ".tar"} | |
| TEXT_SUFFIXES = { | |
| "", | |
| ".cff", | |
| ".csv", | |
| ".html", | |
| ".json", | |
| ".md", | |
| ".py", | |
| ".sh", | |
| ".svg", | |
| ".txt", | |
| ".webmanifest", | |
| ".xml", | |
| ".yaml", | |
| ".yml", | |
| } | |
| TOKEN_PATTERN = re.compile(r"hf_[A-Za-z0-9]{20,}") | |
| STALE_PRESENTATION_STRINGS = { | |
| "xperience10m-" + "modalities-v9-large-atlas": "old task-suite infographic cache key", | |
| "xperience10m-" + "taskfirst-v10": "older task-suite infographic cache key", | |
| "Start with the large native " + "modality atlas": "old suite-section hierarchy copy", | |
| "ChatGPT" + "-image": "internal image-generation tool wording in public copy", | |
| "H" + "20": "private compute infrastructure wording in public copy", | |
| "A" + "100": "private compute infrastructure wording in public copy", | |
| "Cur" + "sor": "editor/work-session wording in public copy", | |
| "public " + "dashboard and generated figures " + "deliberately " + "follow": "meta design-process wording in public copy", | |
| } | |
| LOCAL_PATH_PATTERNS = { | |
| "/" + "Users/": "local macOS user path in public text", | |
| "/" + "private/": "local scratch path in public text", | |
| } | |
| CARD_FRESHNESS_EXPECTATIONS = [ | |
| { | |
| "surface": "github_repo", | |
| "relative_path": "README.md", | |
| "required": [ | |
| "xperience10m-taskfirst-v13-modality-xl", | |
| "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md", | |
| "SOURCE_ALIGNMENT_AUDIT.md", | |
| "EVALUATION_PROTOCOL.md", | |
| "FIGURE_INDEX.md", | |
| "brand_assets.json", | |
| "PROJECT_STATUS.md", | |
| "RESEARCH_TAKEAWAYS.md", | |
| "xperience10m-logo-social-card.png", | |
| "build_brand_assets.py", | |
| "build_research_takeaways.py", | |
| "research_takeaways.json", | |
| "cc-by-nc-4.0", | |
| "12,103 episode folders", | |
| "all 12 task families before the", | |
| "Public-sample modality thumbnails remain enlarged below", | |
| "interactive scrub/play walkthrough storyboard", | |
| "task_surface_integrity.json", | |
| "rendered_site_check.json", | |
| "public_surface_qa.json", | |
| ], | |
| }, | |
| { | |
| "surface": "hf_space_bundle", | |
| "relative_path": "README.md", | |
| "required": [ | |
| "xperience10m-taskfirst-v13-modality-xl", | |
| "xperience10m_dataset_card_alignment.json", | |
| "source_alignment_audit.json", | |
| "evaluation_protocol.json", | |
| "figure_index.json", | |
| "brand_assets.json", | |
| "project_status.json", | |
| "research_takeaways.json", | |
| "xperience10m-logo-social-card.png", | |
| "build_brand_assets.py", | |
| "build_research_takeaways.py", | |
| "cc-by-nc-4.0", | |
| "12,103 episode folders", | |
| "Ropedia Xperience-10M 12-task infographic", | |
| "responsive native modality atlas", | |
| "interactive scrub/play walkthrough storyboard", | |
| "website HTML", | |
| "task_surface_integrity.json", | |
| "rendered_site_check.json", | |
| "public_surface_qa.json", | |
| ], | |
| }, | |
| { | |
| "surface": "hf_artifact_bundle", | |
| "relative_path": "README.md", | |
| "required": [ | |
| "xperience10m-taskfirst-v13-modality-xl", | |
| "xperience10m_dataset_card_alignment.json", | |
| "source_alignment_audit.json", | |
| "evaluation_protocol.json", | |
| "figure_index.json", | |
| "brand_assets.json", | |
| "project_status.json", | |
| "research_takeaways.json", | |
| "xperience10m-logo-social-card.png", | |
| "build_brand_assets.py", | |
| "build_research_takeaways.py", | |
| "cc-by-nc-4.0", | |
| "12,103 episode folders", | |
| "task-first 12-task map", | |
| "interactive scrub/play walkthrough storyboard", | |
| "website HTML", | |
| "task_surface_integrity.json", | |
| "rendered_site_check.json", | |
| "public_surface_qa.json", | |
| ], | |
| }, | |
| { | |
| "surface": "hf_artifact_bundle", | |
| "relative_path": "PROJECT_README.md", | |
| "required": [ | |
| "xperience10m-taskfirst-v13-modality-xl", | |
| "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md", | |
| "SOURCE_ALIGNMENT_AUDIT.md", | |
| "EVALUATION_PROTOCOL.md", | |
| "FIGURE_INDEX.md", | |
| "brand_assets.json", | |
| "PROJECT_STATUS.md", | |
| "RESEARCH_TAKEAWAYS.md", | |
| "xperience10m-logo-social-card.png", | |
| "build_brand_assets.py", | |
| "build_research_takeaways.py", | |
| "research_takeaways.json", | |
| "cc-by-nc-4.0", | |
| "12,103 episode folders", | |
| "all 12 task families before the", | |
| "Public-sample modality thumbnails remain enlarged below", | |
| "interactive scrub/play walkthrough storyboard", | |
| "task_surface_integrity.json", | |
| "rendered_site_check.json", | |
| "public_surface_qa.json", | |
| ], | |
| }, | |
| { | |
| "surface": "hf_model_bundle", | |
| "relative_path": "README.md", | |
| "required": [ | |
| "xperience10m-taskfirst-v13-modality-xl", | |
| "xperience10m_dataset_card_alignment.json", | |
| "source_alignment_audit.json", | |
| "evaluation_protocol.json", | |
| "figure_index.json", | |
| "brand_assets.json", | |
| "project_status.json", | |
| "research_takeaways.json", | |
| "xperience10m-logo-social-card.png", | |
| "build_brand_assets.py", | |
| "build_research_takeaways.py", | |
| "cc-by-nc-4.0", | |
| "12,103 episode folders", | |
| "Ropedia Xperience-10M 12-task infographic", | |
| "responsive native modality atlas", | |
| "interactive scrub/play walkthrough storyboard", | |
| "website HTML", | |
| "task_surface_integrity.json", | |
| "rendered_site_check.json", | |
| "public_surface_qa.json", | |
| ], | |
| }, | |
| ] | |
| def rel(path: Path, base: Path) -> str: | |
| try: | |
| return path.relative_to(base).as_posix() | |
| except ValueError: | |
| return path.as_posix() | |
| def git_public_paths(root: Path) -> list[Path] | None: | |
| try: | |
| result = subprocess.run( | |
| ["git", "-C", str(root), "ls-files", "--cached", "--others", "--exclude-standard"], | |
| check=True, | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.DEVNULL, | |
| text=True, | |
| ) | |
| except (OSError, subprocess.CalledProcessError): | |
| return None | |
| return [root / line for line in result.stdout.splitlines() if line.strip()] | |
| def iter_public_files(root: Path, paths: list[Path] | None = None): | |
| if paths is not None: | |
| for path in paths: | |
| if path.exists(): | |
| yield path | |
| return | |
| if not root.exists(): | |
| return | |
| for path in root.rglob("*"): | |
| parts = set(path.parts) | |
| if ".git" in parts or ".venv" in parts or "venv" in parts: | |
| continue | |
| yield path | |
| def scan(root: Path, *, paths: list[Path] | None = None, display_root: str | None = None) -> dict: | |
| violations: list[dict] = [] | |
| text_files = 0 | |
| total_files = 0 | |
| largest_file = {"path": None, "bytes": 0} | |
| for path in iter_public_files(root, paths): | |
| path_rel = rel(path, root) | |
| if path.is_dir(): | |
| if path.name in BANNED_DIR_NAMES: | |
| violations.append({"kind": "generated_cache_dir", "path": path_rel}) | |
| continue | |
| total_files += 1 | |
| size = path.stat().st_size | |
| if size > largest_file["bytes"]: | |
| largest_file = {"path": path_rel, "bytes": size} | |
| suffix = path.suffix.lower() | |
| if path.name in BANNED_FILE_NAMES or suffix in BANNED_SUFFIXES: | |
| violations.append({"kind": "generated_cache_file", "path": path_rel}) | |
| if suffix in RAW_DATA_SUFFIXES: | |
| violations.append({"kind": "raw_xperience10m_data", "path": path_rel}) | |
| if suffix in HEAVY_MODEL_SUFFIXES: | |
| violations.append({"kind": "heavy_model_or_archive", "path": path_rel}) | |
| if suffix in TEXT_SUFFIXES: | |
| text_files += 1 | |
| try: | |
| text = path.read_text(encoding="utf-8", errors="ignore") | |
| except OSError: | |
| continue | |
| if TOKEN_PATTERN.search(text): | |
| violations.append({"kind": "possible_hf_token", "path": path_rel}) | |
| for needle, reason in LOCAL_PATH_PATTERNS.items(): | |
| if needle in text: | |
| violations.append({ | |
| "kind": "local_filesystem_path", | |
| "path": path_rel, | |
| "detail": reason, | |
| }) | |
| for needle, reason in STALE_PRESENTATION_STRINGS.items(): | |
| if needle in text: | |
| violations.append({ | |
| "kind": "stale_presentation_copy", | |
| "path": path_rel, | |
| "detail": reason, | |
| }) | |
| return { | |
| "root": display_root or rel(root, ROOT.parent), | |
| "exists": root.exists(), | |
| "file_count": total_files, | |
| "text_file_count": text_files, | |
| "largest_file": largest_file, | |
| "violations": violations, | |
| } | |
| def required_assets(root: Path) -> dict[str, bool]: | |
| required = [ | |
| "README.md", | |
| "CITATION.cff", | |
| "LICENSE", | |
| "codemeta.json", | |
| "ARTIFACT_GUIDE.md", | |
| "PROJECT_STATUS.md", | |
| "RESEARCH_ROADMAP.md", | |
| "RESEARCH_TAKEAWAYS.md", | |
| "QUALITY_GATES.md", | |
| "PUBLIC_SURFACE_QA.md", | |
| "RENDERED_SITE_CHECK.md", | |
| "EVALUATION_PROTOCOL.md", | |
| "FIGURE_INDEX.md", | |
| "SOURCE_ALIGNMENT_AUDIT.md", | |
| "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md", | |
| "REPRODUCIBILITY.md", | |
| "EVIDENCE_CONTRACT.md", | |
| "DATA_NOTICE.md", | |
| "docs/404.html", | |
| "docs/apple-touch-icon.png", | |
| "docs/favicon.svg", | |
| "docs/favicon.png", | |
| "docs/index.html", | |
| "docs/research_roadmap.html", | |
| "docs/robots.txt", | |
| "docs/site.webmanifest", | |
| "docs/sitemap.xml", | |
| "docs/data/brand_assets.json", | |
| "docs/data/evidence_contract.json", | |
| "docs/data/evaluation_protocol.json", | |
| "docs/data/figure_index.json", | |
| "docs/data/source_alignment_audit.json", | |
| "docs/data/artifact_index.json", | |
| "docs/data/live_publication_status.json", | |
| "docs/data/quality_gates.json", | |
| "docs/data/project_manifest.json", | |
| "docs/data/project_packet.json", | |
| "docs/data/project_status.json", | |
| "docs/data/research_roadmap.json", | |
| "docs/data/research_roadmap_interactive.json", | |
| "docs/data/research_takeaways.json", | |
| "docs/data/xperience10m_dataset_card_alignment.json", | |
| "docs/data/reproducibility_matrix.json", | |
| "docs/data/modality_atlas.json", | |
| "docs/data/mirror_parity.json", | |
| "docs/data/public_surface_qa.json", | |
| "docs/data/rendered_site_check.json", | |
| "docs/data/scope_claims_audit.json", | |
| "docs/data/task_surface_integrity.json", | |
| "docs/data/website_integrity.json", | |
| "docs/data/summary_metrics.json", | |
| "docs/assets/modalities/video.jpg", | |
| "docs/assets/modalities/audio.png", | |
| "docs/assets/modalities/depth.jpg", | |
| "docs/assets/modalities/pose_slam.png", | |
| "docs/assets/modalities/motion_capture.png", | |
| "docs/assets/modalities/inertial.png", | |
| "docs/assets/modalities/language.png", | |
| "docs/assets/brand/xperience10m-logo-apple-touch.png", | |
| "docs/assets/brand/xperience10m-logo-favicon-32.png", | |
| "docs/assets/brand/xperience10m-logo-favicon-64.png", | |
| "docs/assets/brand/xperience10m-logo-mark.png", | |
| "docs/assets/brand/xperience10m-logo-mark-192.png", | |
| "docs/assets/brand/xperience10m-logo-mark-512.png", | |
| "docs/assets/brand/xperience10m-logo-social-card.png", | |
| "docs/assets/task_suite_infographic.png", | |
| "docs/assets/pipeline_diagram.png", | |
| "docs/assets/task_architectures.png", | |
| "results/episode_task_suite/summary_report.json", | |
| "results/episode_task_suite/feature_manifest.json", | |
| "results/episode_task_suite/neural_mlp/timeline_action/metrics.json", | |
| "results/omni_finetune/DATA_ACCESS_STATUS.md", | |
| "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md", | |
| "scripts/episode_task_suite.py", | |
| "scripts/neural_task_models.py", | |
| "scripts/build_artifact_index.py", | |
| "scripts/build_brand_assets.py", | |
| "scripts/build_evaluation_protocol.py", | |
| "scripts/build_figure_index.py", | |
| "scripts/build_quality_gates.py", | |
| "scripts/build_public_surface_qa.py", | |
| "scripts/build_rendered_site_check.py", | |
| "scripts/build_interactive_research_roadmap.py", | |
| "scripts/verify_live_publication.py", | |
| "scripts/validate_mirror_parity.py", | |
| "scripts/validate_scope_claims.py", | |
| "scripts/validate_source_alignment.py", | |
| "scripts/validate_task_surface.py", | |
| "scripts/validate_website_integrity.py", | |
| "scripts/publish_hf_bundles.py", | |
| "scripts/omni/train_qwen3_omni_lora.py", | |
| ] | |
| return {item: (root / item).exists() for item in required} | |
| def public_card_freshness(roots: dict[str, Path]) -> list[dict]: | |
| records = [] | |
| for item in CARD_FRESHNESS_EXPECTATIONS: | |
| surface = item["surface"] | |
| path = roots[surface] / item["relative_path"] | |
| text = path.read_text(encoding="utf-8", errors="ignore") if path.exists() else "" | |
| missing = [marker for marker in item["required"] if marker not in text] | |
| records.append({ | |
| "surface": surface, | |
| "path": item["relative_path"], | |
| "exists": path.exists(), | |
| "required_marker_count": len(item["required"]), | |
| "missing_markers": missing, | |
| "status": "pass" if path.exists() and not missing else "fail", | |
| }) | |
| return records | |
| def build_report(hf_root: Path) -> dict: | |
| roots = { | |
| "github_repo": ROOT, | |
| "hf_space_bundle": hf_root / "space", | |
| "hf_artifact_bundle": hf_root / "artifacts", | |
| "hf_model_bundle": hf_root / "model", | |
| } | |
| root_labels = { | |
| "github_repo": "repo", | |
| "hf_space_bundle": "hf_publish/space", | |
| "hf_artifact_bundle": "hf_publish/artifacts", | |
| "hf_model_bundle": "hf_publish/model", | |
| } | |
| scans = {} | |
| for name, path in roots.items(): | |
| public_paths = git_public_paths(path) if name == "github_repo" else None | |
| scans[name] = scan(path, paths=public_paths, display_root=root_labels[name]) | |
| assets = required_assets(ROOT) | |
| card_freshness = public_card_freshness(roots) | |
| missing_assets = [path for path, present in assets.items() if not present] | |
| violations = [ | |
| {"root": name, **violation} | |
| for name, result in scans.items() | |
| for violation in result["violations"] | |
| ] | |
| checks = [ | |
| { | |
| "name": "required_publication_assets_present", | |
| "status": "pass" if not missing_assets else "fail", | |
| "missing": missing_assets, | |
| }, | |
| { | |
| "name": "no_generated_python_caches", | |
| "status": "pass" | |
| if not any(v["kind"].startswith("generated_cache") for v in violations) | |
| else "fail", | |
| "count": sum(1 for v in violations if v["kind"].startswith("generated_cache")), | |
| }, | |
| { | |
| "name": "no_raw_xperience10m_data", | |
| "status": "pass" if not any(v["kind"] == "raw_xperience10m_data" for v in violations) else "fail", | |
| "count": sum(1 for v in violations if v["kind"] == "raw_xperience10m_data"), | |
| }, | |
| { | |
| "name": "no_heavy_model_archives", | |
| "status": "pass" if not any(v["kind"] == "heavy_model_or_archive" for v in violations) else "fail", | |
| "count": sum(1 for v in violations if v["kind"] == "heavy_model_or_archive"), | |
| }, | |
| { | |
| "name": "no_hf_tokens_in_public_text", | |
| "status": "pass" if not any(v["kind"] == "possible_hf_token" for v in violations) else "fail", | |
| "count": sum(1 for v in violations if v["kind"] == "possible_hf_token"), | |
| }, | |
| { | |
| "name": "no_local_filesystem_paths_in_public_text", | |
| "status": "pass" if not any(v["kind"] == "local_filesystem_path" for v in violations) else "fail", | |
| "count": sum(1 for v in violations if v["kind"] == "local_filesystem_path"), | |
| }, | |
| { | |
| "name": "no_stale_task_suite_presentation_copy", | |
| "status": "pass" if not any(v["kind"] == "stale_presentation_copy" for v in violations) else "fail", | |
| "count": sum(1 for v in violations if v["kind"] == "stale_presentation_copy"), | |
| }, | |
| { | |
| "name": "public_cards_reference_taskfirst_figure", | |
| "status": "pass" if all(item["status"] == "pass" for item in card_freshness) else "fail", | |
| "failures": [item for item in card_freshness if item["status"] != "pass"], | |
| }, | |
| ] | |
| status = "pass" if all(check["status"] == "pass" for check in checks) else "fail" | |
| return { | |
| "status": status, | |
| "generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"), | |
| "checks": checks, | |
| "required_assets": assets, | |
| "public_card_freshness": card_freshness, | |
| "scans": scans, | |
| "violations": violations, | |
| } | |
| def main() -> int: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--hf-root", type=Path, default=DEFAULT_HF_ROOT) | |
| parser.add_argument("--output", type=Path, default=ROOT / "docs/data/publication_audit.json") | |
| args = parser.parse_args() | |
| report = build_report(args.hf_root.resolve()) | |
| args.output.parent.mkdir(parents=True, exist_ok=True) | |
| args.output.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8") | |
| print(f"{report['status'].upper()}: wrote {args.output}") | |
| if report["status"] != "pass": | |
| for violation in report["violations"][:40]: | |
| print(f"- {violation['root']}: {violation['kind']} {violation['path']}") | |
| if len(report["violations"]) > 40: | |
| print(f"- ... {len(report['violations']) - 40} more violations") | |
| return 1 | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |