Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "title": "Ropedia Xperience-10M Figure Index", | |
| "status": "pass", | |
| "generated_at_utc": "2026-06-04T06:20:38+00:00", | |
| "scope": "Public figures, diagrams, charts, and derived modality thumbnails. Raw Xperience-10M videos, annotations, RRD files, and Qwen weights are excluded.", | |
| "figure_count": 21, | |
| "figures": [ | |
| { | |
| "id": "brand_logo_mark", | |
| "title": "Project logo mark", | |
| "path": "docs/assets/brand/xperience10m-logo-mark-512.png", | |
| "role": "Primary X-shaped multimodal camera mark used for the website header, README, HF cards, and brand identity.", | |
| "source_script": "scripts/build_brand_assets.py", | |
| "surface": "README, website, HF Space, artifact dataset, model card, favicon variants", | |
| "exists": true, | |
| "bytes": 240908, | |
| "sha256": "c67ff852d6ef618df14cf63958c2e196f9ceb0cea4222393f948efc136d98d8e", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 512, | |
| "height": 512 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "brand_social_card", | |
| "title": "Project logo social card", | |
| "path": "docs/assets/brand/xperience10m-logo-social-card.png", | |
| "role": "Large preview image for README, Hugging Face cards, and Open Graph/Twitter social sharing.", | |
| "source_script": "scripts/build_brand_assets.py", | |
| "surface": "README, website metadata, HF Space, artifact dataset, model card", | |
| "exists": true, | |
| "bytes": 201330, | |
| "sha256": "16e99ea98eb890e02aeb681e16b5163e1d5a9702d784dbd34ce3691e423a766a", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 1200, | |
| "height": 630 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "brand_favicon", | |
| "title": "Project favicon", | |
| "path": "docs/assets/brand/xperience10m-logo-favicon-64.png", | |
| "role": "Small dark-tile logo for browser tabs and compact navigation.", | |
| "source_script": "scripts/build_brand_assets.py", | |
| "surface": "website favicon and header", | |
| "exists": true, | |
| "bytes": 7206, | |
| "sha256": "c6ec4a7d71bedd2299f01dbc7c7b1c09bbd6b67a0a316922a290d7ddd2469600", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 64, | |
| "height": 64 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "task_suite_infographic", | |
| "title": "12-task suite infographic", | |
| "path": "docs/assets/task_suite_infographic.png", | |
| "role": "Primary visual map of the task suite, verified metrics, and sample modalities.", | |
| "source_script": "scripts/render_task_suite_infographic.py", | |
| "surface": "README, website, HF Space, artifact dataset, model card", | |
| "exists": true, | |
| "bytes": 2612510, | |
| "sha256": "213d81f49d27e3f2560c79e29a017c017cbe38d8d605815bf3bc87834a1424ae", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 1800, | |
| "height": 6600 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "pipeline_diagram", | |
| "title": "Episode-to-task pipeline diagram", | |
| "path": "docs/assets/pipeline_diagram.png", | |
| "role": "End-to-end data processing and evaluation pipeline overview.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "README, website, HF artifact dataset", | |
| "exists": true, | |
| "bytes": 707296, | |
| "sha256": "3eb7767a6ec78ef50847b79fad16e2438e84f449782f3fe8367417bece1e289c", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 1800, | |
| "height": 1120 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "task_architectures", | |
| "title": "Minimal and neural task architecture map", | |
| "path": "docs/assets/task_architectures.png", | |
| "role": "All 12 task heads and shared feature contracts.", | |
| "source_script": "scripts/render_overview_figures.py", | |
| "surface": "README, website, HF artifact dataset, model card", | |
| "exists": true, | |
| "bytes": 767403, | |
| "sha256": "9e3b86acf7f7296eb7c4b1e5e74c7008f264ddce40485e0e923b1e2a2f2b8efe", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 1800, | |
| "height": 2450 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "video_modality", | |
| "title": "Video modality thumbnail", | |
| "path": "docs/assets/modalities/video.jpg", | |
| "role": "Derived thumbnail for synchronized camera streams.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 82323, | |
| "sha256": "411a125e7b364a3f50d9f34ff5372b7a5f980239f0b393085a405ba839d39631", | |
| "dimensions": { | |
| "format": "JPEG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "audio_modality", | |
| "title": "Audio modality thumbnail", | |
| "path": "docs/assets/modalities/audio.png", | |
| "role": "Derived waveform thumbnail for the MP4 AAC stream.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 7075, | |
| "sha256": "50dfc7611bb3589ebe2965d44eb9dc641a3666e638968200ec63a0edc447e11a", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "depth_modality", | |
| "title": "Depth modality thumbnail", | |
| "path": "docs/assets/modalities/depth.jpg", | |
| "role": "Derived depth and confidence thumbnail.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 45018, | |
| "sha256": "155ba0181a1131864ebfd3492d3db38086645541d1892ba6a430d7f1387c0cf4", | |
| "dimensions": { | |
| "format": "JPEG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "pose_slam_modality", | |
| "title": "Pose / SLAM modality thumbnail", | |
| "path": "docs/assets/modalities/pose_slam.png", | |
| "role": "Derived camera trajectory and sparse map thumbnail.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 68057, | |
| "sha256": "5c3f59b6aa809c41c74be3cd8765a6f01f4c93306ce8fcb7fa9b79914666efd2", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "motion_capture_modality", | |
| "title": "Motion capture modality thumbnail", | |
| "path": "docs/assets/modalities/motion_capture.png", | |
| "role": "Derived body and hand motion-capture thumbnail.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 15078, | |
| "sha256": "fcd5564cd1dd49e01125c24e368fb64d9cc4631db65d6daa93885dbd065b344c", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "inertial_modality", | |
| "title": "Inertial modality thumbnail", | |
| "path": "docs/assets/modalities/inertial.png", | |
| "role": "Derived accelerometer and gyroscope trace thumbnail.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 46655, | |
| "sha256": "10eff24ca245b5d06a6f533ff53b584962baa00b83638b3ee4ac871fef4c7569", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "language_modality", | |
| "title": "Language modality thumbnail", | |
| "path": "docs/assets/modalities/language.png", | |
| "role": "Derived object-tag and caption thumbnail.", | |
| "source_script": "scripts/export_modality_atlas_assets.py", | |
| "surface": "website modality atlas, HF mirrors", | |
| "exists": true, | |
| "bytes": 15428, | |
| "sha256": "932895f67d58e98128b707c6c9e6ee320065e4a5945145d0fdec754ee8763392", | |
| "dimensions": { | |
| "format": "PNG", | |
| "width": 880, | |
| "height": 520 | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "model_macro_f1_chart", | |
| "title": "Model macro-F1 comparison chart", | |
| "path": "docs/assets/charts/model_macro_f1.svg", | |
| "role": "Minimal-vs-neural classification score comparison.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website diagnostics", | |
| "exists": true, | |
| "bytes": 3321, | |
| "sha256": "2984c0cde631cf891fe86c1b24248ce07595fa55efbf44391858d67f07a86386", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1100, | |
| "height": 284, | |
| "view_box": "0 0 1100 284" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "neural_score_chart", | |
| "title": "Neural MLP task score chart", | |
| "path": "docs/assets/charts/episode_task_scores_neural_mlp.svg", | |
| "role": "Neural MLP metric snapshot across the task suite.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website diagnostics", | |
| "exists": true, | |
| "bytes": 5917, | |
| "sha256": "01cbd4223029ea16624153419908384674fe00576928e59843a45a5dd692b5f1", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1100, | |
| "height": 556, | |
| "view_box": "0 0 1100 556" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "minimal_vs_neural_score_chart", | |
| "title": "Minimal-vs-neural task score chart", | |
| "path": "docs/assets/charts/episode_task_scores_minimal_vs_neural.svg", | |
| "role": "Side-by-side baseline comparison over the same window contracts.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website diagnostics", | |
| "exists": true, | |
| "bytes": 10040, | |
| "sha256": "0631b5c00f6ea9271a917605b3161b3c3766028296f40c181bfffaffba20770e", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1100, | |
| "height": 964, | |
| "view_box": "0 0 1100 964" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "research_direction_coverage_chart", | |
| "title": "Research direction coverage chart", | |
| "path": "docs/assets/charts/research_direction_coverage.svg", | |
| "role": "Four-track coverage map for Ropedia research directions.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website directions", | |
| "exists": true, | |
| "bytes": 4968, | |
| "sha256": "6e9dbdc93218ee11049bd7003fd7a68b5e5038810d47c3591025800f7b345ef9", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1180, | |
| "height": 700, | |
| "view_box": "0 0 1180 700" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "research_direction_extension_chart", | |
| "title": "Research direction extension chart", | |
| "path": "docs/assets/charts/research_direction_extension_tasks.svg", | |
| "role": "Four coded extension probes, one per Ropedia research direction.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website directions", | |
| "exists": true, | |
| "bytes": 6685, | |
| "sha256": "ab11b70fd8aac5f81779eb0f4b183fcee7f131eadea0d10a91ff45ccb09ca095", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1420, | |
| "height": 920, | |
| "view_box": "0 0 1420 920" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "feature_blocks_chart", | |
| "title": "Feature block chart", | |
| "path": "docs/assets/charts/feature_blocks.svg", | |
| "role": "Feature allocation by modality block.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website features", | |
| "exists": true, | |
| "bytes": 7889, | |
| "sha256": "c6589dff757ef4ca6e949cd346b0c54ab03e3120e20317ba0ad9d02f47377df8", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1100, | |
| "height": 760, | |
| "view_box": "0 0 1100 760" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "episode_task_scores_chart", | |
| "title": "Minimal task score chart", | |
| "path": "docs/assets/charts/episode_task_scores.svg", | |
| "role": "Minimal baseline metric snapshot across the task suite.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website diagnostics", | |
| "exists": true, | |
| "bytes": 5903, | |
| "sha256": "3fb5fcdff8951e9f58e7990730085c0b591fd4694824a18034e2f7957eb13282", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1100, | |
| "height": 556, | |
| "view_box": "0 0 1100 556" | |
| }, | |
| "source_script_exists": true | |
| }, | |
| { | |
| "id": "cross_modal_retrieval_chart", | |
| "title": "Cross-modal retrieval chart", | |
| "path": "docs/assets/charts/cross_modal_retrieval.svg", | |
| "role": "Retrieval behavior chart for the cross-modal task.", | |
| "source_script": "scripts/generate_visualizations.py", | |
| "surface": "website diagnostics", | |
| "exists": true, | |
| "bytes": 3196, | |
| "sha256": "869f4453b12efd99d13ad60d407cbff75666e41051cb7457d8ac662fcb4d8553", | |
| "dimensions": { | |
| "format": "SVG", | |
| "width": 1100, | |
| "height": 284, | |
| "view_box": "0 0 1100 284" | |
| }, | |
| "source_script_exists": true | |
| } | |
| ], | |
| "failures": [] | |
| } | |