Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| """Validate static website links, anchors, image assets, and JSON data. | |
| This is a local integrity check for the GitHub Pages / Hugging Face static | |
| website. It intentionally does not fetch external URLs; it verifies that the | |
| published local surface is self-consistent. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import re | |
| from dataclasses import dataclass | |
| from datetime import datetime, timezone | |
| from html.parser import HTMLParser | |
| from pathlib import Path | |
| from urllib.parse import unquote, urlsplit | |
| from PIL import Image | |
| ROOT = Path(__file__).resolve().parents[1] | |
| DEFAULT_DOCS = ROOT / "docs" | |
| DEFAULT_OUTPUT = DEFAULT_DOCS / "data/website_integrity.json" | |
| DEFAULT_SITE_BASE = "/ropedia-xperience-10m-task-suite/" | |
| LOCAL_ATTRS = {"href", "src"} | |
| SKIP_SCHEMES = {"http", "https", "mailto", "tel", "data", "javascript"} | |
| IMAGE_SUFFIXES = {".jpg", ".jpeg", ".png", ".webp"} | |
| class Reference: | |
| source: Path | |
| tag: str | |
| attr: str | |
| raw: str | |
| path_part: str | |
| fragment: str | |
| class SiteParser(HTMLParser): | |
| def __init__(self, source: Path): | |
| super().__init__(convert_charrefs=True) | |
| self.source = source | |
| self.ids: list[str] = [] | |
| self.references: list[Reference] = [] | |
| self.images: list[str] = [] | |
| def handle_starttag(self, tag: str, attrs): | |
| attr_map = dict(attrs) | |
| element_id = attr_map.get("id") or attr_map.get("name") | |
| if element_id: | |
| self.ids.append(element_id) | |
| for attr in LOCAL_ATTRS: | |
| raw = attr_map.get(attr) | |
| if not raw: | |
| continue | |
| parsed = urlsplit(raw) | |
| if parsed.scheme in SKIP_SCHEMES or parsed.netloc: | |
| continue | |
| path_part = unquote(parsed.path) | |
| fragment = unquote(parsed.fragment) | |
| if not path_part and not fragment: | |
| continue | |
| self.references.append(Reference(self.source, tag, attr, raw, path_part, fragment)) | |
| if tag == "img" and attr == "src": | |
| self.images.append(path_part) | |
| def parse_html(path: Path) -> SiteParser: | |
| parser = SiteParser(path) | |
| parser.feed(path.read_text(encoding="utf-8", errors="ignore")) | |
| return parser | |
| def normalize_path_part(path_part: str, site_base: str) -> str: | |
| if path_part in {"", "/"}: | |
| return path_part | |
| normalized_base = "/" + site_base.strip("/") + "/" | |
| if path_part == normalized_base: | |
| return "/index.html" | |
| if path_part.startswith(normalized_base): | |
| return "/" + path_part[len(normalized_base):] | |
| return path_part | |
| def resolve_reference(docs_root: Path, source: Path, path_part: str, site_base: str) -> Path: | |
| path_part = normalize_path_part(path_part, site_base) | |
| if not path_part: | |
| return source | |
| base = docs_root if path_part.startswith("/") else source.parent | |
| resolved = (base / path_part.lstrip("/")).resolve() | |
| if resolved.is_dir(): | |
| return resolved / "index.html" | |
| return resolved | |
| def relative(path: Path, root: Path) -> str: | |
| try: | |
| return path.relative_to(root).as_posix() | |
| except ValueError: | |
| return path.as_posix() | |
| def duplicate_ids(ids: list[str]) -> list[dict]: | |
| counts: dict[str, int] = {} | |
| for item in ids: | |
| counts[item] = counts.get(item, 0) + 1 | |
| return [{"id": item, "count": count} for item, count in sorted(counts.items()) if count > 1] | |
| def image_record(path: Path, docs_root: Path) -> dict: | |
| suffix = path.suffix.lower() | |
| record = { | |
| "path": relative(path, docs_root), | |
| "exists": path.exists(), | |
| "bytes": path.stat().st_size if path.exists() else 0, | |
| } | |
| if not path.exists(): | |
| return record | |
| if suffix in IMAGE_SUFFIXES: | |
| with Image.open(path) as image: | |
| record.update({ | |
| "width": int(image.width), | |
| "height": int(image.height), | |
| "format": image.format, | |
| }) | |
| elif suffix == ".svg": | |
| text = path.read_text(encoding="utf-8", errors="ignore") | |
| viewbox = re.search(r'viewBox=["\']([^"\']+)["\']', text) | |
| record["format"] = "SVG" | |
| record["has_viewbox"] = bool(viewbox) | |
| return record | |
| def validate(docs_root: Path, site_base: str) -> dict: | |
| html_files = sorted(docs_root.glob("*.html")) | |
| parsers = {path: parse_html(path) for path in html_files} | |
| anchors_by_file = {path: set(parser.ids) for path, parser in parsers.items()} | |
| missing_targets = [] | |
| missing_anchors = [] | |
| local_references = [] | |
| external_reference_count = 0 | |
| image_paths: set[Path] = set() | |
| for path, parser in parsers.items(): | |
| text = path.read_text(encoding="utf-8", errors="ignore") | |
| external_reference_count += len(re.findall(r'https?://', text)) | |
| for ref in parser.references: | |
| target = resolve_reference(docs_root, path, ref.path_part, site_base) | |
| target_rel = relative(target, docs_root) | |
| source_rel = relative(ref.source, docs_root) | |
| local_references.append({ | |
| "source": source_rel, | |
| "tag": ref.tag, | |
| "attr": ref.attr, | |
| "raw": ref.raw, | |
| "target": target_rel, | |
| "fragment": ref.fragment, | |
| }) | |
| if not target.exists(): | |
| missing_targets.append({ | |
| "source": source_rel, | |
| "raw": ref.raw, | |
| "target": target_rel, | |
| }) | |
| continue | |
| if ref.tag == "img" and ref.attr == "src": | |
| image_paths.add(target) | |
| if ref.fragment: | |
| anchor_target = target if target.suffix.lower() == ".html" else path | |
| if ref.path_part and target.suffix.lower() != ".html": | |
| continue | |
| anchors = anchors_by_file.get(anchor_target) | |
| if anchors is None and anchor_target.exists() and anchor_target.suffix.lower() == ".html": | |
| anchors = set(parse_html(anchor_target).ids) | |
| anchors_by_file[anchor_target] = anchors | |
| if anchors is not None and ref.fragment not in anchors: | |
| missing_anchors.append({ | |
| "source": source_rel, | |
| "raw": ref.raw, | |
| "target": relative(anchor_target, docs_root), | |
| "fragment": ref.fragment, | |
| }) | |
| json_records = [] | |
| invalid_json = [] | |
| for path in sorted((docs_root / "data").glob("*.json")): | |
| rel_path = relative(path, docs_root) | |
| try: | |
| payload = json.loads(path.read_text(encoding="utf-8")) | |
| except json.JSONDecodeError as exc: | |
| invalid_json.append({"path": rel_path, "error": str(exc)}) | |
| continue | |
| json_records.append({ | |
| "path": rel_path, | |
| "bytes": path.stat().st_size, | |
| "top_level_type": type(payload).__name__, | |
| }) | |
| images = [] | |
| invalid_images = [] | |
| for path in sorted(image_paths): | |
| try: | |
| record = image_record(path, docs_root) | |
| images.append(record) | |
| if not record.get("exists") or record.get("bytes", 0) <= 0: | |
| invalid_images.append(record) | |
| if path.suffix.lower() in IMAGE_SUFFIXES and (record.get("width", 0) <= 0 or record.get("height", 0) <= 0): | |
| invalid_images.append(record) | |
| if path.suffix.lower() == ".svg" and not record.get("has_viewbox"): | |
| invalid_images.append(record) | |
| except Exception as exc: # noqa: BLE001 - report image validation failures. | |
| invalid_images.append({"path": relative(path, docs_root), "error": str(exc)}) | |
| duplicate_id_records = [ | |
| {"path": relative(path, docs_root), "duplicates": duplicate_ids(parser.ids)} | |
| for path, parser in parsers.items() | |
| ] | |
| duplicate_id_records = [item for item in duplicate_id_records if item["duplicates"]] | |
| semantic_checks = [] | |
| semantic_layout_failures = [] | |
| index_path = docs_root / "index.html" | |
| index_text = index_path.read_text(encoding="utf-8", errors="ignore") if index_path.exists() else "" | |
| roadmap_payload = None | |
| roadmap_json_error = None | |
| roadmap_path = docs_root / "data/research_roadmap.json" | |
| try: | |
| roadmap_payload = json.loads(roadmap_path.read_text(encoding="utf-8")) | |
| except Exception as exc: # noqa: BLE001 - report malformed/missing roadmap data. | |
| roadmap_json_error = str(exc) | |
| def section_pos(section_id: str) -> int: | |
| match = re.search(rf'<section\b[^>]*\bid="{re.escape(section_id)}"', index_text) | |
| return match.start() if match else -1 | |
| suite_start = section_pos("suite") | |
| suite_end = section_pos("pipeline") | |
| suite_text = index_text[suite_start:suite_end] if suite_start >= 0 and suite_end > suite_start else "" | |
| overview_pos = section_pos("overview") | |
| protocol_pos = section_pos("protocol") | |
| evidence_pos = section_pos("evidence") | |
| dataset_start = section_pos("dataset-card") | |
| dataset_end = section_pos("suite") | |
| dataset_text = index_text[dataset_start:dataset_end] if dataset_start >= 0 and dataset_end > dataset_start else "" | |
| raw_sample_start = section_pos("raw-sample") | |
| raw_sample_end = section_pos("suite") | |
| raw_sample_text = index_text[raw_sample_start:raw_sample_end] if raw_sample_start >= 0 and raw_sample_end > raw_sample_start else "" | |
| roadmap_page = docs_root / "research_roadmap.html" | |
| roadmap_page_text = roadmap_page.read_text(encoding="utf-8", errors="ignore") if roadmap_page.exists() else "" | |
| semantic_rules = [ | |
| ( | |
| "project_tabs_have_six_groups", | |
| 'data-tab-key=', | |
| None, | |
| "The long research page should expose six top-level tabs, including a dedicated Directions tab.", | |
| ), | |
| ( | |
| "project_sections_are_assigned_to_tabs", | |
| 'data-project-tab=', | |
| None, | |
| "Every major research section should be assigned to a tab group.", | |
| ), | |
| ( | |
| "project_hash_router_preserves_deep_links", | |
| 'activateTabForHash', | |
| None, | |
| "Deep links should open the correct tab instead of landing on hidden content.", | |
| ), | |
| ( | |
| "homepage_hidden_attribute_not_overridden", | |
| "[hidden] { display: none !important; }", | |
| None, | |
| "Elements hidden by JavaScript should stay hidden even when component CSS sets display values.", | |
| ), | |
| ( | |
| "project_tabs_use_accessible_roles", | |
| 'role="tab"', | |
| None, | |
| "The tabbed research dashboard should expose tablist/tab semantics.", | |
| ), | |
| ( | |
| "project_sections_are_labeled_tabpanels", | |
| 'role="tabpanel"', | |
| None, | |
| "Every tabbed research section should expose a labeled panel role.", | |
| ), | |
| ( | |
| "project_tabs_update_selected_state", | |
| 'aria-selected', | |
| None, | |
| "Tab activation should update selected state for assistive technology.", | |
| ), | |
| ( | |
| "project_tabs_support_keyboard_navigation", | |
| 'moveProjectTabFocus', | |
| None, | |
| "Keyboard users should be able to switch project tabs with arrow, Home, and End keys.", | |
| ), | |
| ( | |
| "project_overview_precedes_progress_ledger", | |
| '<section id="overview">', | |
| '<section id="evidence">', | |
| "The project overview should appear before the deeper progress ledger.", | |
| ), | |
| ( | |
| "project_status_links_json", | |
| 'data/project_status.json', | |
| None, | |
| "The website should expose the machine-readable project status.", | |
| ), | |
| ( | |
| "roadmap_links_json", | |
| 'data/research_roadmap.json', | |
| None, | |
| "The website should expose the machine-readable research roadmap.", | |
| ), | |
| ( | |
| "interactive_roadmap_page_linked", | |
| 'research_roadmap.html', | |
| None, | |
| "The project site should link to the dedicated interactive research roadmap page.", | |
| ), | |
| ( | |
| "interactive_roadmap_links_json", | |
| 'data/research_roadmap_interactive.json', | |
| None, | |
| "The project site should expose the machine-readable interactive roadmap contract.", | |
| ), | |
| ( | |
| "interactive_roadmap_loads_generated_json", | |
| 'data/research_roadmap_interactive.json', | |
| None, | |
| "The interactive roadmap page should load the generated roadmap JSON contract.", | |
| ), | |
| ( | |
| "interactive_roadmap_tracks_four_directions", | |
| 'Research tracks', | |
| None, | |
| "The interactive roadmap page should expose the four research-track control surface.", | |
| ), | |
| ( | |
| "reader_resources_link_qwen_status", | |
| 'results/omni_finetune/DATA_ACCESS_STATUS.md', | |
| None, | |
| "The website should expose the Qwen3-Omni scale-up status.", | |
| ), | |
| ( | |
| "roadmap_html_matches_json_phases", | |
| "", | |
| None, | |
| "The roadmap section should show every stage defined in research_roadmap.json.", | |
| ), | |
| ( | |
| "roadmap_status_chips_match_json", | |
| "", | |
| None, | |
| "The roadmap status chips should match the phase statuses in research_roadmap.json.", | |
| ), | |
| ( | |
| "evaluation_protocol_between_overview_and_progress", | |
| '<section id="protocol">', | |
| '<section id="evidence">', | |
| "The evaluation protocol should appear before the deeper evidence ledger.", | |
| ), | |
| ( | |
| "evaluation_protocol_links_json", | |
| 'data/evaluation_protocol.json', | |
| None, | |
| "The website should expose the machine-readable evaluation protocol.", | |
| ), | |
| ( | |
| "visual_figures_link_task_suite_image", | |
| 'assets/task_suite_infographic.png', | |
| None, | |
| "The website should expose the main task-suite figure.", | |
| ), | |
| ( | |
| "foundation_direction_cards_explain_one_sample_io", | |
| 'class="foundation-io-panel"', | |
| None, | |
| "The three foundation direction cards should explain one-sample training inputs and outputs.", | |
| ), | |
| ( | |
| "suite_task_map_precedes_radar_surface", | |
| '<div class="figure-pan" id="task-suite-map">', | |
| 'class="chart radar-chart unified-radar-chart"', | |
| "The Suite anchor should show the task-suite map before the radar/results surface.", | |
| ), | |
| ( | |
| "raw_sample_stream_ledger_contains_seven_modalities", | |
| "", | |
| None, | |
| "The raw sample browser should expose the seven source streams without a separate repeated atlas component.", | |
| ), | |
| ( | |
| "raw_sample_browser_links_modality_metadata", | |
| 'data/modality_atlas.json', | |
| None, | |
| "The raw sample browser should keep the machine-readable modality metadata link.", | |
| ), | |
| ( | |
| "dataset_card_section_links_official_dataset", | |
| 'https://huggingface.co/datasets/ropedia-ai/xperience-10m', | |
| None, | |
| "The dataset-card section should link the official gated dataset.", | |
| ), | |
| ( | |
| "dataset_card_section_links_public_sample", | |
| 'https://huggingface.co/datasets/ropedia-ai/xperience-10m-sample', | |
| None, | |
| "The dataset-card section should link the public sample dataset used here.", | |
| ), | |
| ( | |
| "dataset_card_section_states_raw_data_boundary", | |
| 'Raw MP4, HDF5, RRD files', | |
| None, | |
| "The dataset-card section should state that raw Xperience-10M files are not redistributed.", | |
| ), | |
| ( | |
| "raw_sample_browser_section_present", | |
| '<section id="raw-sample"', | |
| None, | |
| "The website should expose a direct raw public sample browser under the Data tab.", | |
| ), | |
| ( | |
| "raw_sample_browser_links_manifest", | |
| 'data/raw_sample_files.json', | |
| None, | |
| "The raw sample browser should expose a machine-readable file manifest.", | |
| ), | |
| ( | |
| "raw_sample_browser_has_video_player", | |
| 'id="rawVideo"', | |
| None, | |
| "The raw sample browser should include a native video player for MP4 streams.", | |
| ), | |
| ( | |
| "raw_sample_browser_has_audio_player", | |
| 'id="rawAudio"', | |
| None, | |
| "The raw sample browser should include a native audio player for the embedded MP4 audio stream.", | |
| ), | |
| ( | |
| "raw_sample_browser_lists_core_files", | |
| 'visualization.rrd', | |
| None, | |
| "The raw sample browser should list the annotation, MP4, and optional RRD files.", | |
| ), | |
| ( | |
| "raw_sample_browser_links_preview_assets", | |
| 'assets/raw-sample-preview/fisheye_cam0_preview.mp4', | |
| None, | |
| "The raw sample browser should link browser-playable preview clips for MP4 streams.", | |
| ), | |
| ( | |
| "raw_sample_browser_links_full_raw_source", | |
| 'open full raw source', | |
| None, | |
| "The raw sample browser should keep direct links to the complete raw source files.", | |
| ), | |
| ( | |
| "task_player_surface_present", | |
| 'id="taskPlayer"', | |
| None, | |
| "The website should expose the interactive task walkthrough/player.", | |
| ), | |
| ( | |
| "task_player_uses_walkthrough_json", | |
| 'data/task_walkthroughs.json', | |
| None, | |
| "The task player and task cards should read the generated walkthrough JSON.", | |
| ), | |
| ( | |
| "task_cards_use_human_research_names", | |
| 'Action Recognition', | |
| None, | |
| "The public task surface should use readable research task names.", | |
| ), | |
| ] | |
| for name, marker, after_marker, reason in semantic_rules: | |
| if name == "project_tabs_have_six_groups": | |
| tab_count = index_text.count(marker) | |
| passed = tab_count == 6 | |
| detail = {"tab_count": tab_count} | |
| elif name == "project_sections_are_assigned_to_tabs": | |
| section_count = index_text.count(marker) | |
| passed = section_count >= 19 | |
| detail = {"section_count": section_count} | |
| elif name == "project_hash_router_preserves_deep_links": | |
| marker_count = index_text.count(marker) | |
| passed = marker_count >= 2 and "sectionTabMap" in index_text | |
| detail = {"marker_count": marker_count, "has_section_tab_map": "sectionTabMap" in index_text} | |
| elif name == "project_tabs_use_accessible_roles": | |
| tab_role_count = index_text.count(marker) | |
| project_tab_count = index_text.count("data-tab-key=") | |
| nested_tab_count = index_text.count("data-panel-target=") | |
| passed = ( | |
| 'role="tablist"' in index_text | |
| and project_tab_count == 6 | |
| and nested_tab_count >= 4 | |
| and tab_role_count >= project_tab_count + nested_tab_count | |
| ) | |
| detail = { | |
| "tab_role_count": tab_role_count, | |
| "project_tab_count": project_tab_count, | |
| "nested_tab_count": nested_tab_count, | |
| "has_tablist": 'role="tablist"' in index_text, | |
| } | |
| elif name == "project_sections_are_labeled_tabpanels": | |
| panel_count = index_text.count(marker) | |
| passed = panel_count >= 19 and index_text.count('aria-labelledby="tab-') >= 19 | |
| detail = { | |
| "panel_count": panel_count, | |
| "labeled_panel_count": index_text.count('aria-labelledby="tab-'), | |
| } | |
| elif name == "project_tabs_update_selected_state": | |
| selected_count = index_text.count(marker) | |
| passed = selected_count >= 6 and 'setAttribute("aria-selected"' in index_text | |
| detail = { | |
| "selected_count": selected_count, | |
| "updates_selected_state": 'setAttribute("aria-selected"' in index_text, | |
| } | |
| elif name == "project_tabs_support_keyboard_navigation": | |
| marker_count = index_text.count(marker) | |
| passed = marker_count >= 2 and "ArrowRight" in index_text and "Home" in index_text and "End" in index_text | |
| detail = { | |
| "marker_count": marker_count, | |
| "has_arrow_navigation": "ArrowRight" in index_text and "ArrowLeft" in index_text, | |
| "has_home_end_navigation": "Home" in index_text and "End" in index_text, | |
| } | |
| elif name == "roadmap_html_matches_json_phases": | |
| phase_names = [str(phase.get("name", "")) for phase in (roadmap_payload or {}).get("phases", [])] | |
| missing_names = [phase for phase in phase_names if phase and phase not in index_text] | |
| passed = bool(phase_names) and not missing_names and not roadmap_json_error | |
| detail = { | |
| "phase_count": len(phase_names), | |
| "missing_phase_names": missing_names, | |
| "roadmap_json_error": roadmap_json_error, | |
| } | |
| elif name == "roadmap_status_chips_match_json": | |
| statuses = [str(phase.get("status", "")).lower() for phase in (roadmap_payload or {}).get("phases", [])] | |
| missing_statuses = sorted({status for status in statuses if status and f'data-status="{status}"' not in index_text}) | |
| passed = bool(statuses) and not missing_statuses and not roadmap_json_error | |
| detail = { | |
| "phase_count": len(statuses), | |
| "statuses": statuses, | |
| "missing_statuses": missing_statuses, | |
| "roadmap_json_error": roadmap_json_error, | |
| } | |
| elif name == "raw_sample_stream_ledger_contains_seven_modalities": | |
| modality_terms = [ | |
| "Video", | |
| "Audio", | |
| "Depth", | |
| "Pose / SLAM", | |
| "Motion capture", | |
| "Inertial", | |
| "Language", | |
| ] | |
| present_terms = [term for term in modality_terms if term in raw_sample_text] | |
| passed = len(present_terms) == len(modality_terms) | |
| detail = { | |
| "modality_count": len(present_terms), | |
| "missing_modalities": [term for term in modality_terms if term not in present_terms], | |
| } | |
| elif name == "foundation_direction_cards_explain_one_sample_io": | |
| panel_count = index_text.count(marker) | |
| required_terms = [ | |
| "Sample input", | |
| "Training output", | |
| "Existing hooks", | |
| "Spatial intelligence models", | |
| "Human-video world models", | |
| "Vision-language-action models", | |
| ] | |
| missing_terms = [term for term in required_terms if term not in index_text] | |
| passed = panel_count == 3 and not missing_terms | |
| detail = {"panel_count": panel_count, "missing_terms": missing_terms} | |
| elif name.startswith("dataset_card_section_"): | |
| marker_count = dataset_text.count(marker) | |
| passed = marker_count >= 1 | |
| detail = {"marker_count": marker_count} | |
| elif name.startswith("raw_sample_browser_"): | |
| if name == "raw_sample_browser_section_present": | |
| marker_count = len(re.findall(r'<section\b[^>]*\bid=["\']raw-sample["\']', index_text)) | |
| elif name == "raw_sample_browser_links_modality_metadata": | |
| marker_count = raw_sample_text.count(marker) | |
| else: | |
| marker_count = index_text.count(marker) | |
| passed = marker_count >= 1 | |
| detail = {"marker_count": marker_count} | |
| elif name == "project_overview_precedes_progress_ledger": | |
| passed = overview_pos >= 0 and evidence_pos >= 0 and overview_pos < evidence_pos | |
| detail = {"overview_index": overview_pos, "evidence_index": evidence_pos} | |
| elif name == "evaluation_protocol_between_overview_and_progress": | |
| passed = overview_pos >= 0 and protocol_pos >= 0 and evidence_pos >= 0 and overview_pos < protocol_pos < evidence_pos | |
| detail = {"overview_index": overview_pos, "protocol_index": protocol_pos, "evidence_index": evidence_pos} | |
| elif name in { | |
| "project_status_links_json", | |
| "roadmap_links_json", | |
| "interactive_roadmap_page_linked", | |
| "interactive_roadmap_links_json", | |
| "homepage_hidden_attribute_not_overridden", | |
| "reader_resources_link_qwen_status", | |
| "visual_figures_link_task_suite_image", | |
| "task_player_surface_present", | |
| "task_player_uses_walkthrough_json", | |
| "task_cards_use_human_research_names", | |
| }: | |
| marker_count = index_text.count(marker) | |
| passed = marker_count >= 1 | |
| detail = {"marker_count": marker_count} | |
| elif name in { | |
| "interactive_roadmap_loads_generated_json", | |
| "interactive_roadmap_tracks_four_directions", | |
| }: | |
| marker_count = roadmap_page_text.count(marker) | |
| passed = marker_count >= 1 and roadmap_page.exists() | |
| detail = {"marker_count": marker_count, "page_exists": roadmap_page.exists()} | |
| elif name == "evaluation_protocol_links_json": | |
| marker_count = index_text.count(marker) | |
| passed = marker_count >= 1 | |
| detail = {"marker_count": marker_count} | |
| else: | |
| marker_pos = suite_text.find(marker) | |
| after_pos = suite_text.find(after_marker or "") | |
| passed = marker_pos >= 0 and after_pos >= 0 and marker_pos < after_pos | |
| detail = {"first_marker_index": marker_pos, "second_marker_index": after_pos} | |
| check = {"name": name, "status": "pass" if passed else "fail", "reason": reason, **detail} | |
| semantic_checks.append(check) | |
| if not passed: | |
| semantic_layout_failures.append(check) | |
| failures = { | |
| "missing_targets": missing_targets, | |
| "missing_anchors": missing_anchors, | |
| "duplicate_ids": duplicate_id_records, | |
| "invalid_json": invalid_json, | |
| "invalid_images": invalid_images, | |
| "semantic_layout": semantic_layout_failures, | |
| } | |
| failure_count = sum(len(items) for items in failures.values()) | |
| return { | |
| "status": "pass" if failure_count == 0 else "fail", | |
| "generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"), | |
| "docs_root": relative(docs_root, ROOT), | |
| "site_base": site_base, | |
| "summary": { | |
| "html_pages": len(html_files), | |
| "local_references": len(local_references), | |
| "external_reference_count": external_reference_count, | |
| "json_files": len(json_records), | |
| "image_assets_referenced": len(images), | |
| "failure_count": failure_count, | |
| }, | |
| "failures": failures, | |
| "semantic_checks": semantic_checks, | |
| "html_pages": [ | |
| { | |
| "path": relative(path, docs_root), | |
| "id_count": len(parser.ids), | |
| "reference_count": len(parser.references), | |
| "image_count": len(parser.images), | |
| } | |
| for path, parser in parsers.items() | |
| ], | |
| "json_files": json_records, | |
| "images": images, | |
| } | |
| def main() -> int: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--docs-root", type=Path, default=DEFAULT_DOCS) | |
| parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT) | |
| parser.add_argument("--site-base", default=DEFAULT_SITE_BASE) | |
| args = parser.parse_args() | |
| report = validate(args.docs_root.resolve(), args.site_base) | |
| args.output.parent.mkdir(parents=True, exist_ok=True) | |
| args.output.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8") | |
| print(f"{report['status'].upper()}: wrote {args.output}") | |
| if report["status"] != "pass": | |
| for kind, items in report["failures"].items(): | |
| for item in items[:20]: | |
| print(f"- {kind}: {item}") | |
| return 1 | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |