ropedia-xperience-10m-task-baselines / scripts /verify_live_publication.py
cy0307's picture
Publish Ropedia Xperience-10M task baseline cards
3a10443 verified
Raw
History Blame
36.3 kB
#!/usr/bin/env python3
"""Verify the already-published GitHub Pages and Hugging Face mirrors.
This is the post-publish companion to the local publication gates. It fetches
public URLs and compares them with the local release artifacts so a reader can
see that the live surfaces match the repo/HF bundles that were prepared.
"""
from __future__ import annotations
import argparse
import hashlib
import json
import subprocess
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from urllib.error import HTTPError, URLError
from urllib.parse import urlsplit, urlunsplit
from urllib.request import Request, urlopen
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_OUTPUT = ROOT / "docs/data/live_publication_status.json"
TIMEOUT_SECONDS = 30
USER_AGENT = "ropedia-xperience-10m-live-verifier/1.0"
LOCAL_PATH_FORBIDDEN_MARKERS = ["/" + "Users/", "/" + "private/"]
QWEN3_LORA_REPO_ID = "cy0307/ropedia-qwen3-omni-lora-128ep"
QWEN3_LORA_UPLOAD_DIR_CANDIDATES = [
ROOT.parent / "hf_publish/qwen3_lora_128ep",
ROOT / "results/omni_finetune/hf_upload_qwen3_128ep_full",
]
HASH_GROUPS = [
{
"id": "task_suite_infographic",
"title": "Task-suite infographic",
"local_path": "docs/assets/task_suite_infographic.png",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/assets/task_suite_infographic.png",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/resolve/main/assets/task_suite_infographic.png",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/assets/task_suite_infographic.png",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/assets/task_suite_infographic.png",
},
},
{
"id": "quality_gates_json",
"title": "Quality-gate JSON",
"local_path": "docs/data/quality_gates.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/quality_gates.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/quality_gates.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/quality_gates.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/quality_gates.json",
},
},
{
"id": "single_episode_explorer_json",
"title": "Single-episode explorer JSON",
"local_path": "docs/data/single_episode_explorer.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/single_episode_explorer.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/single_episode_explorer.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/single_episode_explorer.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/single_episode_explorer.json",
},
},
{
"id": "single_episode_explorer_html",
"title": "Single-episode explorer HTML",
"local_path": "docs/single_episode_explorer.html",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/single_episode_explorer.html",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/single_episode_explorer.html",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/single_episode_explorer.html",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/single_episode_explorer.html",
},
},
{
"id": "research_roadmap_html",
"title": "Interactive research roadmap HTML",
"local_path": "docs/research_roadmap.html",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/research_roadmap.html",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/research_roadmap.html",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/research_roadmap.html",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/research_roadmap.html",
},
},
{
"id": "single_episode_diagnostics_provenance",
"title": "Single-episode diagnostics provenance",
"local_path": "results/single_episode_diagnostics/provenance.json",
"urls": {
"github_raw": "https://raw.githubusercontent.com/ChaoYue0307/ropedia-xperience-10m-task-suite/main/results/single_episode_diagnostics/provenance.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/results/single_episode_diagnostics/provenance.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/results/single_episode_diagnostics/provenance.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/results/single_episode_diagnostics/provenance.json",
},
},
{
"id": "single_episode_object_vocab",
"title": "Single-episode object vocabulary",
"local_path": "results/single_episode_diagnostics/object_labels/object_vocab.json",
"urls": {
"github_raw": "https://raw.githubusercontent.com/ChaoYue0307/ropedia-xperience-10m-task-suite/main/results/single_episode_diagnostics/object_labels/object_vocab.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/results/single_episode_diagnostics/object_labels/object_vocab.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/results/single_episode_diagnostics/object_labels/object_vocab.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/results/single_episode_diagnostics/object_labels/object_vocab.json",
},
},
{
"id": "public_surface_qa_json",
"title": "Public presentation JSON",
"local_path": "docs/data/public_surface_qa.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/public_surface_qa.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/public_surface_qa.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/public_surface_qa.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/public_surface_qa.json",
},
},
{
"id": "rendered_site_check_json",
"title": "Rendered website check JSON",
"local_path": "docs/data/rendered_site_check.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/rendered_site_check.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/rendered_site_check.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/rendered_site_check.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/rendered_site_check.json",
},
},
{
"id": "xperience10m_dataset_card_alignment_json",
"title": "Official Xperience-10M dataset-card alignment JSON",
"local_path": "docs/data/xperience10m_dataset_card_alignment.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/xperience10m_dataset_card_alignment.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/xperience10m_dataset_card_alignment.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/xperience10m_dataset_card_alignment.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/xperience10m_dataset_card_alignment.json",
},
},
{
"id": "source_alignment_json",
"title": "Source-alignment JSON",
"local_path": "docs/data/source_alignment_audit.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/source_alignment_audit.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/source_alignment_audit.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/source_alignment_audit.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/source_alignment_audit.json",
},
},
{
"id": "project_status_json",
"title": "Project status JSON",
"local_path": "docs/data/project_status.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/project_status.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/project_status.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/project_status.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/project_status.json",
},
},
{
"id": "omni_model_comparison_json",
"title": "Omni model comparison JSON",
"local_path": "docs/data/omni_model_comparison.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/omni_model_comparison.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/omni_model_comparison.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/omni_model_comparison.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/omni_model_comparison.json",
},
},
{
"id": "evaluation_protocol_json",
"title": "Evaluation protocol JSON",
"local_path": "docs/data/evaluation_protocol.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/evaluation_protocol.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/evaluation_protocol.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/evaluation_protocol.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/evaluation_protocol.json",
},
},
{
"id": "research_takeaways_json",
"title": "Research takeaways JSON",
"local_path": "docs/data/research_takeaways.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/research_takeaways.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/research_takeaways.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/research_takeaways.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/research_takeaways.json",
},
},
{
"id": "research_roadmap_json",
"title": "Research roadmap JSON",
"local_path": "docs/data/research_roadmap.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/research_roadmap.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/research_roadmap.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/research_roadmap.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/research_roadmap.json",
},
},
{
"id": "research_roadmap_interactive_json",
"title": "Interactive research roadmap JSON",
"local_path": "docs/data/research_roadmap_interactive.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/research_roadmap_interactive.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/research_roadmap_interactive.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/research_roadmap_interactive.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/research_roadmap_interactive.json",
},
},
{
"id": "figure_index_json",
"title": "Figure index JSON",
"local_path": "docs/data/figure_index.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/figure_index.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/figure_index.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/figure_index.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/figure_index.json",
},
},
{
"id": "task_walkthroughs_json",
"title": "Task walkthrough JSON",
"local_path": "docs/data/task_walkthroughs.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/task_walkthroughs.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/task_walkthroughs.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/task_walkthroughs.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/task_walkthroughs.json",
},
},
{
"id": "task_surface_integrity_json",
"title": "Task-surface integrity JSON",
"local_path": "docs/data/task_surface_integrity.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/task_surface_integrity.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/task_surface_integrity.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/task_surface_integrity.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/task_surface_integrity.json",
},
},
{
"id": "brand_assets_json",
"title": "Brand assets JSON",
"local_path": "docs/data/brand_assets.json",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/brand_assets.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/brand_assets.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/brand_assets.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/brand_assets.json",
},
},
{
"id": "brand_logo_social_card",
"title": "Brand logo social card",
"local_path": "docs/assets/brand/xperience10m-logo-social-card.png",
"urls": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/assets/brand/xperience10m-logo-social-card.png",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/resolve/main/assets/brand/xperience10m-logo-social-card.png",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/assets/brand/xperience10m-logo-social-card.png",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/assets/brand/xperience10m-logo-social-card.png",
},
},
{
"id": "quality_gates_markdown",
"title": "Quality-gate Markdown",
"local_path": "QUALITY_GATES.md",
"urls": {
"github_raw": "https://raw.githubusercontent.com/ChaoYue0307/ropedia-xperience-10m-task-suite/main/QUALITY_GATES.md",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/QUALITY_GATES.md",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/raw/main/QUALITY_GATES.md",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/raw/main/QUALITY_GATES.md",
},
},
]
MARKER_CHECKS = [
{
"id": "github_pages_index_current",
"title": "GitHub Pages index contains current publication markers",
"url": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
"required": [
"evaluation_protocol.json",
"research_takeaways.json",
"Research Takeaways",
"project_status.json",
"xperience10m-taskfirst-v13-modality-xl",
"Interactive task walkthrough.",
"taskPlayer",
"Action Recognition",
"data/task_walkthroughs.json",
"research_roadmap.html",
"research_roadmap_interactive.json",
"Qwen3-Omni LoRA Final Diagnostic Result",
"Action/Subtask Error-Analysis Pass",
"100.00%",
"omni_model_comparison.json",
"ropedia-qwen3-omni-lora-128ep",
"Cosmos3-Super has a verified base-weight JSON-task evaluation plus a camera-pose forward-dynamics contract audit",
],
"forbidden": [
"xperience10m-" + "taskfirst-v10",
"xperience10m-" + "modalities-v9-large-atlas",
"artifact-id",
],
},
{
"id": "hf_space_index_current",
"title": "HF Space index contains current publication markers",
"url": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/index.html",
"required": [
"evaluation_protocol.json",
"research_takeaways.json",
"Research Takeaways",
"project_status.json",
"xperience10m-taskfirst-v13-modality-xl",
"Interactive task walkthrough.",
"taskPlayer",
"Action Recognition",
"data/task_walkthroughs.json",
"research_roadmap.html",
"research_roadmap_interactive.json",
"Qwen3-Omni LoRA Final Diagnostic Result",
"Action/Subtask Error-Analysis Pass",
"100.00%",
"omni_model_comparison.json",
"ropedia-qwen3-omni-lora-128ep",
"Cosmos3-Super has a verified base-weight JSON-task evaluation plus a camera-pose forward-dynamics contract audit",
],
"forbidden": [
"xperience10m-" + "taskfirst-v10",
"xperience10m-" + "modalities-v9-large-atlas",
"artifact-id",
],
},
{
"id": "hf_artifacts_card_current",
"title": "HF artifact card links current result packages",
"url": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/raw/main/README.md",
"required": [
"docs/data/omni_finetune_verified_result.json",
"docs/data/omni_model_comparison.json",
"100.00% JSON validity",
"Cosmos3-Super",
"ropedia-qwen3-omni-lora-128ep",
],
"forbidden": ["xperience10m-" + "taskfirst-v10"],
},
{
"id": "github_pages_explorer_current",
"title": "GitHub Pages explorer contains current diagnostics markers",
"url": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/single_episode_explorer.html",
"required": [
"Single-Episode Research Explorer",
"data/single_episode_explorer.json",
"window-level exported artifacts only",
"Feature Blocks",
"Diagnostics",
"object labels",
"prediction rows",
"ablation",
"alignment",
],
"forbidden": LOCAL_PATH_FORBIDDEN_MARKERS,
},
{
"id": "hf_space_explorer_current",
"title": "HF Space explorer contains current diagnostics markers",
"url": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/single_episode_explorer.html",
"required": [
"Single-Episode Research Explorer",
"data/single_episode_explorer.json",
"window-level exported artifacts only",
"Feature Blocks",
"Diagnostics",
"object labels",
"prediction rows",
"ablation",
"alignment",
],
"forbidden": LOCAL_PATH_FORBIDDEN_MARKERS,
},
{
"id": "hf_model_card_current",
"title": "HF model card links current result packages",
"url": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/raw/main/README.md",
"required": [
"docs/data/omni_finetune_verified_result.json",
"docs/data/omni_model_comparison.json",
"100.00%",
"Cosmos3-Super",
"ropedia-qwen3-omni-lora-128ep",
],
"forbidden": ["xperience10m-" + "taskfirst-v10"],
},
]
PATH_HYGIENE_REPORTS = {
"mirror_parity": {
"title": "Mirror parity JSON excludes local paths",
"paths": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/mirror_parity.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/mirror_parity.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/mirror_parity.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/mirror_parity.json",
},
},
"publication_audit": {
"title": "Publication package JSON excludes local paths",
"paths": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/publication_audit.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/publication_audit.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/publication_audit.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/publication_audit.json",
},
},
"website_integrity": {
"title": "Website integrity JSON excludes local paths",
"paths": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/website_integrity.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/website_integrity.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/website_integrity.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/website_integrity.json",
},
},
"public_surface_qa": {
"title": "Public presentation JSON excludes local paths",
"paths": {
"github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/data/public_surface_qa.json",
"hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite/raw/main/data/public_surface_qa.json",
"hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts/resolve/main/docs/data/public_surface_qa.json",
"hf_model": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines/resolve/main/metrics/public_surface_qa.json",
},
},
}
def local_path_checks() -> list[dict]:
checks = []
for report_id, report in PATH_HYGIENE_REPORTS.items():
for surface, url in report["paths"].items():
checks.append({
"id": f"{surface}_{report_id}_local_path_check",
"title": f"{surface}: {report['title']}",
"url": url,
"required": ['"status": "pass"'],
"forbidden": LOCAL_PATH_FORBIDDEN_MARKERS,
})
return checks
def qwen3_lora_upload_dir() -> Path | None:
for path in QWEN3_LORA_UPLOAD_DIR_CANDIDATES:
if path.exists():
return path
return None
def display_local_path(path: Path) -> str:
resolved = path.resolve()
for base, prefix in ((ROOT, ""), (ROOT.parent, "../")):
try:
rel = resolved.relative_to(base.resolve()).as_posix()
return f"{prefix}{rel}"
except ValueError:
continue
return path.name
def qwen3_lora_hash_groups() -> list[dict]:
upload_dir = qwen3_lora_upload_dir()
if upload_dir is None:
return []
groups = []
required_files = [
"README.md",
"upload_manifest.json",
"adapter_config.json",
]
adapter_files = sorted(path.name for path in upload_dir.glob("adapter_model*.safetensors"))
for filename in [*required_files, *adapter_files]:
path = upload_dir / filename
if not path.exists():
continue
groups.append(
{
"id": f"qwen3_lora_{filename.replace('.', '_').replace('-', '_')}",
"title": f"Qwen3-Omni LoRA repo file: {filename}",
"local_path": display_local_path(path),
"urls": {
"hf_qwen3_lora_model": (
f"https://huggingface.co/{QWEN3_LORA_REPO_ID}/resolve/main/{filename}"
),
},
}
)
return groups
def sha256_bytes(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def sanitize_url(url: str) -> str:
parts = urlsplit(url)
return urlunsplit((parts.scheme, parts.netloc, parts.path, "", ""))
def fetch(url: str) -> dict:
request = Request(url, headers={"User-Agent": USER_AGENT})
try:
with urlopen(request, timeout=TIMEOUT_SECONDS) as response:
body = response.read()
return {
"ok": True,
"status_code": int(getattr(response, "status", 200)),
"bytes": len(body),
"sha256": sha256_bytes(body),
"body": body,
"final_url": sanitize_url(response.geturl()),
}
except HTTPError as exc:
return {
"ok": False,
"status_code": exc.code,
"bytes": 0,
"sha256": None,
"error": str(exc),
"final_url": url,
}
except (TimeoutError, OSError) as exc:
fallback = fetch_with_curl(url)
if fallback["ok"]:
return fallback
return {
"ok": False,
"status_code": None,
"bytes": 0,
"sha256": None,
"error": str(exc),
"final_url": url,
}
except URLError as exc:
fallback = fetch_with_curl(url)
if fallback["ok"]:
return fallback
return {
"ok": False,
"status_code": None,
"bytes": 0,
"sha256": None,
"error": str(exc.reason),
"final_url": url,
}
def fetch_with_curl(url: str) -> dict:
with tempfile.NamedTemporaryFile(delete=False) as tmp:
tmp_path = Path(tmp.name)
try:
result = subprocess.run(
[
"curl",
"-L",
"-sS",
"--max-time",
str(TIMEOUT_SECONDS),
"-A",
USER_AGENT,
"-o",
str(tmp_path),
"-w",
"%{http_code}\n%{url_effective}",
url,
],
check=False,
capture_output=True,
text=True,
)
status_text, _, final_url = result.stdout.partition("\n")
status_code = int(status_text.strip() or "0")
body = tmp_path.read_bytes() if tmp_path.exists() else b""
if result.returncode != 0:
return {
"ok": False,
"status_code": status_code or None,
"bytes": 0,
"sha256": None,
"error": result.stderr.strip() or f"curl exited {result.returncode}",
"final_url": sanitize_url(final_url.strip() or url),
}
return {
"ok": 200 <= status_code < 400,
"status_code": status_code,
"bytes": len(body),
"sha256": sha256_bytes(body),
"body": body,
"final_url": sanitize_url(final_url.strip() or url),
}
finally:
tmp_path.unlink(missing_ok=True)
def hash_group_record(group: dict) -> dict:
group_path = Path(group["local_path"])
local_path = group_path if group_path.is_absolute() else ROOT / group_path
local = {
"path": group["local_path"],
"exists": local_path.exists(),
"bytes": local_path.stat().st_size if local_path.exists() else 0,
"sha256": sha256_file(local_path) if local_path.exists() else None,
}
mirrors = {}
failures = []
if not local["exists"]:
failures.append({"surface": "local", "kind": "missing", "path": group["local_path"]})
for surface, url in group["urls"].items():
result = fetch(url)
record = {key: value for key, value in result.items() if key != "body"}
record["url"] = url
mirrors[surface] = record
if not result["ok"]:
failures.append({"surface": surface, "kind": "fetch_failed", "url": url, "error": result.get("error")})
continue
if local["exists"] and result["sha256"] != local["sha256"]:
failures.append(
{
"surface": surface,
"kind": "hash_mismatch",
"url": url,
"expected_sha256": local["sha256"],
"actual_sha256": result["sha256"],
}
)
return {
"id": group["id"],
"title": group["title"],
"status": "pass" if not failures else "fail",
"local": local,
"mirrors": mirrors,
"failures": failures,
}
def marker_record(check: dict) -> dict:
result = fetch(check["url"])
failures = []
missing = []
forbidden_hits = []
if not result["ok"]:
failures.append({"kind": "fetch_failed", "url": check["url"], "error": result.get("error")})
text = ""
else:
text = result["body"].decode("utf-8", errors="ignore")
missing = [marker for marker in check["required"] if marker not in text]
forbidden_hits = [marker for marker in check["forbidden"] if marker in text]
if missing:
failures.append({"kind": "missing_required_markers", "markers": missing})
if forbidden_hits:
failures.append({"kind": "forbidden_markers_present", "markers": forbidden_hits})
return {
"id": check["id"],
"title": check["title"],
"url": check["url"],
"status": "pass" if not failures else "fail",
"fetch": {key: value for key, value in result.items() if key != "body"},
"required_marker_count": len(check["required"]),
"missing_markers": missing,
"forbidden_markers_present": forbidden_hits,
"failures": failures,
}
def build_report() -> dict:
hash_records = [hash_group_record(group) for group in [*HASH_GROUPS, *qwen3_lora_hash_groups()]]
marker_records = [marker_record(check) for check in [*MARKER_CHECKS, *local_path_checks()]]
failures = [
{"check": record["id"], **failure}
for record in [*hash_records, *marker_records]
for failure in record["failures"]
]
return {
"title": "Ropedia Xperience-10M Live Publication Status",
"status": "pass" if not failures else "fail",
"checked_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
"scope": (
"Live GitHub Pages, GitHub raw, Hugging Face Space, artifact dataset, "
"baseline model mirrors, and the Qwen3 LoRA adapter repo when the final "
"upload package exists locally."
),
"hash_groups": hash_records,
"marker_checks": marker_records,
"failure_count": len(failures),
"failures": failures,
}
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT)
parser.add_argument("--no-write", action="store_true", help="Verify and print status without updating the report file.")
args = parser.parse_args()
report = build_report()
if not args.no_write:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
print(f"{report['status'].upper()}: wrote {args.output}")
else:
print(f"{report['status'].upper()}: live publication verification")
if report["status"] != "pass":
for failure in report["failures"][:20]:
print(f"- {failure}")
if len(report["failures"]) > 20:
print(f"- ... {len(report['failures']) - 20} more failures")
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())