cy0307's picture
Publish Ropedia Xperience-10M task baseline cards
2adb4d8 verified
Raw
History Blame
9 kB
#!/usr/bin/env python3
"""Publish prepared Hugging Face bundles for the Xperience-10M task suite.
The repo itself is the source of truth for code, docs, validators, and website
assets. The prepared Hugging Face folders live outside the repo by default:
../hf_publish/space
../hf_publish/artifacts
../hf_publish/model
This script uploads those prepared folders and handles model binaries as an
explicit second model-repo batch so `.npz` weights and `.pt` checkpoints cannot
silently drift behind the model card.
"""
from __future__ import annotations
import argparse
import getpass
import os
import shutil
from pathlib import Path
from huggingface_hub import HfApi
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_HF_ROOT = ROOT.parent / "hf_publish"
DEFAULT_NAMESPACE = "cy0307"
DEFAULT_SPACE_REPO = "ropedia-xperience-10m-task-suite"
DEFAULT_ARTIFACT_REPO = "ropedia-xperience-10m-task-suite-artifacts"
DEFAULT_MODEL_REPO = "ropedia-xperience-10m-task-baselines"
COLLECTION_TITLE = "Ropedia Xperience-10M Task Suite"
COMMON_IGNORE = [
".DS_Store",
"__pycache__/*",
"**/__pycache__/*",
"*.pyc",
".git/*",
]
LEGACY_SCORECARD_MD = "RE" + "VIEWER_SCORECARD.md"
LEGACY_PACKET_JSON = "rev" + "iewer_packet.json"
LEGACY_SCORECARD_JSON = "rev" + "iewer_scorecard.json"
STALE_ARTIFACT_REMOTE_FILES = [
"results/omni_finetune/adapter_lora/tokenizer.json",
"results/omni_finetune/hf_upload/tokenizer.json",
LEGACY_SCORECARD_MD,
"docs/data/" + LEGACY_PACKET_JSON,
"docs/data/" + LEGACY_SCORECARD_JSON,
]
STALE_ARTIFACT_REMOTE_FOLDERS = [
"results/omni_finetune/adapter_lora",
"results/omni_finetune/hf_upload",
]
STALE_SPACE_REMOTE_FILES = [
LEGACY_SCORECARD_MD,
"data/" + LEGACY_PACKET_JSON,
"data/" + LEGACY_SCORECARD_JSON,
]
STALE_MODEL_REMOTE_FILES = [
LEGACY_SCORECARD_MD,
"metrics/" + LEGACY_PACKET_JSON,
"metrics/" + LEGACY_SCORECARD_JSON,
]
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--hf-root", type=Path, default=DEFAULT_HF_ROOT)
parser.add_argument("--namespace", default=DEFAULT_NAMESPACE)
parser.add_argument("--space-repo", default=DEFAULT_SPACE_REPO)
parser.add_argument("--artifact-repo", default=DEFAULT_ARTIFACT_REPO)
parser.add_argument("--model-repo", default=DEFAULT_MODEL_REPO)
parser.add_argument("--token", default=os.environ.get("HF_TOKEN", "").strip())
return parser.parse_args()
def full_repo(namespace: str, repo_name: str) -> str:
return repo_name if "/" in repo_name else f"{namespace}/{repo_name}"
def prune_generated_artifacts(root: Path) -> None:
for cache_dir in sorted(root.rglob("__pycache__"), reverse=True):
shutil.rmtree(cache_dir, ignore_errors=True)
for cache_file in root.rglob("*.pyc"):
cache_file.unlink(missing_ok=True)
for junk_file in root.rglob(".DS_Store"):
junk_file.unlink(missing_ok=True)
def prune_artifact_bundle(hf_root: Path) -> None:
artifact_root = hf_root / "artifacts"
for relative_path in STALE_ARTIFACT_REMOTE_FILES:
(artifact_root / relative_path).unlink(missing_ok=True)
def upload_folder(
api: HfApi,
token: str,
repo_id: str,
repo_type: str | None,
folder: Path,
message: str,
*,
allow_patterns: list[str] | None = None,
ignore_patterns: list[str] | None = None,
):
print(f"Uploading {folder} -> {repo_id}")
return api.upload_folder(
repo_id=repo_id,
repo_type=repo_type,
folder_path=str(folder),
commit_message=message,
token=token,
allow_patterns=allow_patterns,
ignore_patterns=COMMON_IGNORE + (ignore_patterns or []),
)
def delete_remote_file_if_present(
api: HfApi,
token: str,
repo_id: str,
repo_type: str,
path_in_repo: str,
) -> None:
try:
api.delete_file(
path_in_repo=path_in_repo,
repo_id=repo_id,
repo_type=repo_type,
token=token,
commit_message=f"Remove stale {path_in_repo}",
)
print(f"Deleted stale remote file: {repo_id}/{path_in_repo}")
except Exception as exc:
message = str(exc)
if "404" in message or "Entry Not Found" in message or "not found" in message.lower():
print(f"Remote file already absent: {repo_id}/{path_in_repo}")
return
print(f"Remote stale-file cleanup skipped for {repo_id}/{path_in_repo}: {exc}")
def delete_remote_folder_if_present(
api: HfApi,
token: str,
repo_id: str,
repo_type: str,
path_in_repo: str,
) -> None:
try:
api.delete_folder(
path_in_repo=path_in_repo,
repo_id=repo_id,
repo_type=repo_type,
token=token,
commit_message=f"Remove stale {path_in_repo}",
)
print(f"Deleted stale remote folder: {repo_id}/{path_in_repo}")
except Exception as exc:
message = str(exc)
if "404" in message or "Entry Not Found" in message or "not found" in message.lower():
print(f"Remote folder already absent: {repo_id}/{path_in_repo}")
return
print(f"Remote stale-folder cleanup skipped for {repo_id}/{path_in_repo}: {exc}")
def main() -> int:
args = parse_args()
hf_root = args.hf_root.resolve()
prune_generated_artifacts(hf_root)
prune_artifact_bundle(hf_root)
token = args.token or getpass.getpass("HF token: ").strip()
if not token:
raise SystemExit("No token provided.")
api = HfApi(token=token)
me = api.whoami(token=token)
username = me.get("name")
if username != args.namespace:
raise SystemExit(f"Authenticated as {username!r}, expected {args.namespace!r}.")
space_repo = full_repo(args.namespace, args.space_repo)
artifact_repo = full_repo(args.namespace, args.artifact_repo)
model_repo = full_repo(args.namespace, args.model_repo)
api.create_repo(space_repo, repo_type="space", space_sdk="static", exist_ok=True, token=token)
api.create_repo(artifact_repo, repo_type="dataset", exist_ok=True, token=token)
api.create_repo(model_repo, repo_type=None, exist_ok=True, token=token)
upload_folder(
api,
token,
space_repo,
"space",
hf_root / "space",
"Publish Ropedia Xperience-10M task-suite Space",
)
for path_in_repo in STALE_SPACE_REMOTE_FILES:
delete_remote_file_if_present(api, token, space_repo, "space", path_in_repo)
upload_folder(
api,
token,
artifact_repo,
"dataset",
hf_root / "artifacts",
"Publish Ropedia Xperience-10M derived artifacts",
ignore_patterns=["**/*.pt", "**/*.npz"],
)
for path_in_repo in STALE_ARTIFACT_REMOTE_FILES:
delete_remote_file_if_present(api, token, artifact_repo, "dataset", path_in_repo)
for path_in_repo in STALE_ARTIFACT_REMOTE_FOLDERS:
delete_remote_folder_if_present(api, token, artifact_repo, "dataset", path_in_repo)
upload_folder(
api,
token,
model_repo,
None,
hf_root / "model",
"Publish Ropedia Xperience-10M task baseline cards",
ignore_patterns=["**/*.pt", "**/*.npz"],
)
for path_in_repo in STALE_MODEL_REMOTE_FILES:
delete_remote_file_if_present(api, token, model_repo, "model", path_in_repo)
upload_folder(
api,
token,
model_repo,
None,
hf_root / "model",
"Publish Ropedia Xperience-10M model binaries",
allow_patterns=["**/*.npz", "**/*.pt"],
)
try:
collection = api.create_collection(
COLLECTION_TITLE,
namespace=args.namespace,
description=(
"Space, artifact dataset, and minimal plus neural baseline model repos "
"for the Ropedia Xperience-10M single-episode task suite."
),
private=False,
exists_ok=True,
token=token,
)
api.add_collection_item(collection.slug, space_repo, "space", note="Interactive/static dashboard.", exists_ok=True, token=token)
api.add_collection_item(collection.slug, artifact_repo, "dataset", note="Derived metrics, predictions, scripts, and diagrams.", exists_ok=True, token=token)
api.add_collection_item(collection.slug, model_repo, "model", note="Minimal numpy weights plus neural MLP checkpoints.", exists_ok=True, token=token)
print(f"Collection: https://huggingface.co/collections/{collection.slug}")
except Exception as exc:
print(f"Collection update skipped: {exc}")
print("Done")
print(f"Space: https://huggingface.co/spaces/{space_repo}")
print(f"Artifacts: https://huggingface.co/datasets/{artifact_repo}")
print(f"Models: https://huggingface.co/{model_repo}")
return 0
if __name__ == "__main__":
raise SystemExit(main())