ropedia-xperience-10m-task-baselines / scripts /sync_hf_publish_mirrors.py
cy0307's picture
Update publication mirror utilities
349da51 verified
Raw
History Blame
4.48 kB
#!/usr/bin/env python3
"""Sync repo publication files into the prepared Hugging Face bundles.
The upload step publishes ../hf_publish/{space,artifacts,model}; this helper
keeps those staging folders aligned with the same file groups checked by
validate_mirror_parity.py.
"""
from __future__ import annotations
import argparse
import importlib.util
import json
import shutil
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_HF_ROOT = ROOT.parent / "hf_publish"
PARITY_SCRIPT = ROOT / "scripts/validate_mirror_parity.py"
def load_parity_module():
spec = importlib.util.spec_from_file_location("validate_mirror_parity", PARITY_SCRIPT)
if spec is None or spec.loader is None:
raise SystemExit(f"Could not load {PARITY_SCRIPT}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def copy_file(src: Path, destinations: list[Path], *, dry_run: bool) -> list[dict]:
records = []
if not src.is_file():
raise SystemExit(f"Missing source file: {src}")
for dst in destinations:
records.append({"source": src.relative_to(ROOT).as_posix(), "dest": dst.as_posix()})
if dry_run:
continue
dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dst)
return records
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--hf-root", type=Path, default=DEFAULT_HF_ROOT)
parser.add_argument("--dry-run", action="store_true")
parser.add_argument("--json", action="store_true", help="print machine-readable copy records")
return parser.parse_args()
def main() -> int:
args = parse_args()
hf_root = args.hf_root.expanduser().resolve()
parity = load_parity_module()
copied: list[dict] = []
for filename in parity.DATA_FILES:
src = ROOT / "docs/data" / filename
copied += copy_file(
src,
[
hf_root / "space/data" / filename,
hf_root / "artifacts/docs/data" / filename,
hf_root / "model/metrics" / filename,
],
dry_run=args.dry_run,
)
for filename in parity.ASSET_FILES:
src = ROOT / "docs/assets" / filename
copied += copy_file(
src,
[
hf_root / "space/assets" / filename,
hf_root / "artifacts/docs/assets" / filename,
hf_root / "artifacts/assets" / filename,
hf_root / "model/assets" / filename,
],
dry_run=args.dry_run,
)
for filename in parity.SCRIPT_FILES:
src = ROOT / "scripts" / filename
copied += copy_file(
src,
[
hf_root / "artifacts/scripts" / filename,
hf_root / "model/scripts" / filename,
],
dry_run=args.dry_run,
)
for filename in parity.WEBSITE_FILES:
src = ROOT / "docs" / filename
copied += copy_file(
src,
[
hf_root / "space" / filename,
hf_root / "artifacts/docs" / filename,
],
dry_run=args.dry_run,
)
result_files = sorted(set(parity.RESULT_FILES) | set(parity.verified_public_result_files()))
for filename in result_files:
src = ROOT / "results" / filename
copied += copy_file(
src,
[
hf_root / "space/results" / filename,
hf_root / "artifacts/results" / filename,
hf_root / "model/results" / filename,
],
dry_run=args.dry_run,
)
for filename in parity.DOC_FILES:
src = ROOT / filename
copied += copy_file(
src,
[
hf_root / "space" / filename,
hf_root / "artifacts" / filename,
hf_root / "model" / filename,
],
dry_run=args.dry_run,
)
summary = {
"status": "dry_run" if args.dry_run else "synced",
"hf_root": hf_root.as_posix(),
"copy_count": len(copied),
"records": copied,
}
if args.json:
print(json.dumps(summary, indent=2))
else:
print(f"{summary['status'].upper()}: copied {summary['copy_count']} files into {hf_root}")
return 0
if __name__ == "__main__":
raise SystemExit(main())