Add files using upload-large-folder tool

b33f717 verified 3 days ago

29.9 kB

	#!/usr/bin/env python3
	"""Publish prepared Hugging Face bundles for the Xperience-10M task suite.

	The repo itself is the source of truth for code, docs, validators, and website
	assets. The prepared Hugging Face folders live outside the repo by default:

	../hf_publish/space
	../hf_publish/artifacts
	../hf_publish/model

	This script uploads those prepared folders and handles model binaries as an
	explicit second model-repo batch so `.npz` weights and `.pt` checkpoints cannot
	silently drift behind the model card.
	"""

	from __future__ import annotations

	import argparse
	import csv
	import getpass
	import json
	import os
	import shutil
	from pathlib import Path

	from huggingface_hub import HfApi, get_token


	ROOT = Path(__file__).resolve().parents[1]
	DEFAULT_HF_ROOT = ROOT.parent / "hf_publish"
	DEFAULT_NAMESPACE = "cy0307"
	DEFAULT_SPACE_REPO = "ropedia-xperience-10m-task-suite"
	DEFAULT_ARTIFACT_REPO = "ropedia-xperience-10m-task-suite-artifacts"
	DEFAULT_MODEL_REPO = "ropedia-xperience-10m-task-baselines"
	DEFAULT_WEIGHTS_RESULTS_REPO = "ropedia-xperience-10m-weights-results"
	DEFAULT_QWEN3_LORA_REPO = "ropedia-qwen3-omni-lora-128ep"
	DEFAULT_COSMOS3_SUPER_LORA_REPO = "ropedia-cosmos3-super-forward-dynamics-lora-128ep"
	COLLECTION_TITLE = "Ropedia Xperience-10M Task Suite"

	COMMON_IGNORE = [
	".DS_Store",
	"__pycache__/*",
	"*/__pycache__/",
	"*.pyc",
	"*.log",
	"*/.log",
	"*.pid",
	"*/.pid",
	".git/*",
	]

	LEGACY_SCORECARD_MD = "RE" + "VIEWER_SCORECARD.md"
	LEGACY_PACKET_JSON = "rev" + "iewer_packet.json"
	LEGACY_SCORECARD_JSON = "rev" + "iewer_scorecard.json"

	STALE_ARTIFACT_REMOTE_FILES = [
	"results/omni_finetune/adapter_lora/tokenizer.json",
	"results/omni_finetune/hf_upload/tokenizer.json",
	"results/omni_finetune/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/eval.log",
	"results/omni_finetune/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/eval.pid",
	"viewer/dataset_viewer_summary.jsonl",
	LEGACY_SCORECARD_MD,
	"docs/data/" + LEGACY_PACKET_JSON,
	"docs/data/" + LEGACY_SCORECARD_JSON,
	]

	STALE_ARTIFACT_REMOTE_FOLDERS = [
	"results/omni_finetune/adapter_lora",
	"results/omni_finetune/hf_upload",
	]

	STALE_SPACE_REMOTE_FILES = [
	"README_GRADIO_RUNTIME.md",
	"README_SPACE_RUNTIME.md",
	LEGACY_SCORECARD_MD,
	"data/" + LEGACY_PACKET_JSON,
	"data/" + LEGACY_SCORECARD_JSON,
	"results/omni_finetune/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/eval.log",
	"results/omni_finetune/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/eval.pid",
	]

	STALE_MODEL_REMOTE_FILES = [
	LEGACY_SCORECARD_MD,
	"metrics/" + LEGACY_PACKET_JSON,
	"metrics/" + LEGACY_SCORECARD_JSON,
	"results/omni_finetune/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/eval.log",
	"results/omni_finetune/xperience10m_qwen3_omni_128ep_structured_json_v3_strict_label_prompt_reuse_lora_eval_test_full/eval.pid",
	]

	ARTIFACT_BINARY_ALLOWLIST = [
	"results/audio_ablation/raw_logmel_fisheye_cam0_sr16000_mels64_fft512_hop160.npz",
	]

	ARTIFACT_VIEWER_CONFIG = """configs:
	- config_name: episode_sample
	data_files:
	- split: public_sample
	path: viewer/episode_windows.parquet
	- config_name: selected_128_windows
	data_files:
	- split: selected_128
	path: viewer/selected128_windows.parquet
	"""
	ENHANCEMENT_MARKER = "docs/data/task_suite_enhancement_128.json"
	ENHANCEMENT_CARD_BLOCK = """
	## 128-Episode Enhancement Pack

	The no-new-episode suite push is recorded in `TASK_SUITE_ENHANCEMENT_128.md`
	and `docs/data/task_suite_enhancement_128.json`. It recommends
	`multiscale_20s10_40s20_80s40`, hierarchical action/subtask targets,
	label-normalized scoring, and compact raw-feature shards before adding more
	episodes.
	"""

	SPACE_CARD_METADATA = """---
	title: Ropedia Xperience-10M Task Suite
	emoji: 🚀
	colorFrom: blue
	colorTo: green
	sdk: gradio
	app_file: app.py
	pinned: false
	license: mit
	short_description: Xperience-10M embodied-AI task-suite dashboard.
	tags:
	- embodied-ai
	- robotics
	- multimodal
	- xperience-10m
	- evaluation
	- qwen3-omni
	- cosmos
	datasets:
	- ropedia-ai/xperience-10m-sample
	- ropedia-ai/xperience-10m
	models:
	- cy0307/ropedia-xperience-10m-task-baselines
	- cy0307/ropedia-xperience-10m-weights-results
	- cy0307/ropedia-qwen3-omni-lora-128ep
	- cy0307/ropedia-cosmos3-super-forward-dynamics-lora-128ep
	---
	"""

	SPACE_REQUIREMENTS = """gradio>=4.44.0
	"""

	BASELINE_MODEL_CARD_METADATA = """---
	license: mit
	library_name: pytorch
	tags:
	- embodied-ai
	- robotics
	- multimodal
	- xperience-10m
	- baseline
	- evaluation
	- qwen3-omni
	- cosmos
	datasets:
	- ropedia-ai/xperience-10m-sample
	- ropedia-ai/xperience-10m
	metrics:
	- accuracy
	- f1
	- precision
	- recall
	---
	"""


	def load_json(path: Path) -> dict:
	if not path.exists():
	return {}
	return json.loads(path.read_text(encoding="utf-8"))


	def find_status_readout(project_status: dict, area: str, fallback: str) -> str:
	for row in project_status.get("rows", []):
	if row.get("area") == area:
	return row.get("readout", fallback)
	return fallback


	def read_csv_by_window(path: Path) -> dict[int, dict]:
	if not path.exists():
	return {}
	with path.open("r", encoding="utf-8", newline="") as handle:
	return {int(row["window_index"]): row for row in csv.DictReader(handle)}


	def sample_fps(available_modalities: list[dict]) -> float:
	for entry in available_modalities:
	if "fps" in entry:
	return float(entry["fps"])
	return 20.00137419266181


	def modality_summary(modality_atlas: dict) -> str:
	names = [entry.get("id", entry.get("name", "")) for entry in modality_atlas.get("modalities", [])]
	names = [name for name in names if name]
	if "calibration" not in names:
	names.append("calibration")
	return "\|".join(names)


	def parse_multiscale_window_id(window_id: str) -> dict[str, int \| str]:
	prefix = window_id.split(":", 1)[0]
	parts = prefix.split("_")
	parsed: dict[str, int \| str] = {
	"window_scale": parts[0] if parts else "",
	"window_frames": 0,
	"stride_frames": 0,
	}
	for part in parts:
	if part.endswith("f") and part[:-1].isdigit():
	parsed["window_frames"] = int(part[:-1])
	elif part.startswith("stride") and part.removeprefix("stride").isdigit():
	parsed["stride_frames"] = int(part.removeprefix("stride"))
	return parsed


	def selected128_episode_key_to_path(episode_key: str) -> str:
	if "__" not in episode_key:
	return episode_key.replace("__", "/")
	session_id, ep_id = episode_key.split("__", 1)
	return f"{session_id}/{ep_id}"


	def write_selected128_viewer_table(artifact_root: Path, viewer_dir: Path) -> None:
	"""Expose selected-128 exported windows as a separate HF dataset config."""
	windows_path = artifact_root / "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2/windows.csv"
	feature_index_path = artifact_root / "docs/data/xperience10m_128_episode_feature_index.json"
	feature_index = load_json(feature_index_path)
	selection_summary = feature_index.get("selection_summary", {})
	processed_summary = feature_index.get("processed_summary", {})
	qwen_export = processed_summary.get("qwen_v6_multiscale_export", {})
	selected_source_episode_count = int(selection_summary.get("selected_episode_count", 128) or 128)
	expected_rows = int(qwen_export.get("num_samples", 34269) or 34269)
	expected_window_episode_count = int(qwen_export.get("num_episodes", 119) or 119)

	rows = []
	with windows_path.open("r", encoding="utf-8", newline="") as handle:
	for row in csv.DictReader(handle):
	parsed = parse_multiscale_window_id(row["id"])
	start_frame = int(row["start_frame"])
	end_frame = int(row["end_frame"])
	row_id = row["id"]
	episode_id = row["episode_id"]
	rows.append(
	{
	"evidence_line": "selected_128_episodes",
	"source_dataset_repo": "ropedia-ai/xperience-10m",
	"source_access": "gated_upstream_not_redistributed",
	"source_episode_id": episode_id,
	"official_episode_path": selected128_episode_key_to_path(episode_id),
	"window_id": row_id,
	"window_scale": parsed["window_scale"],
	"window_frames": parsed["window_frames"],
	"stride_frames": parsed["stride_frames"],
	"split": row["split"],
	"start_frame": start_frame,
	"end_frame": end_frame,
	"center_frame": (start_frame + end_frame) // 2,
	"main_task": row["main_task"],
	"selected_source_episode_count": selected_source_episode_count,
	"exported_window_episode_count": expected_window_episode_count,
	"exported_window_count": expected_rows,
	"split_policy": "selected 96/16/16 episode split",
	"feature_index": "docs/data/xperience10m_128_episode_feature_index.json",
	"source_window_table": windows_path.relative_to(artifact_root).as_posix(),
	"raw_data_included": False,
	}
	)

	if len(rows) != expected_rows:
	raise RuntimeError(f"Expected {expected_rows} selected-128 rows, found {len(rows)} in {windows_path}")
	episode_count = len({row["source_episode_id"] for row in rows})
	if episode_count != expected_window_episode_count:
	raise RuntimeError(f"Expected {expected_window_episode_count} exported-window episodes, found {episode_count}")
	if selected_source_episode_count < episode_count:
	raise RuntimeError(
	f"Selected source episode count {selected_source_episode_count} is smaller than exported episode count {episode_count}"
	)

	jsonl_path = viewer_dir / "selected128_windows.jsonl"
	jsonl_path.write_text(
	"\n".join(json.dumps(row, ensure_ascii=True) for row in rows) + "\n",
	encoding="utf-8",
	)
	try:
	import pandas as pd

	parquet_path = viewer_dir / "selected128_windows.parquet"
	pd.DataFrame(rows).to_parquet(parquet_path, index=False)
	except ImportError:
	print("pandas/pyarrow unavailable; wrote selected-128 JSONL viewer fallback only")


	def ensure_artifact_dataset_viewer_config(hf_root: Path) -> None:
	"""Expose public sample and selected-128 windows as separate HF-viewable tables."""
	artifact_root = hf_root / "artifacts"
	readme_path = artifact_root / "README.md"
	viewer_dir = artifact_root / "viewer"
	viewer_dir.mkdir(parents=True, exist_ok=True)

	project_status = load_json(artifact_root / "docs/data/project_status.json")
	modality_atlas = load_json(artifact_root / "docs/data/modality_atlas.json")
	available_modalities = load_json(artifact_root / "results/episode_task_suite/available_modalities.json")
	feature_manifest = load_json(artifact_root / "results/episode_task_suite/feature_manifest.json")
	if not isinstance(available_modalities, list):
	available_modalities = []
	if not isinstance(feature_manifest, list):
	feature_manifest = []

	scope = project_status.get("scope_boundary", {})
	fps = sample_fps(available_modalities)
	modalities = modality_summary(modality_atlas)
	feature_blocks = "\|".join(block.get("name", "") for block in feature_manifest if block.get("name"))
	objects_by_window = read_csv_by_window(
	artifact_root / "results/single_episode_diagnostics/object_labels/window_object_labels.csv"
	)

	rows = []
	windows_path = artifact_root / "results/episode_task_suite/windows.csv"
	with windows_path.open("r", encoding="utf-8", newline="") as handle:
	for row in csv.DictReader(handle):
	window_index = int(row["window_index"])
	start_frame = int(row["start_frame"])
	end_frame = int(row["end_frame"])
	center_frame = int(row["center_frame"])
	object_row = objects_by_window.get(window_index, {})
	rows.append(
	{
	"episode_id": "xperience-10m-sample/public_episode",
	"source_sample_repo": "ropedia-ai/xperience-10m-sample",
	"window_index": window_index,
	"start_frame": start_frame,
	"end_frame": end_frame,
	"center_frame": center_frame,
	"start_time_s": round(start_frame / fps, 3),
	"end_time_s": round(end_frame / fps, 3),
	"center_time_s": round(center_frame / fps, 3),
	"window_frames": int(scope.get("window_frames", 20) or 20),
	"stride_frames": 5,
	"action_label": row["action_label"],
	"action_fraction": float(row["action_fraction"]),
	"subtask_label": row["subtask_label"],
	"subtask_fraction": float(row["subtask_fraction"]),
	"objects": object_row.get("objects", ""),
	"object_count": int(object_row.get("object_count", 0) or 0),
	"modalities": modalities,
	"feature_dim": int(scope.get("current_feature_dimensions", 8546) or 8546),
	"feature_blocks": feature_blocks,
	"derived_features_file": "results/episode_task_suite/shared_windows.npz",
	"source_window_table": "results/episode_task_suite/windows.csv",
	"raw_data_included": False,
	}
	)

	viewer_path = viewer_dir / "episode_windows.jsonl"
	viewer_path.write_text(
	"\n".join(json.dumps(row, ensure_ascii=True) for row in rows) + "\n",
	encoding="utf-8",
	)
	try:
	import pandas as pd

	parquet_path = viewer_dir / "episode_windows.parquet"
	pd.DataFrame(rows).to_parquet(parquet_path, index=False)
	except ImportError:
	print("pandas/pyarrow unavailable; wrote JSONL viewer fallback only")
	write_selected128_viewer_table(artifact_root, viewer_dir)
	(viewer_dir / "dataset_viewer_summary.jsonl").unlink(missing_ok=True)

	if not readme_path.exists():
	return
	readme = readme_path.read_text(encoding="utf-8")
	readme = readme.replace(" - n<1K", " - 1K<n<10K")
	if readme.startswith("---"):
	parts = readme.split("---", 2)
	if len(parts) == 3:
	metadata_lines = parts[1].strip().splitlines()
	kept_lines = []
	skip = False
	for line in metadata_lines:
	if line.startswith("configs:"):
	skip = True
	continue
	if skip and not line.startswith((" ", "-")):
	skip = False
	if not skip:
	kept_lines.append(line)
	metadata = "\n".join(kept_lines).rstrip() + "\n" + ARTIFACT_VIEWER_CONFIG
	readme_path.write_text("---\n" + metadata + "---" + parts[2], encoding="utf-8")
	return
	readme_path.write_text(ARTIFACT_VIEWER_CONFIG + "\n" + readme, encoding="utf-8")


	def ensure_repo_card_metadata(readme_path: Path, metadata: str) -> None:
	"""Avoid Hub card warnings when staged cards mirror plain project READMEs."""
	if not readme_path.exists():
	return
	readme = readme_path.read_text(encoding="utf-8")
	normalized_metadata = metadata.rstrip() + "\n\n"
	if readme.startswith("---\n"):
	parts = readme.split("---", 2)
	if len(parts) == 3:
	new_readme = normalized_metadata + parts[2].lstrip("\n")
	if new_readme != readme:
	readme_path.write_text(new_readme, encoding="utf-8")
	return
	new_readme = normalized_metadata + readme.lstrip("\n")
	if new_readme != readme:
	readme_path.write_text(new_readme, encoding="utf-8")


	def ensure_space_runtime_files(hf_root: Path) -> None:
	"""Keep the Hub Space runtime small and explicit."""
	requirements_path = hf_root / "space/requirements.txt"
	requirements_path.write_text(SPACE_REQUIREMENTS, encoding="utf-8")


	def ensure_enhancement_card_links(hf_root: Path) -> None:
	for relative_path in ("artifacts/README.md", "model/README.md"):
	path = hf_root / relative_path
	if not path.exists():
	continue
	text = path.read_text(encoding="utf-8")
	if ENHANCEMENT_MARKER in text:
	continue
	insert_before = "\n## Dataset Boundary" if relative_path.startswith("artifacts/") else "\n## Start Here"
	if insert_before in text:
	text = text.replace(insert_before, ENHANCEMENT_CARD_BLOCK + insert_before, 1)
	else:
	text = text.rstrip() + "\n" + ENHANCEMENT_CARD_BLOCK
	path.write_text(text, encoding="utf-8")


	def parse_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument("--hf-root", type=Path, default=DEFAULT_HF_ROOT)
	parser.add_argument("--namespace", default=DEFAULT_NAMESPACE)
	parser.add_argument("--space-repo", default=DEFAULT_SPACE_REPO)
	parser.add_argument("--artifact-repo", default=DEFAULT_ARTIFACT_REPO)
	parser.add_argument("--model-repo", default=DEFAULT_MODEL_REPO)
	parser.add_argument("--weights-results-repo", default=DEFAULT_WEIGHTS_RESULTS_REPO)
	parser.add_argument("--qwen3-lora-repo", default=DEFAULT_QWEN3_LORA_REPO)
	parser.add_argument("--cosmos3-super-lora-repo", default=DEFAULT_COSMOS3_SUPER_LORA_REPO)
	parser.add_argument("--token", default=os.environ.get("HF_TOKEN", "").strip())
	parser.add_argument("--skip-space", action="store_true")
	parser.add_argument("--skip-artifacts", action="store_true")
	parser.add_argument("--skip-model", action="store_true")
	parser.add_argument("--skip-weights-results", action="store_true")
	return parser.parse_args()


	def full_repo(namespace: str, repo_name: str) -> str:
	return repo_name if "/" in repo_name else f"{namespace}/{repo_name}"


	def prune_generated_artifacts(root: Path) -> None:
	for cache_dir in sorted(root.rglob("__pycache__"), reverse=True):
	shutil.rmtree(cache_dir, ignore_errors=True)
	for cache_file in root.rglob("*.pyc"):
	cache_file.unlink(missing_ok=True)
	for junk_file in root.rglob(".DS_Store"):
	junk_file.unlink(missing_ok=True)


	def prune_artifact_bundle(hf_root: Path) -> None:
	artifact_root = hf_root / "artifacts"
	for relative_path in STALE_ARTIFACT_REMOTE_FILES:
	(artifact_root / relative_path).unlink(missing_ok=True)


	def upload_folder(
	api: HfApi,
	token: str,
	repo_id: str,
	repo_type: str \| None,
	folder: Path,
	message: str,
	*,
	allow_patterns: list[str] \| None = None,
	ignore_patterns: list[str] \| None = None,
	):
	print(f"Uploading {folder} -> {repo_id}")
	effective_repo_type = repo_type or "model"
	effective_ignore_patterns = COMMON_IGNORE + (ignore_patterns or [])
	if effective_repo_type != "space" and hasattr(api, "upload_large_folder"):
	return api.upload_large_folder(
	repo_id=repo_id,
	repo_type=effective_repo_type,
	folder_path=str(folder),
	allow_patterns=allow_patterns,
	ignore_patterns=effective_ignore_patterns,
	num_workers=8,
	print_report=True,
	print_report_every=60,
	)
	return api.upload_folder(
	repo_id=repo_id,
	repo_type=repo_type,
	folder_path=str(folder),
	commit_message=message,
	token=token,
	allow_patterns=allow_patterns,
	ignore_patterns=effective_ignore_patterns,
	)


	def delete_remote_file_if_present(
	api: HfApi,
	token: str,
	repo_id: str,
	repo_type: str,
	path_in_repo: str,
	) -> None:
	try:
	api.delete_file(
	path_in_repo=path_in_repo,
	repo_id=repo_id,
	repo_type=repo_type,
	token=token,
	commit_message=f"Remove stale {path_in_repo}",
	)
	print(f"Deleted stale remote file: {repo_id}/{path_in_repo}")
	except Exception as exc:
	message = str(exc)
	if "404" in message or "Entry Not Found" in message or "not found" in message.lower():
	print(f"Remote file already absent: {repo_id}/{path_in_repo}")
	return
	print(f"Remote stale-file cleanup skipped for {repo_id}/{path_in_repo}: {exc}")


	def delete_remote_folder_if_present(
	api: HfApi,
	token: str,
	repo_id: str,
	repo_type: str,
	path_in_repo: str,
	) -> None:
	try:
	api.delete_folder(
	path_in_repo=path_in_repo,
	repo_id=repo_id,
	repo_type=repo_type,
	token=token,
	commit_message=f"Remove stale {path_in_repo}",
	)
	print(f"Deleted stale remote folder: {repo_id}/{path_in_repo}")
	except Exception as exc:
	message = str(exc)
	if "404" in message or "Entry Not Found" in message or "not found" in message.lower():
	print(f"Remote folder already absent: {repo_id}/{path_in_repo}")
	return
	print(f"Remote stale-folder cleanup skipped for {repo_id}/{path_in_repo}: {exc}")


	def upload_allowlisted_artifact_binaries(
	api: HfApi,
	token: str,
	repo_id: str,
	artifact_root: Path,
	) -> None:
	"""Upload approved derived binary artifacts without exposing model weights."""
	for relative_path in ARTIFACT_BINARY_ALLOWLIST:
	path = artifact_root / relative_path
	if not path.exists():
	print(f"Allowlisted artifact binary absent: {relative_path}")
	continue
	api.upload_file(
	path_or_fileobj=str(path),
	path_in_repo=relative_path,
	repo_id=repo_id,
	repo_type="dataset",
	token=token,
	commit_message=f"Publish derived artifact {relative_path}",
	)
	print(f"Uploaded allowlisted artifact binary: {repo_id}/{relative_path}")


	def upsert_collection_item_notes(
	api: HfApi,
	token: str,
	collection_slug: str,
	notes_by_repo: dict[str, str],
	) -> None:
	collection = api.get_collection(collection_slug, token=token)
	for item in collection.items:
	note = notes_by_repo.get(item.item_id)
	if note is None or item.note == note:
	continue
	api.update_collection_item(
	collection_slug,
	item.item_object_id,
	note=note,
	token=token,
	)


	def main() -> int:
	args = parse_args()
	hf_root = args.hf_root.resolve()
	prune_generated_artifacts(hf_root)
	prune_artifact_bundle(hf_root)
	ensure_artifact_dataset_viewer_config(hf_root)
	ensure_space_runtime_files(hf_root)
	ensure_repo_card_metadata(hf_root / "space/README.md", SPACE_CARD_METADATA)
	ensure_repo_card_metadata(hf_root / "model/README.md", BASELINE_MODEL_CARD_METADATA)
	ensure_enhancement_card_links(hf_root)

	token = args.token or get_token() or getpass.getpass("HF token: ").strip()
	if not token:
	raise SystemExit("No token provided.")

	api = HfApi(token=token)
	me = api.whoami(token=token)
	username = me.get("name")
	if username != args.namespace:
	raise SystemExit(f"Authenticated as {username!r}, expected {args.namespace!r}.")

	space_repo = full_repo(args.namespace, args.space_repo)
	artifact_repo = full_repo(args.namespace, args.artifact_repo)
	model_repo = full_repo(args.namespace, args.model_repo)
	weights_results_repo = full_repo(args.namespace, args.weights_results_repo)
	qwen3_lora_repo = full_repo(args.namespace, args.qwen3_lora_repo)
	cosmos3_super_lora_repo = full_repo(args.namespace, args.cosmos3_super_lora_repo)

	api.create_repo(space_repo, repo_type="space", space_sdk="gradio", exist_ok=True, token=token)
	api.create_repo(artifact_repo, repo_type="dataset", exist_ok=True, token=token)
	api.create_repo(model_repo, repo_type=None, exist_ok=True, token=token)
	api.create_repo(weights_results_repo, repo_type=None, exist_ok=True, token=token)

	if not args.skip_space:
	upload_folder(
	api,
	token,
	space_repo,
	"space",
	hf_root / "space",
	"Publish Ropedia Xperience-10M task-suite Space",
	)
	for path_in_repo in STALE_SPACE_REMOTE_FILES:
	delete_remote_file_if_present(api, token, space_repo, "space", path_in_repo)
	if not args.skip_artifacts:
	upload_folder(
	api,
	token,
	artifact_repo,
	"dataset",
	hf_root / "artifacts",
	"Publish Ropedia Xperience-10M derived artifacts",
	ignore_patterns=["*/.pt", "*/.npz"],
	)
	upload_allowlisted_artifact_binaries(api, token, artifact_repo, hf_root / "artifacts")
	for path_in_repo in STALE_ARTIFACT_REMOTE_FILES:
	delete_remote_file_if_present(api, token, artifact_repo, "dataset", path_in_repo)
	for path_in_repo in STALE_ARTIFACT_REMOTE_FOLDERS:
	delete_remote_folder_if_present(api, token, artifact_repo, "dataset", path_in_repo)
	if not args.skip_model:
	upload_folder(
	api,
	token,
	model_repo,
	None,
	hf_root / "model",
	"Publish Ropedia Xperience-10M task baseline cards",
	ignore_patterns=["*/.pt", "*/.npz"],
	)
	for path_in_repo in STALE_MODEL_REMOTE_FILES:
	delete_remote_file_if_present(api, token, model_repo, "model", path_in_repo)
	upload_folder(
	api,
	token,
	model_repo,
	None,
	hf_root / "model",
	"Publish Ropedia Xperience-10M model binaries",
	allow_patterns=["*/.npz", "*/.pt"],
	)
	if not args.skip_weights_results:
	upload_folder(
	api,
	token,
	weights_results_repo,
	None,
	hf_root / "weights_results",
	"Publish consolidated Ropedia Xperience-10M weights/results bundle",
	)

	try:
	collection_description = (
	"Ropedia Xperience-10M dashboard, public artifacts, baselines, "
	"Qwen3-Omni v6, and Cosmos3-Super/Nano results."
	)
	collection_notes = {
	space_repo: "Interactive/static dashboard with raw public-sample previews and task-suite analysis.",
	artifact_repo: "Public-safe metrics, predictions, docs, scripts, diagrams, and verified_public result packages.",
	model_repo: "Minimal numpy weights plus aligned neural MLP checkpoints and task-head metrics.",
	weights_results_repo: "Consolidated baseline weights, Qwen3-Omni v6 LoRA, Cosmos3-Super forward-dynamics LoRA, verified results, and analysis manifest.",
	qwen3_lora_repo: "Verified v6 rank64 Qwen3-Omni LoRA adapter for the selected 128-episode diagnostic row.",
	cosmos3_super_lora_repo: "Verified Cosmos3-Super forward-dynamics LoRA adapter over camera-pose proxy targets.",
	}
	collection = api.create_collection(
	COLLECTION_TITLE,
	namespace=args.namespace,
	description=collection_description,
	private=False,
	exists_ok=True,
	token=token,
	)
	api.update_collection_metadata(
	collection.slug,
	description=collection_description,
	private=False,
	token=token,
	)
	api.add_collection_item(collection.slug, space_repo, "space", note=collection_notes[space_repo], exists_ok=True, token=token)
	api.add_collection_item(collection.slug, artifact_repo, "dataset", note=collection_notes[artifact_repo], exists_ok=True, token=token)
	api.add_collection_item(collection.slug, model_repo, "model", note=collection_notes[model_repo], exists_ok=True, token=token)
	api.add_collection_item(
	collection.slug,
	weights_results_repo,
	"model",
	note=collection_notes[weights_results_repo],
	exists_ok=True,
	token=token,
	)
	api.add_collection_item(
	collection.slug,
	qwen3_lora_repo,
	"model",
	note=collection_notes[qwen3_lora_repo],
	exists_ok=True,
	token=token,
	)
	api.add_collection_item(
	collection.slug,
	cosmos3_super_lora_repo,
	"model",
	note=collection_notes[cosmos3_super_lora_repo],
	exists_ok=True,
	token=token,
	)
	upsert_collection_item_notes(api, token, collection.slug, collection_notes)
	print(f"Collection: https://huggingface.co/collections/{collection.slug}")
	except Exception as exc:
	print(f"Collection update skipped: {exc}")

	print("Done")
	print(f"Space: https://huggingface.co/spaces/{space_repo}")
	print(f"Artifacts: https://huggingface.co/datasets/{artifact_repo}")
	print(f"Models: https://huggingface.co/{model_repo}")
	print(f"Weights/results: https://huggingface.co/{weights_results_repo}")
	print(f"Qwen3-Omni LoRA: https://huggingface.co/{qwen3_lora_repo}")
	print(f"Cosmos3-Super LoRA: https://huggingface.co/{cosmos3_super_lora_repo}")
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())