Publish Ropedia Xperience-10M task baseline cards

1e688c9 verified 27 days ago

16 kB

	#!/usr/bin/env python3
	"""Validate the public 12-task card and walkthrough surface.

	This gate is deliberately about presentation integrity, not model quality. The
	repo keeps snake_case artifact ids for reproducibility, but the public website
	task cards and interactive player should use research-readable names and clear
	input/process/output wording.
	"""

	from __future__ import annotations

	import json
	import re
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import Any


	ROOT = Path(__file__).resolve().parents[1]
	TASK_JSON = ROOT / "docs/data/task_walkthroughs.json"
	WEBSITE = ROOT / "docs/index.html"
	WALKTHROUGH_MD = ROOT / "results/episode_task_suite/task_walkthroughs/TASK_WALKTHROUGHS.md"
	OUTPUT = ROOT / "docs/data/task_surface_integrity.json"

	EXPECTED_TASKS = {
	"timeline_action": "Action Recognition",
	"timeline_subtask": "Procedure Step Recognition",
	"transition_detection": "Action Boundary Detection",
	"next_action": "Next-Action Prediction",
	"hand_trajectory_forecast": "Hand Trajectory Forecasting",
	"contact_prediction": "Contact State Prediction",
	"object_relevance": "Object Relevance Prediction",
	"caption_grounding": "Language Grounding",
	"cross_modal_retrieval": "Cross-Modal Retrieval",
	"modality_reconstruction": "Cross-Modal Reconstruction",
	"temporal_order": "Temporal Order Verification",
	"misalignment_detection": "Multimodal Synchronization Detection",
	}

	EXPECTED_EXTENSION_NAMES = {
	"body_motion_intensity": "Body and Hand Motion Intensity",
	"multi_view_consistency_retrieval": "Multi-View Consistency Retrieval",
	"action_phase_progress": "Action Phase Progress Estimation",
	"ego_motion_forecast": "Short-Horizon Ego-Motion Forecasting",
	}

	REQUIRED_TASK_FIELDS = {
	"display_name",
	"research_name",
	"task_family",
	"architecture_family",
	"primary_direction",
	"card_blurb",
	"input_short",
	"process_short",
	"output_short",
	"modalities",
	"poster_modality",
	"case_study",
	"input",
	"output",
	"middle_modules",
	"metric",
	"failure_mode",
	"artifact_id",
	"plain_goal",
	}

	DISPLAY_FIELDS = {
	"display_name",
	"research_name",
	"card_blurb",
	"input_short",
	"process_short",
	"output_short",
	"plain_goal",
	}

	ALLOWED_FAMILIES = {"supervised", "forecast", "retrieval", "diagnostic"}
	MODALITY_ASSETS = {
	"video": "docs/assets/modalities/video.jpg",
	"audio": "docs/assets/modalities/audio.png",
	"depth": "docs/assets/modalities/depth.jpg",
	"pose_slam": "docs/assets/modalities/pose_slam.png",
	"motion_capture": "docs/assets/modalities/motion_capture.png",
	"inertial": "docs/assets/modalities/inertial.png",
	"language": "docs/assets/modalities/language.png",
	}

	RAW_ID_PATTERN = re.compile(r"\b[a-z]+(?:_[a-z0-9]+)+\b")


	def load_json(path: Path) -> dict[str, Any]:
	return json.loads(path.read_text(encoding="utf-8"))


	def check(condition: bool, name: str, failures: list[dict[str, Any]], **details: Any) -> dict[str, Any]:
	record = {"name": name, "status": "pass" if condition else "fail", **details}
	if not condition:
	failures.append(record)
	return record


	def function_body(source: str, name: str) -> str:
	marker = f"function {name}("
	start = source.find(marker)
	if start < 0:
	return ""
	brace = source.find("{", start)
	if brace < 0:
	return ""
	depth = 0
	for index in range(brace, len(source)):
	char = source[index]
	if char == "{":
	depth += 1
	elif char == "}":
	depth -= 1
	if depth == 0:
	return source[start : index + 1]
	return source[start:]


	def validate_tasks(payload: dict[str, Any], failures: list[dict[str, Any]]) -> list[dict[str, Any]]:
	checks: list[dict[str, Any]] = []
	tasks = payload.get("tasks", {})
	checks.append(check(isinstance(tasks, dict), "tasks_object_present", failures))
	if not isinstance(tasks, dict):
	return checks

	task_ids = set(tasks)
	checks.append(
	check(
	len(tasks) == len(EXPECTED_TASKS),
	"exactly_12_tasks",
	failures,
	observed=len(tasks),
	expected=len(EXPECTED_TASKS),
	)
	)
	checks.append(
	check(
	task_ids == set(EXPECTED_TASKS),
	"expected_task_ids_present",
	failures,
	missing=sorted(set(EXPECTED_TASKS) - task_ids),
	extra=sorted(task_ids - set(EXPECTED_TASKS)),
	)
	)

	for task_id, task in tasks.items():
	if not isinstance(task, dict):
	checks.append(check(False, f"{task_id}: task_record_object", failures))
	continue
	missing_fields = sorted(REQUIRED_TASK_FIELDS - set(task))
	checks.append(
	check(not missing_fields, f"{task_id}: required_fields", failures, missing=missing_fields)
	)
	expected_name = EXPECTED_TASKS.get(task_id)
	checks.append(
	check(
	task.get("display_name") == expected_name,
	f"{task_id}: human_readable_display_name",
	failures,
	expected=expected_name,
	observed=task.get("display_name"),
	)
	)
	checks.append(
	check(
	task.get("artifact_id") == task_id,
	f"{task_id}: artifact_id_matches_key",
	failures,
	observed=task.get("artifact_id"),
	)
	)
	for field in DISPLAY_FIELDS:
	value = str(task.get(field, ""))
	raw_hits = [hit for hit in RAW_ID_PATTERN.findall(value) if hit in EXPECTED_TASKS or hit in MODALITY_ASSETS]
	checks.append(
	check(
	not raw_hits,
	f"{task_id}: public_field_{field}_is_human_readable",
	failures,
	value=value,
	raw_hits=raw_hits,
	)
	)
	family = task.get("task_family")
	checks.append(
	check(
	family in ALLOWED_FAMILIES,
	f"{task_id}: known_task_family",
	failures,
	observed=family,
	allowed=sorted(ALLOWED_FAMILIES),
	)
	)
	modalities = task.get("modalities", [])
	checks.append(
	check(
	isinstance(modalities, list) and modalities,
	f"{task_id}: modality_list_present",
	failures,
	observed=modalities,
	)
	)
	if isinstance(modalities, list):
	unknown = [item for item in modalities if item not in MODALITY_ASSETS]
	missing_assets = [
	MODALITY_ASSETS[item]
	for item in modalities
	if item in MODALITY_ASSETS and not (ROOT / MODALITY_ASSETS[item]).exists()
	]
	checks.append(
	check(
	not unknown,
	f"{task_id}: known_modalities",
	failures,
	unknown=unknown,
	)
	)
	checks.append(
	check(
	not missing_assets,
	f"{task_id}: modality_assets_exist",
	failures,
	missing=missing_assets,
	)
	)
	checks.append(
	check(
	task.get("poster_modality") in modalities,
	f"{task_id}: poster_modality_in_task_modalities",
	failures,
	poster_modality=task.get("poster_modality"),
	modalities=modalities,
	)
	)
	metric = task.get("metric", {})
	metric_ok = (
	isinstance(metric, dict)
	and isinstance(metric.get("name"), str)
	and isinstance(metric.get("direction"), str)
	and isinstance(metric.get("minimal"), (int, float))
	and isinstance(metric.get("neural_mlp"), (int, float))
	)
	checks.append(
	check(
	metric_ok,
	f"{task_id}: numeric_minimal_and_neural_metrics",
	failures,
	metric=metric,
	)
	)
	checks.append(
	check(
	isinstance(task.get("middle_modules"), list) and len(task.get("middle_modules", [])) >= 3,
	f"{task_id}: middle_modules_explain_process",
	failures,
	observed_count=len(task.get("middle_modules", [])) if isinstance(task.get("middle_modules"), list) else 0,
	)
	)
	return checks


	def validate_markdown(source: str, tasks: dict[str, Any], failures: list[dict[str, Any]]) -> list[dict[str, Any]]:
	checks: list[dict[str, Any]] = []
	for task_id, display_name in EXPECTED_TASKS.items():
	expected_heading = f"### {display_name} (`{task_id}`)"
	checks.append(
	check(
	expected_heading in source,
	f"markdown_heading_present:{task_id}",
	failures,
	expected=expected_heading,
	)
	)
	checks.append(
	check(
	source.count("### ") == len(EXPECTED_TASKS),
	"markdown_has_12_task_sections",
	failures,
	observed=source.count("### "),
	)
	)
	checks.append(
	check(
	all(str(task.get("case_study", "")) in source for task in tasks.values()),
	"markdown_contains_case_studies",
	failures,
	)
	)
	return checks


	def validate_website(source: str, failures: list[dict[str, Any]]) -> list[dict[str, Any]]:
	checks: list[dict[str, Any]] = []
	required_markers = [
	'id="taskPlayer"',
	'id="taskGrid"',
	'id="walkthroughSelector"',
	'id="playerStoryboard"',
	'id="playerFrameChip"',
	'id="playerFrameCaption"',
	'id="playerScrub"',
	'fetch("data/task_walkthroughs.json"',
	'class="task-card"',
	'class="task-card-media"',
	'class="story-button',
	'class="flow-step',
	'id="playerPlay"',
	'id="playerPrev"',
	'id="playerNext"',
	]
	for marker in required_markers:
	checks.append(
	check(marker in source, f"website_marker_present:{marker}", failures, marker=marker)
	)
	task_card_renderer = function_body(source, "renderTaskCards")
	selector_renderer = function_body(source, "renderSelector")
	player_renderer = function_body(source, "renderPlayer")
	checks.append(
	check(
	"artifact-id" not in source,
	"website_no_artifact_id_css_or_markup",
	failures,
	)
	)
	checks.append(
	check(
	"artifact_id" not in task_card_renderer,
	"task_cards_do_not_render_artifact_ids",
	failures,
	)
	)
	checks.append(
	check(
	"task.display_name" in task_card_renderer and "task.research_name" in task_card_renderer,
	"task_cards_render_human_names",
	failures,
	)
	)
	checks.append(
	check(
	"task.input_short" in task_card_renderer and "task.process_short" in task_card_renderer and "task.output_short" in task_card_renderer,
	"task_cards_render_input_process_output",
	failures,
	)
	)
	checks.append(
	check(
	"task.poster_modality" in task_card_renderer and "task-card-media" in task_card_renderer,
	"task_cards_use_representative_modality_thumbnail",
	failures,
	)
	)
	checks.append(
	check(
	all(
	needle in player_renderer
	for needle in ["playerPoster", "middle_modules"]
	)
	and all(needle in source for needle in ["playerProgress", "renderStageFrame(task, index)"])
	and all(needle in source for needle in ['id="playerPlay"', 'id="playerPrev"', 'id="playerNext"']),
	"interactive_player_wired_to_task_metadata",
	failures,
	)
	)
	checks.append(
	check(
	all(needle in source for needle in ["function setActiveStage", "function advancePlayer", "playerScrub"]),
	"interactive_video_storyboard_controls_present",
	failures,
	)
	)
	checks.append(
	check(
	"task.display_name" in selector_renderer and "artifact_id" not in selector_renderer,
	"selector_uses_human_names",
	failures,
	)
	)
	for artifact_id, display_name in EXPECTED_EXTENSION_NAMES.items():
	checks.append(
	check(
	f"<h3>{artifact_id}</h3>" not in source and display_name in source,
	f"extension_probe_uses_human_name:{artifact_id}",
	failures,
	expected=display_name,
	)
	)
	return checks


	def build_report() -> dict[str, Any]:
	failures: list[dict[str, Any]] = []
	checks: list[dict[str, Any]] = []

	inputs_present = {
	"task_walkthroughs_json": TASK_JSON.exists(),
	"website_index": WEBSITE.exists(),
	"walkthrough_markdown": WALKTHROUGH_MD.exists(),
	}
	checks.append(
	check(
	all(inputs_present.values()),
	"required_task_surface_inputs_present",
	failures,
	inputs=inputs_present,
	)
	)
	if not all(inputs_present.values()):
	return {
	"status": "fail",
	"generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
	"summary": {"task_count": 0, "failure_count": len(failures)},
	"checks": checks,
	"failures": failures,
	}

	task_payload = load_json(TASK_JSON)
	website_source = WEBSITE.read_text(encoding="utf-8")
	markdown_source = WALKTHROUGH_MD.read_text(encoding="utf-8")
	tasks = task_payload.get("tasks", {}) if isinstance(task_payload.get("tasks", {}), dict) else {}

	checks.extend(validate_tasks(task_payload, failures))
	checks.extend(validate_markdown(markdown_source, tasks, failures))
	checks.extend(validate_website(website_source, failures))

	task_families = {}
	task_modalities = {}
	for task in tasks.values():
	family = task.get("task_family")
	if isinstance(family, str):
	task_families[family] = task_families.get(family, 0) + 1
	for modality in task.get("modalities", []):
	task_modalities[modality] = task_modalities.get(modality, 0) + 1

	return {
	"status": "pass" if not failures else "fail",
	"generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"),
	"summary": {
	"task_count": len(tasks),
	"expected_task_count": len(EXPECTED_TASKS),
	"task_family_counts": dict(sorted(task_families.items())),
	"modality_usage_counts": dict(sorted(task_modalities.items())),
	"interactive_surface": "task cards plus scrub/play/chapter walkthrough storyboard",
	"failure_count": len(failures),
	},
	"checks": checks,
	"failures": failures,
	}


	def main() -> int:
	report = build_report()
	OUTPUT.parent.mkdir(parents=True, exist_ok=True)
	OUTPUT.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
	print(f"{report['status'].upper()}: wrote {OUTPUT}")
	if report["status"] != "pass":
	for failure in report["failures"][:40]:
	print(f"- {failure['name']}")
	if len(report["failures"]) > 40:
	print(f"- ... {len(report['failures']) - 40} more failures")
	return 1
	return 0


	if __name__ == "__main__":
	raise SystemExit(main())