#!/usr/bin/env python3 """Build unified 20-task radar charts for baseline and model diagnostics.""" from __future__ import annotations import html import json import math from datetime import datetime, timezone from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] TASK_SUITE_PATH = ROOT / "docs/data/task_suite_20.json" QWEN_V6_METRICS_PATH = ( ROOT / "results/omni_finetune/verified_public" / "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full" / "eval/metrics.json" ) COSMOS_SUPER_REASONER_METRICS_PATH = ( ROOT / "results/omni_finetune/verified_public" / "xperience10m_cosmos3_super_reasoner_128ep_test_full_20260607" / "eval/metrics.json" ) COSMOS_NANO_METRICS_PATH = ( ROOT / "results/omni_finetune/verified_public" / "xperience10m_cosmos3_nano_128ep_future_window_h5_compat_adapter_eval_test_full" / "eval/metrics.json" ) COSMOS_SUPER_FD_METRICS_PATH = ( ROOT / "results/omni_finetune/verified_public" / "xperience10m_cosmos3_super_forward_dynamics_lora_128ep_train1epoch_256_attn_full8gpu_20260608_eval_test_full_fsdp" / "eval/metrics.json" ) METADATA128_BASELINE_DIR = ROOT / "results/omni_finetune/a100_128_metadata_task_baselines_20260616_v2" RAW128_BASELINE_DIR = ROOT / "results/omni_finetune/a100_128_raw20_task_baselines_complete20_proxy_20260616T091500Z" MODEL_OUTPUT_TASK_PROBE_DIR = ROOT / "results/omni_finetune/model_output_task_probes_20260616" QWEN_FUTURE_TASK_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_future_task_probes_a100_20260616T143608Z" ) QWEN_ORDER_SYNC_TIME_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_order_sync_time_probes_a100_20260617T132500Z" ) QWEN_RETRIEVAL_TASK_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_retrieval_task_probes_a100_20260617T175919Z" ) QWEN_CROSS_MODAL_RETRIEVAL_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_cross_modal_retrieval_probe_a100_20260618T000000Z" ) QWEN_CAMERA_VIEW_SYNC_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_camera_view_sync_mosaic_tile_a100_20260619T0305Z" ) QWEN_SENSOR_TARGET_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_sensor_target_probes_a100_20260619T000000Z" ) QWEN_INTERACTION_TEXT_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_qwen3_omni_v6_interaction_text_task15_a100_20260620T010305Z" ) COSMOS_SUPER_RETRIEVAL_TASK_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_super_retrieval_task_probes_a100_textonly_prompatch_v2_20260620" ) COSMOS_SUPER_FUTURE_TASK_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_super_future_task_probes_a100_textonly_v1_20260620" ) COSMOS_SUPER_INTERACTION_TEXT_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_super_interaction_text_task15_textonly_v1_20260620T1558Z" ) COSMOS_NANO_RETRIEVAL_TASK_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_nano_retrieval_task_probes_a100_patched_textonly_20260621" ) COSMOS_NANO_INTERACTION_TEXT_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_nano_interaction_text_task15_patched_textonly_20260621" ) COSMOS_NANO_FUTURE_ORDER_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_nano_future_order_misalignment_patched_textonly_20260621" ) COSMOS_NANO_CURRENT_TASK_PROBE_DIR = ( ROOT / "results/omni_finetune" / "xperience10m_cosmos3_nano_current_subtask_object_relevance_patched_textonly_20260621" ) QWEN_ACTION_OBJECT_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "action_object_relation/qwen3_omni_v6_lora/metrics.json" ) COSMOS_SUPER_ACTION_OBJECT_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "action_object_relation/cosmos3_super_reasoner/metrics.json" ) COSMOS_SUPER_CAPTION_GROUNDING_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "caption_grounding/cosmos3_super_reasoner/metrics.json" ) COSMOS_SUPER_TIME_TO_TRANSITION_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "time_to_transition/cosmos3_super_reasoner/metrics.json" ) COSMOS_SUPER_LONG_HORIZON_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "long_horizon_next_action/cosmos3_super_reasoner/metrics.json" ) COSMOS_NANO_LONG_HORIZON_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "long_horizon_next_action/cosmos3_nano_future_window/metrics.json" ) COSMOS_NANO_NEXT_SUBTASK_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "next_subtask_forecast/cosmos3_nano_future_window/metrics.json" ) COSMOS_NANO_MODALITY_RECONSTRUCTION_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "modality_reconstruction/cosmos3_nano_future_window/metrics.json" ) COSMOS_NANO_OBJECT_SET_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "object_set_forecast/cosmos3_nano_future_window/metrics.json" ) COSMOS_NANO_ACTION_OBJECT_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "action_object_relation/cosmos3_nano_future_window/metrics.json" ) COSMOS_NANO_TIME_TO_TRANSITION_METRICS_PATH = ( MODEL_OUTPUT_TASK_PROBE_DIR / "time_to_transition/cosmos3_nano_future_window/metrics.json" ) QWEN_FUTURE_TASK_METRIC_PATHS = { "caption_grounding": QWEN_RETRIEVAL_TASK_PROBE_DIR / "caption_grounding/metrics.json", "cross_modal_retrieval": QWEN_CROSS_MODAL_RETRIEVAL_PROBE_DIR / "cross_modal_retrieval/metrics.json", "temporal_order": QWEN_ORDER_SYNC_TIME_PROBE_DIR / "temporal_order/metrics.json", "misalignment_detection": QWEN_ORDER_SYNC_TIME_PROBE_DIR / "misalignment_detection/metrics.json", "long_horizon_next_action": QWEN_FUTURE_TASK_PROBE_DIR / "long_horizon_next_action/metrics.json", "next_subtask_forecast": QWEN_FUTURE_TASK_PROBE_DIR / "next_subtask_forecast/metrics.json", "object_set_forecast": QWEN_FUTURE_TASK_PROBE_DIR / "object_set_forecast/metrics.json", "time_to_transition": QWEN_ORDER_SYNC_TIME_PROBE_DIR / "time_to_transition/metrics.json", "camera_view_sync_retrieval": QWEN_CAMERA_VIEW_SYNC_PROBE_DIR / "camera_view_sync_retrieval/metrics.json", "hand_trajectory_forecast": QWEN_SENSOR_TARGET_PROBE_DIR / "hand_trajectory_forecast/metrics.json", "modality_reconstruction": QWEN_SENSOR_TARGET_PROBE_DIR / "modality_reconstruction/metrics.json", "imu_to_hand_pose": QWEN_SENSOR_TARGET_PROBE_DIR / "imu_to_hand_pose/metrics.json", "interaction_text_prediction": QWEN_INTERACTION_TEXT_PROBE_DIR / "interaction_text_prediction/metrics.json", } QWEN_FUTURE_TASK_METRIC_KEYS = { "caption_grounding": "caption_grounding_mrr", "cross_modal_retrieval": "cross_modal_retrieval_mrr", "temporal_order": "temporal_order_f1", "misalignment_detection": "misalignment_detection_f1", "long_horizon_next_action": "long_horizon_next_action_macro_f1", "next_subtask_forecast": "next_subtask_forecast_macro_f1", "object_set_forecast": "object_set_forecast_micro_f1", "time_to_transition": "time_to_transition_mae", "camera_view_sync_retrieval": "camera_view_sync_retrieval_mrr", "hand_trajectory_forecast": "hand_trajectory_forecast_mrr", "modality_reconstruction": "modality_reconstruction_mrr", "imu_to_hand_pose": "imu_to_hand_pose_mrr", "interaction_text_prediction": "macro_f1", } COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS = { "hand_trajectory_forecast": COSMOS_SUPER_RETRIEVAL_TASK_PROBE_DIR / "hand_trajectory_forecast/metrics.json", "cross_modal_retrieval": COSMOS_SUPER_RETRIEVAL_TASK_PROBE_DIR / "cross_modal_retrieval/metrics.json", "modality_reconstruction": COSMOS_SUPER_RETRIEVAL_TASK_PROBE_DIR / "modality_reconstruction/metrics.json", "imu_to_hand_pose": COSMOS_SUPER_RETRIEVAL_TASK_PROBE_DIR / "imu_to_hand_pose/metrics.json", "camera_view_sync_retrieval": COSMOS_SUPER_RETRIEVAL_TASK_PROBE_DIR / "camera_view_sync_retrieval/metrics.json", } COSMOS_SUPER_RETRIEVAL_TASK_METRIC_KEYS = { "hand_trajectory_forecast": "hand_trajectory_forecast_mrr", "cross_modal_retrieval": "cross_modal_retrieval_mrr", "modality_reconstruction": "modality_reconstruction_mrr", "imu_to_hand_pose": "imu_to_hand_pose_mrr", "camera_view_sync_retrieval": "camera_view_sync_retrieval_mrr", } COSMOS_NANO_RETRIEVAL_TASK_METRIC_PATHS = { "hand_trajectory_forecast": COSMOS_NANO_RETRIEVAL_TASK_PROBE_DIR / "hand_trajectory_forecast/metrics.json", "caption_grounding": COSMOS_NANO_RETRIEVAL_TASK_PROBE_DIR / "caption_grounding/metrics.json", "imu_to_hand_pose": COSMOS_NANO_RETRIEVAL_TASK_PROBE_DIR / "imu_to_hand_pose/metrics.json", "camera_view_sync_retrieval": COSMOS_NANO_RETRIEVAL_TASK_PROBE_DIR / "camera_view_sync_retrieval/metrics.json", } COSMOS_NANO_RETRIEVAL_TASK_METRIC_KEYS = { "hand_trajectory_forecast": "hand_trajectory_forecast_mrr", "caption_grounding": "caption_grounding_mrr", "imu_to_hand_pose": "imu_to_hand_pose_mrr", "camera_view_sync_retrieval": "camera_view_sync_retrieval_mrr", } COSMOS_SUPER_FUTURE_TASK_METRIC_PATHS = { "temporal_order": COSMOS_SUPER_FUTURE_TASK_PROBE_DIR / "temporal_order/metrics.json", "misalignment_detection": COSMOS_SUPER_FUTURE_TASK_PROBE_DIR / "misalignment_detection/metrics.json", "next_subtask_forecast": COSMOS_SUPER_FUTURE_TASK_PROBE_DIR / "next_subtask_forecast/metrics.json", "object_set_forecast": COSMOS_SUPER_FUTURE_TASK_PROBE_DIR / "object_set_forecast/metrics.json", } COSMOS_SUPER_FUTURE_TASK_METRIC_KEYS = { "temporal_order": "temporal_order_f1", "misalignment_detection": "misalignment_detection_f1", "next_subtask_forecast": "next_subtask_forecast_macro_f1", "object_set_forecast": "object_set_forecast_micro_f1", } COSMOS_NANO_FUTURE_ORDER_TASK_METRIC_PATHS = { "temporal_order": COSMOS_NANO_FUTURE_ORDER_PROBE_DIR / "temporal_order/metrics.json", "misalignment_detection": COSMOS_NANO_FUTURE_ORDER_PROBE_DIR / "misalignment_detection/metrics.json", } COSMOS_NANO_FUTURE_ORDER_TASK_METRIC_KEYS = { "temporal_order": "temporal_order_f1", "misalignment_detection": "misalignment_detection_f1", } COSMOS_NANO_CURRENT_TASK_METRIC_PATHS = { "timeline_subtask": COSMOS_NANO_CURRENT_TASK_PROBE_DIR / "timeline_subtask/metrics.json", "object_relevance": COSMOS_NANO_CURRENT_TASK_PROBE_DIR / "object_relevance/metrics.json", } COSMOS_NANO_CURRENT_TASK_METRIC_KEYS = { "timeline_subtask": "timeline_subtask_macro_f1", "object_relevance": "object_relevance_micro_f1", } COSMOS_SUPER_INTERACTION_TEXT_TASK_METRIC_PATHS = { "interaction_text_prediction": COSMOS_SUPER_INTERACTION_TEXT_PROBE_DIR / "interaction_text_prediction/metrics.json", } COSMOS_SUPER_INTERACTION_TEXT_TASK_METRIC_KEYS = { "interaction_text_prediction": "macro_f1", } COSMOS_NANO_INTERACTION_TEXT_TASK_METRIC_PATHS = { "interaction_text_prediction": COSMOS_NANO_INTERACTION_TEXT_PROBE_DIR / "interaction_text_prediction/metrics.json", } COSMOS_NANO_INTERACTION_TEXT_TASK_METRIC_KEYS = { "interaction_text_prediction": "macro_f1", } OUTPUT_JSON = ROOT / "docs/data/unified_task_model_radar.json" OUTPUT_SINGLE_JSON = ROOT / "docs/data/single_episode_task_model_radar.json" OUTPUT_128_JSON = ROOT / "docs/data/episode128_task_model_radar.json" OUTPUT_MATRIX_JSON = ROOT / "docs/data/task_method_20_result_matrix.json" OUTPUT_MATRIX_MD = ROOT / "TASK_METHOD_20_RESULT_MATRIX.md" OUTPUT_SVG = ROOT / "docs/assets/charts/unified_task_model_radar.svg" OUTPUT_SINGLE_SVG = ROOT / "docs/assets/charts/single_episode_task_model_radar.svg" OUTPUT_128_SVG = ROOT / "docs/assets/charts/episode128_task_model_radar.svg" SERIES = { "minimal": { "label": "Minimal", "short_label": "Min", "color": "#ccffa0", "kind": "full_20_task_baseline", "scope": "1 public sample episode", "stroke_dasharray": None, }, "neural_mlp": { "label": "Neural MLP", "short_label": "NN", "color": "#67e8d1", "kind": "full_20_task_baseline", "scope": "1 public sample episode", "stroke_dasharray": None, }, "metadata128_simple": { "label": "128ep Aligned Simple", "short_label": "128-S", "color": "#ffd166", "kind": "partial_128_episode_aligned_baseline", "scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available", "stroke_dasharray": "9 6", }, "metadata128_neural_mlp": { "label": "128ep Aligned NN", "short_label": "128-NN", "color": "#f472b6", "kind": "partial_128_episode_aligned_baseline", "scope": "128 selected episodes, JSONL metadata/text plus staged sensor-block targets where available", "stroke_dasharray": "3 6", }, "raw128_simple": { "label": "128ep Raw Simple", "short_label": "128-RS", "color": "#f59e0b", "kind": "complete_128_episode_raw_feature_baseline", "scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes", "stroke_dasharray": "8 4", }, "raw128_neural_mlp": { "label": "128ep Raw NN", "short_label": "128-RN", "color": "#22d3ee", "kind": "complete_128_episode_raw_feature_baseline", "scope": "128 selected episodes, staged 4430-dim sensor NPZ features; 2 compact proxy axes", "stroke_dasharray": "2 5", }, "qwen3_omni_v6_lora": { "label": "Qwen3-Omni v6 LoRA", "short_label": "Qwen3", "color": "#9bb8ff", "kind": "partial_128_episode_foundation_model_overlay", "scope": "128 selected episodes, held-out test", "stroke_dasharray": "7 7", }, "cosmos3_super_reasoner": { "label": "Cosmos3-Super Reasoner", "short_label": "C3-S", "color": "#ff9c7a", "kind": "partial_128_episode_foundation_model_overlay", "scope": "128 selected episodes, held-out test", "stroke_dasharray": "4 7", }, "cosmos3_nano_future_window": { "label": "Cosmos3-Nano Future Window", "short_label": "C3-N", "color": "#d9c7ff", "kind": "partial_128_episode_world_model_overlay", "scope": "128 selected episodes, held-out test", "stroke_dasharray": "2 7", }, } FOUNDATION_TASK_METRICS = { "timeline_action": { "qwen3_omni_v6_lora": "action_macro_f1", "cosmos3_super_reasoner": "action_macro_f1", "cosmos3_nano_future_window": "action_accuracy_from_retrieved_future", }, "timeline_subtask": { "qwen3_omni_v6_lora": "subtask_accuracy", "cosmos3_super_reasoner": "subtask_accuracy", "cosmos3_nano_future_window": "timeline_subtask_macro_f1", }, "transition_detection": { "qwen3_omni_v6_lora": "transition_accuracy", "cosmos3_super_reasoner": "transition_accuracy", "cosmos3_nano_future_window": "transition_accuracy", }, "next_action": { "qwen3_omni_v6_lora": "next_action_accuracy", "cosmos3_super_reasoner": "next_action_accuracy", "cosmos3_nano_future_window": "action_accuracy_from_retrieved_future", }, "contact_prediction": { "qwen3_omni_v6_lora": "contact_accuracy", "cosmos3_super_reasoner": "contact_accuracy", "cosmos3_nano_future_window": "contact_accuracy", }, "hand_trajectory_forecast": { "cosmos3_nano_future_window": "hand_trajectory_forecast_mrr", }, "object_relevance": { "qwen3_omni_v6_lora": "object_micro_f1", "cosmos3_super_reasoner": "object_micro_f1", "cosmos3_nano_future_window": "object_relevance_micro_f1", }, "action_object_relation": { "qwen3_omni_v6_lora": "action_object_relation_macro_f1", "cosmos3_super_reasoner": "action_object_relation_macro_f1", "cosmos3_nano_future_window": "action_object_relation_macro_f1", }, "caption_grounding": { "cosmos3_super_reasoner": "caption_grounding_iou", "cosmos3_nano_future_window": "caption_grounding_mrr", }, "long_horizon_next_action": { "cosmos3_super_reasoner": "long_horizon_next_action_macro_f1", "cosmos3_nano_future_window": "long_horizon_next_action_macro_f1", }, "next_subtask_forecast": { "cosmos3_nano_future_window": "next_subtask_forecast_macro_f1", }, "modality_reconstruction": { "cosmos3_nano_future_window": "feature_reconstruction_quality", }, "object_set_forecast": { "cosmos3_nano_future_window": "object_set_forecast_micro_f1", }, "cross_modal_retrieval": { "cosmos3_nano_future_window": "future_retrieval_mrr", }, "temporal_order": { "cosmos3_nano_future_window": "temporal_order_f1", }, "misalignment_detection": { "cosmos3_nano_future_window": "misalignment_detection_f1", }, "imu_to_hand_pose": { "cosmos3_nano_future_window": "imu_to_hand_pose_mrr", }, "camera_view_sync_retrieval": { "cosmos3_nano_future_window": "camera_view_sync_retrieval_mrr", }, "interaction_text_prediction": { "cosmos3_nano_future_window": "macro_f1", }, "time_to_transition": { "cosmos3_super_reasoner": "time_to_transition_mae", "cosmos3_nano_future_window": "time_to_transition_mae", }, } FOUNDATION_METRIC_PATHS = { "qwen3_omni_v6_lora": QWEN_V6_METRICS_PATH, "cosmos3_super_reasoner": COSMOS_SUPER_REASONER_METRICS_PATH, "cosmos3_nano_future_window": COSMOS_NANO_METRICS_PATH, } FOUNDATION_METRIC_SOURCE_OVERRIDES = { ("qwen3_omni_v6_lora", "action_object_relation"): QWEN_ACTION_OBJECT_METRICS_PATH, ("cosmos3_super_reasoner", "action_object_relation"): COSMOS_SUPER_ACTION_OBJECT_METRICS_PATH, ("cosmos3_super_reasoner", "caption_grounding"): COSMOS_SUPER_CAPTION_GROUNDING_METRICS_PATH, ("cosmos3_super_reasoner", "temporal_order"): COSMOS_SUPER_FUTURE_TASK_METRIC_PATHS["temporal_order"], ("cosmos3_super_reasoner", "misalignment_detection"): COSMOS_SUPER_FUTURE_TASK_METRIC_PATHS["misalignment_detection"], ("cosmos3_super_reasoner", "next_subtask_forecast"): COSMOS_SUPER_FUTURE_TASK_METRIC_PATHS["next_subtask_forecast"], ("cosmos3_super_reasoner", "object_set_forecast"): COSMOS_SUPER_FUTURE_TASK_METRIC_PATHS["object_set_forecast"], ("qwen3_omni_v6_lora", "caption_grounding"): QWEN_FUTURE_TASK_METRIC_PATHS["caption_grounding"], ("qwen3_omni_v6_lora", "cross_modal_retrieval"): QWEN_FUTURE_TASK_METRIC_PATHS["cross_modal_retrieval"], ("qwen3_omni_v6_lora", "temporal_order"): QWEN_FUTURE_TASK_METRIC_PATHS["temporal_order"], ("qwen3_omni_v6_lora", "misalignment_detection"): QWEN_FUTURE_TASK_METRIC_PATHS["misalignment_detection"], ("qwen3_omni_v6_lora", "long_horizon_next_action"): QWEN_FUTURE_TASK_METRIC_PATHS["long_horizon_next_action"], ("qwen3_omni_v6_lora", "next_subtask_forecast"): QWEN_FUTURE_TASK_METRIC_PATHS["next_subtask_forecast"], ("qwen3_omni_v6_lora", "object_set_forecast"): QWEN_FUTURE_TASK_METRIC_PATHS["object_set_forecast"], ("qwen3_omni_v6_lora", "time_to_transition"): QWEN_FUTURE_TASK_METRIC_PATHS["time_to_transition"], ("qwen3_omni_v6_lora", "camera_view_sync_retrieval"): QWEN_FUTURE_TASK_METRIC_PATHS["camera_view_sync_retrieval"], ("qwen3_omni_v6_lora", "hand_trajectory_forecast"): QWEN_FUTURE_TASK_METRIC_PATHS["hand_trajectory_forecast"], ("qwen3_omni_v6_lora", "modality_reconstruction"): QWEN_FUTURE_TASK_METRIC_PATHS["modality_reconstruction"], ("qwen3_omni_v6_lora", "imu_to_hand_pose"): QWEN_FUTURE_TASK_METRIC_PATHS["imu_to_hand_pose"], ("qwen3_omni_v6_lora", "interaction_text_prediction"): QWEN_FUTURE_TASK_METRIC_PATHS["interaction_text_prediction"], ("cosmos3_nano_future_window", "long_horizon_next_action"): COSMOS_NANO_LONG_HORIZON_METRICS_PATH, ("cosmos3_nano_future_window", "next_subtask_forecast"): COSMOS_NANO_NEXT_SUBTASK_METRICS_PATH, ("cosmos3_nano_future_window", "modality_reconstruction"): COSMOS_NANO_MODALITY_RECONSTRUCTION_METRICS_PATH, ("cosmos3_nano_future_window", "action_object_relation"): COSMOS_NANO_ACTION_OBJECT_METRICS_PATH, ("cosmos3_nano_future_window", "object_set_forecast"): COSMOS_NANO_OBJECT_SET_METRICS_PATH, ("cosmos3_nano_future_window", "time_to_transition"): COSMOS_NANO_TIME_TO_TRANSITION_METRICS_PATH, ("cosmos3_nano_future_window", "hand_trajectory_forecast"): COSMOS_NANO_RETRIEVAL_TASK_METRIC_PATHS["hand_trajectory_forecast"], ("cosmos3_nano_future_window", "caption_grounding"): COSMOS_NANO_RETRIEVAL_TASK_METRIC_PATHS["caption_grounding"], ("cosmos3_nano_future_window", "imu_to_hand_pose"): COSMOS_NANO_RETRIEVAL_TASK_METRIC_PATHS["imu_to_hand_pose"], ("cosmos3_nano_future_window", "camera_view_sync_retrieval"): COSMOS_NANO_RETRIEVAL_TASK_METRIC_PATHS["camera_view_sync_retrieval"], ("cosmos3_nano_future_window", "interaction_text_prediction"): COSMOS_NANO_INTERACTION_TEXT_TASK_METRIC_PATHS["interaction_text_prediction"], ("cosmos3_nano_future_window", "temporal_order"): COSMOS_NANO_FUTURE_ORDER_TASK_METRIC_PATHS["temporal_order"], ("cosmos3_nano_future_window", "misalignment_detection"): COSMOS_NANO_FUTURE_ORDER_TASK_METRIC_PATHS["misalignment_detection"], ("cosmos3_nano_future_window", "timeline_subtask"): COSMOS_NANO_CURRENT_TASK_METRIC_PATHS["timeline_subtask"], ("cosmos3_nano_future_window", "object_relevance"): COSMOS_NANO_CURRENT_TASK_METRIC_PATHS["object_relevance"], ("cosmos3_super_reasoner", "long_horizon_next_action"): COSMOS_SUPER_LONG_HORIZON_METRICS_PATH, ("cosmos3_super_reasoner", "time_to_transition"): COSMOS_SUPER_TIME_TO_TRANSITION_METRICS_PATH, ("cosmos3_super_reasoner", "hand_trajectory_forecast"): COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS["hand_trajectory_forecast"], ("cosmos3_super_reasoner", "cross_modal_retrieval"): COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS["cross_modal_retrieval"], ("cosmos3_super_reasoner", "modality_reconstruction"): COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS["modality_reconstruction"], ("cosmos3_super_reasoner", "imu_to_hand_pose"): COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS["imu_to_hand_pose"], ("cosmos3_super_reasoner", "camera_view_sync_retrieval"): COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS["camera_view_sync_retrieval"], ("cosmos3_super_reasoner", "interaction_text_prediction"): COSMOS_SUPER_INTERACTION_TEXT_TASK_METRIC_PATHS["interaction_text_prediction"], } SHORT_TASK_LABELS = { "timeline_action": "Action", "timeline_subtask": "Step", "transition_detection": "Boundary", "next_action": "Next act", "hand_trajectory_forecast": "Hand traj", "contact_prediction": "Contact", "object_relevance": "Objects", "caption_grounding": "Language", "cross_modal_retrieval": "X-modal", "modality_reconstruction": "Recon", "temporal_order": "Order", "misalignment_detection": "Sync", "long_horizon_next_action": "Long act", "next_subtask_forecast": "Long step", "interaction_text_prediction": "Interact txt", "action_object_relation": "Act+obj", "object_set_forecast": "Future obj", "imu_to_hand_pose": "IMU->hand", "camera_view_sync_retrieval": "Cam sync", "time_to_transition": "Time2bdry", } METHOD_DETAILS = { "minimal": "Single-episode simple heads over the public sample split.", "neural_mlp": "Single-episode compact PyTorch MLP heads on the same 20 task contracts.", "metadata128_simple": "128-episode aligned simple baselines: JSONL metadata/text tasks plus staged sensor-block tasks where the processed target exists.", "metadata128_neural_mlp": "128-episode aligned MLP baselines: JSONL metadata/text tasks plus staged sensor-block tasks where the processed target exists.", "raw128_simple": "128-episode 4430-dim sensor NPZ simple heads; tasks 15/19 use compact proxies.", "raw128_neural_mlp": "128-episode 4430-dim sensor NPZ MLP heads; tasks 15/19 use compact proxies.", "qwen3_omni_v6_lora": "Verified held-out Qwen3-Omni v6 LoRA metrics, plus task 16 and any completed private-GPU future/retrieval/sensor-target probes scored from task-specific JSON.", "cosmos3_super_reasoner": "Verified Cosmos3-Super base-weight Reasoner JSON-task evaluation, plus task 5/8/9/10/11/12/13/14/16/17/18/19/20 probes where public metrics exist.", "cosmos3_nano_future_window": "Verified Cosmos3-Nano future-window compatibility metrics, plus model-output probes for tasks 2/5/7/8/10/11/12/13/14/15/16/17/18/19 and a derived task-20 boundary timing probe scored from held-out future-window artifacts.", } PROXY_TASK_IDS = {"interaction_text_prediction", "camera_view_sync_retrieval"} SINGLE_EPISODE_SERIES = ("minimal", "neural_mlp") EPISODE128_SERIES = ( "metadata128_simple", "metadata128_neural_mlp", "raw128_simple", "raw128_neural_mlp", "qwen3_omni_v6_lora", "cosmos3_super_reasoner", "cosmos3_nano_future_window", ) RADAR_GROUP_SPECS = ( { "id": "single_episode", "title": "Single-episode sample", "subtitle": "Public-sample simple and neural task heads.", "series_ids": ("minimal", "neural_mlp"), }, { "id": "metadata_128", "title": "128-episode metadata/text", "subtitle": "Aligned JSONL metadata/text plus staged target blocks.", "series_ids": ("metadata128_simple", "metadata128_neural_mlp"), }, { "id": "raw_128", "title": "128-episode raw features", "subtitle": "4430-dim sensor-block heads; proxy axes are flagged.", "series_ids": ("raw128_simple", "raw128_neural_mlp"), }, { "id": "foundation_models", "title": "Foundation-model probes", "subtitle": "Verified Qwen3 and Cosmos task-specific outputs.", "series_ids": ("qwen3_omni_v6_lora", "cosmos3_super_reasoner", "cosmos3_nano_future_window"), }, ) STATUS_LABELS = { "scored": "scored", "proxy_scored": "proxy scored", "unsupported_without_required_target": "unsupported", "not_supported_by_metadata_only_package": "not supported", "not_evaluated_in_verified_package": "not evaluated", "missing_public_metric": "missing metric", } STATUS_SHORT = { "scored": "score", "proxy_scored": "proxy", "unsupported_without_required_target": "unsupported", "not_supported_by_metadata_only_package": "not supported", "not_evaluated_in_verified_package": "not evaluated", "missing_public_metric": "missing", } def read_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) if path.exists() else {} def foundation_task_metric_mapping( qwen_metrics: dict[str, Any], cosmos_super_metrics: dict[str, Any], ) -> dict[str, dict[str, str]]: mapping = {task_id: dict(series_metrics) for task_id, series_metrics in FOUNDATION_TASK_METRICS.items()} for task_id, path in QWEN_FUTURE_TASK_METRIC_PATHS.items(): payload = read_json(path) metric_key = QWEN_FUTURE_TASK_METRIC_KEYS[task_id] metric_value = payload.get(metric_key) if payload.get("status") != "pass" or not isinstance(metric_value, (int, float)): continue qwen_metrics[metric_key] = metric_value mapping.setdefault(task_id, {})["qwen3_omni_v6_lora"] = metric_key for task_id, path in COSMOS_SUPER_RETRIEVAL_TASK_METRIC_PATHS.items(): payload = read_json(path) metric_key = COSMOS_SUPER_RETRIEVAL_TASK_METRIC_KEYS[task_id] metric_value = payload.get(metric_key) if payload.get("status") != "pass" or not isinstance(metric_value, (int, float)): continue cosmos_super_metrics[metric_key] = metric_value mapping.setdefault(task_id, {})["cosmos3_super_reasoner"] = metric_key for task_id, path in COSMOS_SUPER_FUTURE_TASK_METRIC_PATHS.items(): payload = read_json(path) metric_key = COSMOS_SUPER_FUTURE_TASK_METRIC_KEYS[task_id] metric_value = payload.get(metric_key) if payload.get("status") != "pass" or not isinstance(metric_value, (int, float)): continue cosmos_super_metrics[metric_key] = metric_value mapping.setdefault(task_id, {})["cosmos3_super_reasoner"] = metric_key for task_id, path in COSMOS_SUPER_INTERACTION_TEXT_TASK_METRIC_PATHS.items(): payload = read_json(path) metric_key = COSMOS_SUPER_INTERACTION_TEXT_TASK_METRIC_KEYS[task_id] metric_value = payload.get(metric_key) if payload.get("status") != "pass" or not isinstance(metric_value, (int, float)): continue cosmos_super_metrics[metric_key] = metric_value mapping.setdefault(task_id, {})["cosmos3_super_reasoner"] = metric_key return mapping def read_a100_metadata_record(task_id: str, *, neural: bool = False) -> dict[str, Any] | None: path = METADATA128_BASELINE_DIR / ("neural_mlp" if neural else "") / task_id / "metrics.json" if not path.exists(): return None payload = read_json(path) status = payload.get("status", "missing_public_metric") score = payload.get("primary_score") if status == "pass" else None proxy_completion = bool(payload.get("proxy_completion")) return { "raw": score, "metric_key": payload.get("primary_metric"), "source": str(path.relative_to(ROOT)), "scope": payload.get("scope") or "multi_episode_128_aligned_baseline", "status": ( "proxy_scored" if status == "pass" and score is not None and proxy_completion else "scored" if status == "pass" and score is not None else "unsupported_without_required_target" ), "reason": payload.get("reason") or payload.get("error") or payload.get("proxy_reason") or ( "the 128-episode aligned artifact for this task does not contain a numeric public score" if status != "pass" else None ), } def read_a100_raw_metric(task_id: str, *, neural: bool = False) -> dict[str, Any] | None: candidates = ( [RAW128_BASELINE_DIR / "neural_mlp_raw128" / task_id / "metrics.json"] if neural else [ RAW128_BASELINE_DIR / "simple_raw128" / task_id / "metrics.json", RAW128_BASELINE_DIR / "simple_raw128_centroid" / task_id / "metrics.json", RAW128_BASELINE_DIR / "simple_raw128_ridge" / task_id / "metrics.json", ] ) for path in candidates: if not path.exists(): continue payload = read_json(path) if payload.get("status") != "pass": continue score = payload.get("primary_score") if score is None: continue return { "raw": score, "metric_key": payload.get("primary_metric"), "source": str(path.relative_to(ROOT)), "scope": "multi_episode_128_raw_sensor_feature_baseline", "status": "proxy_scored" if task_id in PROXY_TASK_IDS else "scored", "reason": "documented compact proxy completion for this raw128 task axis" if task_id in PROXY_TASK_IDS else None, } return None def clamp01(value: float) -> float: return max(0.0, min(1.0, value)) def score_from_raw(value: float | None, direction: str, best_lower: float | None = None) -> float | None: if value is None: return None if direction == "lower": if value <= 0: return 1.0 if best_lower is None or best_lower <= 0: return None return clamp01(best_lower / value) return clamp01(value) def format_metric(value: float | None) -> str: if value is None: return "n/a" if abs(value) >= 10: return f"{value:.2f}" if abs(value) >= 1: return f"{value:.3f}" return f"{value:.4f}" def status_label(status: str | None) -> str: return STATUS_LABELS.get(status or "", status or "unknown") def make_missing_record(series_id: str, task_id: str, metric_key: str | None) -> dict[str, Any]: if series_id.startswith("metadata128"): status = "not_supported_by_metadata_only_package" reason = ( "the 128-episode aligned rerun did not produce this task target; " "raw interaction text, paired camera-view embeddings, or a task-specific target builder is required" ) scope = "multi_episode_128_aligned_baseline" elif series_id in {"qwen3_omni_v6_lora", "cosmos3_super_reasoner", "cosmos3_nano_future_window"}: status = "not_evaluated_in_verified_package" reason = ( "the verified public model package did not ask this branch to emit that task target; " "a new task-specific evaluation package is required for a numeric score" ) scope = "multi_episode_128_partial_model_overlay" else: status = "missing_public_metric" reason = "no public metric artifact was found for this method-task pair" scope = SERIES.get(series_id, {}).get("scope") return { "raw": None, "metric_key": metric_key, "source": None, "scope": scope, "status": status, "reason": reason, "normalized_score": None, "raw_text": "n/a", } def finalize_value_record(item: dict[str, Any], direction: str, best_lower: float | None) -> None: raw = item.get("raw") item.setdefault("status", "scored" if isinstance(raw, (int, float)) else "missing_public_metric") item["normalized_score"] = score_from_raw(raw if isinstance(raw, (int, float)) else None, direction, best_lower) if item["normalized_score"] is None and item.get("status") in {"scored", "proxy_scored"}: item["status"] = "missing_public_metric" item.setdefault("reason", "numeric raw score could not be normalized for this task") item["raw_text"] = format_metric(raw if isinstance(raw, (int, float)) else None) item["status_label"] = status_label(item.get("status")) def matrix_rows(payload: dict[str, Any]) -> list[dict[str, Any]]: rows: list[dict[str, Any]] = [] for task in payload["tasks"]: for series_id, series_spec in SERIES.items(): value = task["values"][series_id] rows.append( { "task_number": task["task_number"], "task_id": task["task_id"], "task_label": task["label"], "series_id": series_id, "method": series_spec["label"], "status": value.get("status"), "status_label": value.get("status_label", status_label(value.get("status"))), "scored": value.get("normalized_score") is not None, "proxy_scored": value.get("status") == "proxy_scored", "raw": value.get("raw"), "raw_text": value.get("raw_text", "n/a"), "normalized_score": value.get("normalized_score"), "metric_key": value.get("metric_key"), "source": value.get("source"), "scope": value.get("scope"), "reason": value.get("reason"), } ) return rows def render_matrix_markdown(payload: dict[str, Any]) -> str: def score_cell(value: dict[str, Any]) -> str: if value.get("normalized_score") is None: return status_label(value.get("status")) raw_text = str(value.get("raw_text") or "n/a") norm = value.get("normalized_score") norm_text = f"{float(norm):.3f}" if norm is not None else "n/a" metric_key = str(value.get("metric_key") or "metric") status = "proxy" if value.get("status") == "proxy_scored" else "direct" return f"{raw_text}
{status}; norm {norm_text}; {metric_key}" lines = [ "# Task Method 20-Result Matrix", "", "Every method has one record for each of the 20 unified task contracts. Numeric scores appear only where a committed runner or verified package produced that task target.", "", "Legend: `score` = direct numeric task score and `proxy` = documented compact substitute target. The current public matrix is complete at 180/180 scored records; unsupported/not-evaluated labels are retained only for future regression audits.", "", "| Method | Records | Scored | Proxy scored | Scoreless | Status counts |", "| --- | ---: | ---: | ---: | ---: | --- |", ] for record in payload["series"]: counts = record["status_counts"] count_text = ", ".join(f"{status_label(key)} {value}" for key, value in sorted(counts.items())) lines.append( f"| {record['label']} | {record['result_record_count']} | {record['scored_task_count']} | " f"{record['proxy_scored_task_count']} | {record['scoreless_task_count']} | {count_text} |" ) lines.extend( [ "", "## Compact Score Matrix", "", "Cells show `raw metric value`, then `direct/proxy; normalized radar value; metric key`. The raw metric is the value to cite; the normalized value is the exact linear 0-1 score retained in JSON. The SVG radar uses sqrt(normalized score) only for visual radius, so low but real differences remain visible without changing the table values.", "", "| # | Task | " + " | ".join(spec["short_label"] for spec in SERIES.values()) + " |", "| ---: | --- | " + " | ".join("---" for _ in SERIES) + " |", ] ) for task in payload["tasks"]: cells = [score_cell(task["values"][series_id]) for series_id in SERIES] lines.append(f"| {task['task_number']:02d} | {task['label']} | " + " | ".join(cells) + " |") lines.extend( [ "", "## Status Matrix", "", "| # | Task | " + " | ".join(spec["short_label"] for spec in SERIES.values()) + " |", "| ---: | --- | " + " | ".join("---" for _ in SERIES) + " |", ] ) for task in payload["tasks"]: cells = [STATUS_SHORT.get(task["values"][series_id].get("status"), "unknown") for series_id in SERIES] lines.append(f"| {task['task_number']:02d} | {task['label']} | " + " | ".join(cells) + " |") lines.extend( [ "", "Sources and raw values are in `docs/data/task_method_20_result_matrix.json` and `docs/data/unified_task_model_radar.json`.", "", ] ) return "\n".join(lines) def filtered_radar_payload( payload: dict[str, Any], series_ids: tuple[str, ...], *, title: str, description: str, ) -> dict[str, Any]: selected = set(series_ids) series = [json.loads(json.dumps(record)) for record in payload["series"] if record["id"] in selected] tasks = [] for task in payload["tasks"]: task_copy = {key: json.loads(json.dumps(value)) for key, value in task.items() if key != "values"} task_copy["values"] = { series_id: json.loads(json.dumps(task["values"][series_id])) for series_id in series_ids if series_id in task["values"] } tasks.append(task_copy) rows = [ json.loads(json.dumps(row)) for row in payload["task_method_result_matrix"] if row.get("series_id") in selected ] selected_groups = radar_groups_for_series(series_ids) chart_design = json.loads(json.dumps(payload.get("chart_design", {}))) chart_design["method_count"] = len(series) chart_design["reason"] = ( f"This split view has {len(series)} methods and {sum(record.get('result_record_count', 0) for record in series)} " "method-task records; grouped radar panels keep related methods readable while retaining the unified source matrix." ) chart_design["groups"] = [ { "id": group["id"], "title": group["title"], "series_ids": list(group["series_ids"]), } for group in selected_groups ] return { "title": title, "status": payload["status"], "generated_at_utc": payload["generated_at_utc"], "description": description, "task_count": payload["task_count"], "method_count": len(series), "method_task_record_count": sum(record.get("result_record_count", 0) for record in series), "scored_method_task_count": sum(record.get("scored_task_count", 0) for record in series), "normalization_policy": payload["normalization_policy"], "chart_design": chart_design, "source_unified_radar": "docs/data/unified_task_model_radar.json", "source_result_matrix": "docs/data/task_method_20_result_matrix.json", "series": series, "tasks": tasks, "task_method_result_matrix": rows, } def point(cx: float, cy: float, radius: float, angle: float) -> tuple[float, float]: return cx + math.cos(angle) * radius, cy + math.sin(angle) * radius def svg_text( x: float, y: float, text: str, *, size: int = 16, fill: str = "#f4f8ef", anchor: str = "start", weight: int | str = 600, opacity: float = 1.0, ) -> str: return ( f'{html.escape(text)}' ) def split_text(text: str, max_chars: int) -> list[str]: words = text.split() if not words: return [""] lines: list[str] = [] current = words[0] for word in words[1:]: if len(current) + 1 + len(word) <= max_chars: current += " " + word else: lines.append(current) current = word lines.append(current) return lines def svg_text_lines( x: float, y: float, lines: list[str], *, size: int = 14, fill: str = "#f4f8ef", anchor: str = "start", weight: int | str = 600, line_height: float = 18, opacity: float = 1.0, ) -> list[str]: return [ svg_text(x, y + idx * line_height, line, size=size, fill=fill, anchor=anchor, weight=weight, opacity=opacity) for idx, line in enumerate(lines) ] def polyline(points: list[tuple[float, float]], *, fill: str, stroke: str, opacity: float, stroke_width: float, dash: str | None = None) -> str: coords = " ".join(f"{x:.1f},{y:.1f}" for x, y in points) dash_attr = f' stroke-dasharray="{dash}"' if dash else "" return ( f'' ) def build_payload() -> dict[str, Any]: suite = read_json(TASK_SUITE_PATH) qwen = read_json(QWEN_V6_METRICS_PATH) cosmos_super = read_json(COSMOS_SUPER_REASONER_METRICS_PATH) cosmos_nano = read_json(COSMOS_NANO_METRICS_PATH) cosmos_fd = read_json(COSMOS_SUPER_FD_METRICS_PATH) qwen.update(read_json(QWEN_ACTION_OBJECT_METRICS_PATH)) cosmos_super.update(read_json(COSMOS_SUPER_ACTION_OBJECT_METRICS_PATH)) cosmos_super.update(read_json(COSMOS_SUPER_CAPTION_GROUNDING_METRICS_PATH)) cosmos_super.update(read_json(COSMOS_SUPER_LONG_HORIZON_METRICS_PATH)) cosmos_super.update(read_json(COSMOS_SUPER_TIME_TO_TRANSITION_METRICS_PATH)) cosmos_nano.update(read_json(COSMOS_NANO_LONG_HORIZON_METRICS_PATH)) cosmos_nano.update(read_json(COSMOS_NANO_NEXT_SUBTASK_METRICS_PATH)) cosmos_nano.update(read_json(COSMOS_NANO_MODALITY_RECONSTRUCTION_METRICS_PATH)) cosmos_nano.update(read_json(COSMOS_NANO_ACTION_OBJECT_METRICS_PATH)) cosmos_nano.update(read_json(COSMOS_NANO_OBJECT_SET_METRICS_PATH)) cosmos_nano.update(read_json(COSMOS_NANO_TIME_TO_TRANSITION_METRICS_PATH)) for metrics_path in COSMOS_NANO_RETRIEVAL_TASK_METRIC_PATHS.values(): cosmos_nano.update(read_json(metrics_path)) for metrics_path in COSMOS_NANO_CURRENT_TASK_METRIC_PATHS.values(): cosmos_nano.update(read_json(metrics_path)) foundation_task_metrics = foundation_task_metric_mapping(qwen, cosmos_super) foundation_metrics = { "qwen3_omni_v6_lora": qwen, "cosmos3_super_reasoner": cosmos_super, "cosmos3_nano_future_window": cosmos_nano, } tasks: list[dict[str, Any]] = [] for row in suite.get("tasks", []): values: dict[str, dict[str, Any]] = { "minimal": { "raw": row.get("minimal_primary_metric"), "metric_key": row.get("metric_key"), "source": row.get("artifact_sources", {}).get("minimal_metrics"), "scope": "single_episode_public_sample", "status": "scored", }, "neural_mlp": { "raw": row.get("neural_primary_metric"), "metric_key": row.get("metric_key"), "source": row.get("artifact_sources", {}).get("neural_metrics"), "scope": "single_episode_public_sample", "status": "scored", }, } for series_id, metric_key in foundation_task_metrics.get(row["task_id"], {}).items(): source_path = FOUNDATION_METRIC_SOURCE_OVERRIDES.get( (series_id, row["task_id"]), FOUNDATION_METRIC_PATHS[series_id], ) source_metrics = ( read_json(source_path) if (series_id, row["task_id"]) in FOUNDATION_METRIC_SOURCE_OVERRIDES else foundation_metrics.get(series_id, {}) ) raw = source_metrics.get(metric_key) values[series_id] = { "raw": raw, "metric_key": metric_key, "source": str(source_path.relative_to(ROOT)), "scope": "multi_episode_128_partial_model_overlay", "status": "scored" if isinstance(raw, (int, float)) else "missing_public_metric", "reason": None if isinstance(raw, (int, float)) else f"metric {metric_key} is absent from the verified public package", } metadata_simple = read_a100_metadata_record(row["task_id"], neural=False) if metadata_simple: values["metadata128_simple"] = metadata_simple metadata_neural = read_a100_metadata_record(row["task_id"], neural=True) if metadata_neural: values["metadata128_neural_mlp"] = metadata_neural raw_simple = read_a100_raw_metric(row["task_id"], neural=False) if raw_simple: values["raw128_simple"] = raw_simple raw_neural = read_a100_raw_metric(row["task_id"], neural=True) if raw_neural: values["raw128_neural_mlp"] = raw_neural lower_values = [ item["raw"] for item in values.values() if row.get("metric_direction") == "lower" and isinstance(item.get("raw"), (int, float)) and item["raw"] > 0 ] best_lower = min(lower_values) if lower_values else None for series_id in SERIES: values.setdefault(series_id, make_missing_record(series_id, row["task_id"], row.get("metric_key"))) for item in values.values(): finalize_value_record(item, row.get("metric_direction", "higher"), best_lower) tasks.append( { "task_number": row["task_number"], "task_id": row["task_id"], "label": row.get("task_display_name", row["task_id"]), "axis_label": f"{row['task_number']:02d} {row.get('task_display_name', row['task_id'])}", "short_label": SHORT_TASK_LABELS.get(row["task_id"], row["task_id"].replace("_", " ").title()), "provenance_source": row.get("provenance_source"), "metric_key": row.get("metric_key"), "metric_name": row.get("metric_name"), "metric_direction": row.get("metric_direction"), "raw128_proxy_axis": row["task_id"] in PROXY_TASK_IDS, "values": values, } ) series_records = [] for series_id, spec in SERIES.items(): status_counts: dict[str, int] = {} for task in tasks: status = task["values"][series_id].get("status", "unknown") status_counts[status] = status_counts.get(status, 0) + 1 covered = sum(1 for task in tasks if task["values"].get(series_id, {}).get("normalized_score") is not None) proxy_count = status_counts.get("proxy_scored", 0) scoreless = len(tasks) - covered series_records.append( { "id": series_id, **spec, "method_detail": METHOD_DETAILS.get(series_id, spec["scope"]), "plotted_as": "grouped small-multiple radar panel with direct legend and coverage badges", "result_record_count": len(tasks), "scored_task_count": covered, "covered_task_count": covered, "proxy_scored_task_count": proxy_count, "scoreless_task_count": scoreless, "unsupported_task_count": status_counts.get("unsupported_without_required_target", 0) + status_counts.get("not_supported_by_metadata_only_package", 0), "not_evaluated_task_count": status_counts.get("not_evaluated_in_verified_package", 0), "status_counts": dict(sorted(status_counts.items())), "coverage_fraction": covered / max(len(tasks), 1), "result_record_fraction": len(tasks) / max(len(tasks), 1), } ) fd_loss = (cosmos_fd.get("loss_summary") or {}).get("mean") payload = { "title": "Unified 20-Task Model Radar", "status": "pass", "generated_at_utc": datetime.now(timezone.utc).isoformat(timespec="seconds"), "task_count": len(tasks), "method_count": len(SERIES), "method_task_record_count": len(tasks) * len(SERIES), "scored_method_task_count": sum( 1 for task in tasks for series_id in SERIES if task["values"][series_id].get("normalized_score") is not None ), "normalization_policy": { "higher_is_better": "bounded metrics are plotted directly on 0-1 axes after clipping to [0, 1]", "lower_is_better": "lower-error metrics are converted to best_observed_value / raw_value within the same task", "raw_values": "raw metric values, metric keys, and sources are retained in this JSON; the SVG is an overview, not a replacement for the metric table", "radar_visual_radius": "SVG radar panels use sqrt(normalized_score) for radius so polygon area remains closer to the score and low-valued but real differences stay visible; the JSON and matrix retain exact linear normalized_score values", "result_record_policy": "every method has 20 task records; the current public release has 180/180 scored rows with proxy flags and reasons retained where compact substitute targets are used", "foundation_model_overlay": "Qwen3-Omni and Cosmos3 are grouped in the foundation-model radar panel. All current public model rows have 20 scored task records, with source paths retained for every metric.", "metadata_128_overlay": "128-episode aligned baselines are grouped in the metadata/text radar panel. Numeric scores come from JSONL metadata/text tasks plus staged sensor-block targets when the processed target exists.", "raw_128_overlay": "128-episode raw-feature baselines are grouped in the raw-feature radar panel. Eighteen axes use direct task targets; interaction text and camera-view sync are completed with documented compact proxies because raw interaction strings and paired video-view embeddings are absent from the 128 export.", }, "chart_design": { "mode": "grouped_small_multiples", "method_count": len(SERIES), "reason": "The public release has nine methods and 180 scored records; small-multiple radar panels avoid a nine-polygon overlay while keeping every method visible.", "groups": [ { "id": group["id"], "title": group["title"], "series_ids": list(group["series_ids"]), } for group in RADAR_GROUP_SPECS ], "visual_radius_transform": "sqrt(normalized_score)", "exact_value_source": "docs/data/task_method_20_result_matrix.json", }, "series": series_records, "tasks": tasks, "model_branch_cards": [ { "id": "metadata128_simple", "title": "128ep Aligned Simple", "status": "a100_rerun_pass", "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'metadata128_simple')['scored_task_count']} scored aligned axes", "headline": "34,269 rows; train/val/test 25,629/4,608/4,032", "source": str((METADATA128_BASELINE_DIR / "summary_report.json").relative_to(ROOT)), }, { "id": "metadata128_neural_mlp", "title": "128ep Aligned NN", "status": "a100_rerun_pass", "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'metadata128_neural_mlp')['scored_task_count']} scored aligned axes", "headline": "compact MLP heads over metadata/text and staged block features", "source": str((METADATA128_BASELINE_DIR / "summary_report.json").relative_to(ROOT)), }, { "id": "raw128_simple", "title": "128ep Raw Simple", "status": "a100_raw20_complete_with_documented_proxies", "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'raw128_simple')['scored_task_count']} scored records; 18 direct + 2 proxy", "headline": "34,269 windows; centroid/ridge heads over 4430-dim sensor blocks", "source": str((RAW128_BASELINE_DIR / "run_summary_all.json").relative_to(ROOT)), }, { "id": "raw128_neural_mlp", "title": "128ep Raw NN", "status": "a100_raw20_complete_with_documented_proxies", "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'raw128_neural_mlp')['scored_task_count']} scored records; 18 direct + 2 proxy", "headline": "MLP heads over staged features; tasks 15/19 use compact proxies", "source": str((RAW128_BASELINE_DIR / "run_summary_all.json").relative_to(ROOT)), }, { "id": "qwen3_omni_v6_lora", "title": "Qwen3-Omni v6 LoRA", "status": "verified", "task_aligned_axes": SERIES["qwen3_omni_v6_lora"]["short_label"], "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'qwen3_omni_v6_lora')['scored_task_count']} scored task-aligned axes", "headline": f"JSON validity {format_metric(qwen.get('json_validity_rate'))}; action macro-F1 {format_metric(qwen.get('action_macro_f1'))}", "source": str(QWEN_V6_METRICS_PATH.relative_to(ROOT)), }, { "id": "cosmos3_super_reasoner", "title": "Cosmos3-Super Reasoner", "status": "verified_base_weight_eval", "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'cosmos3_super_reasoner')['scored_task_count']} scored task-aligned axes", "headline": f"JSON validity {format_metric(cosmos_super.get('json_validity_rate'))}; action macro-F1 {format_metric(cosmos_super.get('action_macro_f1'))}", "source": str(COSMOS_SUPER_REASONER_METRICS_PATH.relative_to(ROOT)), }, { "id": "cosmos3_nano_future_window", "title": "Cosmos3-Nano Future Window", "status": "verified_compatibility_eval", "coverage": f"20 records / {next(item for item in series_records if item['id'] == 'cosmos3_nano_future_window')['scored_task_count']} scored task-aligned axes", "headline": f"future retrieval MRR {format_metric(cosmos_nano.get('future_retrieval_mrr'))}; transition accuracy {format_metric(cosmos_nano.get('transition_accuracy'))}", "source": str(COSMOS_NANO_METRICS_PATH.relative_to(ROOT)), }, { "id": "cosmos3_super_forward_dynamics_lora", "title": "Cosmos3-Super Forward-Dynamics LoRA", "status": "verified_finetuned_adapter", "coverage": "separate camera-pose proxy target, not plotted on the 20 task axes", "headline": f"test MSE {format_metric(fd_loss)} over 448 held-out rows", "source": str(COSMOS_SUPER_FD_METRICS_PATH.relative_to(ROOT)), }, ], } payload["task_method_result_matrix"] = matrix_rows(payload) return payload def svg_shape( tag: str, points: list[tuple[float, float]], *, fill: str, fill_opacity: float, stroke: str, stroke_opacity: float = 0.92, stroke_width: float = 2.0, dash: str | None = None, ) -> str: coords = " ".join(f"{x:.1f},{y:.1f}" for x, y in points) dash_attr = f' stroke-dasharray="{dash}"' if dash else "" return ( f'<{tag} points="{coords}" fill="{fill}" fill-opacity="{fill_opacity:.3f}" ' f'stroke="{stroke}" stroke-opacity="{stroke_opacity:.3f}" stroke-width="{stroke_width:.1f}" ' f'stroke-linejoin="round" stroke-linecap="round"{dash_attr}/>' ) def radar_radius(score: float | None, radius: float) -> float | None: if score is None: return None return radius * math.sqrt(clamp01(float(score))) def radar_groups_for_series(series_ids: tuple[str, ...]) -> list[dict[str, Any]]: selected = set(series_ids) groups: list[dict[str, Any]] = [] assigned: set[str] = set() for group in RADAR_GROUP_SPECS: present = tuple(series_id for series_id in group["series_ids"] if series_id in selected) if not present: continue groups.append({**group, "series_ids": present}) assigned.update(present) remaining = tuple(series_id for series_id in series_ids if series_id not in assigned) if remaining: groups.append( { "id": "other_methods", "title": "Other methods", "subtitle": "Additional method rows retained from the matrix.", "series_ids": remaining, } ) return groups def draw_radar_grid( parts: list[str], *, cx: float, cy: float, radius: float, tasks: list[dict[str, Any]], angles: list[float], label_size: int, ) -> None: for value in (0.05, 0.25, 0.50, 0.75, 1.0): ring_radius = radius * math.sqrt(value) ring = [point(cx, cy, ring_radius, angle) for angle in angles] parts.append( svg_shape( "polygon", ring, fill="none", fill_opacity=0, stroke="#ccffa0", stroke_opacity=0.16, stroke_width=1.0, ) ) parts.append(svg_text(cx + 8, cy - ring_radius + 4, f"{value:.2g}", size=max(9, label_size - 2), fill="#a5afa2", weight=620, opacity=0.72)) for task, angle in zip(tasks, angles): x, y = point(cx, cy, radius, angle) parts.append(f'') lx, ly = point(cx, cy, radius + 28, angle) proxy = task["task_id"] in PROXY_TASK_IDS color = "#f472b6" if proxy else "#ccffa0" parts.append(f'') parts.append(svg_text(lx, ly + label_size * 0.33, f"{task['task_number']:02d}", size=label_size, fill=color, anchor="middle", weight=850, opacity=0.98)) def draw_radar_series( parts: list[str], *, cx: float, cy: float, radius: float, tasks: list[dict[str, Any]], angles: list[float], series_id: str, stroke_width: float, fill_opacity: float, ) -> None: spec = SERIES[series_id] valid_points: list[tuple[float, float]] = [] scored_count = 0 for task, angle in zip(tasks, angles): value = task["values"].get(series_id, {}) score = value.get("normalized_score") plotted_radius = radar_radius(score, radius) if plotted_radius is None: continue scored_count += 1 valid_points.append(point(cx, cy, plotted_radius, angle)) if len(valid_points) >= 3 and scored_count == len(tasks): parts.append( svg_shape( "polygon", valid_points, fill=spec["color"], fill_opacity=fill_opacity, stroke=spec["color"], stroke_width=stroke_width, dash=spec.get("stroke_dasharray"), ) ) elif len(valid_points) >= 2: parts.append( svg_shape( "polyline", valid_points, fill="none", fill_opacity=0, stroke=spec["color"], stroke_width=stroke_width, dash=spec.get("stroke_dasharray"), ) ) for task, angle in zip(tasks, angles): value = task["values"].get(series_id, {}) plotted_radius = radar_radius(value.get("normalized_score"), radius) if plotted_radius is None: continue px, py = point(cx, cy, plotted_radius, angle) proxy = value.get("status") == "proxy_scored" parts.append( f'' ) def draw_radar_panel( parts: list[str], *, x: float, y: float, width: float, height: float, group: dict[str, Any], payload: dict[str, Any], series_record_by_id: dict[str, dict[str, Any]], large: bool = False, ) -> None: tasks = payload["tasks"] angles = [-math.pi / 2 + 2 * math.pi * i / len(tasks) for i in range(len(tasks))] panel_bg = "#071007" parts.append(f'') parts.append(svg_text(x + 28, y + 44, str(group["title"]), size=26 if large else 20, weight=850)) parts.append(svg_text(x + 28, y + 74, str(group["subtitle"]), size=14 if large else 12, fill="#a5afa2", weight=600)) if large: cx = x + width * 0.39 cy = y + height * 0.56 radius = min(width * 0.20, height * 0.34) legend_x = x + width * 0.68 legend_y = y + 160 label_size = 12 else: cx = x + width * 0.38 cy = y + height * 0.57 radius = min(width * 0.18, height * 0.30) legend_x = x + width * 0.67 legend_y = y + 122 label_size = 8 draw_radar_grid(parts, cx=cx, cy=cy, radius=radius, tasks=tasks, angles=angles, label_size=label_size) series_ids = tuple(group["series_ids"]) fill_opacity = 0.065 if len(series_ids) <= 2 else 0.040 for idx, series_id in enumerate(series_ids): draw_radar_series( parts, cx=cx, cy=cy, radius=radius, tasks=tasks, angles=angles, series_id=series_id, stroke_width=4.3 if large else 3.2, fill_opacity=max(0.026, fill_opacity - idx * 0.010), ) parts.append(svg_text(legend_x, legend_y - 34, "Methods", size=17 if large else 14, fill="#ccffa0", weight=850)) for idx, series_id in enumerate(series_ids): record = series_record_by_id[series_id] color = record["color"] row_y = legend_y + idx * (92 if large else 74) parts.append(f'') parts.append(f'') parts.append(svg_text(legend_x + 74, row_y + 5, record["label"], size=15 if large else 12, weight=850)) coverage = f"{record['scored_task_count']}/20 scored" proxy = record.get("proxy_scored_task_count", 0) if proxy: coverage += f" ยท {proxy} proxy" parts.append(svg_text(legend_x + 74, row_y + (28 if large else 22), coverage, size=12 if large else 10, fill=color, weight=800)) detail = split_text(METHOD_DETAILS.get(series_id, record["scope"]), 50 if large else 44)[:2] parts.extend(svg_text_lines(legend_x + 74, row_y + (49 if large else 40), detail, size=10 if large else 8, fill="#a5afa2", weight=560, line_height=13 if large else 10)) parts.append(svg_text(x + 28, y + height - 30, "Radius = sqrt(normalized score); exact raw and normalized values are in the matrix.", size=11 if large else 9, fill="#a5afa2", weight=600, opacity=0.88)) def draw_task_key(parts: list[str], *, x: float, y: float, width: float, tasks: list[dict[str, Any]], compact: bool = False) -> None: height = 292 if not compact else 250 parts.append(f'') parts.append(svg_text(x + 28, y + 42, "20-task axis key", size=20, weight=850)) parts.append(svg_text(x + 250, y + 42, "Task numbers stay on the radar; full names and proxy axes stay here.", size=13, fill="#a5afa2", weight=600)) col_count = 4 col_w = (width - 56) / col_count row_h = 42 if not compact else 36 for idx, task in enumerate(tasks): col = idx // 5 row = idx % 5 x0 = x + 28 + col * col_w y0 = y + 84 + row * row_h proxy = task["task_id"] in PROXY_TASK_IDS color = "#f472b6" if proxy else "#ccffa0" parts.append(f'') parts.append(svg_text(x0 + 17.5, y0 + 1, f"{task['task_number']:02d}", size=10, fill=color, anchor="middle", weight=850)) task_name = str(task["label"]) if len(task_name) > 34: task_name = task_name[:31].rstrip() + "..." parts.append(svg_text(x0 + 46, y0 - 2, task_name, size=11 if not compact else 10, fill="#f4f8ef", weight=800)) metric = str(task.get("metric_name") or task.get("metric_key") or "") direction = "lower" if task.get("metric_direction") == "lower" else "higher" metric_text = f"{metric}; {direction} better" if proxy: metric_text += "; proxy axis" if len(metric_text) > 43: metric_text = metric_text[:40].rstrip() + "..." parts.append(svg_text(x0 + 46, y0 + 16, metric_text, size=9, fill="#a5afa2", weight=560)) def draw_reading_rules(parts: list[str], *, y: float, reading_rules: tuple[str, str, str] | None) -> None: if reading_rules is None: reading_rules = ( "Use the panels for shape and coverage; use docs/data/task_method_20_result_matrix.json for exact ranks, raw values, direct/proxy flags, and sources.", "The old nine-method overlay was replaced by grouped small multiples so each radar compares only related methods.", "SVG radius uses sqrt(normalized_score) for readable area; JSON normalized_score remains linear and unchanged.", ) parts.append(f'') parts.append(svg_text(100, y + 33, "Reading rules", size=16, fill="#ccffa0", weight=850)) parts.append(svg_text(230, y + 33, reading_rules[0], size=13, fill="#dce8d7", weight=650)) parts.append(svg_text(230, y + 61, reading_rules[1], size=12, fill="#a5afa2", weight=560)) parts.append(svg_text(230, y + 87, reading_rules[2], size=12, fill="#a5afa2", weight=560)) def render_svg( payload: dict[str, Any], *, series_ids: tuple[str, ...] | None = None, polygon_series_ids: tuple[str, ...] = ("minimal", "neural_mlp"), title: str | None = None, subtitle: str | None = None, context_line: str | None = None, chip_specs: list[tuple[str, str]] | None = None, reading_rules: tuple[str, str, str] | None = None, ) -> str: del polygon_series_ids width, height = 2400, 1900 tasks = payload["tasks"] if series_ids is None: series_ids = tuple(record["id"] for record in payload["series"]) groups = radar_groups_for_series(series_ids) series_record_by_id = {record["id"]: record for record in payload["series"]} parts = [ f'', "", '', '', "", '', '', '', svg_text(70, 86, title or payload.get("title", "20-Task Model Radar"), size=36, weight=850), svg_text( 70, 122, subtitle or "Grouped small-multiple radars for the nine-method, 180-result comparison.", size=18, fill="#dce8d7", weight=650, ), svg_text( 70, 150, context_line or "Related methods are compared in separate panels to avoid the unreadable nine-polygon overlay.", size=15, fill="#a5afa2", weight=560, ), ] if chip_specs is None: chip_specs = [ ("20 task axes", "#ccffa0"), (f"{payload['method_task_record_count']} method-task records", "#67e8d1"), (f"{payload['scored_method_task_count']} scored records", "#22d3ee"), ("grouped small multiples", "#f59e0b"), ("sqrt visual radius", "#f472b6"), ] chip_x = 70 for label, color in chip_specs: chip_w = max(128, min(280, 18 + len(label) * 8.3)) parts.append(f'') parts.append(svg_text(chip_x + 16, 197, label, size=13, fill=color, weight=780)) chip_x += chip_w + 12 if len(groups) == 1: draw_radar_panel( parts, x=70, y=242, width=2260, height=1040, group=groups[0], payload=payload, series_record_by_id=series_record_by_id, large=True, ) key_y = 1322 elif len(groups) == 3: panel_w, panel_h = 1100, 545 start_x, start_y = 70, 248 gap_x, gap_y = 30, 34 for idx, group in enumerate(groups[:2]): draw_radar_panel( parts, x=start_x + idx * (panel_w + gap_x), y=start_y, width=panel_w, height=panel_h, group=group, payload=payload, series_record_by_id=series_record_by_id, ) draw_radar_panel( parts, x=start_x, y=start_y + panel_h + gap_y, width=panel_w * 2 + gap_x, height=panel_h, group=groups[2], payload=payload, series_record_by_id=series_record_by_id, large=True, ) key_y = 1438 else: panel_w, panel_h = 1100, 545 start_x, start_y = 70, 248 gap_x, gap_y = 30, 34 for idx, group in enumerate(groups): col = idx % 2 row = idx // 2 draw_radar_panel( parts, x=start_x + col * (panel_w + gap_x), y=start_y + row * (panel_h + gap_y), width=panel_w, height=panel_h, group=group, payload=payload, series_record_by_id=series_record_by_id, ) key_y = 1438 draw_task_key(parts, x=70, y=key_y, width=2260, tasks=tasks, compact=len(groups) == 1) draw_reading_rules(parts, y=1750 if len(groups) > 1 else 1632, reading_rules=reading_rules) parts.append("") return "\n".join(parts) + "\n" def main() -> int: payload = build_payload() single_payload = filtered_radar_payload( payload, SINGLE_EPISODE_SERIES, title="Single-Episode 20-Task Radar", description="Minimal and Neural MLP baselines on the one public sample episode, both scored on all 20 task contracts.", ) episode128_payload = filtered_radar_payload( payload, EPISODE128_SERIES, title="128-Episode 20-Task Radar", description="Selected 128-episode metadata/raw baselines plus verified Qwen3-Omni v6, Cosmos3-Super, and Cosmos3-Nano diagnostics. Every method has 20 records; numeric scores appear only where the public artifact produced that task target.", ) OUTPUT_JSON.parent.mkdir(parents=True, exist_ok=True) OUTPUT_SINGLE_JSON.parent.mkdir(parents=True, exist_ok=True) OUTPUT_128_JSON.parent.mkdir(parents=True, exist_ok=True) OUTPUT_MATRIX_JSON.parent.mkdir(parents=True, exist_ok=True) OUTPUT_SVG.parent.mkdir(parents=True, exist_ok=True) OUTPUT_SINGLE_SVG.parent.mkdir(parents=True, exist_ok=True) OUTPUT_128_SVG.parent.mkdir(parents=True, exist_ok=True) OUTPUT_JSON.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8") OUTPUT_SINGLE_JSON.write_text(json.dumps(single_payload, indent=2) + "\n", encoding="utf-8") OUTPUT_128_JSON.write_text(json.dumps(episode128_payload, indent=2) + "\n", encoding="utf-8") matrix_payload = { "title": "Task Method 20-Result Matrix", "status": "pass", "generated_at_utc": payload["generated_at_utc"], "task_count": payload["task_count"], "method_count": payload["method_count"], "method_task_record_count": payload["method_task_record_count"], "scored_method_task_count": payload["scored_method_task_count"], "series": payload["series"], "records": payload["task_method_result_matrix"], } OUTPUT_MATRIX_JSON.write_text(json.dumps(matrix_payload, indent=2) + "\n", encoding="utf-8") OUTPUT_MATRIX_MD.write_text(render_matrix_markdown(payload), encoding="utf-8") OUTPUT_SVG.write_text(render_svg(payload), encoding="utf-8") OUTPUT_SINGLE_SVG.write_text( render_svg( single_payload, series_ids=SINGLE_EPISODE_SERIES, polygon_series_ids=SINGLE_EPISODE_SERIES, title="Single-Episode 20-Task Radar", subtitle="One public sample episode; both baseline heads score every task axis.", context_line="This view isolates the 1-episode task-head setup from the selected-128 model diagnostics.", chip_specs=[ ("20 task axes", "#ccffa0"), ("40 method-task records", "#67e8d1"), ("40 scored records", "#22d3ee"), ("2 filled baseline polygons", "#f472b6"), ], reading_rules=( "Both single-episode methods have numeric scores on every one of the 20 task contracts.", "This radar is the cleanest view of public-sample Minimal vs Neural MLP behavior before any 128-episode scale-up.", "Raw metric values and sources remain in docs/data/single_episode_task_model_radar.json and docs/data/task_method_20_result_matrix.json.", ), ), encoding="utf-8", ) OUTPUT_128_SVG.write_text( render_svg( episode128_payload, series_ids=EPISODE128_SERIES, polygon_series_ids=("raw128_simple", "raw128_neural_mlp"), title="128-Episode 20-Task Radar", subtitle="Selected 96/16/16 episode split; all seven 128-episode rows score all 20 axes.", context_line="Metadata, raw-feature, and foundation-model methods are separated into grouped radar panels instead of one crowded overlay.", chip_specs=[ ("20 task axes", "#ccffa0"), ("140 method-task records", "#67e8d1"), (f"{episode128_payload['scored_method_task_count']} scored records", "#22d3ee"), ("40/40 raw128 pass", "#f59e0b"), ("0 scoreless", "#f472b6"), ], reading_rules=( "Every 128-episode method has 20 result records and all 140 rows are scored in this split radar.", "Raw128 Simple and Raw128 NN are complete 20/20 scored multi-episode baselines; tasks 15/19 are documented compact proxies and are marked in the task key.", "Qwen3-Omni and Cosmos3 rows use verified held-out outputs or derived probe artifacts; source paths stay in the matrix JSON.", ), ), encoding="utf-8", ) print(f"PASS: wrote {OUTPUT_JSON}") print(f"PASS: wrote {OUTPUT_SINGLE_JSON}") print(f"PASS: wrote {OUTPUT_128_JSON}") print(f"PASS: wrote {OUTPUT_MATRIX_JSON}") print(f"PASS: wrote {OUTPUT_MATRIX_MD}") print(f"PASS: wrote {OUTPUT_SVG}") print(f"PASS: wrote {OUTPUT_SINGLE_SVG}") print(f"PASS: wrote {OUTPUT_128_SVG}") return 0 if __name__ == "__main__": raise SystemExit(main())