{
    "title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
    "status": "verified_full_128_episode_diagnostic_result",
    "status_date": "2026-06-08",
    "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
    "adapter": "Qwen3-Omni LoRA",
    "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
    "split_policy": {
        "unit": "episode",
        "selected_episode_counts": {
            "test": 16,
            "train": 96,
            "val": 16
        },
        "exported_window_counts": {
            "train": 2848,
            "val": 512,
            "test": 448
        },
        "exported_episode_counts": {
            "train": 89,
            "val": 16,
            "test": 14
        },
        "skipped_selected_episodes": 9,
        "leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
    },
    "training": {
        "num_processes": 8,
        "epochs": 4,
        "lora_rank": 16,
        "lora_alpha": 32,
        "lora_dropout": 0.05,
        "num_train_samples": 2848,
        "num_val_samples": 512,
        "history": [
            {
                "epoch": 1,
                "train_loss": 0.40796751019628613,
                "val_loss": 0.03258896619081497,
                "global_step": 356
            },
            {
                "epoch": 2,
                "train_loss": 0.027628723937453012,
                "val_loss": 0.027754632756114006,
                "global_step": 712
            },
            {
                "epoch": 3,
                "train_loss": 0.02446955946807781,
                "val_loss": 0.026343274861574173,
                "global_step": 1068
            },
            {
                "epoch": 4,
                "train_loss": 0.022728607045444712,
                "val_loss": 0.025629229843616486,
                "global_step": 1424
            }
        ],
        "loss": "answer-token cross entropy over supervised JSON tokens",
        "note": "This current Qwen3-Omni LoRA result is the v4 four-epoch full held-out evaluation on the selected 96/16/16 episode setup."
    },
    "evaluation": {
        "split": "test",
        "num_samples": 448,
        "held_out_episode_count": 14,
        "json_validity_rate": 1.0,
        "action_macro_f1": 0.0018678269676001454,
        "subtask_accuracy": 0.0,
        "transition_accuracy": 0.9732142857142857,
        "next_action_accuracy": 0.033482142857142856,
        "contact_accuracy": 0.7299107142857143,
        "object_micro_f1": 0.31099781500364165,
        "quality_target": {
            "json_validity_rate": 0.98,
            "status": "met"
        },
        "previous_strict_label_v3_action_macro_f1": 0.0021983997167007384,
        "previous_structured_json_v2_json_validity_rate": 0.9977678571428571
    },
    "interpretation": "This is the current verified Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. The v4 four-epoch package reaches 100% JSON validity and slightly improves next-action, contact, and object metrics versus the prior strict-label v3 package, while action and subtask classification remain weak on held-out episodes. Treat it as a diagnostic baseline and error-analysis source, not as a strong Xperience-10M action recognizer.",
    "public_package": {
        "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full",
        "audit_status": "pass",
        "contains_raw_xperience10m_data": false,
        "contains_qwen_base_weights": false,
        "contains_lora_weights": false,
        "adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
    },
    "required_next_steps": [
        "Use the v4 predictions for action/subtask error analysis, unseen-label debugging, and hierarchical action-family scoring.",
        "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json for the no-new-episode suite push before requesting more storage.",
        "Keep the existing Qwen LoRA adapter repository as the weight-bearing artifact and publish future Qwen v5 runs as separate verified packages.",
        "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
    ]
}