{ "title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result", "status": "verified_full_128_episode_diagnostic_result", "status_date": "2026-06-08", "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct", "adapter": "Qwen3-Omni LoRA", "dataset": "Ropedia Xperience-10M selected 128-episode pilot", "split_policy": { "unit": "episode", "selected_episode_counts": { "test": 16, "train": 96, "val": 16 }, "exported_window_counts": { "train": 2848, "val": 512, "test": 448 }, "exported_episode_counts": { "train": 89, "val": 16, "test": 14 }, "skipped_selected_episodes": 9, "leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation." }, "training": { "num_processes": 8, "epochs": 4, "lora_rank": 16, "lora_alpha": 32, "lora_dropout": 0.05, "num_train_samples": 2848, "num_val_samples": 512, "history": [ { "epoch": 1, "train_loss": 0.40796751019628613, "val_loss": 0.03258896619081497, "global_step": 356 }, { "epoch": 2, "train_loss": 0.027628723937453012, "val_loss": 0.027754632756114006, "global_step": 712 }, { "epoch": 3, "train_loss": 0.02446955946807781, "val_loss": 0.026343274861574173, "global_step": 1068 }, { "epoch": 4, "train_loss": 0.022728607045444712, "val_loss": 0.025629229843616486, "global_step": 1424 } ], "loss": "answer-token cross entropy over supervised JSON tokens", "note": "This current Qwen3-Omni LoRA result is the v4 four-epoch full held-out evaluation on the selected 96/16/16 episode setup." }, "evaluation": { "split": "test", "num_samples": 448, "held_out_episode_count": 14, "json_validity_rate": 1.0, "action_macro_f1": 0.0018678269676001454, "subtask_accuracy": 0.0, "transition_accuracy": 0.9732142857142857, "next_action_accuracy": 0.033482142857142856, "contact_accuracy": 0.7299107142857143, "object_micro_f1": 0.31099781500364165, "quality_target": { "json_validity_rate": 0.98, "status": "met" }, "previous_strict_label_v3_action_macro_f1": 0.0021983997167007384, "previous_structured_json_v2_json_validity_rate": 0.9977678571428571 }, "interpretation": "This is the current verified Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. The v4 four-epoch package reaches 100% JSON validity and slightly improves next-action, contact, and object metrics versus the prior strict-label v3 package, while action and subtask classification remain weak on held-out episodes. Treat it as a diagnostic baseline and error-analysis source, not as a strong Xperience-10M action recognizer.", "public_package": { "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_structured_json_v4_4epoch_full8gpu_lora_eval_test_full", "audit_status": "pass", "contains_raw_xperience10m_data": false, "contains_qwen_base_weights": false, "contains_lora_weights": false, "adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep" }, "required_next_steps": [ "Use the v4 predictions for action/subtask error analysis, unseen-label debugging, and hierarchical action-family scoring.", "Use TASK_SUITE_ENHANCEMENT_128.md and docs/data/task_suite_enhancement_128.json for the no-new-episode suite push before requesting more storage.", "Keep the existing Qwen LoRA adapter repository as the weight-bearing artifact and publish future Qwen v5 runs as separate verified packages.", "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels." ] }