{ "title": "Qwen3-Omni v5 versus v6 verified comparison", "status": "pass", "generated_at_utc": "2026-06-14T00:00:00+00:00", "comparison_scope": "Verified Qwen3-Omni LoRA held-out test packages on the same dense multiscale selected 128-episode dataset.", "release_policy": { "latest_verified_qwen_row": "v6", "pinned_release_tag": "ropedia-xperience-10m-v5", "recommendation": "Publish v6 as the latest verified branch and create a separate v6 tag only if the project wants a formal experimental release; do not move the v5 tag." }, "runs": { "v5": { "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full", "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora", "epochs": 1, "eval_samples": 4032, "held_out_episode_count": 14, "metrics": { "json_validity_rate": 1.0, "action_macro_f1": 0.002289711036077459, "subtask_accuracy": 0.011194029850746268, "transition_accuracy": 0.9908234126984127, "next_action_accuracy": 0.053618594823032224, "contact_accuracy": 0.7864583333333334, "object_micro_f1": 0.31614599936244814 } }, "v6": { "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full", "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora", "epochs": 2, "lora_rank": 64, "learning_rate": 0.00005, "eval_samples": 4032, "held_out_episode_count": 14, "metrics": { "json_validity_rate": 0.9990079365079365, "action_macro_f1": 0.0028830723979596335, "subtask_accuracy": 0.0037313432835820895, "transition_accuracy": 0.9898313492063492, "next_action_accuracy": 0.04305335446381405, "contact_accuracy": 0.8177083333333334, "object_micro_f1": 0.3064982378331287 } } }, "deltas_v6_minus_v5": { "json_validity_rate": -0.0009920634920634888, "action_macro_f1": 0.0005933613618821745, "subtask_accuracy": -0.007462686567164178, "transition_accuracy": -0.0009920634920634888, "next_action_accuracy": -0.010565240359218173, "contact_accuracy": 0.03125, "object_micro_f1": -0.009647761529319436 }, "wins_for_v6": [ "action_macro_f1", "contact_accuracy" ], "wins_for_v5": [ "json_validity_rate", "subtask_accuracy", "transition_accuracy", "next_action_accuracy", "object_micro_f1" ], "interpretation": "v6 is the newest verified Qwen LoRA branch and is better for action macro-F1 and contact accuracy, but v5 remains the safer pinned release row for JSON perfection, subtask/next-action accuracy, transition accuracy, and object micro-F1." }