File size: 2,814 Bytes
5205c69 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | {
"title": "Qwen3-Omni v5 versus v6 verified comparison",
"status": "pass",
"generated_at_utc": "2026-06-14T00:00:00+00:00",
"comparison_scope": "Verified Qwen3-Omni LoRA held-out test packages on the same dense multiscale selected 128-episode dataset.",
"release_policy": {
"latest_verified_qwen_row": "v6",
"pinned_release_tag": "ropedia-xperience-10m-v5",
"recommendation": "Publish v6 as the latest verified branch and create a separate v6 tag only if the project wants a formal experimental release; do not move the v5 tag."
},
"runs": {
"v5": {
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
"train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
"epochs": 1,
"eval_samples": 4032,
"held_out_episode_count": 14,
"metrics": {
"json_validity_rate": 1.0,
"action_macro_f1": 0.002289711036077459,
"subtask_accuracy": 0.011194029850746268,
"transition_accuracy": 0.9908234126984127,
"next_action_accuracy": 0.053618594823032224,
"contact_accuracy": 0.7864583333333334,
"object_micro_f1": 0.31614599936244814
}
},
"v6": {
"eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
"train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora",
"epochs": 2,
"lora_rank": 64,
"learning_rate": 0.00005,
"eval_samples": 4032,
"held_out_episode_count": 14,
"metrics": {
"json_validity_rate": 0.9990079365079365,
"action_macro_f1": 0.0028830723979596335,
"subtask_accuracy": 0.0037313432835820895,
"transition_accuracy": 0.9898313492063492,
"next_action_accuracy": 0.04305335446381405,
"contact_accuracy": 0.8177083333333334,
"object_micro_f1": 0.3064982378331287
}
}
},
"deltas_v6_minus_v5": {
"json_validity_rate": -0.0009920634920634888,
"action_macro_f1": 0.0005933613618821745,
"subtask_accuracy": -0.007462686567164178,
"transition_accuracy": -0.0009920634920634888,
"next_action_accuracy": -0.010565240359218173,
"contact_accuracy": 0.03125,
"object_micro_f1": -0.009647761529319436
},
"wins_for_v6": [
"action_macro_f1",
"contact_accuracy"
],
"wins_for_v5": [
"json_validity_rate",
"subtask_accuracy",
"transition_accuracy",
"next_action_accuracy",
"object_micro_f1"
],
"interpretation": "v6 is the newest verified Qwen LoRA branch and is better for action macro-F1 and contact accuracy, but v5 remains the safer pinned release row for JSON perfection, subtask/next-action accuracy, transition accuracy, and object micro-F1."
}
|