File size: 2,814 Bytes
5205c69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
{
  "title": "Qwen3-Omni v5 versus v6 verified comparison",
  "status": "pass",
  "generated_at_utc": "2026-06-14T00:00:00+00:00",
  "comparison_scope": "Verified Qwen3-Omni LoRA held-out test packages on the same dense multiscale selected 128-episode dataset.",
  "release_policy": {
    "latest_verified_qwen_row": "v6",
    "pinned_release_tag": "ropedia-xperience-10m-v5",
    "recommendation": "Publish v6 as the latest verified branch and create a separate v6 tag only if the project wants a formal experimental release; do not move the v5 tag."
  },
  "runs": {
    "v5": {
      "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora_eval_test_full",
      "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v5_full8gpu_lora",
      "epochs": 1,
      "eval_samples": 4032,
      "held_out_episode_count": 14,
      "metrics": {
        "json_validity_rate": 1.0,
        "action_macro_f1": 0.002289711036077459,
        "subtask_accuracy": 0.011194029850746268,
        "transition_accuracy": 0.9908234126984127,
        "next_action_accuracy": 0.053618594823032224,
        "contact_accuracy": 0.7864583333333334,
        "object_micro_f1": 0.31614599936244814
      }
    },
    "v6": {
      "eval_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
      "train_run_id": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora",
      "epochs": 2,
      "lora_rank": 64,
      "learning_rate": 0.00005,
      "eval_samples": 4032,
      "held_out_episode_count": 14,
      "metrics": {
        "json_validity_rate": 0.9990079365079365,
        "action_macro_f1": 0.0028830723979596335,
        "subtask_accuracy": 0.0037313432835820895,
        "transition_accuracy": 0.9898313492063492,
        "next_action_accuracy": 0.04305335446381405,
        "contact_accuracy": 0.8177083333333334,
        "object_micro_f1": 0.3064982378331287
      }
    }
  },
  "deltas_v6_minus_v5": {
    "json_validity_rate": -0.0009920634920634888,
    "action_macro_f1": 0.0005933613618821745,
    "subtask_accuracy": -0.007462686567164178,
    "transition_accuracy": -0.0009920634920634888,
    "next_action_accuracy": -0.010565240359218173,
    "contact_accuracy": 0.03125,
    "object_micro_f1": -0.009647761529319436
  },
  "wins_for_v6": [
    "action_macro_f1",
    "contact_accuracy"
  ],
  "wins_for_v5": [
    "json_validity_rate",
    "subtask_accuracy",
    "transition_accuracy",
    "next_action_accuracy",
    "object_micro_f1"
  ],
  "interpretation": "v6 is the newest verified Qwen LoRA branch and is better for action macro-F1 and contact accuracy, but v5 remains the safer pinned release row for JSON perfection, subtask/next-action accuracy, transition accuracy, and object micro-F1."
}