File size: 4,325 Bytes
2bd8497
c433b73
 
 
 
 
 
 
 
 
 
 
 
2bd8497
c433b73
 
 
 
2bd8497
c433b73
 
 
 
2bd8497
c433b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bd8497
c433b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2bd8497
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
{
  "title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
  "status": "verified_latest_qwen3_v6_diagnostic_result",
  "status_date": "2026-06-14",
  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
  "adapter": "Qwen3-Omni LoRA",
  "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
  "split_policy": {
    "unit": "episode",
    "selected_episode_counts": {
      "train": 96,
      "val": 16,
      "test": 16
    },
    "exported_window_counts": {
      "train": 25629,
      "val": 4608,
      "test": 4032
    },
    "exported_episode_counts": {
      "train": 89,
      "val": 16,
      "test": 14
    },
    "skipped_selected_episodes": 9,
    "leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
  },
  "training": {
    "num_processes": 8,
    "epochs": 2,
    "lora_rank": 64,
    "lora_alpha": 128,
    "lora_dropout": 0.05,
    "learning_rate": 0.00005,
    "num_train_samples": 25629,
    "num_val_samples": 2048,
    "history": [
      {
        "epoch": 1,
        "train_loss": 0.05208605339353295,
        "val_loss": 0.026512427255511284,
        "global_step": 3204
      },
      {
        "epoch": 2,
        "train_loss": 0.013760763933660042,
        "val_loss": 0.032345958054065704,
        "global_step": 6408
      }
    ],
    "loss": "answer-token cross entropy over supervised JSON tokens",
    "note": "This current Qwen3-Omni LoRA result is the v6 rank64/lr5e-5 dense multiscale held-out evaluation on the selected 96/16/16 episode setup."
  },
  "evaluation": {
    "split": "test",
    "num_samples": 4032,
    "held_out_episode_count": 14,
    "json_validity_rate": 0.9990079365079365,
    "action_macro_f1": 0.0028830723979596335,
    "subtask_accuracy": 0.0037313432835820895,
    "transition_accuracy": 0.9898313492063492,
    "next_action_accuracy": 0.04305335446381405,
    "contact_accuracy": 0.8177083333333334,
    "object_micro_f1": 0.3064982378331287,
    "quality_target": {
      "json_validity_rate": 0.98,
      "status": "met"
    },
    "previous_v5_json_validity_rate": 1.0,
    "previous_v5_action_macro_f1": 0.002289711036077459,
    "previous_v5_subtask_accuracy": 0.011194029850746268,
    "previous_v5_next_action_accuracy": 0.053618594823032224,
    "previous_v5_contact_accuracy": 0.7864583333333334,
    "previous_v5_object_micro_f1": 0.31614599936244814
  },
  "interpretation": "This is the latest verified Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. The v6 rank64/lr5e-5 package keeps JSON validity above the 98% target and improves action macro-F1 and contact accuracy versus the pinned v5 release row, but slightly regresses JSON validity, subtask accuracy, next-action accuracy, transition accuracy, and object micro-F1. Treat it as the latest diagnostic branch, not as a globally stronger replacement for v5.",
  "public_package": {
    "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
    "audit_status": "pass",
    "contains_raw_xperience10m_data": false,
    "contains_qwen_base_weights": false,
    "contains_lora_weights": false,
    "adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
  },
  "release_policy": {
    "latest_verified_qwen_row": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
    "pinned_release_tag": "ropedia-xperience-10m-v5",
    "pinned_release_reason": "v5 remains the prior stable release tag; v6 is published on main/HF as the latest verified branch and can receive a separate v6 release tag."
  },
  "required_next_steps": [
    "Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
    "Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
    "Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
    "Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
  ]
}