ropedia-xperience-10m-task-baselines / data /omni_finetune_verified_result.json
cy0307's picture
Add files using upload-large-folder tool
c433b73 verified
Raw
History Blame
4.33 kB
{
"title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
"status": "verified_latest_qwen3_v6_diagnostic_result",
"status_date": "2026-06-14",
"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
"adapter": "Qwen3-Omni LoRA",
"dataset": "Ropedia Xperience-10M selected 128-episode pilot",
"split_policy": {
"unit": "episode",
"selected_episode_counts": {
"train": 96,
"val": 16,
"test": 16
},
"exported_window_counts": {
"train": 25629,
"val": 4608,
"test": 4032
},
"exported_episode_counts": {
"train": 89,
"val": 16,
"test": 14
},
"skipped_selected_episodes": 9,
"leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
},
"training": {
"num_processes": 8,
"epochs": 2,
"lora_rank": 64,
"lora_alpha": 128,
"lora_dropout": 0.05,
"learning_rate": 0.00005,
"num_train_samples": 25629,
"num_val_samples": 2048,
"history": [
{
"epoch": 1,
"train_loss": 0.05208605339353295,
"val_loss": 0.026512427255511284,
"global_step": 3204
},
{
"epoch": 2,
"train_loss": 0.013760763933660042,
"val_loss": 0.032345958054065704,
"global_step": 6408
}
],
"loss": "answer-token cross entropy over supervised JSON tokens",
"note": "This current Qwen3-Omni LoRA result is the v6 rank64/lr5e-5 dense multiscale held-out evaluation on the selected 96/16/16 episode setup."
},
"evaluation": {
"split": "test",
"num_samples": 4032,
"held_out_episode_count": 14,
"json_validity_rate": 0.9990079365079365,
"action_macro_f1": 0.0028830723979596335,
"subtask_accuracy": 0.0037313432835820895,
"transition_accuracy": 0.9898313492063492,
"next_action_accuracy": 0.04305335446381405,
"contact_accuracy": 0.8177083333333334,
"object_micro_f1": 0.3064982378331287,
"quality_target": {
"json_validity_rate": 0.98,
"status": "met"
},
"previous_v5_json_validity_rate": 1.0,
"previous_v5_action_macro_f1": 0.002289711036077459,
"previous_v5_subtask_accuracy": 0.011194029850746268,
"previous_v5_next_action_accuracy": 0.053618594823032224,
"previous_v5_contact_accuracy": 0.7864583333333334,
"previous_v5_object_micro_f1": 0.31614599936244814
},
"interpretation": "This is the latest verified Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. The v6 rank64/lr5e-5 package keeps JSON validity above the 98% target and improves action macro-F1 and contact accuracy versus the pinned v5 release row, but slightly regresses JSON validity, subtask accuracy, next-action accuracy, transition accuracy, and object micro-F1. Treat it as the latest diagnostic branch, not as a globally stronger replacement for v5.",
"public_package": {
"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
"audit_status": "pass",
"contains_raw_xperience10m_data": false,
"contains_qwen_base_weights": false,
"contains_lora_weights": false,
"adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
},
"release_policy": {
"latest_verified_qwen_row": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
"pinned_release_tag": "ropedia-xperience-10m-v5",
"pinned_release_reason": "v5 remains the prior stable release tag; v6 is published on main/HF as the latest verified branch and can receive a separate v6 release tag."
},
"required_next_steps": [
"Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
"Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
"Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
"Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
]
}