ropedia-xperience-10m-task-baselines / data /omni_finetune_verified_result.json

Add files using upload-large-folder tool

c433b73 verified 15 days ago

4.33 kB

	{
	"title": "Verified Qwen3-Omni LoRA 128-Episode Held-Out Result",
	"status": "verified_latest_qwen3_v6_diagnostic_result",
	"status_date": "2026-06-14",
	"backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
	"adapter": "Qwen3-Omni LoRA",
	"dataset": "Ropedia Xperience-10M selected 128-episode pilot",
	"split_policy": {
	"unit": "episode",
	"selected_episode_counts": {
	"train": 96,
	"val": 16,
	"test": 16
	},
	"exported_window_counts": {
	"train": 25629,
	"val": 4608,
	"test": 4032
	},
	"exported_episode_counts": {
	"train": 89,
	"val": 16,
	"test": 14
	},
	"skipped_selected_episodes": 9,
	"leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
	},
	"training": {
	"num_processes": 8,
	"epochs": 2,
	"lora_rank": 64,
	"lora_alpha": 128,
	"lora_dropout": 0.05,
	"learning_rate": 0.00005,
	"num_train_samples": 25629,
	"num_val_samples": 2048,
	"history": [
	{
	"epoch": 1,
	"train_loss": 0.05208605339353295,
	"val_loss": 0.026512427255511284,
	"global_step": 3204
	},
	{
	"epoch": 2,
	"train_loss": 0.013760763933660042,
	"val_loss": 0.032345958054065704,
	"global_step": 6408
	}
	],
	"loss": "answer-token cross entropy over supervised JSON tokens",
	"note": "This current Qwen3-Omni LoRA result is the v6 rank64/lr5e-5 dense multiscale held-out evaluation on the selected 96/16/16 episode setup."
	},
	"evaluation": {
	"split": "test",
	"num_samples": 4032,
	"held_out_episode_count": 14,
	"json_validity_rate": 0.9990079365079365,
	"action_macro_f1": 0.0028830723979596335,
	"subtask_accuracy": 0.0037313432835820895,
	"transition_accuracy": 0.9898313492063492,
	"next_action_accuracy": 0.04305335446381405,
	"contact_accuracy": 0.8177083333333334,
	"object_micro_f1": 0.3064982378331287,
	"quality_target": {
	"json_validity_rate": 0.98,
	"status": "met"
	},
	"previous_v5_json_validity_rate": 1.0,
	"previous_v5_action_macro_f1": 0.002289711036077459,
	"previous_v5_subtask_accuracy": 0.011194029850746268,
	"previous_v5_next_action_accuracy": 0.053618594823032224,
	"previous_v5_contact_accuracy": 0.7864583333333334,
	"previous_v5_object_micro_f1": 0.31614599936244814
	},
	"interpretation": "This is the latest verified Qwen3-Omni LoRA diagnostic result for the selected 128-episode setup. The v6 rank64/lr5e-5 package keeps JSON validity above the 98% target and improves action macro-F1 and contact accuracy versus the pinned v5 release row, but slightly regresses JSON validity, subtask accuracy, next-action accuracy, transition accuracy, and object micro-F1. Treat it as the latest diagnostic branch, not as a globally stronger replacement for v5.",
	"public_package": {
	"path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
	"audit_status": "pass",
	"contains_raw_xperience10m_data": false,
	"contains_qwen_base_weights": false,
	"contains_lora_weights": false,
	"adapter_weights_repo": "cy0307/ropedia-qwen3-omni-lora-128ep"
	},
	"release_policy": {
	"latest_verified_qwen_row": "xperience10m_qwen3_omni_128ep_multiscale_cap96_v6_rank64_lr5e5_full8gpu_lora_eval_test_full",
	"pinned_release_tag": "ropedia-xperience-10m-v5",
	"pinned_release_reason": "v5 remains the prior stable release tag; v6 is published on main/HF as the latest verified branch and can receive a separate v6 release tag."
	},
	"required_next_steps": [
	"Use results/omni_finetune/QWEN3_V5_V6_COMPARISON_20260614.md before deciding whether v6 should become a formal release tag.",
	"Use the v6 predictions for action/contact error analysis, and compare v5 for subtask, next-action, and object regressions.",
	"Keep full-parameter Qwen runs as feasibility gates until there is a storage plan for checkpoints or mergeable full-weight deltas.",
	"Use the verified Cosmos3-Super Forward-Dynamics LoRA package as a separate world-model branch: it updates adapter weights over camera-pose proxy future-vision-velocity targets, not Qwen-style JSON action labels."
	]
	}