#!/usr/bin/env bash set -euo pipefail WORKSPACE="${WORKSPACE:-$(pwd)}" PROJECT_ROOT="${PROJECT_ROOT:-$(dirname "${WORKSPACE}")}" VENV_PY="${VENV_PY:-$WORKSPACE/.venv/bin/python}" RUN_ID="${RUN_ID:-xperience10m_qwen3_omni_32ep}" DATA_ROOT="${DATA_ROOT:-$PROJECT_ROOT/modelscope_data}" MAX_EPISODES="${MAX_EPISODES:-32}" MAX_WINDOWS_PER_EPISODE="${MAX_WINDOWS_PER_EPISODE:-128}" MAX_VIDEO_FRAMES="${MAX_VIDEO_FRAMES:-16}" EPOCHS="${EPOCHS:-1}" TRAIN_SPLIT="${TRAIN_SPLIT:-train}" VAL_SPLIT="${VAL_SPLIT:-val}" EVAL_SPLIT="${EVAL_SPLIT:-test}" MODEL_ID="${MODEL_ID:-Qwen/Qwen3-Omni-30B-A3B-Instruct}" LOCAL_MODEL_DIR="${LOCAL_MODEL_DIR:-$PROJECT_ROOT/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct}" RESULT_DIR="$WORKSPACE/results/omni_finetune/$RUN_ID" DATASET_RUN_ID="${RUN_ID}_dataset" DATASET_DIR="$WORKSPACE/results/omni_finetune/$DATASET_RUN_ID" MANIFEST="$RESULT_DIR/episode_manifest.json" LOG_DIR="$RESULT_DIR/logs" mkdir -p "$LOG_DIR" "$LOCAL_MODEL_DIR" exec > >(tee -a "$LOG_DIR/pipeline.log") 2>&1 cd "$WORKSPACE" phase() { echo "PHASE: $1" "$VENV_PY" - < /dev/null && ! compgen -G "$LOCAL_MODEL_DIR/*.bin" > /dev/null; then if command -v modelscope >/dev/null 2>&1; then modelscope download --model "$MODEL_ID" --local_dir "$LOCAL_MODEL_DIR" else "$VENV_PY" -m modelscope download --model "$MODEL_ID" --local_dir "$LOCAL_MODEL_DIR" fi else echo "Model weights already present in $LOCAL_MODEL_DIR" fi phase "build_manifest" "$VENV_PY" scripts/omni/build_episode_manifest.py \ --data-root "$DATA_ROOT" \ --max-episodes "$MAX_EPISODES" \ --train-fraction 0.8 \ --val-fraction 0.0 \ --test-fraction 0.2 \ --output "$MANIFEST" EVAL_SPLIT="$("$VENV_PY" - <