Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| set -euo pipefail | |
| ROPEDIA_WORKSPACE="${ROPEDIA_WORKSPACE:-$HOME/Ropedia}" | |
| PROJECT_DIR="${PROJECT_DIR:-$ROPEDIA_WORKSPACE/ropedia-episode-task-suite}" | |
| BASE_RUN_ID="${BASE_RUN_ID:-xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu}" | |
| SMOKE_RUN_ID="${SMOKE_RUN_ID:-xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_smoke_v3}" | |
| SMOKE_EXIT_EVENT="${SMOKE_EXIT_EVENT:-train_exit_fsdp_smoke_v3_no_val}" | |
| FULL_RUN_ID="${FULL_RUN_ID:-xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_lora_fsdp_full_train_valmon}" | |
| MODEL_ID="${MODEL_ID:-$ROPEDIA_WORKSPACE/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct}" | |
| BACKBONE_CONFIG="${BACKBONE_CONFIG:-configs/omni_backbones/qwen3_omni_lora.json}" | |
| FSDP_CPU_RAM_EFFICIENT_LOADING="${FSDP_CPU_RAM_EFFICIENT_LOADING:-true}" | |
| FSDP_SYNC_MODULE_STATES="${FSDP_SYNC_MODULE_STATES:-true}" | |
| FSDP_ACTIVATION_CHECKPOINTING="${FSDP_ACTIVATION_CHECKPOINTING:-true}" | |
| cd "$PROJECT_DIR" | |
| RUN_DIR="results/omni_finetune/${BASE_RUN_ID}" | |
| STATUS="${RUN_DIR}/status.jsonl" | |
| DATASET_JSONL="results/omni_finetune/${BASE_RUN_ID}_dataset/dataset.jsonl" | |
| FULL_LOG="${FULL_LOG:-${RUN_DIR}/train_${FULL_RUN_ID}.log}" | |
| mkdir -p "$RUN_DIR" | |
| is_full_training_active() { | |
| pgrep -af "scripts/omni/train_qwen3_omni_lora.py.*${FULL_RUN_ID}" >/dev/null 2>&1 | |
| } | |
| is_smoke_active() { | |
| pgrep -af "scripts/omni/train_qwen3_omni_lora.py.*${SMOKE_RUN_ID}" >/dev/null 2>&1 | |
| } | |
| if is_full_training_active; then | |
| echo "full run already active" | |
| exit 0 | |
| fi | |
| while true; do | |
| if grep -q "${SMOKE_EXIT_EVENT}.*returncode\":0" "$STATUS"; then | |
| break | |
| fi | |
| if grep -q "${SMOKE_EXIT_EVENT}.*returncode\":[1-9]" "$STATUS"; then | |
| echo "{\"event\":\"train_full_blocked_smoke_failed\",\"time\":$(date +%s),\"smoke_run_id\":\"${SMOKE_RUN_ID}\"}" >> "$STATUS" | |
| exit 1 | |
| fi | |
| if ! is_smoke_active; then | |
| echo "{\"event\":\"train_full_blocked_smoke_missing_exit\",\"time\":$(date +%s),\"smoke_run_id\":\"${SMOKE_RUN_ID}\"}" >> "$STATUS" | |
| exit 2 | |
| fi | |
| sleep 30 | |
| done | |
| if is_full_training_active; then | |
| echo "full run already active after smoke" | |
| exit 0 | |
| fi | |
| echo "{\"event\":\"train_start_fsdp_full_train_valmon\",\"time\":$(date +%s),\"run_id\":\"${FULL_RUN_ID}\",\"train_split\":\"train\",\"val_split\":\"val\",\"test_split_reserved\":\"test\",\"num_processes\":8}" >> "$STATUS" | |
| train_cmd=( | |
| .venv/bin/python -m accelerate.commands.launch | |
| --num_processes 8 | |
| --mixed_precision bf16 | |
| --use_fsdp | |
| --fsdp_sharding_strategy FULL_SHARD | |
| --fsdp_auto_wrap_policy TRANSFORMER_BASED_WRAP | |
| --fsdp_transformer_layer_cls_to_wrap Qwen3OmniMoeThinkerTextDecoderLayer | |
| --fsdp_use_orig_params true | |
| --fsdp_cpu_ram_efficient_loading "$FSDP_CPU_RAM_EFFICIENT_LOADING" | |
| --fsdp_sync_module_states "$FSDP_SYNC_MODULE_STATES" | |
| --fsdp_activation_checkpointing "$FSDP_ACTIVATION_CHECKPOINTING" | |
| scripts/omni/train_qwen3_omni_lora.py | |
| --dataset-jsonl "$DATASET_JSONL" | |
| --model-id "$MODEL_ID" | |
| --backbone-config "$BACKBONE_CONFIG" | |
| --run-id "$FULL_RUN_ID" | |
| --train-split train | |
| --val-split val | |
| --epochs 1 | |
| --batch-size 1 | |
| --gradient-accumulation-steps 8 | |
| --max-train-samples 0 | |
| --max-val-samples 512 | |
| --local-files-only | |
| --gradient-checkpointing | |
| --progress-every 10 | |
| ) | |
| set +e | |
| CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
| PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \ | |
| "${train_cmd[@]}" > "$FULL_LOG" 2>&1 | |
| rc=$? | |
| set -e | |
| echo "{\"event\":\"train_exit_fsdp_full_train_valmon\",\"time\":$(date +%s),\"run_id\":\"${FULL_RUN_ID}\",\"returncode\":${rc}}" >> "$STATUS" | |
| exit "$rc" | |