# Kimi K2.6 DFlash source-patched image for 8x MI300X (gfx942) # # Base: vllm/vllm-openai-rocm:nightly # When a date-pinned tag becomes available (e.g. :2026-04-21), switch to it # and record the vLLM version (v0.19.2rc1.dev21 at time of writing). # # This image bakes the DFlash ROCm patches at build time so the launcher # no longer needs to run patch_dflash_rocm.py at container startup. # The patches are idempotent — running the script again inside this image # is a safe no-op. FROM vllm/vllm-openai-rocm:nightly # --- ROCm / AITER / vLLM environment defaults for gfx942 --- ENV PYTORCH_ROCM_ARCH=gfx942 \ AITER_ROCM_ARCH=gfx942 \ GPU_ARCHS=gfx942 \ VLLM_ROCM_USE_AITER=1 \ VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4 \ VLLM_ROCM_USE_AITER_RMSNORM=0 \ HSA_ENABLE_SDMA=0 \ HSA_NO_SCRATCH_RECLAIM=1 \ OMP_NUM_THREADS=1 # --- Copy and apply DFlash patches --- COPY payload/patch_dflash_rocm.py /tmp/patch_dflash_rocm.py RUN python3 /tmp/patch_dflash_rocm.py && rm /tmp/patch_dflash_rocm.py ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]