@@ -16,8 +16,36 @@ RUN echo "Available release branches:" && git branch -r -l 'origin/release/*' --
1616 echo "Checking out: $LATEST_RELEASE" && \
1717 git checkout --track "$LATEST_RELEASE"
1818
19- RUN sh INSTALL_MEGATRON.sh
19+ ENV SETUPTOOLS_USE_DISTUTILS=local
2020
21+ # Install base packages
22+ RUN pip install --upgrade peft accelerate transformers "modelscope[framework]" --no-cache-dir
23+
24+ # Install vllm
25+ RUN pip install --upgrade vllm --no-cache-dir
26+
27+ # Install transformer_engine and megatron_core
28+ RUN SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])" ) && \
29+ CUDNN_PATH=$SITE_PACKAGES/nvidia/cudnn \
30+ CPLUS_INCLUDE_PATH=$SITE_PACKAGES/nvidia/cudnn/include \
31+ pip install --no-build-isolation "transformer_engine[pytorch]" --no-cache-dir
32+
33+ RUN pip install megatron_core mcore_bridge --no-cache-dir
34+
35+ # Install flash-attention (default arch 8.0;9.0, override via build-arg if needed)
36+ ARG TORCH_CUDA_ARCH_LIST="8.0;9.0"
37+ RUN TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" \
38+ MAX_JOBS=8 \
39+ FLASH_ATTENTION_FORCE_BUILD=TRUE \
40+ pip install flash-attn --no-build-isolation --no-cache-dir
41+
42+ RUN pip install flash-linear-attention -U --no-cache-dir
43+
44+ # Install numpy
45+ RUN pip install numpy==2.2 --no-cache-dir
46+
47+ # Install tinker, ray, and other deps
2148RUN pip install --no-cache-dir tinker==0.14.0 "ray[serve]" transformers peft accelerate -U
2249
50+ # Install twinkle itself
2351RUN pip install -e . --no-build-isolation
0 commit comments