Eric X. Liu eric

docker-memo (25.07)

Published 2025-10-05 03:17:49 +00:00 by eric

Installation

docker pull git.ericxliu.me/eric/docker-memo:25.07
sha256:c6be6b72a932c43ee6fda655070217048566f2dfbc1342ac8d5274922a516316

Image Layers

ARG RELEASE
ARG LAUNCHPAD_BUILD_ARCH
LABEL org.opencontainers.image.ref.name=ubuntu
LABEL org.opencontainers.image.version=24.04
ADD file:ad85a9d7b0a74c2140bd51d9c4559cca392991e0c95f84cb139347348e5d1f9a in /
CMD ["/bin/bash"]
ARG JETPACK_HOST_MOUNTS=
ENV NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=
RUN |1 JETPACK_HOST_MOUNTS= /bin/sh -c if [ -n "${JETPACK_HOST_MOUNTS}" ]; then echo "/usr/lib/aarch64-linux-gnu/tegra" > /etc/ld.so.conf.d/nvidia-tegra.conf && echo "/usr/lib/aarch64-linux-gnu/tegra-egl" >> /etc/ld.so.conf.d/nvidia-tegra.conf; fi # buildkit
RUN |1 JETPACK_HOST_MOUNTS= /bin/sh -c export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends ca-certificates curl patch wget && rm -rf /var/lib/apt/lists/* && echo "hsts=0" > /root/.wgetrc # buildkit
ARG GDRCOPY_VERSION=2.4.4
ARG HPCX_VERSION=2.23
ARG RDMACORE_VERSION=50.0
ARG MOFED_VERSION=5.4-rdmacore50.0
ARG OPENUCX_VERSION=1.19.0
ARG OPENMPI_VERSION=4.1.7
ARG EFA_VERSION=1.38.1
ARG AWS_OFI_NCCL_VERSION=1.14.0
ENV GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 RDMACORE_VERSION=50.0 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0
ARG TARGETARCH=amd64
RUN |10 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 /bin/sh -c cd /nvidia && ( export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends libibverbs1 libibverbs-dev librdmacm1 librdmacm-dev libibumad3 libibumad-dev ibverbs-utils ibverbs-providers && rm -rf /var/lib/apt/lists/* && rm $(dpkg-query -L libibverbs-dev librdmacm-dev libibumad-dev | grep "\(\.so\|\.a\)$") ) && ( cd opt/gdrcopy/ && dpkg -i libgdrapi_*.deb ) && ( cp -r opt/hpcx /opt/ && cp etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/ && ln -sf /opt/hpcx/ompi /usr/local/mpi && ln -sf /opt/hpcx/ucx /usr/local/ucx && sed -i 's/^\(hwloc_base_binding_policy\) = core$/\1 = none/' /opt/hpcx/ompi/etc/openmpi-mca-params.conf && sed -i 's/^\(btl = self\)$/#\1/' /opt/hpcx/ompi/etc/openmpi-mca-params.conf ) && ( if [ ! -f /etc/ld.so.conf.d/nvidia-tegra.conf ]; then cd opt/amazon/efa/ && dpkg -i libfabric*.deb && rm /opt/amazon/efa/lib/libfabric.a && echo "/opt/amazon/efa/lib" > /etc/ld.so.conf.d/efa.conf; fi ) && ldconfig # buildkit
ENV OPAL_PREFIX=/opt/hpcx/ompi PATH=/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin
ENV OMPI_MCA_coll_hcoll_enable=0
ARG CUDA_VERSION=12.9.0.043
ARG CUDA_DRIVER_VERSION=575.51.03
ENV CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03
RUN |12 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 /bin/sh -c BASE=min /nvidia/build-scripts/installCUDA.sh # buildkit
RUN |12 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 /bin/sh -c cp -vprd /nvidia/. / && patch -p0 < /etc/startup_scripts.patch && rm -f /etc/startup_scripts.patch # buildkit
ENV _CUDA_COMPAT_PATH=/usr/local/cuda/compat ENV=/etc/shinit_v2 BASH_ENV=/etc/bash.bashrc SHELL=/bin/bash NVIDIA_REQUIRE_CUDA=cuda>=9.0
LABEL com.nvidia.volumes.needed=nvidia_driver com.nvidia.cuda.version=9.0
ARG NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0
ENV NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSPARSELT_VERSION=0.7.1.0 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0
ADD docs.tgz / # buildkit
RUN |39 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 /bin/sh -c echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit
ARG _LIBPATH_SUFFIX=
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin LD_LIBRARY_PATH=/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=compute,utility,video
COPY /opt/amazon/aws-ofi-nccl /opt/amazon/aws-ofi-nccl # buildkit
RUN |40 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 _LIBPATH_SUFFIX= /bin/sh -c if [ ! -f /etc/ld.so.conf.d/nvidia-tegra.conf ]; then echo "/opt/amazon/aws-ofi-nccl/lib" > /etc/ld.so.conf.d/aws-ofi-nccl.conf && ldconfig; fi # buildkit
COPY entrypoint/ /opt/nvidia/ # buildkit
ENV NVIDIA_PRODUCT_NAME=CUDA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
RUN |40 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 _LIBPATH_SUFFIX= /bin/sh -c mkdir -p /workspace && cp -f -p /opt/nvidia/entrypoint.d/30-container-license.txt /workspace/license.txt # buildkit
RUN /bin/sh -c export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends apt-utils build-essential libncurses6 libncursesw6 unzip jq gnupg libtcmalloc-minimal4 && rm -rf /var/lib/apt/lists/* # buildkit
RUN /bin/sh -c /nvidia/build-scripts/installCUDA.sh # buildkit
RUN /bin/sh -c /nvidia/build-scripts/installLIBS.sh && /nvidia/build-scripts/installCUDNN.sh && /nvidia/build-scripts/installTRT.sh && /nvidia/build-scripts/installNSYS.sh && /nvidia/build-scripts/installNCU.sh && /nvidia/build-scripts/installCUSPARSELT.sh && if [ -z "${JETPACK_HOST_MOUNTS}" ]; then /nvidia/build-scripts/installNCCL.sh; fi; # buildkit
LABEL com.nvidia.nccl.version=2.26.5 com.nvidia.cublas.version=12.9.0.13 com.nvidia.cufft.version=11.4.0.6 com.nvidia.curand.version=10.3.10.19 com.nvidia.cusparse.version=12.5.9.5 com.nvidia.cusparselt.version=0.7.1.0 com.nvidia.cusolver.version=11.7.4.40 com.nvidia.npp.version=12.4.0.27 com.nvidia.nvjpeg.version=12.4.0.16 com.nvidia.cublasmp.version=0.4.0.789 com.nvidia.cal.version=0.4.4.50 com.nvidia.cudnn.version=9.10.1.4 com.nvidia.tensorrt.version=10.10.0.31 com.nvidia.tensorrtoss.version= com.nvidia.nsightsystems.version=2025.3.1.90 com.nvidia.nsightcompute.version=2025.2.0.11
RUN /bin/sh -c export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends build-essential git libglib2.0-0 less libhwloc15 libnl-route-3-200 libnl-3-dev libnl-route-3-dev libnuma-dev libnuma1 libpmi2-0-dev nano numactl openssh-client vim wget && rm -rf /var/lib/apt/lists/* # buildkit
COPY cuda-*.patch /tmp # buildkit
RUN /bin/sh -c export DEVEL=1 BASE=0 && /nvidia/build-scripts/installNCU.sh && /nvidia/build-scripts/installCUDA.sh && /nvidia/build-scripts/installLIBS.sh && if [ ! -f /etc/ld.so.conf.d/nvidia-tegra.conf ]; then /nvidia/build-scripts/installNCCL.sh; fi && /nvidia/build-scripts/installCUDNN.sh && /nvidia/build-scripts/installTRT.sh && /nvidia/build-scripts/installNSYS.sh && /nvidia/build-scripts/installCUSPARSELT.sh && if [ -f "/tmp/cuda-${_CUDA_VERSION_MAJMIN}.patch" ]; then patch -p0 < /tmp/cuda-${_CUDA_VERSION_MAJMIN}.patch; fi && rm -f /tmp/cuda-*.patch # buildkit
ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs:
ENV NVIDIA_PRODUCT_NAME=PyTorch
ARG NVIDIA_PYTORCH_VERSION=25.05
ARG PYTORCH_BUILD_VERSION=2.8.0a0+5228986
ARG NVFUSER_BUILD_VERSION=9bf5aca
ENV PYTORCH_BUILD_VERSION=2.8.0a0+5228986 PYTORCH_VERSION=2.8.0a0+5228986 PYTORCH_BUILD_NUMBER=0 NVIDIA_PYTORCH_VERSION=25.05
ENV NVFUSER_BUILD_VERSION=9bf5aca NVFUSER_VERSION=9bf5aca
LABEL com.nvidia.pytorch.version=2.8.0a0+5228986
ARG TARGETARCH=amd64
ARG PYVER=3.12
ARG PYVER_MAJMIN=312
ENV PIP_BREAK_SYSTEM_PACKAGES=1
ARG L4T=0
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c export PYSFX=`echo "${PYVER}" | cut -c1-1` && export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends python$PYVER-dev python$PYSFX python$PYSFX-dev python$PYSFX-venv python-is-python$PYSFX autoconf automake libatlas-base-dev libgoogle-glog-dev libbz2-dev libc-ares2 libre2-dev libleveldb-dev liblmdb-dev libprotobuf-dev libsnappy-dev libtool nasm protobuf-compiler pkg-config unzip sox libsndfile1 libpng-dev libhdf5-dev gfortran rapidjson-dev ninja-build libedit-dev build-essential patchelf && rm -rf /var/lib/apt/lists/* # buildkit
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c DEBIAN_FRONTEND=noninteractive apt remove -y --force-yes python3-pip && curl -O https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py # buildkit
ENV PIP_CONSTRAINT=/etc/pip/constraint.txt
COPY constraint.txt /etc/pip/constraint.txt # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install pip setuptools && pip install cmake # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c curl "https://gitlab-master.nvidia.com/api/v4/projects/105799/packages/generic/OpenBLAS/0.3.24-$(uname -m)/OpenBLAS-0.3.24-$(uname -m).tar.gz" --output OpenBLAS.tar.gz && tar -xf OpenBLAS.tar.gz -C /usr/local/ && rm OpenBLAS.tar.gz # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c if [ $TARGETARCH = "arm64" ]; then cd /opt && curl "https://gitlab-master.nvidia.com/api/v4/projects/105799/packages/generic/nvpl_slim_24.04/sbsa/nvpl_slim_24.04.tar" --output nvpl_slim_24.04.tar && tar -xf nvpl_slim_24.04.tar && cp -r nvpl_slim_24.04/lib/* /usr/local/lib && cp -r nvpl_slim_24.04/include/* /usr/local/include && rm -rf nvpl_slim_24.04.tar nvpl_slim_24.04 ; fi # buildkit
ENV NVPL_LAPACK_MATH_MODE=PEDANTIC
WORKDIR /opt/pytorch
COPY . . # buildkit
ENV PYTHONIOENCODING=utf-8
ENV LC_ALL=C.UTF-8
ENV PIP_DEFAULT_TIMEOUT=100
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install numpy scipy PyYAML astunparse typing_extensions cffi spacy mock tqdm librosa expecttest hypothesis xdoctest pytest pytest-xdist pytest-rerunfailures pytest-shard pytest-flakefinder pybind11 Cython regex protobuf six && if [[ $TARGETARCH = "amd64" ]] ; then pip install --no-cache-dir mkl mkl-include mkl-devel ; find /usr/local/lib -maxdepth 1 -type f -regex '.*\/lib\(tbb\|mkl\).*\.so\($\|\.[0-9]*\.[0-9]*\)' -exec rm -v {} + ; fi # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c git config --global url."https://github".insteadOf git://github && pip install jupyterlab notebook tensorboard jupyterlab_code_formatter python-hostlist # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c PATCHED_FILE=$(python -c "from tensorboard.plugins.core import core_plugin as _; print(_.__file__)") && sed -i 's/^\( *"--bind_all",\)$/\1 default=True,/' "$PATCHED_FILE" && test $(grep '^ *"--bind_all", default=True,$' "$PATCHED_FILE" | wc -l) -eq 1 # buildkit
WORKDIR /opt/pytorch
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install --no-cache-dir /builder/*.whl jupytext black isort && mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/completer-extension/ && jupyter lab clean # buildkit
COPY jupyter_config/jupyter_notebook_config.py /usr/local/etc/jupyter/ # buildkit
COPY jupyter_config/manager.jupyterlab-settings /root/.jupyter/lab/user-settings/@jupyterlab/completer-extension/ # buildkit
COPY jupyter_config/settings.jupyterlab-settings /root/.jupyter/lab/user-settings/@jupyterlab/completer-extension/ # buildkit
ENV JUPYTER_PORT=8888
ENV TENSORBOARD_PORT=6006
EXPOSE map[8888/tcp:{}]
EXPOSE map[6006/tcp:{}]
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c OPENCV_VERSION=4.10.0 && cd / && wget -q -O - https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && cd /opencv-${OPENCV_VERSION} && cmake -GNinja -Bbuild -H. -DWITH_CUDA=OFF -DWITH_1394=OFF -DPYTHON3_PACKAGES_PATH="/usr/local/lib/python${PYVER}/dist-packages" -DBUILD_opencv_cudalegacy=OFF -DBUILD_opencv_stitching=OFF -DWITH_IPP=OFF -DWITH_PROTOBUF=OFF && cmake --build build --target install && cd modules/python/package && pip install -v . && rm -rf /opencv-${OPENCV_VERSION} # buildkit
ENV UCC_CL_BASIC_TLS=^sharp
ENV TORCH_CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0+PTX
ENV PYTORCH_HOME=/opt/pytorch/pytorch
ENV CUDA_HOME=/usr/local/cuda
ENV TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c echo "TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}" && pip install /opt/transfer/torch*.whl && patchelf --set-rpath '/usr/local/lib' /usr/local/lib/python${PYVER}/dist-packages/torch/lib/libtorch_global_deps.so # buildkit
COPY /usr/local/share/cmake/TorchVision/ /usr/local/share/cmake/TorchVision/ # buildkit
COPY /usr/local/include/torchvision/ /usr/local/include/torchvision/ # buildkit
COPY /usr/local/lib64/libtorchvision.so /usr/local/lib/libtorchvision.so.1.0 # buildkit
COPY /usr/local/lib64/libjpeg* /usr/local/lib/ # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c patchelf --set-rpath '$ORIGIN:/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torchvision/' /usr/local/lib/libtorchvision.so.1.0 && patchelf --set-soname libtorchvision.so.1 --output /usr/local/lib/libtorchvision.so.1.0 /usr/local/lib/libtorchvision.so.1.0 && ldconfig && pushd /usr/local/lib && ln -s libtorchvision.so.1 /usr/local/lib/libtorchvision.so && popd && patchelf --set-soname libjpeg.so.62 --output /usr/local/lib/libjpeg.so.62 $(readlink -f $(ldd /usr/local/lib/python3.12/dist-packages/torchvision/image.so | grep libjpeg | awk '{print $3}')) # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c cd pytorch && pip install -v -r /opt/pytorch/pytorch/requirements.txt # buildkit
RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install --no-cache-dir /tmp/dist/*.whl # buildkit
ARG DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ -z "${DALI_VERSION}" ] ; then echo "Not Installing DALI for L4T Build." ; exit 0; fi && export CUDA_VERSION_MAJOR=$(ls /usr/local/cuda/lib64/libcudart.so.*.*.* | cut -d . -f 3) && export DALI_PKG_SUFFIX="cuda${CUDA_VERSION_MAJOR}0" && if [ -z "${DALI_URL_SUFFIX}" ] ; then export DALI_EXTRA_INDEX_URL="${DALI_EXTRA_INDEX_URL}-qa"; fi && pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --extra-index-url "${DALI_EXTRA_INDEX_URL}" --trusted-host sqrl nvidia-dali-${DALI_PKG_SUFFIX}==${DALI_VERSION} # buildkit
ENV COCOAPI_VERSION=2.0+nv0.8.1
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c export COCOAPI_TAG=$(echo ${COCOAPI_VERSION} | sed 's/^.*+n//') && pip install git+https://github.com/nvidia/cocoapi.git@${COCOAPI_TAG}#subdirectory=PythonAPI # buildkit
COPY singularity/ /.singularity.d/ # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c ( cd fuser && pip install -r requirements.txt && python setup.py -version-tag=a0+${NVFUSER_VERSION} install && python setup.py clean && cp $(find /usr/local/lib/python${PYVER}/dist-packages/ -name libnvfuser_codegen.so) /usr/local/lib/python${PYVER}/dist-packages/torch/lib/ ) && ( cd lightning-thunder && python setup.py install && rm -rf build *.egg-info) && ( cd lightning-thunder && mkdir tmp && cd tmp && git clone -b v${CUDNN_FRONTEND_VERSION} --recursive --single-branch https://github.com/NVIDIA/cudnn-frontend.git cudnn_frontend && cd cudnn_frontend && pip install --no-build-isolation . && cd ../../ && rm -rf tmp ) && ( cd pytorch/third_party/onnx && pip uninstall typing -y && CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=ON" pip install --no-build-isolation . ) # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip install tabulate # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ "${L4T}" = "1" ]; then echo "Not installing rapids for L4T build."; exit 0; fi && find /rapids -name "*-Linux.tar.gz" -exec tar -C /usr --exclude="*.a" --exclude="bin/xgboost" --strip-components=1 -xvf {} \; && find /rapids -name "*.whl" ! -name "tornado-*" ! -name "cugraph_dgl*" ! -name "cugraph_pyg*" ! -name "torch_geometric*" ! -name "Pillow-*" ! -name "certifi-*" ! -name "protobuf-*" ! -name "six-*" -exec pip install --no-cache-dir {} + # buildkit
WORKDIR /workspace
COPY NVREADME.md README.md # buildkit
COPY docker-examples docker-examples # buildkit
COPY tutorials tutorials # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c chmod -R a+w . # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c set -x && WHEELS=1 /nvidia/build-scripts/installTRT.sh # buildkit
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin:/opt/tensorrt/bin
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip --version && python -c 'import sys; print(sys.platform)' && pip install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --no-cache-dir "polygraphy==${POLYGRAPHY_VERSION}" && pip install --index-url https://gitlab-master.nvidia.com/api/v4/projects/omniml%2Fmodelopt/packages/pypi/simple --extra-index-url https://pypi.nvidia.com "nvidia-modelopt[torch]==${MODEL_OPT_VERSION}" && pip install nvidia-resiliency-ext==0.3.0 # buildkit
COPY torch_tensorrt/ /opt/pytorch/torch_tensorrt/ # buildkit
ARG PYVER=3.12
ENV LD_LIBRARY_PATH=/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV PATH=/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin:/opt/tensorrt/bin
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip install --no-cache-dir /opt/pytorch/apex/dist/*.whl # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip install --no-cache-dir /opt/pytorch/torch_tensorrt/dist/*.whl # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ "${L4T}" = "1" ]; then echo "Not installing Flash Attention wheel in iGPU as it is a requirement for Transformer Engine"; else pip install --no-cache-dir /opt/pytorch/flash_attn*.whl; fi # buildkit
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ "${L4T}" = "1" ]; then echo "Not installing Transformer Engine in iGPU container until Version variable is set"; else /nvidia/build-scripts/installCAL.sh && /nvidia/build-scripts/installCUBLASMP.sh && /nvidia/build-scripts/installNVSHMEM.sh && git clone -b release_v${TRANSFORMER_ENGINE_VERSION} --single-branch --recursive https://github.com/NVIDIA/TransformerEngine.git && env NVTE_CUDA_ARCHS="70;80;89;90;100;120" NVTE_BUILD_THREADS_PER_JOB=8 pip install --no-cache-dir --no-build-isolation ./TransformerEngine && rm -rf TransformerEngine; fi # buildkit
ENV CUDA_MODULE_LOADING=LAZY
ENV TORCH_NCCL_USE_COMM_NONBLOCKING=0
RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib && echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf && ldconfig && rm -f ${_CUDA_COMPAT_PATH}/lib # buildkit
COPY entrypoint.d/ /opt/nvidia/entrypoint.d/ # buildkit
ARG NVIDIA_BUILD_ID=170559088
ENV NVIDIA_BUILD_ID=170559088
LABEL com.nvidia.build.id=170559088
ARG NVIDIA_BUILD_REF=0f499560921269b0135bf88c85232c1f26bcecfb
LABEL com.nvidia.build.ref=0f499560921269b0135bf88c85232c1f26bcecfb
ENV NVIDIA_PRODUCT_NAME=NeMo Framework
ENV PIP_NO_CACHE_DIR=1
ARG NVIDIA_BIGNLP_VERSION
ENV NVIDIA_BIGNLP_VERSION=
LABEL com.nvidia.bignlp.version=
ENV DEBIAN_FRONTEND=noninteractive
RUN |1 NVIDIA_BIGNLP_VERSION= /bin/sh -c apt-get update && apt-get install -y --no-install-recommends git libsndfile1 sox openssh-server && rm -rf /var/lib/apt/lists/* && apt-get clean # buildkit
RUN |1 NVIDIA_BIGNLP_VERSION= /bin/sh -c apt-get remove --purge -y libslurm* && apt-get -y autoremove && pip uninstall -y onnx && pip install wheel && rm -rf /opt/pytorch/pytorch/third_party/onnx # buildkit
RUN |1 NVIDIA_BIGNLP_VERSION= /bin/sh -c cp /dev/null /etc/pip/constraint.txt # buildkit
WORKDIR /opt
ARG UV_VERSION=0.7.2
RUN |2 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 /bin/sh -c curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | XDG_BIN_HOME=/usr/local/bin sh # buildkit
ENV UV_PROJECT_ENVIRONMENT=/opt/venv
ENV UV_CACHE_DIR=/opt/uv_cache
ENV PATH=/opt/venv/bin:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin:/opt/tensorrt/bin
ENV UV_LINK_MODE=copy
RUN |2 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 /bin/sh -c uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages # buildkit
ARG REINSTALL_NSYS=True
ARG NSYS_VERSION=2025.1.1.103
ARG REINSTALL_CUDNN=True
ARG CUDNN_VERSION=9.11.0.98
ENV NSIGHT_SYSTEMS_VERSION=2025.1.1.103
ARG REINSTALL_NCCL=True
ARG NCCL_VERSION=2.27.3-1+cuda12.9
COPY docker/common/install_nccl.sh /opt/install_nccl.sh # buildkit
RUN |8 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 /bin/sh -c if [ $REINSTALL_NSYS = "True" ]; then /nvidia/build-scripts/installNSYS.sh; fi && if [ $REINSTALL_CUDNN = "True" ]; then /nvidia/build-scripts/installCUDNN.sh $CUDNN_VERSION; fi && if [ $REINSTALL_NCCL = "True" ]; then bash /opt/install_nccl.sh --NCCL_VER=$NCCL_VERSION; fi && rm /opt/install_nccl.sh # buildkit
ARG INSTALL_DEEPEP=True
ARG TARGET_ARCH=x86
ARG DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076
ARG DEEPEP_NVSHMEM_COMMIT=
ARG GDR_COPY_VERSION=v2.4.1
ENV CPATH=/usr/local/mpi/include:
ENV LD_LIBRARY_PATH=/usr/local/mpi/lib:/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.1/
COPY nemo2/external/nvshmem_src_3.2.5-1.txz /opt/nvshmem_src_3.2.5-1.txz # buildkit
RUN |13 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 /bin/sh -c if [ $INSTALL_DEEPEP = "True" ]; then apt-get update && apt-get install -y --no-install-recommends nvidia-dkms-535 devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev && if [ $TARGET_ARCH = "x86" ]; then ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so; else ln -s /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so; fi && rm -rf /var/lib/apt/lists/* && apt-get clean && git clone https://github.com/NVIDIA/gdrcopy.git && cd /opt/gdrcopy && git checkout $GDR_COPY_VERSION && CUDA=/usr/local/cuda packages/build-deb-packages.sh && dpkg -i gdrdrv-dkms_*.deb && dpkg -i libgdrapi_*.deb && dpkg -i gdrcopy-tests_*.deb && dpkg -i gdrcopy_*.deb && cd /opt && git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git fetch origin $DEEPEP_COMMIT && git checkout FETCH_HEAD && cd /opt && if [ ! -z $DEEPEP_NVSHMEM_COMMIT ]; then CI_JOB_TOKEN=$(cat /run/secrets/CI_JOB_TOKEN) && git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab-master.nvidia.com/arch_moe_exploration/deepep-nvshmem.git nvshmem_src && cd nvshmem_src && git fetch origin $DEEPEP_NVSHMEM_COMMIT && git checkout FETCH_HEAD && rm -rf /opt/nvshmem_src/.git; else mkdir -p /opt/nvshmem_src && tar xf /opt/nvshmem_src_3.2.5-1.txz -C /opt && cd /opt/nvshmem_src && git apply /opt/DeepEP/third-party/nvshmem.patch; fi && CI_JOB_TOKEN=$(cat /run/secrets/CI_JOB_TOKEN) CUDA_HOME=/usr/local/cuda NVSHMEM_SHMEM_SUPPORT=0 NVSHMEM_UCX_SUPPORT=0 NVSHMEM_USE_NCCL=0 NVSHMEM_IBGDA_SUPPORT=1 NVSHMEM_PMIX_SUPPORT=0 NVSHMEM_TIMEOUT_DEVICE_POLLING=0 NVSHMEM_USE_GDRCOPY=1 cmake -S . -B build/ -DNVSHMEM_BUILD_EXAMPLES=OFF -DCMAKE_INSTALL_PREFIX=/opt/nvshmem_src/install -DCMAKE_CUDA_ARCHITECTURES=90 && cd build && make install -j && rm -rf /opt/nvshmem_src/build && cd /opt/DeepEP && NVSHMEM_DIR=/opt/nvshmem_src/install /usr/bin/python setup.py develop && NVSHMEM_DIR=/opt/nvshmem_src/install /usr/bin/python setup.py install && apt-get remove --purge -y devscripts debhelper man-db check groff-base dkms kmod bsdextrautils fakeroot && apt-get -y autoremove && apt-get clean; fi && rm -rf /opt/nvshmem_src_3.2.5-1.txz # buildkit
ARG REINSTALL_APEX=False
ARG APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec
RUN |15 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec /bin/sh -c pip install packaging # buildkit
RUN |15 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec /bin/sh -c if [ $REINSTALL_APEX = "True" ]; then pip uninstall -y apex && git clone https://github.com/NVIDIA/apex && cd apex && if [ ! -z $APEX_COMMIT ]; then git fetch origin $APEX_COMMIT && git checkout FETCH_HEAD; fi && HEAD_APEX_COMMIT=$(git rev-parse HEAD) && echo "Container built with Apex commit hash: $HEAD_APEX_COMMIT" && pip install -e . -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam --group_norm --nccl_allocator"; fi # buildkit
RUN |15 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec /bin/sh -c if [ -d /tmp/TransformerEngine ]; then pip install --no-cache-dir --no-build-isolation /tmp/TransformerEngine/dist/transformer_engine*; fi # buildkit
ARG MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa
ARG MCORE_VERSION=0.13.1
ENV NEMO_FW_MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa
ENV NEMO_FW_MCORE_VERSION=0.13.1
RUN |17 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 /bin/sh -c CI_JOB_TOKEN=$(cat /run/secrets/CI_JOB_TOKEN) && git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab-master.nvidia.com/ADLR/megatron-lm.git && cd megatron-lm && git pull && if [ ! -z $MCORE_COMMIT ]; then git fetch origin $MCORE_COMMIT && git checkout FETCH_HEAD; fi && HEAD_MCORE_COMMIT=$(git rev-parse HEAD) && echo "Container built with megatron-lm commit hash: $HEAD_MCORE_COMMIT" && pip install -e . && rm -rf .git && cd megatron/core/datasets && make && rm -rf /root/.cache/bazel # buildkit
ENV PYTHONPATH=:/opt/megatron-lm
ARG TARGET_ARCH=x86
WORKDIR /opt
COPY /opt/tinycudann*.whl ./ # buildkit
RUN |17 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 /bin/sh -c pip install --no-cache-dir /tmp/build_mamba_dep/causal-conv1d/causal_conv1d*.whl /tmp/build_grouped_gemm/grouped_gemm*.whl /tmp/build_nvdiffrast/nvdiffrast*.whl /tmp/build_stable_dreamfusion/raymarching*.whl /tmp/build_stable_dreamfusion/shencoder*.whl /tmp/build_stable_dreamfusion/freqencoder*.whl /tmp/build_stable_dreamfusion/gridencoder*.whl && if [ -f /tmp/build_mamba_dep/mamba/mamba*.whl ]; then pip install --no-cache-dir /tmp/build_mamba_dep/mamba/mamba*.whl; fi && if [ -f /tmp/build_bitsandbytes/bitsandbytes/dist/bitsandbytes*.whl ]; then pip install --no-cache-dir /tmp/build_bitsandbytes/bitsandbytes/dist/bitsandbytes*.whl; fi # buildkit
ARG TARGET_ARCH=x86
RUN |17 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 /bin/sh -c echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && sed -i 's/# StrictHostKeyChecking ask/ StrictHostKeyChecking no/' /etc/ssh/ssh_config && mkdir -p /var/run/sshd # buildkit
ARG NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4
ARG NEMO_VERSION=2.4.1
ENV NEMO_FW_NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4
ENV NEMO_FW_NEMO_VERSION=2.4.1
RUN |19 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 /bin/sh -c git clone https://github.com/NVIDIA/NeMo.git && cd NeMo && git pull && if [ ! -z $NEMO_COMMIT ]; then git fetch origin $NEMO_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo commit hash: $HEAD_NEMO_COMMIT" && pip uninstall -y nemo_toolkit sacrebleu && sed -i "/mamba-ssm/d" requirements/requirements_nlp.txt && if [ $TARGET_ARCH = "arm" ]; then sed -i "/torch/d" requirements/requirements.txt && sed -i "/decord/d" requirements/requirements_multimodal.txt && sed -i "/megatron_core/d" requirements/requirements_nlp.txt; fi && pip install -e ".[all]" && cd nemo/collections/nlp/data/language_modeling/megatron && make # buildkit
ARG NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995
ARG NEMO_EXPORT_DEPLOY_VERSION
ENV NEMO_FW_NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995
ENV NEMO_FW_NEMO_EXPORT_DEPLOY_VERSION=
ARG SKIP_TRTLLM=True
COPY export_deploy/uv_args.txt /opt/uv_args.txt # buildkit
RUN |22 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Export-Deploy.git && cd Export-Deploy && git pull && if [ ! -z $NEMO_EXPORT_DEPLOY_COMMIT ]; then git fetch origin $NEMO_EXPORT_DEPLOY_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_EXPORT_DEPLOY_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo-Export-Deploy commit hash: $HEAD_NEMO_EXPORT_DEPLOY_COMMIT" && UV_ARGS=`cat /opt/uv_args.txt` && if [ "$SKIP_TRTLLM" = "True" ]; then uv sync --link-mode symlink --locked --inexact $UV_ARGS; else uv sync --link-mode symlink --locked --inexact --extra trtllm $UV_ARGS; fi && rm ~/.netrc # buildkit
ENV PYTHONPATH=/opt/Export-Deploy:/opt/venv/lib/python3.12/site-packages/::/opt/megatron-lm
ARG NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f
ARG NEMO_EVAL_VERSION
ENV NEMO_FW_NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f
ENV NEMO_FW_NEMO_EVAL_VERSION=
RUN |24 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Eval.git && cd Eval && git pull && if [ ! -z $NEMO_EVAL_COMMIT ]; then git fetch origin $NEMO_EVAL_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_EVAL_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo-Eval commit hash: $HEAD_NEMO_EVAL_COMMIT" && pip install -e . && rm ~/.netrc # buildkit
ARG NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131
ARG NVFSDP_VERSION=0.2.0rc0
ENV NEMO_FW_NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131
ENV NEMO_FW_NVFSDP_VERSION=0.2.0rc0
RUN |26 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/nvFSDP.git && cd nvFSDP && git pull && if [ ! -z $NVFSDP_COMMIT ]; then git fetch origin $NVFSDP_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NVFSDP_COMMIT=$(git rev-parse HEAD) && echo "Container built with nvFSDP commit hash: $HEAD_NVFSDP_COMMIT" && pip install -e . && rm ~/.netrc # buildkit
ARG NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783
ARG NEMO_AUTOMODEL_VERSION=0.2.0rc0
ENV NEMO_FW_NEMO_AUTOMODEL_COMMIT=
ENV NEMO_FW_NEMO_AUTOMODEL_VERSION=0.2.0rc0
RUN |28 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Automodel.git && cd Automodel && git pull && if [ ! -z $NEMO_AUTOMODEL_COMMIT ]; then git fetch origin $NEMO_AUTOMODEL_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_AUTOMODEL_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo-Automodel commit hash: $HEAD_NEMO_AUTOMODEL_COMMIT" && pip install --no-deps "liger-kernel==0.5.8" && pip install --no-deps "cut-cross-entropy @ git+https://github.com/apple/ml-cross-entropy.git@87a86aba72cfd2f0d8abecaf81c13c4528ea07d8" && sed -i 's/torch==[0-9]\+\.[0-9]\+\.[0-9]\+/torch/g' pyproject.toml && sed -i '/cut-cross-entropy/d' pyproject.toml && sed -i '/liger-kernel/d' pyproject.toml && pip install -e .[vlm]; rm ~/.netrc # buildkit
ARG MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f
ARG MEGATRON_BRIDGE_VERSION=0.2.0rc0
ENV NEMO_FW_MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f
ENV NEMO_FW_MEGATRON_BRIDGE_VERSION=0.2.0rc0
RUN |30 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Megatron-Bridge.git && cd Megatron-Bridge && git pull && if [ ! -z $MEGATRON_BRIDGE_COMMIT ]; then git fetch origin $MEGATRON_BRIDGE_COMMIT && git checkout FETCH_HEAD; fi && HEAD_MEGATRON_BRIDGE_COMMIT=$(git rev-parse HEAD) && echo "Container built with Megatron-Bridge commit hash: $HEAD_MEGATRON_BRIDGE_COMMIT" && sed -i 's/torch==[0-9]\+\.[0-9]\+\.[0-9]\+/torch/g' pyproject.toml && pip install -e "." && rm ~/.netrc # buildkit
ARG INSTALL_RESIL=False
ARG RESIL_COMMIT=not_installed
ARG RESIL_VERSION=not_installed
ENV NEMO_FW_RESIL_COMMIT=not_installed
ENV NEMO_FW_RESIL_VERSION=not_installed
RUN |33 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed /bin/sh -c if [ $INSTALL_RESIL = "True" ] && [ $TARGET_ARCH = "x86" ]; then pip install --no-cache-dir "git+https://github.com/NVIDIA/nvidia-resiliency-ext.git@${RESIL_COMMIT}"; fi # buildkit
ARG INSTALL_MODELOPT=True
ARG MODELOPT_VERSION=0.31.0
RUN |35 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 /bin/sh -c if [ $INSTALL_MODELOPT = "True" ]; then pip install --no-cache-dir nvidia-modelopt[torch]==$MODELOPT_VERSION; fi # buildkit
ARG NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0
ENV NEMO_FW_NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0
RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c git clone https://github.com/NVIDIA-NeMo/Run && cd Run && git pull && if [ ! -z $NEMO_RUN_COMMIT ]; then git fetch origin $NEMO_RUN_COMMIT && git checkout FETCH_HEAD; fi && pip install -e . # buildkit
RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c pip uninstall -y webdataset && pip install --no-cache-dir "webdataset==0.2.86" "pandas==2.2.3" "ctc_segmentation==1.7.1" # buildkit
RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c pip uninstall -y pytest-coverage levenshtein pillow future && pip install --no-cache-dir "jupyter-core==5.8.1" "pillow==11.3.0" && uv cache clean protobuf && uv pip install "protobuf==4.25.8" # buildkit
RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c apt-get remove --purge -y gdb fakeroot libfakeroot && apt-get -y autoremove && apt-get clean # buildkit
RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c <<"EOF" python from transformers import AutoTokenizer, Qwen2Tokenizer, CLIPImageProcessor _=AutoTokenizer.from_pretrained('gpt2') _=AutoTokenizer.from_pretrained('bert-base-cased') _=AutoTokenizer.from_pretrained('bert-large-cased') _=AutoTokenizer.from_pretrained('bert-large-uncased') _=AutoTokenizer.from_pretrained('bigcode/starcoder2-tokenizer') _=AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") _=AutoTokenizer.from_pretrained("nvidia/Nemotron-H-8B-Base-8K") _=AutoTokenizer.from_pretrained("nvidia/Nemotron-H-47B-Base-8K") _=AutoTokenizer.from_pretrained("nvidia/Nemotron-H-56B-Base-8K") _=AutoTokenizer.from_pretrained('THUDM/chatglm2-6b',trust_remote_code=True) _=AutoTokenizer.from_pretrained('THUDM/chatglm3-6b',trust_remote_code=True) _=Qwen2Tokenizer.from_pretrained('qwen/Qwen1.5-7B',trust_remote_code=True) _=Qwen2Tokenizer.from_pretrained('qwen/Qwen1.5-14B',trust_remote_code=True) _=AutoTokenizer.from_pretrained('openai/clip-vit-large-patch14') _=CLIPImageProcessor.from_pretrained('openai/clip-vit-large-patch14') _=CLIPImageProcessor.from_pretrained('openai/clip-vit-large-patch14-336') EOF # buildkit
ARG URM_USER_ACCOUNT=donghyukc
RUN |37 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc /bin/sh -c URM_API_TOKEN=$(cat /run/secrets/URM_API_TOKEN) && wget --http-user $URM_USER_ACCOUNT --http-password ${URM_API_TOKEN} "https://urm.nvidia.com/artifactory/nemo-fw-generic-local/llama3_70b_tokenizer.tar.gz" && wget --http-user $URM_USER_ACCOUNT --http-password ${URM_API_TOKEN} "https://urm.nvidia.com/artifactory/nemo-fw-generic-local/llama3_8b_tokenizer.tar.gz" && mkdir -p /tmp_assets && tar -xzvf llama3_8b_tokenizer.tar.gz -C /tmp_assets && tar -xzvf llama3_70b_tokenizer.tar.gz -C /tmp_assets && cp -rf /tmp_assets/hub ~/.cache/huggingface && rm -rf /tmp_assets && rm llama3_8b_tokenizer.tar.gz llama3_70b_tokenizer.tar.gz # buildkit
RUN |37 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc /bin/sh -c <<"EOF" python from transformers import AutoTokenizer _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B', local_files_only=True) _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-70B', local_files_only=True) _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B-instruct', local_files_only=True) _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-70B-instruct', local_files_only=True) EOF # buildkit
RUN |37 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc /bin/sh -c sed -i '/^LayerId/s/^/#/' /usr/local/lib/python3.12/dist-packages/cv2/typing/__init__.py # buildkit
ARG PRE_COMPILE_QUANT_KERNELS=True
RUN |38 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc PRE_COMPILE_QUANT_KERNELS=True /bin/sh -c if [ $PRE_COMPILE_QUANT_KERNELS = "True" ]; then python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"; fi # buildkit
COPY nemo2/patches/triton-lang_triton_6570_lazy_init.patch /opt/ # buildkit
RUN |38 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc PRE_COMPILE_QUANT_KERNELS=True /bin/sh -c cd /usr/local/lib/python3.12/dist-packages && patch -p1 $(python -c "import triton; print(triton.__path__[0])")/runtime/autotuner.py /opt/triton-lang_triton_6570_lazy_init.patch && rm /opt/triton-lang_triton_6570_lazy_init.patch # buildkit
WORKDIR /workspace
RUN |38 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc PRE_COMPILE_QUANT_KERNELS=True /bin/sh -c chmod -R a+w /workspace # buildkit
ARG NVIDIA_BUILD_ID=203980512
ENV NVIDIA_BUILD_ID=203980512
LABEL com.nvidia.build.id=203980512
ARG NVIDIA_BUILD_REF=c66568ebda7cde9f5231a46a0d93240373c63a6c
LABEL com.nvidia.build.ref=c66568ebda7cde9f5231a46a0d93240373c63a6c
LABEL maintainer=eric@ericxliu.me
LABEL description=Cached base image for NVIDIA NeMo
LABEL nemo.version=
WORKDIR /workspace
CMD ["/bin/bash"]

Labels

Key Value
com.nvidia.bignlp.version
com.nvidia.build.id 203980512
com.nvidia.build.ref c66568ebda7cde9f5231a46a0d93240373c63a6c
com.nvidia.cal.version 0.4.4.50
com.nvidia.cublas.version 12.9.0.13
com.nvidia.cublasmp.version 0.4.0.789
com.nvidia.cuda.version 9.0
com.nvidia.cudnn.version 9.10.1.4
com.nvidia.cufft.version 11.4.0.6
com.nvidia.curand.version 10.3.10.19
com.nvidia.cusolver.version 11.7.4.40
com.nvidia.cusparse.version 12.5.9.5
com.nvidia.cusparselt.version 0.7.1.0
com.nvidia.nccl.version 2.26.5
com.nvidia.npp.version 12.4.0.27
com.nvidia.nsightcompute.version 2025.2.0.11
com.nvidia.nsightsystems.version 2025.3.1.90
com.nvidia.nvjpeg.version 12.4.0.16
com.nvidia.pytorch.version 2.8.0a0+5228986
com.nvidia.tensorrt.version 10.10.0.31
com.nvidia.tensorrtoss.version
com.nvidia.volumes.needed nvidia_driver
description Cached base image for NVIDIA NeMo
maintainer eric@ericxliu.me
nemo.version
org.opencontainers.image.ref.name ubuntu
org.opencontainers.image.version 24.04
Details
Container
2025-10-05 03:17:49 +00:00
15
OCI / Docker
linux/amd64
16 GiB
Versions (3) View all
latest 2025-10-06
24.12.01 2025-10-06
25.07 2025-10-05