Block a user
docker-memo (25.07)
Published 2025-10-05 03:17:49 +00:00 by eric
Installation
docker pull git.ericxliu.me/eric/docker-memo:25.07sha256:c6be6b72a932c43ee6fda655070217048566f2dfbc1342ac8d5274922a516316Image Layers
| ARG RELEASE |
| ARG LAUNCHPAD_BUILD_ARCH |
| LABEL org.opencontainers.image.ref.name=ubuntu |
| LABEL org.opencontainers.image.version=24.04 |
| ADD file:ad85a9d7b0a74c2140bd51d9c4559cca392991e0c95f84cb139347348e5d1f9a in / |
| CMD ["/bin/bash"] |
| ARG JETPACK_HOST_MOUNTS= |
| ENV NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS= |
| RUN |1 JETPACK_HOST_MOUNTS= /bin/sh -c if [ -n "${JETPACK_HOST_MOUNTS}" ]; then echo "/usr/lib/aarch64-linux-gnu/tegra" > /etc/ld.so.conf.d/nvidia-tegra.conf && echo "/usr/lib/aarch64-linux-gnu/tegra-egl" >> /etc/ld.so.conf.d/nvidia-tegra.conf; fi # buildkit |
| RUN |1 JETPACK_HOST_MOUNTS= /bin/sh -c export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends ca-certificates curl patch wget && rm -rf /var/lib/apt/lists/* && echo "hsts=0" > /root/.wgetrc # buildkit |
| ARG GDRCOPY_VERSION=2.4.4 |
| ARG HPCX_VERSION=2.23 |
| ARG RDMACORE_VERSION=50.0 |
| ARG MOFED_VERSION=5.4-rdmacore50.0 |
| ARG OPENUCX_VERSION=1.19.0 |
| ARG OPENMPI_VERSION=4.1.7 |
| ARG EFA_VERSION=1.38.1 |
| ARG AWS_OFI_NCCL_VERSION=1.14.0 |
| ENV GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 RDMACORE_VERSION=50.0 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 |
| ARG TARGETARCH=amd64 |
| RUN |10 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 /bin/sh -c cd /nvidia && ( export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends libibverbs1 libibverbs-dev librdmacm1 librdmacm-dev libibumad3 libibumad-dev ibverbs-utils ibverbs-providers && rm -rf /var/lib/apt/lists/* && rm $(dpkg-query -L libibverbs-dev librdmacm-dev libibumad-dev | grep "\(\.so\|\.a\)$") ) && ( cd opt/gdrcopy/ && dpkg -i libgdrapi_*.deb ) && ( cp -r opt/hpcx /opt/ && cp etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/ && ln -sf /opt/hpcx/ompi /usr/local/mpi && ln -sf /opt/hpcx/ucx /usr/local/ucx && sed -i 's/^\(hwloc_base_binding_policy\) = core$/\1 = none/' /opt/hpcx/ompi/etc/openmpi-mca-params.conf && sed -i 's/^\(btl = self\)$/#\1/' /opt/hpcx/ompi/etc/openmpi-mca-params.conf ) && ( if [ ! -f /etc/ld.so.conf.d/nvidia-tegra.conf ]; then cd opt/amazon/efa/ && dpkg -i libfabric*.deb && rm /opt/amazon/efa/lib/libfabric.a && echo "/opt/amazon/efa/lib" > /etc/ld.so.conf.d/efa.conf; fi ) && ldconfig # buildkit |
| ENV OPAL_PREFIX=/opt/hpcx/ompi PATH=/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin |
| ENV OMPI_MCA_coll_hcoll_enable=0 |
| ARG CUDA_VERSION=12.9.0.043 |
| ARG CUDA_DRIVER_VERSION=575.51.03 |
| ENV CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 |
| RUN |12 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 /bin/sh -c BASE=min /nvidia/build-scripts/installCUDA.sh # buildkit |
| RUN |12 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 /bin/sh -c cp -vprd /nvidia/. / && patch -p0 < /etc/startup_scripts.patch && rm -f /etc/startup_scripts.patch # buildkit |
| ENV _CUDA_COMPAT_PATH=/usr/local/cuda/compat ENV=/etc/shinit_v2 BASH_ENV=/etc/bash.bashrc SHELL=/bin/bash NVIDIA_REQUIRE_CUDA=cuda>=9.0 |
| LABEL com.nvidia.volumes.needed=nvidia_driver com.nvidia.cuda.version=9.0 |
| ARG NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 |
| ENV NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSPARSELT_VERSION=0.7.1.0 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 |
| ADD docs.tgz / # buildkit |
| RUN |39 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 /bin/sh -c echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit |
| ARG _LIBPATH_SUFFIX= |
| ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin LD_LIBRARY_PATH=/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 NVIDIA_VISIBLE_DEVICES=all NVIDIA_DRIVER_CAPABILITIES=compute,utility,video |
| COPY /opt/amazon/aws-ofi-nccl /opt/amazon/aws-ofi-nccl # buildkit |
| RUN |40 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 _LIBPATH_SUFFIX= /bin/sh -c if [ ! -f /etc/ld.so.conf.d/nvidia-tegra.conf ]; then echo "/opt/amazon/aws-ofi-nccl/lib" > /etc/ld.so.conf.d/aws-ofi-nccl.conf && ldconfig; fi # buildkit |
| COPY entrypoint/ /opt/nvidia/ # buildkit |
| ENV NVIDIA_PRODUCT_NAME=CUDA |
| ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] |
| RUN |40 JETPACK_HOST_MOUNTS= GDRCOPY_VERSION=2.4.4 HPCX_VERSION=2.23 RDMACORE_VERSION=50.0 MOFED_VERSION=5.4-rdmacore50.0 OPENUCX_VERSION=1.19.0 OPENMPI_VERSION=4.1.7 EFA_VERSION=1.38.1 AWS_OFI_NCCL_VERSION=1.14.0 TARGETARCH=amd64 CUDA_VERSION=12.9.0.043 CUDA_DRIVER_VERSION=575.51.03 NCCL_VERSION=2.26.5 CUBLAS_VERSION=12.9.0.13 CUFFT_VERSION=11.4.0.6 CURAND_VERSION=10.3.10.19 CUSPARSE_VERSION=12.5.9.5 CUSOLVER_VERSION=11.7.4.40 NPP_VERSION=12.4.0.27 NVJPEG_VERSION=12.4.0.16 CUFILE_VERSION=1.14.0.30 NVJITLINK_VERSION=12.9.41 CUBLASMP_VERSION=0.4.0.789 CAL_VERSION=0.4.4.50 NVSHMEM_VERSION=3.2.5 CUDNN_VERSION=9.10.1.4 CUDNN_FRONTEND_VERSION=1.11.0 TRT_VERSION=10.10.0.31 TRTOSS_VERSION= NSIGHT_SYSTEMS_VERSION=2025.3.1.90 NSIGHT_COMPUTE_VERSION=2025.2.0.11 CUSPARSELT_VERSION=0.7.1.0 DALI_VERSION=1.49.0 DALI_BUILD= DALI_URL_SUFFIX=120 POLYGRAPHY_VERSION=0.49.20 TRANSFORMER_ENGINE_VERSION=2.3 MODEL_OPT_VERSION=0.27.1 CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0 _LIBPATH_SUFFIX= /bin/sh -c mkdir -p /workspace && cp -f -p /opt/nvidia/entrypoint.d/30-container-license.txt /workspace/license.txt # buildkit |
| RUN /bin/sh -c export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends apt-utils build-essential libncurses6 libncursesw6 unzip jq gnupg libtcmalloc-minimal4 && rm -rf /var/lib/apt/lists/* # buildkit |
| RUN /bin/sh -c /nvidia/build-scripts/installCUDA.sh # buildkit |
| RUN /bin/sh -c /nvidia/build-scripts/installLIBS.sh && /nvidia/build-scripts/installCUDNN.sh && /nvidia/build-scripts/installTRT.sh && /nvidia/build-scripts/installNSYS.sh && /nvidia/build-scripts/installNCU.sh && /nvidia/build-scripts/installCUSPARSELT.sh && if [ -z "${JETPACK_HOST_MOUNTS}" ]; then /nvidia/build-scripts/installNCCL.sh; fi; # buildkit |
| LABEL com.nvidia.nccl.version=2.26.5 com.nvidia.cublas.version=12.9.0.13 com.nvidia.cufft.version=11.4.0.6 com.nvidia.curand.version=10.3.10.19 com.nvidia.cusparse.version=12.5.9.5 com.nvidia.cusparselt.version=0.7.1.0 com.nvidia.cusolver.version=11.7.4.40 com.nvidia.npp.version=12.4.0.27 com.nvidia.nvjpeg.version=12.4.0.16 com.nvidia.cublasmp.version=0.4.0.789 com.nvidia.cal.version=0.4.4.50 com.nvidia.cudnn.version=9.10.1.4 com.nvidia.tensorrt.version=10.10.0.31 com.nvidia.tensorrtoss.version= com.nvidia.nsightsystems.version=2025.3.1.90 com.nvidia.nsightcompute.version=2025.2.0.11 |
| RUN /bin/sh -c export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends build-essential git libglib2.0-0 less libhwloc15 libnl-route-3-200 libnl-3-dev libnl-route-3-dev libnuma-dev libnuma1 libpmi2-0-dev nano numactl openssh-client vim wget && rm -rf /var/lib/apt/lists/* # buildkit |
| COPY cuda-*.patch /tmp # buildkit |
| RUN /bin/sh -c export DEVEL=1 BASE=0 && /nvidia/build-scripts/installNCU.sh && /nvidia/build-scripts/installCUDA.sh && /nvidia/build-scripts/installLIBS.sh && if [ ! -f /etc/ld.so.conf.d/nvidia-tegra.conf ]; then /nvidia/build-scripts/installNCCL.sh; fi && /nvidia/build-scripts/installCUDNN.sh && /nvidia/build-scripts/installTRT.sh && /nvidia/build-scripts/installNSYS.sh && /nvidia/build-scripts/installCUSPARSELT.sh && if [ -f "/tmp/cuda-${_CUDA_VERSION_MAJMIN}.patch" ]; then patch -p0 < /tmp/cuda-${_CUDA_VERSION_MAJMIN}.patch; fi && rm -f /tmp/cuda-*.patch # buildkit |
| ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs: |
| ENV NVIDIA_PRODUCT_NAME=PyTorch |
| ARG NVIDIA_PYTORCH_VERSION=25.05 |
| ARG PYTORCH_BUILD_VERSION=2.8.0a0+5228986 |
| ARG NVFUSER_BUILD_VERSION=9bf5aca |
| ENV PYTORCH_BUILD_VERSION=2.8.0a0+5228986 PYTORCH_VERSION=2.8.0a0+5228986 PYTORCH_BUILD_NUMBER=0 NVIDIA_PYTORCH_VERSION=25.05 |
| ENV NVFUSER_BUILD_VERSION=9bf5aca NVFUSER_VERSION=9bf5aca |
| LABEL com.nvidia.pytorch.version=2.8.0a0+5228986 |
| ARG TARGETARCH=amd64 |
| ARG PYVER=3.12 |
| ARG PYVER_MAJMIN=312 |
| ENV PIP_BREAK_SYSTEM_PACKAGES=1 |
| ARG L4T=0 |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c export PYSFX=`echo "${PYVER}" | cut -c1-1` && export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install -y --no-install-recommends python$PYVER-dev python$PYSFX python$PYSFX-dev python$PYSFX-venv python-is-python$PYSFX autoconf automake libatlas-base-dev libgoogle-glog-dev libbz2-dev libc-ares2 libre2-dev libleveldb-dev liblmdb-dev libprotobuf-dev libsnappy-dev libtool nasm protobuf-compiler pkg-config unzip sox libsndfile1 libpng-dev libhdf5-dev gfortran rapidjson-dev ninja-build libedit-dev build-essential patchelf && rm -rf /var/lib/apt/lists/* # buildkit |
| ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c DEBIAN_FRONTEND=noninteractive apt remove -y --force-yes python3-pip && curl -O https://bootstrap.pypa.io/get-pip.py && python get-pip.py && rm get-pip.py # buildkit |
| ENV PIP_CONSTRAINT=/etc/pip/constraint.txt |
| COPY constraint.txt /etc/pip/constraint.txt # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install pip setuptools && pip install cmake # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c curl "https://gitlab-master.nvidia.com/api/v4/projects/105799/packages/generic/OpenBLAS/0.3.24-$(uname -m)/OpenBLAS-0.3.24-$(uname -m).tar.gz" --output OpenBLAS.tar.gz && tar -xf OpenBLAS.tar.gz -C /usr/local/ && rm OpenBLAS.tar.gz # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c if [ $TARGETARCH = "arm64" ]; then cd /opt && curl "https://gitlab-master.nvidia.com/api/v4/projects/105799/packages/generic/nvpl_slim_24.04/sbsa/nvpl_slim_24.04.tar" --output nvpl_slim_24.04.tar && tar -xf nvpl_slim_24.04.tar && cp -r nvpl_slim_24.04/lib/* /usr/local/lib && cp -r nvpl_slim_24.04/include/* /usr/local/include && rm -rf nvpl_slim_24.04.tar nvpl_slim_24.04 ; fi # buildkit |
| ENV NVPL_LAPACK_MATH_MODE=PEDANTIC |
| WORKDIR /opt/pytorch |
| COPY . . # buildkit |
| ENV PYTHONIOENCODING=utf-8 |
| ENV LC_ALL=C.UTF-8 |
| ENV PIP_DEFAULT_TIMEOUT=100 |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install numpy scipy PyYAML astunparse typing_extensions cffi spacy mock tqdm librosa expecttest hypothesis xdoctest pytest pytest-xdist pytest-rerunfailures pytest-shard pytest-flakefinder pybind11 Cython regex protobuf six && if [[ $TARGETARCH = "amd64" ]] ; then pip install --no-cache-dir mkl mkl-include mkl-devel ; find /usr/local/lib -maxdepth 1 -type f -regex '.*\/lib\(tbb\|mkl\).*\.so\($\|\.[0-9]*\.[0-9]*\)' -exec rm -v {} + ; fi # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c git config --global url."https://github".insteadOf git://github && pip install jupyterlab notebook tensorboard jupyterlab_code_formatter python-hostlist # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c PATCHED_FILE=$(python -c "from tensorboard.plugins.core import core_plugin as _; print(_.__file__)") && sed -i 's/^\( *"--bind_all",\)$/\1 default=True,/' "$PATCHED_FILE" && test $(grep '^ *"--bind_all", default=True,$' "$PATCHED_FILE" | wc -l) -eq 1 # buildkit |
| WORKDIR /opt/pytorch |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install --no-cache-dir /builder/*.whl jupytext black isort && mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/completer-extension/ && jupyter lab clean # buildkit |
| COPY jupyter_config/jupyter_notebook_config.py /usr/local/etc/jupyter/ # buildkit |
| COPY jupyter_config/manager.jupyterlab-settings /root/.jupyter/lab/user-settings/@jupyterlab/completer-extension/ # buildkit |
| COPY jupyter_config/settings.jupyterlab-settings /root/.jupyter/lab/user-settings/@jupyterlab/completer-extension/ # buildkit |
| ENV JUPYTER_PORT=8888 |
| ENV TENSORBOARD_PORT=6006 |
| EXPOSE map[8888/tcp:{}] |
| EXPOSE map[6006/tcp:{}] |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c OPENCV_VERSION=4.10.0 && cd / && wget -q -O - https://github.com/opencv/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && cd /opencv-${OPENCV_VERSION} && cmake -GNinja -Bbuild -H. -DWITH_CUDA=OFF -DWITH_1394=OFF -DPYTHON3_PACKAGES_PATH="/usr/local/lib/python${PYVER}/dist-packages" -DBUILD_opencv_cudalegacy=OFF -DBUILD_opencv_stitching=OFF -DWITH_IPP=OFF -DWITH_PROTOBUF=OFF && cmake --build build --target install && cd modules/python/package && pip install -v . && rm -rf /opencv-${OPENCV_VERSION} # buildkit |
| ENV UCC_CL_BASIC_TLS=^sharp |
| ENV TORCH_CUDA_ARCH_LIST=7.5 8.0 8.6 9.0 10.0 12.0+PTX |
| ENV PYTORCH_HOME=/opt/pytorch/pytorch |
| ENV CUDA_HOME=/usr/local/cuda |
| ENV TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1 |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c echo "TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}" && pip install /opt/transfer/torch*.whl && patchelf --set-rpath '/usr/local/lib' /usr/local/lib/python${PYVER}/dist-packages/torch/lib/libtorch_global_deps.so # buildkit |
| COPY /usr/local/share/cmake/TorchVision/ /usr/local/share/cmake/TorchVision/ # buildkit |
| COPY /usr/local/include/torchvision/ /usr/local/include/torchvision/ # buildkit |
| COPY /usr/local/lib64/libtorchvision.so /usr/local/lib/libtorchvision.so.1.0 # buildkit |
| COPY /usr/local/lib64/libjpeg* /usr/local/lib/ # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c patchelf --set-rpath '$ORIGIN:/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torchvision/' /usr/local/lib/libtorchvision.so.1.0 && patchelf --set-soname libtorchvision.so.1 --output /usr/local/lib/libtorchvision.so.1.0 /usr/local/lib/libtorchvision.so.1.0 && ldconfig && pushd /usr/local/lib && ln -s libtorchvision.so.1 /usr/local/lib/libtorchvision.so && popd && patchelf --set-soname libjpeg.so.62 --output /usr/local/lib/libjpeg.so.62 $(readlink -f $(ldd /usr/local/lib/python3.12/dist-packages/torchvision/image.so | grep libjpeg | awk '{print $3}')) # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c cd pytorch && pip install -v -r /opt/pytorch/pytorch/requirements.txt # buildkit |
| RUN |7 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 /bin/sh -c pip install --no-cache-dir /tmp/dist/*.whl # buildkit |
| ARG DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ -z "${DALI_VERSION}" ] ; then echo "Not Installing DALI for L4T Build." ; exit 0; fi && export CUDA_VERSION_MAJOR=$(ls /usr/local/cuda/lib64/libcudart.so.*.*.* | cut -d . -f 3) && export DALI_PKG_SUFFIX="cuda${CUDA_VERSION_MAJOR}0" && if [ -z "${DALI_URL_SUFFIX}" ] ; then export DALI_EXTRA_INDEX_URL="${DALI_EXTRA_INDEX_URL}-qa"; fi && pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --extra-index-url "${DALI_EXTRA_INDEX_URL}" --trusted-host sqrl nvidia-dali-${DALI_PKG_SUFFIX}==${DALI_VERSION} # buildkit |
| ENV COCOAPI_VERSION=2.0+nv0.8.1 |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c export COCOAPI_TAG=$(echo ${COCOAPI_VERSION} | sed 's/^.*+n//') && pip install git+https://github.com/nvidia/cocoapi.git@${COCOAPI_TAG}#subdirectory=PythonAPI # buildkit |
| COPY singularity/ /.singularity.d/ # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c ( cd fuser && pip install -r requirements.txt && python setup.py -version-tag=a0+${NVFUSER_VERSION} install && python setup.py clean && cp $(find /usr/local/lib/python${PYVER}/dist-packages/ -name libnvfuser_codegen.so) /usr/local/lib/python${PYVER}/dist-packages/torch/lib/ ) && ( cd lightning-thunder && python setup.py install && rm -rf build *.egg-info) && ( cd lightning-thunder && mkdir tmp && cd tmp && git clone -b v${CUDNN_FRONTEND_VERSION} --recursive --single-branch https://github.com/NVIDIA/cudnn-frontend.git cudnn_frontend && cd cudnn_frontend && pip install --no-build-isolation . && cd ../../ && rm -rf tmp ) && ( cd pytorch/third_party/onnx && pip uninstall typing -y && CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=ON" pip install --no-build-isolation . ) # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip install tabulate # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ "${L4T}" = "1" ]; then echo "Not installing rapids for L4T build."; exit 0; fi && find /rapids -name "*-Linux.tar.gz" -exec tar -C /usr --exclude="*.a" --exclude="bin/xgboost" --strip-components=1 -xvf {} \; && find /rapids -name "*.whl" ! -name "tornado-*" ! -name "cugraph_dgl*" ! -name "cugraph_pyg*" ! -name "torch_geometric*" ! -name "Pillow-*" ! -name "certifi-*" ! -name "protobuf-*" ! -name "six-*" -exec pip install --no-cache-dir {} + # buildkit |
| WORKDIR /workspace |
| COPY NVREADME.md README.md # buildkit |
| COPY docker-examples docker-examples # buildkit |
| COPY tutorials tutorials # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c chmod -R a+w . # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c set -x && WHEELS=1 /nvidia/build-scripts/installTRT.sh # buildkit |
| ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin:/opt/tensorrt/bin |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip --version && python -c 'import sys; print(sys.platform)' && pip install --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/sw-tensorrt-pypi/simple --no-cache-dir "polygraphy==${POLYGRAPHY_VERSION}" && pip install --index-url https://gitlab-master.nvidia.com/api/v4/projects/omniml%2Fmodelopt/packages/pypi/simple --extra-index-url https://pypi.nvidia.com "nvidia-modelopt[torch]==${MODEL_OPT_VERSION}" && pip install nvidia-resiliency-ext==0.3.0 # buildkit |
| COPY torch_tensorrt/ /opt/pytorch/torch_tensorrt/ # buildkit |
| ARG PYVER=3.12 |
| ENV LD_LIBRARY_PATH=/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 |
| ENV PATH=/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin:/opt/tensorrt/bin |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip install --no-cache-dir /opt/pytorch/apex/dist/*.whl # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c pip install --no-cache-dir /opt/pytorch/torch_tensorrt/dist/*.whl # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ "${L4T}" = "1" ]; then echo "Not installing Flash Attention wheel in iGPU as it is a requirement for Transformer Engine"; else pip install --no-cache-dir /opt/pytorch/flash_attn*.whl; fi # buildkit |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c if [ "${L4T}" = "1" ]; then echo "Not installing Transformer Engine in iGPU container until Version variable is set"; else /nvidia/build-scripts/installCAL.sh && /nvidia/build-scripts/installCUBLASMP.sh && /nvidia/build-scripts/installNVSHMEM.sh && git clone -b release_v${TRANSFORMER_ENGINE_VERSION} --single-branch --recursive https://github.com/NVIDIA/TransformerEngine.git && env NVTE_CUDA_ARCHS="70;80;89;90;100;120" NVTE_BUILD_THREADS_PER_JOB=8 pip install --no-cache-dir --no-build-isolation ./TransformerEngine && rm -rf TransformerEngine; fi # buildkit |
| ENV CUDA_MODULE_LOADING=LAZY |
| ENV TORCH_NCCL_USE_COMM_NONBLOCKING=0 |
| RUN |8 NVIDIA_PYTORCH_VERSION=25.05 PYTORCH_BUILD_VERSION=2.8.0a0+5228986 NVFUSER_BUILD_VERSION=9bf5aca TARGETARCH=amd64 PYVER=3.12 PYVER_MAJMIN=312 L4T=0 DALI_EXTRA_INDEX_URL=http://sqrl/nvdl/datasets/dali/pip-dali /bin/sh -c ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib && echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf && ldconfig && rm -f ${_CUDA_COMPAT_PATH}/lib # buildkit |
| COPY entrypoint.d/ /opt/nvidia/entrypoint.d/ # buildkit |
| ARG NVIDIA_BUILD_ID=170559088 |
| ENV NVIDIA_BUILD_ID=170559088 |
| LABEL com.nvidia.build.id=170559088 |
| ARG NVIDIA_BUILD_REF=0f499560921269b0135bf88c85232c1f26bcecfb |
| LABEL com.nvidia.build.ref=0f499560921269b0135bf88c85232c1f26bcecfb |
| ENV NVIDIA_PRODUCT_NAME=NeMo Framework |
| ENV PIP_NO_CACHE_DIR=1 |
| ARG NVIDIA_BIGNLP_VERSION |
| ENV NVIDIA_BIGNLP_VERSION= |
| LABEL com.nvidia.bignlp.version= |
| ENV DEBIAN_FRONTEND=noninteractive |
| RUN |1 NVIDIA_BIGNLP_VERSION= /bin/sh -c apt-get update && apt-get install -y --no-install-recommends git libsndfile1 sox openssh-server && rm -rf /var/lib/apt/lists/* && apt-get clean # buildkit |
| RUN |1 NVIDIA_BIGNLP_VERSION= /bin/sh -c apt-get remove --purge -y libslurm* && apt-get -y autoremove && pip uninstall -y onnx && pip install wheel && rm -rf /opt/pytorch/pytorch/third_party/onnx # buildkit |
| RUN |1 NVIDIA_BIGNLP_VERSION= /bin/sh -c cp /dev/null /etc/pip/constraint.txt # buildkit |
| WORKDIR /opt |
| ARG UV_VERSION=0.7.2 |
| RUN |2 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 /bin/sh -c curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | XDG_BIN_HOME=/usr/local/bin sh # buildkit |
| ENV UV_PROJECT_ENVIRONMENT=/opt/venv |
| ENV UV_CACHE_DIR=/opt/uv_cache |
| ENV PATH=/opt/venv/bin:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/mpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/amazon/efa/bin:/opt/tensorrt/bin |
| ENV UV_LINK_MODE=copy |
| RUN |2 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 /bin/sh -c uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages # buildkit |
| ARG REINSTALL_NSYS=True |
| ARG NSYS_VERSION=2025.1.1.103 |
| ARG REINSTALL_CUDNN=True |
| ARG CUDNN_VERSION=9.11.0.98 |
| ENV NSIGHT_SYSTEMS_VERSION=2025.1.1.103 |
| ARG REINSTALL_NCCL=True |
| ARG NCCL_VERSION=2.27.3-1+cuda12.9 |
| COPY docker/common/install_nccl.sh /opt/install_nccl.sh # buildkit |
| RUN |8 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 /bin/sh -c if [ $REINSTALL_NSYS = "True" ]; then /nvidia/build-scripts/installNSYS.sh; fi && if [ $REINSTALL_CUDNN = "True" ]; then /nvidia/build-scripts/installCUDNN.sh $CUDNN_VERSION; fi && if [ $REINSTALL_NCCL = "True" ]; then bash /opt/install_nccl.sh --NCCL_VER=$NCCL_VERSION; fi && rm /opt/install_nccl.sh # buildkit |
| ARG INSTALL_DEEPEP=True |
| ARG TARGET_ARCH=x86 |
| ARG DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 |
| ARG DEEPEP_NVSHMEM_COMMIT= |
| ARG GDR_COPY_VERSION=v2.4.1 |
| ENV CPATH=/usr/local/mpi/include: |
| ENV LD_LIBRARY_PATH=/usr/local/mpi/lib:/usr/local/lib/python3.12/dist-packages/torch/lib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 |
| ENV GDRCOPY_HOME=/usr/src/gdrdrv-2.4.1/ |
| COPY nemo2/external/nvshmem_src_3.2.5-1.txz /opt/nvshmem_src_3.2.5-1.txz # buildkit |
| RUN |13 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 /bin/sh -c if [ $INSTALL_DEEPEP = "True" ]; then apt-get update && apt-get install -y --no-install-recommends nvidia-dkms-535 devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev && if [ $TARGET_ARCH = "x86" ]; then ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so; else ln -s /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so; fi && rm -rf /var/lib/apt/lists/* && apt-get clean && git clone https://github.com/NVIDIA/gdrcopy.git && cd /opt/gdrcopy && git checkout $GDR_COPY_VERSION && CUDA=/usr/local/cuda packages/build-deb-packages.sh && dpkg -i gdrdrv-dkms_*.deb && dpkg -i libgdrapi_*.deb && dpkg -i gdrcopy-tests_*.deb && dpkg -i gdrcopy_*.deb && cd /opt && git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git fetch origin $DEEPEP_COMMIT && git checkout FETCH_HEAD && cd /opt && if [ ! -z $DEEPEP_NVSHMEM_COMMIT ]; then CI_JOB_TOKEN=$(cat /run/secrets/CI_JOB_TOKEN) && git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab-master.nvidia.com/arch_moe_exploration/deepep-nvshmem.git nvshmem_src && cd nvshmem_src && git fetch origin $DEEPEP_NVSHMEM_COMMIT && git checkout FETCH_HEAD && rm -rf /opt/nvshmem_src/.git; else mkdir -p /opt/nvshmem_src && tar xf /opt/nvshmem_src_3.2.5-1.txz -C /opt && cd /opt/nvshmem_src && git apply /opt/DeepEP/third-party/nvshmem.patch; fi && CI_JOB_TOKEN=$(cat /run/secrets/CI_JOB_TOKEN) CUDA_HOME=/usr/local/cuda NVSHMEM_SHMEM_SUPPORT=0 NVSHMEM_UCX_SUPPORT=0 NVSHMEM_USE_NCCL=0 NVSHMEM_IBGDA_SUPPORT=1 NVSHMEM_PMIX_SUPPORT=0 NVSHMEM_TIMEOUT_DEVICE_POLLING=0 NVSHMEM_USE_GDRCOPY=1 cmake -S . -B build/ -DNVSHMEM_BUILD_EXAMPLES=OFF -DCMAKE_INSTALL_PREFIX=/opt/nvshmem_src/install -DCMAKE_CUDA_ARCHITECTURES=90 && cd build && make install -j && rm -rf /opt/nvshmem_src/build && cd /opt/DeepEP && NVSHMEM_DIR=/opt/nvshmem_src/install /usr/bin/python setup.py develop && NVSHMEM_DIR=/opt/nvshmem_src/install /usr/bin/python setup.py install && apt-get remove --purge -y devscripts debhelper man-db check groff-base dkms kmod bsdextrautils fakeroot && apt-get -y autoremove && apt-get clean; fi && rm -rf /opt/nvshmem_src_3.2.5-1.txz # buildkit |
| ARG REINSTALL_APEX=False |
| ARG APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec |
| RUN |15 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec /bin/sh -c pip install packaging # buildkit |
| RUN |15 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec /bin/sh -c if [ $REINSTALL_APEX = "True" ]; then pip uninstall -y apex && git clone https://github.com/NVIDIA/apex && cd apex && if [ ! -z $APEX_COMMIT ]; then git fetch origin $APEX_COMMIT && git checkout FETCH_HEAD; fi && HEAD_APEX_COMMIT=$(git rev-parse HEAD) && echo "Container built with Apex commit hash: $HEAD_APEX_COMMIT" && pip install -e . -v --no-build-isolation --disable-pip-version-check --no-cache-dir --config-settings "--build-option=--cpp_ext --cuda_ext --fast_layer_norm --distributed_adam --deprecated_fused_adam --group_norm --nccl_allocator"; fi # buildkit |
| RUN |15 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec /bin/sh -c if [ -d /tmp/TransformerEngine ]; then pip install --no-cache-dir --no-build-isolation /tmp/TransformerEngine/dist/transformer_engine*; fi # buildkit |
| ARG MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa |
| ARG MCORE_VERSION=0.13.1 |
| ENV NEMO_FW_MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa |
| ENV NEMO_FW_MCORE_VERSION=0.13.1 |
| RUN |17 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.11.0.98 REINSTALL_NCCL=True NCCL_VERSION=2.27.3-1+cuda12.9 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 /bin/sh -c CI_JOB_TOKEN=$(cat /run/secrets/CI_JOB_TOKEN) && git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab-master.nvidia.com/ADLR/megatron-lm.git && cd megatron-lm && git pull && if [ ! -z $MCORE_COMMIT ]; then git fetch origin $MCORE_COMMIT && git checkout FETCH_HEAD; fi && HEAD_MCORE_COMMIT=$(git rev-parse HEAD) && echo "Container built with megatron-lm commit hash: $HEAD_MCORE_COMMIT" && pip install -e . && rm -rf .git && cd megatron/core/datasets && make && rm -rf /root/.cache/bazel # buildkit |
| ENV PYTHONPATH=:/opt/megatron-lm |
| ARG TARGET_ARCH=x86 |
| WORKDIR /opt |
| COPY /opt/tinycudann*.whl ./ # buildkit |
| RUN |17 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 /bin/sh -c pip install --no-cache-dir /tmp/build_mamba_dep/causal-conv1d/causal_conv1d*.whl /tmp/build_grouped_gemm/grouped_gemm*.whl /tmp/build_nvdiffrast/nvdiffrast*.whl /tmp/build_stable_dreamfusion/raymarching*.whl /tmp/build_stable_dreamfusion/shencoder*.whl /tmp/build_stable_dreamfusion/freqencoder*.whl /tmp/build_stable_dreamfusion/gridencoder*.whl && if [ -f /tmp/build_mamba_dep/mamba/mamba*.whl ]; then pip install --no-cache-dir /tmp/build_mamba_dep/mamba/mamba*.whl; fi && if [ -f /tmp/build_bitsandbytes/bitsandbytes/dist/bitsandbytes*.whl ]; then pip install --no-cache-dir /tmp/build_bitsandbytes/bitsandbytes/dist/bitsandbytes*.whl; fi # buildkit |
| ARG TARGET_ARCH=x86 |
| RUN |17 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 /bin/sh -c echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config && sed -i 's/# StrictHostKeyChecking ask/ StrictHostKeyChecking no/' /etc/ssh/ssh_config && mkdir -p /var/run/sshd # buildkit |
| ARG NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 |
| ARG NEMO_VERSION=2.4.1 |
| ENV NEMO_FW_NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 |
| ENV NEMO_FW_NEMO_VERSION=2.4.1 |
| RUN |19 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 /bin/sh -c git clone https://github.com/NVIDIA/NeMo.git && cd NeMo && git pull && if [ ! -z $NEMO_COMMIT ]; then git fetch origin $NEMO_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo commit hash: $HEAD_NEMO_COMMIT" && pip uninstall -y nemo_toolkit sacrebleu && sed -i "/mamba-ssm/d" requirements/requirements_nlp.txt && if [ $TARGET_ARCH = "arm" ]; then sed -i "/torch/d" requirements/requirements.txt && sed -i "/decord/d" requirements/requirements_multimodal.txt && sed -i "/megatron_core/d" requirements/requirements_nlp.txt; fi && pip install -e ".[all]" && cd nemo/collections/nlp/data/language_modeling/megatron && make # buildkit |
| ARG NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 |
| ARG NEMO_EXPORT_DEPLOY_VERSION |
| ENV NEMO_FW_NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 |
| ENV NEMO_FW_NEMO_EXPORT_DEPLOY_VERSION= |
| ARG SKIP_TRTLLM=True |
| COPY export_deploy/uv_args.txt /opt/uv_args.txt # buildkit |
| RUN |22 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Export-Deploy.git && cd Export-Deploy && git pull && if [ ! -z $NEMO_EXPORT_DEPLOY_COMMIT ]; then git fetch origin $NEMO_EXPORT_DEPLOY_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_EXPORT_DEPLOY_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo-Export-Deploy commit hash: $HEAD_NEMO_EXPORT_DEPLOY_COMMIT" && UV_ARGS=`cat /opt/uv_args.txt` && if [ "$SKIP_TRTLLM" = "True" ]; then uv sync --link-mode symlink --locked --inexact $UV_ARGS; else uv sync --link-mode symlink --locked --inexact --extra trtllm $UV_ARGS; fi && rm ~/.netrc # buildkit |
| ENV PYTHONPATH=/opt/Export-Deploy:/opt/venv/lib/python3.12/site-packages/::/opt/megatron-lm |
| ARG NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f |
| ARG NEMO_EVAL_VERSION |
| ENV NEMO_FW_NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f |
| ENV NEMO_FW_NEMO_EVAL_VERSION= |
| RUN |24 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Eval.git && cd Eval && git pull && if [ ! -z $NEMO_EVAL_COMMIT ]; then git fetch origin $NEMO_EVAL_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_EVAL_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo-Eval commit hash: $HEAD_NEMO_EVAL_COMMIT" && pip install -e . && rm ~/.netrc # buildkit |
| ARG NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 |
| ARG NVFSDP_VERSION=0.2.0rc0 |
| ENV NEMO_FW_NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 |
| ENV NEMO_FW_NVFSDP_VERSION=0.2.0rc0 |
| RUN |26 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/nvFSDP.git && cd nvFSDP && git pull && if [ ! -z $NVFSDP_COMMIT ]; then git fetch origin $NVFSDP_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NVFSDP_COMMIT=$(git rev-parse HEAD) && echo "Container built with nvFSDP commit hash: $HEAD_NVFSDP_COMMIT" && pip install -e . && rm ~/.netrc # buildkit |
| ARG NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 |
| ARG NEMO_AUTOMODEL_VERSION=0.2.0rc0 |
| ENV NEMO_FW_NEMO_AUTOMODEL_COMMIT= |
| ENV NEMO_FW_NEMO_AUTOMODEL_VERSION=0.2.0rc0 |
| RUN |28 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Automodel.git && cd Automodel && git pull && if [ ! -z $NEMO_AUTOMODEL_COMMIT ]; then git fetch origin $NEMO_AUTOMODEL_COMMIT && git checkout FETCH_HEAD; fi && HEAD_NEMO_AUTOMODEL_COMMIT=$(git rev-parse HEAD) && echo "Container built with NeMo-Automodel commit hash: $HEAD_NEMO_AUTOMODEL_COMMIT" && pip install --no-deps "liger-kernel==0.5.8" && pip install --no-deps "cut-cross-entropy @ git+https://github.com/apple/ml-cross-entropy.git@87a86aba72cfd2f0d8abecaf81c13c4528ea07d8" && sed -i 's/torch==[0-9]\+\.[0-9]\+\.[0-9]\+/torch/g' pyproject.toml && sed -i '/cut-cross-entropy/d' pyproject.toml && sed -i '/liger-kernel/d' pyproject.toml && pip install -e .[vlm]; rm ~/.netrc # buildkit |
| ARG MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f |
| ARG MEGATRON_BRIDGE_VERSION=0.2.0rc0 |
| ENV NEMO_FW_MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f |
| ENV NEMO_FW_MEGATRON_BRIDGE_VERSION=0.2.0rc0 |
| RUN |30 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 /bin/sh -c export GITHUB_API_TOKEN=$(cat /run/secrets/GITHUB_API_TOKEN) && echo -e "machine github.com\n login token\n password $GITHUB_API_TOKEN" > ~/.netrc && chmod 600 ~/.netrc && git clone https://github.com/NVIDIA-NeMo/Megatron-Bridge.git && cd Megatron-Bridge && git pull && if [ ! -z $MEGATRON_BRIDGE_COMMIT ]; then git fetch origin $MEGATRON_BRIDGE_COMMIT && git checkout FETCH_HEAD; fi && HEAD_MEGATRON_BRIDGE_COMMIT=$(git rev-parse HEAD) && echo "Container built with Megatron-Bridge commit hash: $HEAD_MEGATRON_BRIDGE_COMMIT" && sed -i 's/torch==[0-9]\+\.[0-9]\+\.[0-9]\+/torch/g' pyproject.toml && pip install -e "." && rm ~/.netrc # buildkit |
| ARG INSTALL_RESIL=False |
| ARG RESIL_COMMIT=not_installed |
| ARG RESIL_VERSION=not_installed |
| ENV NEMO_FW_RESIL_COMMIT=not_installed |
| ENV NEMO_FW_RESIL_VERSION=not_installed |
| RUN |33 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed /bin/sh -c if [ $INSTALL_RESIL = "True" ] && [ $TARGET_ARCH = "x86" ]; then pip install --no-cache-dir "git+https://github.com/NVIDIA/nvidia-resiliency-ext.git@${RESIL_COMMIT}"; fi # buildkit |
| ARG INSTALL_MODELOPT=True |
| ARG MODELOPT_VERSION=0.31.0 |
| RUN |35 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 /bin/sh -c if [ $INSTALL_MODELOPT = "True" ]; then pip install --no-cache-dir nvidia-modelopt[torch]==$MODELOPT_VERSION; fi # buildkit |
| ARG NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 |
| ENV NEMO_FW_NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 |
| RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c git clone https://github.com/NVIDIA-NeMo/Run && cd Run && git pull && if [ ! -z $NEMO_RUN_COMMIT ]; then git fetch origin $NEMO_RUN_COMMIT && git checkout FETCH_HEAD; fi && pip install -e . # buildkit |
| RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c pip uninstall -y webdataset && pip install --no-cache-dir "webdataset==0.2.86" "pandas==2.2.3" "ctc_segmentation==1.7.1" # buildkit |
| RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c pip uninstall -y pytest-coverage levenshtein pillow future && pip install --no-cache-dir "jupyter-core==5.8.1" "pillow==11.3.0" && uv cache clean protobuf && uv pip install "protobuf==4.25.8" # buildkit |
| RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c apt-get remove --purge -y gdb fakeroot libfakeroot && apt-get -y autoremove && apt-get clean # buildkit |
| RUN |36 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 /bin/sh -c <<"EOF" python from transformers import AutoTokenizer, Qwen2Tokenizer, CLIPImageProcessor _=AutoTokenizer.from_pretrained('gpt2') _=AutoTokenizer.from_pretrained('bert-base-cased') _=AutoTokenizer.from_pretrained('bert-large-cased') _=AutoTokenizer.from_pretrained('bert-large-uncased') _=AutoTokenizer.from_pretrained('bigcode/starcoder2-tokenizer') _=AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") _=AutoTokenizer.from_pretrained("nvidia/Nemotron-H-8B-Base-8K") _=AutoTokenizer.from_pretrained("nvidia/Nemotron-H-47B-Base-8K") _=AutoTokenizer.from_pretrained("nvidia/Nemotron-H-56B-Base-8K") _=AutoTokenizer.from_pretrained('THUDM/chatglm2-6b',trust_remote_code=True) _=AutoTokenizer.from_pretrained('THUDM/chatglm3-6b',trust_remote_code=True) _=Qwen2Tokenizer.from_pretrained('qwen/Qwen1.5-7B',trust_remote_code=True) _=Qwen2Tokenizer.from_pretrained('qwen/Qwen1.5-14B',trust_remote_code=True) _=AutoTokenizer.from_pretrained('openai/clip-vit-large-patch14') _=CLIPImageProcessor.from_pretrained('openai/clip-vit-large-patch14') _=CLIPImageProcessor.from_pretrained('openai/clip-vit-large-patch14-336') EOF # buildkit |
| ARG URM_USER_ACCOUNT=donghyukc |
| RUN |37 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc /bin/sh -c URM_API_TOKEN=$(cat /run/secrets/URM_API_TOKEN) && wget --http-user $URM_USER_ACCOUNT --http-password ${URM_API_TOKEN} "https://urm.nvidia.com/artifactory/nemo-fw-generic-local/llama3_70b_tokenizer.tar.gz" && wget --http-user $URM_USER_ACCOUNT --http-password ${URM_API_TOKEN} "https://urm.nvidia.com/artifactory/nemo-fw-generic-local/llama3_8b_tokenizer.tar.gz" && mkdir -p /tmp_assets && tar -xzvf llama3_8b_tokenizer.tar.gz -C /tmp_assets && tar -xzvf llama3_70b_tokenizer.tar.gz -C /tmp_assets && cp -rf /tmp_assets/hub ~/.cache/huggingface && rm -rf /tmp_assets && rm llama3_8b_tokenizer.tar.gz llama3_70b_tokenizer.tar.gz # buildkit |
| RUN |37 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc /bin/sh -c <<"EOF" python from transformers import AutoTokenizer _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B', local_files_only=True) _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-70B', local_files_only=True) _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B-instruct', local_files_only=True) _=AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-70B-instruct', local_files_only=True) EOF # buildkit |
| RUN |37 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc /bin/sh -c sed -i '/^LayerId/s/^/#/' /usr/local/lib/python3.12/dist-packages/cv2/typing/__init__.py # buildkit |
| ARG PRE_COMPILE_QUANT_KERNELS=True |
| RUN |38 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc PRE_COMPILE_QUANT_KERNELS=True /bin/sh -c if [ $PRE_COMPILE_QUANT_KERNELS = "True" ]; then python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"; fi # buildkit |
| COPY nemo2/patches/triton-lang_triton_6570_lazy_init.patch /opt/ # buildkit |
| RUN |38 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc PRE_COMPILE_QUANT_KERNELS=True /bin/sh -c cd /usr/local/lib/python3.12/dist-packages && patch -p1 $(python -c "import triton; print(triton.__path__[0])")/runtime/autotuner.py /opt/triton-lang_triton_6570_lazy_init.patch && rm /opt/triton-lang_triton_6570_lazy_init.patch # buildkit |
| WORKDIR /workspace |
| RUN |38 NVIDIA_BIGNLP_VERSION= UV_VERSION=0.7.2 REINSTALL_NSYS=True NSYS_VERSION=2025.1.1.103 REINSTALL_CUDNN=True CUDNN_VERSION=9.10.1.4 REINSTALL_NCCL=True NCCL_VERSION=2.26.5 INSTALL_DEEPEP=True TARGET_ARCH=x86 DEEPEP_COMMIT=a84a24808fb0ea732f49b874cc456a69dde69076 DEEPEP_NVSHMEM_COMMIT= GDR_COPY_VERSION=v2.4.1 REINSTALL_APEX=False APEX_COMMIT=26bba57d62553d268319b4a20cc3d8aa990249ec MCORE_COMMIT=ea651a3b78b9b0c1dfd83975ff28853e38e5abaa MCORE_VERSION=0.13.1 NEMO_COMMIT=6489229cbb05c38ad20c2e00f7a2adde2e5bd8c4 NEMO_VERSION=2.4.1 NEMO_EXPORT_DEPLOY_COMMIT=75e25d47c2430c600dde63d317122034d10e2995 NEMO_EXPORT_DEPLOY_VERSION= SKIP_TRTLLM=True NEMO_EVAL_COMMIT=97331474f317d84768ea551fa32b16becbeaa56f NEMO_EVAL_VERSION= NVFSDP_COMMIT=c4180101296d8bbfcd6a5f65ca28416fce89f131 NVFSDP_VERSION=0.2.0rc0 NEMO_AUTOMODEL_COMMIT=36076d22c13695c1e0268d27d45db10fc8dc3783 NEMO_AUTOMODEL_VERSION=0.2.0rc0 MEGATRON_BRIDGE_COMMIT=039b26f78695a05ac4304de72a206781788de48f MEGATRON_BRIDGE_VERSION=0.2.0rc0 INSTALL_RESIL=False RESIL_COMMIT=not_installed RESIL_VERSION=not_installed INSTALL_MODELOPT=True MODELOPT_VERSION=0.31.0 NEMO_RUN_COMMIT=ddb1c4b4e406a11be291729e22f68c52f394b0f0 URM_USER_ACCOUNT=donghyukc PRE_COMPILE_QUANT_KERNELS=True /bin/sh -c chmod -R a+w /workspace # buildkit |
| ARG NVIDIA_BUILD_ID=203980512 |
| ENV NVIDIA_BUILD_ID=203980512 |
| LABEL com.nvidia.build.id=203980512 |
| ARG NVIDIA_BUILD_REF=c66568ebda7cde9f5231a46a0d93240373c63a6c |
| LABEL com.nvidia.build.ref=c66568ebda7cde9f5231a46a0d93240373c63a6c |
| LABEL maintainer=eric@ericxliu.me |
| LABEL description=Cached base image for NVIDIA NeMo |
| LABEL nemo.version= |
| WORKDIR /workspace |
| CMD ["/bin/bash"] |
Labels
| Key | Value |
|---|---|
| com.nvidia.bignlp.version | |
| com.nvidia.build.id | 203980512 |
| com.nvidia.build.ref | c66568ebda7cde9f5231a46a0d93240373c63a6c |
| com.nvidia.cal.version | 0.4.4.50 |
| com.nvidia.cublas.version | 12.9.0.13 |
| com.nvidia.cublasmp.version | 0.4.0.789 |
| com.nvidia.cuda.version | 9.0 |
| com.nvidia.cudnn.version | 9.10.1.4 |
| com.nvidia.cufft.version | 11.4.0.6 |
| com.nvidia.curand.version | 10.3.10.19 |
| com.nvidia.cusolver.version | 11.7.4.40 |
| com.nvidia.cusparse.version | 12.5.9.5 |
| com.nvidia.cusparselt.version | 0.7.1.0 |
| com.nvidia.nccl.version | 2.26.5 |
| com.nvidia.npp.version | 12.4.0.27 |
| com.nvidia.nsightcompute.version | 2025.2.0.11 |
| com.nvidia.nsightsystems.version | 2025.3.1.90 |
| com.nvidia.nvjpeg.version | 12.4.0.16 |
| com.nvidia.pytorch.version | 2.8.0a0+5228986 |
| com.nvidia.tensorrt.version | 10.10.0.31 |
| com.nvidia.tensorrtoss.version | |
| com.nvidia.volumes.needed | nvidia_driver |
| description | Cached base image for NVIDIA NeMo |
| maintainer | eric@ericxliu.me |
| nemo.version | |
| org.opencontainers.image.ref.name | ubuntu |
| org.opencontainers.image.version | 24.04 |