sciml-full-base.def 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. Bootstrap: docker
  2. From: ubuntu:18.04
  3. #Stage: spython-base
  4. %environment
  5. # -----------------------------------------------------------------------------------
  6. export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
  7. export LC_ALL=C
  8. export HOROVOD_GPU_ALLREDUCE=NCCL
  9. export HOROVOD_GPU_ALLGATHER=MPI
  10. export HOROVOD_GPU_BROADCAST=MPI
  11. export PYTHON_VERSION=3.8
  12. export TENSORFLOW_VERSION=2.3.0
  13. export PYTORCH_VERSION=1.10.0+cu113
  14. %post
  15. # -----------------------------------------------------------------------------------
  16. export SINGULARITY_BINDPATH=$SINGULARITY_BINDPATH,$(echo /usr/bin/ | sed -e 's/ /,/g')
  17. MLNX_ARCH=ubuntu18.04-x86_64
  18. MLNX_VERSION=5.2-1.0.4.0
  19. # Python 3.7 is supported by Ubuntu Bionic out of the box
  20. python=3.7
  21. PYTHON_VERSION=${python}
  22. # Set default shell to /bin/bash
  23. #SHELL ["/bin/bash", "-cu"]
  24. TZ="Europe/London"
  25. RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
  26. apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
  27. build-essential \
  28. cmake \
  29. g++-7 \
  30. git \
  31. curl \
  32. vim \
  33. wget \
  34. ca-certificates \
  35. python${PYTHON_VERSION} \
  36. python${PYTHON_VERSION}-dev \
  37. python${PYTHON_VERSION}-distutils \
  38. libpython-stdlib \
  39. python \
  40. udev \
  41. automake \
  42. gfortran \
  43. autotools-dev \
  44. chrpath \
  45. pkg-config \
  46. libnl-3-dev \
  47. tcl \
  48. pciutils \
  49. tk \
  50. libnl-route-3-dev \
  51. libltdl-dev \
  52. bison \
  53. autoconf \
  54. flex \
  55. graphviz \
  56. libssl1.0.0 \
  57. kmod \
  58. debhelper \
  59. libgfortran4 \
  60. ethtool \
  61. swig \
  62. lsof \
  63. libnl-route-3-200 \
  64. m4 \
  65. libnl-3-200 \
  66. dpatch \
  67. libnuma-dev \
  68. iputils-ping \
  69. iproute2 \
  70. apt-utils
  71. ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
  72. curl -O https://bootstrap.pypa.io/get-pip.py && \
  73. python get-pip.py && \
  74. rm get-pip.py
  75. wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_VERSION}/MLNX_OFED_LINUX-${MLNX_VERSION}-${MLNX_ARCH}.tgz && \
  76. tar -xzvf MLNX_OFED_LINUX-${MLNX_VERSION}-${MLNX_ARCH}.tgz --no-same-owner && \
  77. cd MLNX_OFED_LINUX-${MLNX_VERSION}-${MLNX_ARCH} && \
  78. ./mlnxofedinstall --user-space-only --without-fw-update --skip-repo --force && \
  79. cd .. && \
  80. rm -rf MLNX_OFED-LINUX-${MLNX_VERSION}-${MLNX_ARCH} && \
  81. rm -rf *.tgz
  82. # Install Open MPI version 4.1
  83. wget https://www.open-mpi.org/software/ompi/v4.1/downloads/openmpi-4.1.0.tar.gz && \
  84. tar zxf openmpi-4.1.0.tar.gz && \
  85. cd openmpi-4.1.0 && \
  86. ./configure --enable-mpirun-prefix-by-default \
  87. --enable-shared \
  88. --without-verbs && \
  89. make -j $(nproc) all && \
  90. make install && \
  91. ldconfig && \
  92. rm -rf /tmp/openmpi
  93. cd /
  94. pip --no-cache-dir --disable-pip-version-check install mpi4py
  95. LD_LIBRARY_PATH=/usr/local/lib/:/usr/local/lib/openmpi:$LD_LIBRARY_PATH
  96. # --------------------------------------------------------------------------
  97. # this will install all necessary packages and prepare the container
  98. # TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully
  99. # Python 2.7 or 3.5 is supported by Ubuntu Xenial out of the box
  100. export PYTHON_VERSION=3.8
  101. export TENSORFLOW_VERSION=2.3.0
  102. export PYTORCH_VERSION=1.10.0+cu113
  103. wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
  104. mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
  105. apt-get update && apt-get install -y gnupg
  106. apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
  107. apt-get install -y software-properties-common
  108. apt-get update
  109. add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
  110. apt-get update
  111. export DEBIAN_FRONTEND=noninteractive
  112. apt-get -y install cuda
  113. apt-get update && apt-get install -y libcudnn8 libcudnn8-dev
  114. apt install libnccl2 libnccl-dev
  115. apt-get -y update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
  116. build-essential \
  117. cmake \
  118. git \
  119. curl \
  120. vim \
  121. wget \
  122. ca-certificates \
  123. libjpeg-dev \
  124. libpng-dev \
  125. python${PYTHON_VERSION} \
  126. python${PYTHON_VERSION}-dev
  127. ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
  128. curl -O https://bootstrap.pypa.io/get-pip.py && \
  129. python get-pip.py && \
  130. rm get-pip.py
  131. # Install TensorFlow, Keras and PyTorch
  132. pip install torch==${PYTORCH_VERSION} torchvision==0.11.1+cu113 torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
  133. pip install mxnet-cu112 tensorflow-gpu==${TENSORFLOW_VERSION} keras h5py filelock matplotlib scikit-learn
  134. export PATH="/usr/local/cuda-11.5/bin:$PATH"
  135. # Install Horovod, temporarily using CUDA stubs
  136. ldconfig /usr/local/cuda-11.5/targets/x86_64-linux/lib/stubs && \
  137. HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_MXNET=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 pip install --no-cache-dir horovod && \
  138. ldconfig
  139. # Set default NCCL parameters
  140. echo NCCL_DEBUG=INFO >> /etc/nccl.conf && \
  141. echo NCCL_SOCKET_IFNAME=^docker0 >> /etc/nccl.conf
  142. # Clean up CUDA
  143. sudo apt-get --purge -y remove "cuda*"
  144. sudo apt-get --purge -y remove "nvidia*"