sciml-bench-clean.def 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. Bootstrap: docker
  2. From: ubuntu:18.04
  3. #Stage: spython-base
  4. %files
  5. # -----------------------------------------------------------------------------------
  6. sciml-bench/requirements.txt /sciml-benchmarks/requirements.txt
  7. sciml-bench/MANIFEST.in /sciml-benchmarks/MANIFEST.in
  8. sciml-bench/setup.py /sciml-benchmarks/setup.py
  9. sciml-bench/doc /sciml-benchmarks/doc
  10. sciml-bench/sciml_bench /sciml-benchmarks/sciml_bench
  11. %environment
  12. # -----------------------------------------------------------------------------------
  13. export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
  14. export LC_ALL=C
  15. export HOROVOD_GPU_ALLREDUCE=NCCL
  16. export HOROVOD_GPU_ALLGATHER=MPI
  17. export HOROVOD_GPU_BROADCAST=MPI
  18. #export HOROVOD_NCCL_HOME=/usr/local/cuda/nccl
  19. #export HOROVOD_NCCL_INCLUDE=/usr/local/cuda/nccl/include
  20. #export HOROVOD_NCCL_LIB=/usr/local/cuda/nccl/lib
  21. export PYTHON_VERSION=3.8
  22. export TENSORFLOW_VERSION=2.3.0
  23. export PYTORCH_VERSION=1.10.0+cu113
  24. %post
  25. # -----------------------------------------------------------------------------------
  26. export SINGULARITY_BINDPATH=$SINGULARITY_BINDPATH,$(echo /usr/bin/ | sed -e 's/ /,/g')
  27. MLNX_ARCH=ubuntu18.04-x86_64
  28. MLNX_VERSION=5.2-1.0.4.0
  29. # Python 3.7 is supported by Ubuntu Bionic out of the box
  30. python=3.7
  31. PYTHON_VERSION=${python}
  32. # Set default shell to /bin/bash
  33. #SHELL ["/bin/bash", "-cu"]
  34. TZ="Europe/London"
  35. RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
  36. apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
  37. build-essential \
  38. cmake \
  39. g++-7 \
  40. git \
  41. curl \
  42. vim \
  43. wget \
  44. ca-certificates \
  45. python${PYTHON_VERSION} \
  46. python${PYTHON_VERSION}-dev \
  47. python${PYTHON_VERSION}-distutils \
  48. libpython-stdlib \
  49. python \
  50. udev \
  51. automake \
  52. gfortran \
  53. autotools-dev \
  54. chrpath \
  55. pkg-config \
  56. libnl-3-dev \
  57. tcl \
  58. pciutils \
  59. tk \
  60. libnl-route-3-dev \
  61. libltdl-dev \
  62. bison \
  63. autoconf \
  64. flex \
  65. graphviz \
  66. libssl1.0.0 \
  67. kmod \
  68. debhelper \
  69. libgfortran4 \
  70. ethtool \
  71. swig \
  72. lsof \
  73. libnl-route-3-200 \
  74. m4 \
  75. libnl-3-200 \
  76. dpatch \
  77. libnuma-dev \
  78. iputils-ping \
  79. iproute2 \
  80. apt-utils
  81. ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
  82. curl -O https://bootstrap.pypa.io/get-pip.py && \
  83. python get-pip.py && \
  84. rm get-pip.py
  85. wget http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_VERSION}/MLNX_OFED_LINUX-${MLNX_VERSION}-${MLNX_ARCH}.tgz && \
  86. tar -xzvf MLNX_OFED_LINUX-${MLNX_VERSION}-${MLNX_ARCH}.tgz --no-same-owner && \
  87. cd MLNX_OFED_LINUX-${MLNX_VERSION}-${MLNX_ARCH} && \
  88. ./mlnxofedinstall --user-space-only --without-fw-update --skip-repo --force && \
  89. cd .. && \
  90. rm -rf MLNX_OFED-LINUX-${MLNX_VERSION}-${MLNX_ARCH} && \
  91. rm -rf *.tgz
  92. # Install Open MPI version 4.1
  93. wget https://www.open-mpi.org/software/ompi/v4.1/downloads/openmpi-4.1.0.tar.gz && \
  94. tar zxf openmpi-4.1.0.tar.gz && \
  95. cd openmpi-4.1.0 && \
  96. ./configure --enable-mpirun-prefix-by-default \
  97. --enable-shared \
  98. --without-verbs && \
  99. make -j $(nproc) all && \
  100. make install && \
  101. ldconfig && \
  102. rm -rf /tmp/openmpi
  103. cd /
  104. pip --no-cache-dir --disable-pip-version-check install mpi4py
  105. LD_LIBRARY_PATH=/usr/local/lib/:/usr/local/lib/openmpi:$LD_LIBRARY_PATH
  106. # --------------------------------------------------------------------------
  107. # this will install all necessary packages and prepare the container
  108. # TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully
  109. # Python 2.7 or 3.5 is supported by Ubuntu Xenial out of the box
  110. export PYTHON_VERSION=3.8
  111. export TENSORFLOW_VERSION=2.3.0
  112. export PYTORCH_VERSION=1.10.0+cu113
  113. wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
  114. mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
  115. apt-get update && apt-get install -y gnupg
  116. apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
  117. apt-get install -y software-properties-common
  118. apt-get update
  119. add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
  120. apt-get update
  121. export DEBIAN_FRONTEND=noninteractive
  122. apt-get -y install cuda
  123. apt-get update && apt-get install -y libcudnn8 libcudnn8-dev
  124. apt install libnccl2 libnccl-dev
  125. apt-get -y update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
  126. build-essential \
  127. cmake \
  128. git \
  129. curl \
  130. vim \
  131. wget \
  132. ca-certificates \
  133. libjpeg-dev \
  134. libpng-dev \
  135. python${PYTHON_VERSION} \
  136. python${PYTHON_VERSION}-dev
  137. ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
  138. curl -O https://bootstrap.pypa.io/get-pip.py && \
  139. python get-pip.py && \
  140. rm get-pip.py
  141. # Install TensorFlow, Keras and PyTorch
  142. pip install torch==${PYTORCH_VERSION} torchvision==0.11.1+cu113 torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
  143. pip install mxnet-cu112 tensorflow-gpu==${TENSORFLOW_VERSION} keras h5py filelock matplotlib scikit-learn
  144. export PATH="/usr/local/cuda-11.5/bin:$PATH"
  145. # Install Horovod, temporarily using CUDA stubs
  146. ldconfig /usr/local/cuda-11.5/targets/x86_64-linux/lib/stubs && \
  147. HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_MXNET=1 HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 pip install --no-cache-dir horovod && \
  148. ldconfig
  149. # Set default NCCL parameters
  150. echo NCCL_DEBUG=INFO >> /etc/nccl.conf && \
  151. echo NCCL_SOCKET_IFNAME=^docker0 >> /etc/nccl.conf
  152. # Clean up cuda
  153. ls /usr/local/
  154. sudo apt-get --purge -y remove "cuda*"
  155. sudo apt-get --purge -y remove "nvidia*"
  156. # Install sciml-bench
  157. cd /sciml-benchmarks && pip install .
  158. %environment
  159. %runscript
  160. sciml-bench $@