|
@@ -0,0 +1,85 @@
|
|
|
|
+
|
|
|
|
+%files
|
|
|
|
+# -----------------------------------------------------------------------------------
|
|
|
|
+ ../sciml-bench/requirements.txt /sciml-benchmarks/requirements.txt
|
|
|
|
+ ../sciml-bench/MANIFEST.in /sciml-benchmarks/MANIFEST.in
|
|
|
|
+ ../sciml-bench/setup.py /sciml-benchmarks/setup.py
|
|
|
|
+ ../sciml-bench/doc /sciml-benchmarks/doc
|
|
|
|
+ ../sciml-bench/sciml_bench /sciml-benchmarks/sciml_bench
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+%environment
|
|
|
|
+# -----------------------------------------------------------------------------------
|
|
|
|
+
|
|
|
|
+ export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
|
|
|
+ export LC_ALL=C
|
|
|
|
+ export HOROVOD_GPU_ALLREDUCE=NCCL
|
|
|
|
+ export HOROVOD_GPU_ALLGATHER=MPI
|
|
|
|
+ export HOROVOD_GPU_BROADCAST=MPI
|
|
|
|
+ #export HOROVOD_NCCL_HOME=/usr/local/cuda/nccl
|
|
|
|
+ #export HOROVOD_NCCL_INCLUDE=/usr/local/cuda/nccl/include
|
|
|
|
+ #export HOROVOD_NCCL_LIB=/usr/local/cuda/nccl/lib
|
|
|
|
+ export PYTHON_VERSION=3.8
|
|
|
|
+ #export TENSORFLOW_VERSION=${TENSORFLOW_VERSION}
|
|
|
|
+ #export PYTORCH_VERSION=${PYTORCH_VERSION}
|
|
|
|
+
|
|
|
|
+%post
|
|
|
|
+# -----------------------------------------------------------------------------------
|
|
|
|
+# this will install all necessary packages and prepare the container
|
|
|
|
+
|
|
|
|
+# TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully
|
|
|
|
+# Python 2.7 or 3.5 is supported by Ubuntu Xenial out of the box
|
|
|
|
+
|
|
|
|
+ export PYTHON_VERSION=3.8
|
|
|
|
+ export TENSORFLOW_VERSION=${TENSORFLOW_VERSION}
|
|
|
|
+ export PYTORCH_VERSION=${PYTORCH_VERSION}
|
|
|
|
+
|
|
|
|
+ echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
|
|
|
|
+
|
|
|
|
+ apt-get -y update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
|
|
|
|
+ build-essential \
|
|
|
|
+ cmake \
|
|
|
|
+ git \
|
|
|
|
+ curl \
|
|
|
|
+ vim \
|
|
|
|
+ wget \
|
|
|
|
+ ca-certificates \
|
|
|
|
+ libjpeg-dev \
|
|
|
|
+ libpng-dev \
|
|
|
|
+ python${PYTHON_VERSION} \
|
|
|
|
+ python${PYTHON_VERSION}-dev
|
|
|
|
+
|
|
|
|
+ ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
|
|
|
|
+
|
|
|
|
+ curl -O https://bootstrap.pypa.io/get-pip.py && \
|
|
|
|
+ python get-pip.py && \
|
|
|
|
+ rm get-pip.py
|
|
|
|
+
|
|
|
|
+# Install TensorFlow, Keras and PyTorch
|
|
|
|
+
|
|
|
|
+ ${PIP_CMD_1}
|
|
|
|
+ ${PIP_CMD_2}
|
|
|
|
+
|
|
|
|
+ export PATH="/usr/local/cuda-11.5/bin:$PATH"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+# Install Horovod, temporarily using CUDA stubs
|
|
|
|
+ ldconfig ${LDCONFIG_PATH} && \
|
|
|
|
+ ${PIP_CMD_3} && \
|
|
|
|
+ ldconfig
|
|
|
|
+
|
|
|
|
+# Set default NCCL parameters
|
|
|
|
+ echo NCCL_DEBUG=INFO >> /etc/nccl.conf && \
|
|
|
|
+ echo NCCL_SOCKET_IFNAME=^docker0 >> /etc/nccl.conf
|
|
|
|
+
|
|
|
|
+# Download examples
|
|
|
|
+ cd / && \
|
|
|
|
+ apt-get install -y --no-install-recommends subversion && \
|
|
|
|
+ svn checkout https://github.com/uber/horovod/trunk/examples && \
|
|
|
|
+ rm -rf /examples/.svn
|
|
|
|
+
|
|
|
|
+# Install sciml-bench
|
|
|
|
+ cd /sciml-benchmarks && pip install .
|
|
|
|
+
|
|
|
|
+%runscript
|
|
|
|
+ sciml-bench $@
|