sciml-config-body.txt 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. %files
  2. # -----------------------------------------------------------------------------------
  3. ../sciml-bench/requirements.txt /sciml-benchmarks/requirements.txt
  4. ../sciml-bench/MANIFEST.in /sciml-benchmarks/MANIFEST.in
  5. ../sciml-bench/setup.py /sciml-benchmarks/setup.py
  6. ../sciml-bench/doc /sciml-benchmarks/doc
  7. ../sciml-bench/sciml_bench /sciml-benchmarks/sciml_bench
  8. %environment
  9. # -----------------------------------------------------------------------------------
  10. export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
  11. export LC_ALL=C
  12. export HOROVOD_GPU_ALLREDUCE=NCCL
  13. export HOROVOD_GPU_ALLGATHER=MPI
  14. export HOROVOD_GPU_BROADCAST=MPI
  15. #export HOROVOD_NCCL_HOME=/usr/local/cuda/nccl
  16. #export HOROVOD_NCCL_INCLUDE=/usr/local/cuda/nccl/include
  17. #export HOROVOD_NCCL_LIB=/usr/local/cuda/nccl/lib
  18. export PYTHON_VERSION=3.8
  19. #export TENSORFLOW_VERSION=${TENSORFLOW_VERSION}
  20. #export PYTORCH_VERSION=${PYTORCH_VERSION}
  21. %post
  22. # -----------------------------------------------------------------------------------
  23. # this will install all necessary packages and prepare the container
  24. # TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully
  25. # Python 2.7 or 3.5 is supported by Ubuntu Xenial out of the box
  26. export PYTHON_VERSION=3.8
  27. export TENSORFLOW_VERSION=${TENSORFLOW_VERSION}
  28. export PYTORCH_VERSION=${PYTORCH_VERSION}
  29. echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list
  30. apt-get -y update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
  31. build-essential \
  32. cmake \
  33. git \
  34. curl \
  35. vim \
  36. wget \
  37. ca-certificates \
  38. libjpeg-dev \
  39. libpng-dev \
  40. python${PYTHON_VERSION} \
  41. python${PYTHON_VERSION}-dev
  42. ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python
  43. curl -O https://bootstrap.pypa.io/get-pip.py && \
  44. python get-pip.py && \
  45. rm get-pip.py
  46. # Install TensorFlow, Keras and PyTorch
  47. ${PIP_CMD_1}
  48. ${PIP_CMD_2}
  49. export PATH="/usr/local/cuda-11.5/bin:$PATH"
  50. # Install Horovod, temporarily using CUDA stubs
  51. ldconfig ${LDCONFIG_PATH} && \
  52. ${PIP_CMD_3} && \
  53. ldconfig
  54. # Set default NCCL parameters
  55. echo NCCL_DEBUG=INFO >> /etc/nccl.conf && \
  56. echo NCCL_SOCKET_IFNAME=^docker0 >> /etc/nccl.conf
  57. # Download examples
  58. cd / && \
  59. apt-get install -y --no-install-recommends subversion && \
  60. svn checkout https://github.com/uber/horovod/trunk/examples && \
  61. rm -rf /examples/.svn
  62. # Install sciml-bench
  63. cd /sciml-benchmarks && pip install .
  64. %runscript
  65. sciml-bench $@