4 anni fa · 5782bb6e04
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -44,3 +44,25 @@ jobs:
 
				 
			
 
				       - name: ShellCheck
			
 
				         uses: ludeeus/action-shellcheck@1.1.0
			
 
				+
			
 
				+  python:
			
 
				+    name: "Lint Python"
			
 
				+    runs-on: ubuntu-latest
			
 
				+
			
 
				+    steps:
			
 
				+      - name: Checkout
			
 
				+        uses: actions/checkout@v2
			
 
				+
			
 
				+      - name: Setup Python
			
 
				+        uses: actions/setup-python@v2
			
 
				+        with:
			
 
				+          python-version: '3.10'
			
 
				+
			
 
				+      - name: Install dependencies
			
 
				+        run: pip install flake8 mypy
			
 
				+
			
 
				+      - name: Flake8
			
 
				+        run: flake8 $(find . -name '*.py')
			
 
				+
			
 
				+      - name: Mypy
			
 
				+        run: mypy --strict $(find . -name '*.py')
			
--- a/base_containers/pytorch/pytorch_cu.def.template
+++ b/base_containers/pytorch/pytorch_cu.def.template
@@ -0,0 +1,17 @@
 
				+BootStrap: library

			
 
				+From: ubuntu:20.04

			
 
				+

			
 
				+%post

			
 
				+    apt-get -y update

			
 
				+

			
 
				+    # Add universe repository (necessary for python3-pip)

			
 
				+    apt-get -y install software-properties-common

			
 
				+    add-apt-repository -y -u universe

			
 
				+

			
 
				+    # Install python packages

			
 
				+    apt-get -y install python3 python3-pip

			
 
				+

			
 
				+    apt-get clean

			
 
				+

			
 
				+    # Install python dependencies

			
 
				+    pip3 install --no-cache-dir torch==$torch_version torchvision==$torchvision_version torchaudio==$torchaudio_version -f $find_links

			
--- a/base_containers/pytorch/template.py
+++ b/base_containers/pytorch/template.py
@@ -0,0 +1,64 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+import argparse
			
 
				+from string import Template
			
 
				+
			
 
				+CUDA = {
			
 
				+    '10.2': {
			
 
				+        'torch_version': '1.9.0+cu102',
			
 
				+        'torchvision_version': '0.10.0+cu102',
			
 
				+        'torchaudio_version': '0.9.0',
			
 
				+        'find_links': 'https://download.pytorch.org/whl/torch_stable.html'
			
 
				+    },
			
 
				+    '11.1': {
			
 
				+        'torch_version': '1.9.0+cu111',
			
 
				+        'torchvision_version': '0.10.0+cu111',
			
 
				+        'torchaudio_version': '0.9.0',
			
 
				+        'find_links': 'https://download.pytorch.org/whl/torch_stable.html'
			
 
				+    },
			
 
				+    '11.3': {
			
 
				+        'torch_version': '1.10.2+cu113',
			
 
				+        'torchvision_version': '0.11.3+cu113',
			
 
				+        'torchaudio_version': '0.10.2+cu113',
			
 
				+        'find_links': (
			
 
				+            'https://download.pytorch.org/whl/cu113/torch_stable.html'
			
 
				+        )
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def render(cuda_version: str) -> str:
			
 
				+    with open('pytorch_cu.def.template', 'r', encoding='utf8') as f:
			
 
				+        template = Template(f.read())
			
 
				+
			
 
				+    return template.substitute(**CUDA[cuda_version])
			
 
				+
			
 
				+
			
 
				+def write_def(cuda_version: str, text: str) -> None:
			
 
				+    file_name = f'pytorch_cu_{cuda_version}.def'
			
 
				+    with open(file_name, 'w', encoding='utf8') as f:
			
 
				+        f.write(text)
			
 
				+
			
 
				+
			
 
				+def main() -> None:
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='Template Pytorch definition files'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        'cuda',
			
 
				+        help='CUDA version',
			
 
				+        type=str,
			
 
				+        choices=['all'] + list(CUDA.keys())
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.cuda == 'all':
			
 
				+        for cuda_version in CUDA.keys():
			
 
				+            write_def(cuda_version, render(cuda_version))
			
 
				+    else:
			
 
				+        write_def(args.cuda, render(args.cuda))
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/workflows/pytorch_GAN_zoo/README.md
+++ b/workflows/pytorch_GAN_zoo/README.md
@@ -4,8 +4,8 @@ This example builds a singularity container for [Facebook Research's PyTorch GAN
 
				 Zoo](https://github.com/facebookresearch/pytorch_GAN_zoo).
			
 
				 
			
 
				 The singularity container will allow you to call all the scripts from the
			
 
				-project and includes are requirements. The container supports CUDA versions
			
 
				-10.1, 10.2 and 11.1 on the host.
			
 
				+project and includes are requirements. The container supports CUDA version 11.1
			
 
				+on the host.
			
 
				 
			
 
				 ## Building
			
 
				 
			
@@ -36,16 +36,23 @@ singularity exec pytorch_GAN_zoo.sif eval.py
 
				 Any flags or command line arguments can be declared after the script name.
			
 
				 
			
 
				 When training, you will need to supply the `--nv` flag to singularity so that
			
 
				-the host GPU may be used. You will also need to select a singularity app, using
			
 
				-the `--app` flag to select the appropriate CUDA version. The available apps are
			
 
				-`cu101`, `cu102`, and `cu111` for CUDA 10.1, 10.2 and 11.1 respectively.
			
 
				+the host GPU may be used.
			
 
				 
			
 
				-For example, to pre-process the dtd dataset and train a PGAN model on a host
			
 
				-with CUDA 10.2 you could run the following commands.
			
 
				+### Multiple GPUs
			
 
				+
			
 
				+PyTorch GAN zoo natively supports [parallelisation across multiple
			
 
				+GPUs](https://github.com/facebookresearch/pytorch_GAN_zoo/issues/57). The
			
 
				+devices to use can be selected using the `CUDA_VISIBLE_DEVICES` environment
			
 
				+variable. CUDA compatible GPUs are numbered from zero. For example, to use the
			
 
				+first and third CUDA accelerators you would set `CUDA_VISIBLE_DEVICES=0,2`
			
 
				+
			
 
				+To pass this environment variable to singularity the `--env-file` flag must be
			
 
				+used as [passing environment variables with commas is not supported by the
			
 
				+`--env` flag](https://github.com/apptainer/singularity/issues/6088).
			
 
				 
			
 
				 ```bash
			
 
				-singularity exec --app cu102 pytorch_GAN_zoo.sif datasets.py dtd <path to dtd dataset>/images/
			
 
				-singularity exec --nv --app cu102 pytorch_GAN_zoo.sif train.py PGAN -c config_dtd.json --restart --no_vis -n dtd
			
 
				+echo 'CUDA_VISIBLE_DEVICES=0,1' > env.txt
			
 
				+singularity exec --env-file env.txt pytorch_GAN_zoo.sif ...
			
 
				 ```
			
 
				 
			
 
				 ### Models
			
@@ -60,16 +67,14 @@ In each example the `--restart` flag is used so that checkpoints are
 
				 periodically written during the training. The `--no_vis` flag is used to disable
			
 
				 visdom visualisations.
			
 
				 
			
 
				-As above, these examples assume the host has CUDA 10.2 installed.
			
 
				-
			
 
				 #### DTD
			
 
				 
			
 
				 The DTD dataset requires no preprocessing, so the datasets script simply creates
			
 
				 a configuration file.
			
 
				 
			
 
				 ```bash
			
 
				-singularity exec --app cu102 pytorch_GAN_zoo.sif datasets.py dtd <path to dtd>/images
			
 
				-singularity exec --nv --app cu102 pytorch_GAN_zoo.sif train.py PGAN -c config_dtd.json --restart --no_vis -n dtd
			
 
				+singularity exec pytorch_GAN_zoo.sif datasets.py dtd <path to dtd>/images
			
 
				+singularity exec pytorch_GAN_zoo.sif train.py PGAN -c config_dtd.json --restart --no_vis -n dtd
			
 
				 ```
			
 
				 
			
 
				 Where `<path to dtd>` is the path of the directory extracted from the dtd
			
@@ -82,8 +87,8 @@ A processed dataset will be written to a directory delcared using the `-o` flag,
 
				 `cifar-10` n this example.
			
 
				 
			
 
				 ```bash
			
 
				-singularity exec --app cu102 pytorch_GAN_zoo.sif datasets.py cifar10 <path to cifar-10> -o cifar10
			
 
				-singularity exec --nv --app cu102 pytorch_GAN_zoo.sif train.py -c config_cifar10.json --restart --no_vis -n cifar10
			
 
				+singularity exec pytorch_GAN_zoo.sif datasets.py cifar10 <path to cifar-10> -o cifar10
			
 
				+singularity exec pytorch_GAN_zoo.sif train.py -c config_cifar10.json --restart --no_vis -n cifar10
			
 
				 ```
			
 
				 
			
 
				 Where `<path to cifar-10>` is the path of the directory containing the pickle
			
--- a/workflows/pytorch_GAN_zoo/build.sh
+++ b/workflows/pytorch_GAN_zoo/build.sh
@@ -1,9 +1,26 @@
 
				-#!/bin/sh
			
 
				+#!/bin/bash
			
 
				 
			
 
				 _UID=$(id -u)
			
 
				 DEF_FILE="pytorch_GAN_zoo.def"
			
 
				 SIF_FILE="pytorch_GAN_zoo.sif"
			
 
				 
			
 
				+TORCH_DEF_FILE="pytorch_cu_11.1.def"
			
 
				+TORCH_SIF_FILE="pytorch_cu_11.1.sif"
			
 
				+
			
 
				+pushd ../../base_containers/pytorch/ || exit
			
 
				+if ! [ -f $TORCH_SIF_FILE ]; then
			
 
				+    if ! [ -f $TORCH_DEF_FILE ]; then
			
 
				+        ./template.py 11.1
			
 
				+    fi
			
 
				+
			
 
				+    if [ "$_UID" = 0 ]; then
			
 
				+        singularity build $TORCH_SIF_FILE $TORCH_DEF_FILE
			
 
				+    else
			
 
				+        singularity build --fakeroot $TORCH_SIF_FILE $TORCH_DEF_FILE
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+popd || exit
			
 
				 if [ "$_UID" = 0 ]; then
			
 
				     singularity build $SIF_FILE $DEF_FILE
			
 
				 else
			
--- a/workflows/pytorch_GAN_zoo/pytorch_GAN_zoo.def
+++ b/workflows/pytorch_GAN_zoo/pytorch_GAN_zoo.def
@@ -1,5 +1,5 @@
 
				-BootStrap: library

			
 
				-From: ubuntu:20.04

			
 
				+BootStrap: localimage

			
 
				+From: ../../base_containers/pytorch/pytorch_cu_11.1.sif

			
 
				 

			
 
				 # Global settings

			
 
				 %files

			
@@ -29,47 +29,3 @@ From: ubuntu:20.04
 
				     # Install python dependencies

			
 
				     pip3 install --no-cache-dir -r requirements.txt

			
 
				     pip3 install --no-cache-dir imageio  # For image processing

			
 
				-

			
 
				-

			
 
				-# CUDA 11.1 app

			
 
				-%apphelp cu111

			
 
				-    Cuda 11.1 support, torch 1.9.0, torchvision 0.10.0, torchaudio 0.9.0

			
 
				-

			
 
				-%appinstall cu111

			
 
				-    python3 -m venv --system-site-packages ./venv_cu111

			
 
				-    . ./venv_cu111/bin/activate

			
 
				-    pip3 install --no-cache-dir torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html

			
 
				-    deactivate

			
 
				-

			
 
				-%appenv cu111

			
 
				-    PATH="/scif/apps/cu111/venv_cu111/bin:$PATH"

			
 
				-    export PATH

			
 
				-

			
 
				-# CUDA 10.2 app

			
 
				-%apphelp cu102

			
 
				-    Cuda 10.2 support, torch 1.9.0, torchvision 0.10.0, torchaudio 0.9.0

			
 
				-

			
 
				-%appinstall cu102

			
 
				-    python3 -m venv --system-site-packages ./venv_cu102

			
 
				-    . ./venv_cu102/bin/activate

			
 
				-    pip3 install --no-cache-dir torch torchvision torchaudio

			
 
				-    deactivate

			
 
				-

			
 
				-%appenv cu102

			
 
				-    PATH="/scif/apps/cu102/venv_cu102/bin:$PATH"

			
 
				-    export PATH

			
 
				-

			
 
				-

			
 
				-# CUDA 10.1 app

			
 
				-%apphelp cu101

			
 
				-    Cuda 10.1 support, torch 1.7.1, torchvision 0.8.2, torchaudio 0.7.2

			
 
				-

			
 
				-%appinstall cu101

			
 
				-    python3 -m venv --system-site-packages ./venv_cu101

			
 
				-    . ./venv_cu101/bin/activate

			
 
				-    pip3 install --no-cache-dir torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html

			
 
				-    deactivate

			
 
				-

			
 
				-%appenv cu101

			
 
				-    PATH="/scif/apps/cu101/venv_cu101/bin:$PATH"

			
 
				-    export PATH