Просмотр исходного кода

NV HPC SDK, new makefiles,Jupyter revamps

Mozhgan K. Chimeh 4 лет назад
Родитель
Сommit
69bbf17c2b
88 измененных файлов с 272 добавлено и 1706 удалено
  1. 21 124
      hpc/miniprofiler/Dockerfile
  2. 2 2
      hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-checkpoint.ipynb
  3. 2 2
      hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab1-checkpoint.ipynb
  4. 5 5
      hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab2-checkpoint.ipynb
  5. 3 3
      hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab3-checkpoint.ipynb
  6. 3 3
      hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab4-checkpoint.ipynb
  7. 9 9
      hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab5-checkpoint.ipynb
  8. 2 2
      hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab1.ipynb
  9. 5 5
      hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab2.ipynb
  10. 5 5
      hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab3.ipynb
  11. 3 3
      hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab4.ipynb
  12. 10 10
      hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab5.ipynb
  13. 2 2
      hpc/miniprofiler/English/C/jupyter_notebook/profiling-c.ipynb
  14. 1 2
      hpc/miniprofiler/English/C/source_code/lab1/Makefile
  15. 5 132
      hpc/miniprofiler/English/C/source_code/lab1/miniWeather_serial.cpp
  16. 1 2
      hpc/miniprofiler/English/C/source_code/lab2/Makefile
  17. 5 132
      hpc/miniprofiler/English/C/source_code/lab2/miniWeather_openacc.cpp
  18. 1 2
      hpc/miniprofiler/English/C/source_code/lab3/Makefile
  19. 5 132
      hpc/miniprofiler/English/C/source_code/lab3/miniWeather_openacc.cpp
  20. 1 2
      hpc/miniprofiler/English/C/source_code/lab4/Makefile
  21. 5 132
      hpc/miniprofiler/English/C/source_code/lab4/miniWeather_openacc.cpp
  22. 1 2
      hpc/miniprofiler/English/C/source_code/lab5/Makefile
  23. 5 132
      hpc/miniprofiler/English/C/source_code/lab5/miniWeather_openacc.cpp
  24. 1 2
      hpc/miniprofiler/English/C/source_code/solutions/Makefile
  25. 5 132
      hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc.cpp
  26. 5 132
      hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc_exr2.cpp
  27. 5 132
      hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc_exr3.cpp
  28. 5 132
      hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc_exr4.cpp
  29. 1 1
      hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-checkpoint.ipynb
  30. 2 2
      hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab1-checkpoint.ipynb
  31. 5 5
      hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab2-checkpoint.ipynb
  32. 3 3
      hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab3-checkpoint.ipynb
  33. 3 3
      hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab4-checkpoint.ipynb
  34. 10 10
      hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab5-checkpoint.ipynb
  35. 2 2
      hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab1.ipynb
  36. 5 5
      hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab2.ipynb
  37. 6 6
      hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab3.ipynb
  38. 3 3
      hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab4.ipynb
  39. 10 10
      hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab5.ipynb
  40. 1 1
      hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran.ipynb
  41. 1 1
      hpc/miniprofiler/English/Fortran/source_code/lab1/Makefile
  42. 3 101
      hpc/miniprofiler/English/Fortran/source_code/lab1/miniWeather_serial.f90
  43. 1 1
      hpc/miniprofiler/English/Fortran/source_code/lab2/Makefile
  44. 3 3
      hpc/miniprofiler/English/Fortran/source_code/lab2/miniWeather_openacc.f90
  45. 1 1
      hpc/miniprofiler/English/Fortran/source_code/lab3/Makefile
  46. 3 3
      hpc/miniprofiler/English/Fortran/source_code/lab3/miniWeather_openacc.f90
  47. 1 1
      hpc/miniprofiler/English/Fortran/source_code/lab4/Makefile
  48. 3 3
      hpc/miniprofiler/English/Fortran/source_code/lab4/miniWeather_openacc.f90
  49. 1 1
      hpc/miniprofiler/English/Fortran/source_code/lab5/Makefile
  50. 1 1
      hpc/miniprofiler/English/Fortran/source_code/solutions/Makefile
  51. 3 3
      hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc.f90
  52. 3 3
      hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr2.f90
  53. 3 3
      hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr3.f90
  54. 3 3
      hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr4.f90
  55. 3 3
      hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr5.f90
  56. 17 101
      hpc/miniprofiler/Singularity
  57. 2 2
      hpc/openacc/Dockerfile
  58. 1 1
      hpc/openacc/English/C/jupyter_notebook/.ipynb_checkpoints/openacc_c_lab3-bonus-checkpoint.ipynb
  59. 1 1
      hpc/openacc/English/C/jupyter_notebook/openacc_c_lab1-bonus.ipynb
  60. 1 1
      hpc/openacc/English/C/jupyter_notebook/openacc_c_lab1.ipynb
  61. 2 2
      hpc/openacc/English/C/jupyter_notebook/openacc_c_lab2.ipynb
  62. 2 2
      hpc/openacc/English/C/jupyter_notebook/openacc_c_lab3-bonus.ipynb
  63. 2 2
      hpc/openacc/English/C/jupyter_notebook/openacc_c_lab3.ipynb
  64. 1 1
      hpc/openacc/English/C/source_code/lab1/Makefile
  65. 1 1
      hpc/openacc/English/C/source_code/lab1/solutions/Makefile
  66. 2 2
      hpc/openacc/English/C/source_code/lab2/Makefile
  67. 2 2
      hpc/openacc/English/C/source_code/lab2/solutions/Makefile
  68. 2 2
      hpc/openacc/English/C/source_code/lab2/update/Makefile
  69. 2 2
      hpc/openacc/English/C/source_code/lab2/update/solution/Makefile
  70. 1 1
      hpc/openacc/English/C/source_code/lab3/Makefile
  71. 1 1
      hpc/openacc/English/C/source_code/lab3/solutions/collapse/Makefile
  72. 1 1
      hpc/openacc/English/C/source_code/lab3/solutions/tile/Makefile
  73. 1 1
      hpc/openacc/English/Fortran/jupyter_notebook/.ipynb_checkpoints/openacc_fortran_lab3-bonus-checkpoint.ipynb
  74. 1 1
      hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab1-bonus.ipynb
  75. 1 1
      hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab1.ipynb
  76. 2 2
      hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab2.ipynb
  77. 2 2
      hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab3-bonus.ipynb
  78. 2 2
      hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab3.ipynb
  79. 1 1
      hpc/openacc/English/Fortran/source_code/lab1/Makefile
  80. 1 1
      hpc/openacc/English/Fortran/source_code/lab1/solutions/Makefile
  81. 2 2
      hpc/openacc/English/Fortran/source_code/lab2/Makefile
  82. 2 2
      hpc/openacc/English/Fortran/source_code/lab2/solutions/Makefile
  83. 2 2
      hpc/openacc/English/Fortran/source_code/lab2/update/Makefile
  84. 2 2
      hpc/openacc/English/Fortran/source_code/lab2/update/solution/Makefile
  85. 1 1
      hpc/openacc/English/Fortran/source_code/lab3/Makefile
  86. 1 1
      hpc/openacc/English/Fortran/source_code/lab3/solutions/collapse/Makefile
  87. 1 1
      hpc/openacc/English/Fortran/source_code/lab3/solutions/tile/Makefile
  88. 2 2
      hpc/openacc/Singularity

+ 21 - 124
hpc/miniprofiler/Dockerfile

@@ -4,143 +4,40 @@
 
 # To run Jupyter inside the container: $ jupyter notebook --ip 0.0.0.0 --port 8888 --no-browser --allow-root
 
-FROM nvcr.io/hpc/pgi-compilers:ce
+FROM nvcr.io/nvidia/cuda:10.2-base-ubuntu18.04
 
-RUN apt-get update && \
-    apt-get dist-upgrade -y && \
-    apt-get install --no-install-recommends -y \
-    openssh-server m4 vim-nox emacs-nox nano zip && \
+RUN DEBIAN_FRONTEND=noninteractive apt-get -y update && \
+    DEBIAN_FRONTEND=noninteractive apt-get dist-upgrade -y && \
+    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
+    openssh-server m4 vim-nox emacs-nox nano zip wget python3 python3-pip python3-setuptools git-core inotify-tools curl git-lfs  build-essential && \
     rm -rf /var/lib/apt/cache/* 
-#useradd -k /etc/skel -m -s /usr/local/bin/entrypoint.sh -p openacc openacc && \
-#echo 'openacc:openacc' | chpasswd && \
-#mkdir /var/run/sshd 
 
-RUN apt-get install --no-install-recommends -y python3 python3-pip
 RUN pip3 install --upgrade pip
-RUN apt-get install --no-install-recommends -y python3-setuptools
-RUN apt-get install --no-install-recommends -y git-core
 RUN apt-get install --no-install-recommends -y openssh-client 
-RUN apt-get install --no-install-recommends -y inotify-tools 
 RUN pip3 install jupyter
 RUN pip3 install netcdf4
-RUN apt-get install --no-install-recommends -y curl
-RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
-RUN apt-get install --no-install-recommends -y git-lfs
-RUN git lfs install
 
-# NVIDIA Nsight Systems 
-RUN apt-get update -y && \
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    apt-transport-https \
-    ca-certificates \
-    gnupg \
-    wget && \
-    echo "deb https://developer.download.nvidia.com/devtools/repo-deb/x86_64/ /" >> /etc/apt/sources.list.d/nsight.list && \
-    apt-get update -y
+### NVIDIA HPC SDK 20.7
+RUN wget https://developer.download.nvidia.com/hpc-sdk/nvhpc_2020_207_Linux_x86_64_cuda_multi.tar.gz
+RUN tar xpzf nvhpc_2020_207_Linux_x86_64_cuda_multi.tar.gz
 
-RUN apt-get update -y
-
-# NVIDIA Nsight Systems 2020.3.1
-RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.3.1
-
-
-#RUN apt-get update && apt-get install -y --no-install-recommends && \
-#    echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
-#    echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list 
-
-# NVIDIA Nsight Systems 2019.3.7
-#RUN apt-get update -y && \ 
-#    apt-get install -y cuda-nsight-systems-10-1 nsight-systems-2019.3.7
-
-RUN apt-get install --no-install-recommends -y build-essential
-
-##### START : netcdf installation #####
-
-RUN cd /usr && \
-    mkdir netcdf && \
-    cd netcdf && \
-    mkdir WORK_DIR && \
-    cd /usr/netcdf/WORK_DIR && \
-    mkdir zlib hdf5 netCDF netCDF-C++ netCDF-Fortran
-
-RUN cd /usr/netcdf/WORK_DIR/zlib && \
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4/zlib-1.2.8.tar.gz && \
-    tar -xvzf zlib-1.2.8.tar.gz
-
-RUN cd /usr/netcdf/WORK_DIR/hdf5 && \
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4/hdf5-1.8.12.tar.gz && \
-    tar -xvzf hdf5-1.8.12.tar.gz
-
-RUN cd /usr/netcdf/WORK_DIR/netCDF && \
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/old/netcdf-4.3.0.tar.gz && \
-    tar -xvzf netcdf-4.3.0.tar.gz 
-
-RUN cd /usr/netcdf/WORK_DIR/netCDF-C++ && \
-    wget https://github.com/Unidata/netcdf-cxx4/archive/v4.2.1.tar.gz && \
-    tar -xvzf v4.2.1.tar.gz
-
-RUN cd /usr/netcdf/WORK_DIR/netCDF-Fortran && \
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/old/netcdf-fortran-4.2.tar.gz && \
-    tar -xvzf netcdf-fortran-4.2.tar.gz
-
-ENV CC=pgcc CFLAGS="-O tp=p7-64" CXX=pgc++ CXXFLAGS="-O tp=p7-64" FC=pgfortran FCFLAGS="-O tp=p7-64" F77=pgfortran FFLAGS="-O tp=p7-64" CPPFLAGS="-DpgiFortran" 
-
-RUN mkdir -p /usr/local && \
-    mkdir -p /usr/local/bin && \
-    mkdir -p /usr/local/include && \
-    mkdir -p /usr/local/lib 
-
-ENV PATH="$PATH:/usr/local/bin:/opt/anaconda3/bin:/usr/bin" LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" LIBRARY_PATH="$LIBRARY_PATH:/usr/local/lib" CPATH="$CPATH:/usr/local/include"
-
-# zlib 
-RUN cd /usr/netcdf/WORK_DIR/zlib/zlib-1.2.8 && \
-    ./configure --prefix=/usr/local && \
-    make  && \
-    make install 
-
-# hdf5
-RUN cd  /usr/netcdf/WORK_DIR/hdf5/hdf5-1.8.12 && \
-    unset CPP && \
-    ./configure --prefix=/usr/local --enable-fortran --enable-c++ && \
-    make  && \
-    make install 
-
-# netcdf
-RUN export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" && \
-    export CFLAGS="-O tp=p7-64 -I /usr/local/include" && \
-    export LDFLAGS="-L/usr/local/lib -L/usr/local/lib" && \
-    cd  /usr/netcdf/WORK_DIR/netCDF/netcdf-4.3.0 && \
-    ./configure --prefix=/usr/local && \
-    make  && \
-    make install  
-
-# netcdf-c++
-RUN export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/hdf5/lib:/usr/local/lib" && \
-    export CFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" && \
-    export CPPFLAGS="-DpgiFortran -I /usr/local/include -I /usr/local/include" && \
-    export LDFLAGS="-L/usr/local/lib -L/usr/local/lib" && \
-    cd  /usr/netcdf/WORK_DIR/netCDF-C++/netcdf-cxx4-4.2.1 && \
-    ./configure --prefix=/usr/local && \
-    make  && \
-    make install 
-
-# netcdf-fortran
-RUN export CFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" && \
-    export FCFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" && \
-    export FFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" && \
-    export CPPFLAGS="-DpgiFortran -I /usr/local/include -I /usr/local/include" && \
-    unset LDFLAGS && \
-    cd  /usr/netcdf/WORK_DIR/netCDF-Fortran/netcdf-fortran-4.2 && \
-    ./configure --prefix=/usr/local && \
-    make && \
-    make install   
-##### END : netcdf installation #####
+RUN nvhpc_2020_207_Linux_x86_64_cuda_multi/install
+###
 
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
     bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3  && \
     rm Miniconda3-latest-Linux-x86_64.sh && \
     /opt/anaconda3/bin/conda install -y -q netcdf4
 
-ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/pgi/linux86-64-llvm/2019/cuda/10.1/lib64/" 
+ENV PATH="$PATH:/opt/pgi/linux86-64/19.10/bin:/usr/local/bin:/opt/anaconda3/bin:/usr/bin" \
+    LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" LIBRARY_PATH="$LIBRARY_PATH:/usr/local/lib" CPATH="$CPATH:/usr/local/include"
+
+ENV NVARCH=`uname -s`_`uname -m`; export NVARCH \
+    NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS \
+    MANPATH=$MANPATH:$NVCOMPILERS/$NVARCH/20.7/compilers/man; export MANPATH
+
+ENV PATH=$NVCOMPILERS/$NVARCH/20.7/compilers/bin:$PATH; export PATH \
+    PATH=$NVCOMPILERS/$NVARCH/20.7/comm_libs/mpi/bin:$PATH \
+    MANPATH=$MANPATH:$NVCOMPILERS/$NVARCH/20.7/comm_libs/mpi/man
 
-ADD miniapps-profiler /labs
+ADD English/ /labs

+ 2 - 2
hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-checkpoint.ipynb

@@ -164,7 +164,7 @@
     "This lab comprises of multiple exercises, each follows the optimization cycle method. For each exercise, build the code with a simple `make` by running the cell and profile it with `nsys`.\n",
     "\n",
     "\n",
-    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report."
+    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report. In other words, some **screenshots represents profiler report for the values of 400,200,1500.**"
    ]
   },
   {
@@ -190,7 +190,7 @@
     "\n",
     "[CUDA Toolkit Download](https://developer.nvidia.com/cuda-downloads)\n",
     "\n",
-    "**NOTE**: To be able to see the Nsight System profiler output, please download Nsight System version 2020.1 from [here](https://developer.nvidia.com/nsight-systems).\n",
+    "**NOTE**: To be able to see the Nsight System profiler output, please download Nsight System latest version from [here](https://developer.nvidia.com/nsight-systems).\n",
     "\n",
     "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
     "\n",

+ 2 - 2
hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab1-checkpoint.ipynb

@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab1 && make"
+    "!cd ../source_code/lab1 && make clean && make"
    ]
   },
   {
@@ -73,7 +73,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](../profiling-c.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
+    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](profiling-c.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
     "\n",
     "<img src=\"images/e1-nvtx_gui.png\">\n",
     "\n",

+ 5 - 5
hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab2-checkpoint.ipynb

@@ -37,7 +37,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `English/C/source_code/lab2` directory and inspect the code before running below cells. We have already added OpenACC compute directives (`#pragma acc parallel`) around the expensive routines (loops) in the code.\n",
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `C/source_code/lab2` directory and inspect the code before running below cells. We have already added OpenACC compute directives (`#pragma acc parallel`) around the expensive routines (loops) in the code.\n",
     "\n",
     "Once done, compile the code with `make`. View the PGI compiler feedback (enabled by adding `-Minfo=accel` flag) and investigate the compiler feedback for the OpenACC code. The compiler feedback provides useful information about applied optimizations."
    ]
@@ -48,7 +48,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && make"
+    "!cd ../source_code/lab2 && make clean && make"
    ]
   },
   {
@@ -67,7 +67,7 @@
     "\n",
     "It is very important to inspect the feedback to make sure the compiler is doing what you have asked of it.\n",
     "\n",
-    "Now, **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **400, 200, 10**"
+    "Now, let's **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **40, 20, 1000**"
    ]
   },
   {
@@ -76,7 +76,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather 400 200 10"
+    "!cd ../source_code/lab2 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather 40 20 1000"
    ]
   },
   {
@@ -85,7 +85,7 @@
    "source": [
     "You can see that the changes made actually slowed down the code and it runs slower compared to the non-accelerated CPU only version. Let's checkout the profiler's report. [Download the profiler output](../source_code/lab2/miniWeather_3.qdrep) and open it via the GUI. \n",
     "\n",
-    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. \n",
+    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. **Screenshots represents profiler report for the values of 400,200,1500.**\n",
     "\n",
     "<img src=\"images/nsys_slow.png\" width=\"80%\" height=\"80%\">\n",
     "\n",

+ 3 - 3
hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab3-checkpoint.ipynb

@@ -85,7 +85,7 @@
     "\n",
     "Now, add `collapse` clause to the code and make necessary changes to the loop directives. Once done, save the file, re-compile via `make`, and profile it again. \n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `English/C/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `C/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -94,7 +94,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && make"
+    "!cd ../source_code/lab3 && make clean && make"
    ]
   },
   {
@@ -114,7 +114,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
+    "!cd ../source_code/lab3 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
    ]
   },
   {

+ 3 - 3
hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab4-checkpoint.ipynb

@@ -54,7 +54,7 @@
     "\n",
     "Now, add `data` directives to the code, save the file, re-compile via `make`, and profile it again.\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `English/C/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `C/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -63,7 +63,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && make"
+    "!cd ../source_code/lab4 && make clean && make"
    ]
   },
   {
@@ -83,7 +83,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
+    "!cd ../source_code/lab4 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
    ]
   },
   {

Разница между файлами не показана из-за своего большого размера
+ 9 - 9
hpc/miniprofiler/English/C/jupyter_notebook/.ipynb_checkpoints/profiling-c-lab5-checkpoint.ipynb


+ 2 - 2
hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab1.ipynb

@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab1 && make"
+    "!cd ../source_code/lab1 && make clean && make"
    ]
   },
   {
@@ -73,7 +73,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](../profiling-c.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
+    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](profiling-c.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
     "\n",
     "<img src=\"images/e1-nvtx_gui.png\">\n",
     "\n",

+ 5 - 5
hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab2.ipynb

@@ -37,7 +37,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `English/C/source_code/lab2` directory and inspect the code before running below cells. We have already added OpenACC compute directives (`#pragma acc parallel`) around the expensive routines (loops) in the code.\n",
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `C/source_code/lab2` directory and inspect the code before running below cells. We have already added OpenACC compute directives (`#pragma acc parallel`) around the expensive routines (loops) in the code.\n",
     "\n",
     "Once done, compile the code with `make`. View the PGI compiler feedback (enabled by adding `-Minfo=accel` flag) and investigate the compiler feedback for the OpenACC code. The compiler feedback provides useful information about applied optimizations."
    ]
@@ -48,7 +48,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && make"
+    "!cd ../source_code/lab2 && make clean && make"
    ]
   },
   {
@@ -67,7 +67,7 @@
     "\n",
     "It is very important to inspect the feedback to make sure the compiler is doing what you have asked of it.\n",
     "\n",
-    "Now, **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **400, 200, 10**"
+    "Now, let's **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **40, 20, 1000**"
    ]
   },
   {
@@ -76,7 +76,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather 400 200 10"
+    "!cd ../source_code/lab2 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather 40 20 1000"
    ]
   },
   {
@@ -85,7 +85,7 @@
    "source": [
     "You can see that the changes made actually slowed down the code and it runs slower compared to the non-accelerated CPU only version. Let's checkout the profiler's report. [Download the profiler output](../source_code/lab2/miniWeather_3.qdrep) and open it via the GUI. \n",
     "\n",
-    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. \n",
+    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. **Screenshots represents profiler report for the values of 400,200,1500.**\n",
     "\n",
     "<img src=\"images/nsys_slow.png\" width=\"80%\" height=\"80%\">\n",
     "\n",

+ 5 - 5
hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab3.ipynb

@@ -85,7 +85,7 @@
     "\n",
     "Now, add `collapse` clause to the code and make necessary changes to the loop directives. Once done, save the file, re-compile via `make`, and profile it again. \n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `English/C/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `C/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -94,7 +94,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && make"
+    "!cd ../source_code/lab3 && make clean && make"
    ]
   },
   {
@@ -114,7 +114,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
+    "!cd ../source_code/lab3 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
    ]
   },
   {
@@ -125,7 +125,7 @@
     "\n",
     "<img src=\"images/occu-3.png\" width=\"40%\" height=\"40%\">\n",
     "\n",
-    "As you can see from the above screenshot, the theoretical occupancy is now 75% and the block dimension is now `<128,1,1>` where *128* is the vector size per gang. \n",
+    "As you can see from the above screenshot, the theoretical occupancy is now 75% and the block dimension is now `<128,1,1>` where *128* is the vector size per gang. **Screenshots represents profiler report for the values of 400,200,1500.**\n",
     "\n",
     "```cpp\n",
     "#pragma acc parallel loop collapse(3) private(indt, indf1, indf2) \n",
@@ -144,7 +144,7 @@
     "  }\n",
     "```\n",
     "\n",
-    "The iteration count for the collapsed loop is `NUM_VARS * nz * nx` where (in this example),\n",
+    "The iteration count for the collapsed loop is `NUM_VARS * nz * nx` where (in the example screenshot),\n",
     "\n",
     "- nz= 200,\n",
     "- nx = 400, and \n",

+ 3 - 3
hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab4.ipynb

@@ -54,7 +54,7 @@
     "\n",
     "Now, add `data` directives to the code, save the file, re-compile via `make`, and profile it again.\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `English/C/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.cpp` and `Makefile` from the current directory at `C/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -63,7 +63,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && make"
+    "!cd ../source_code/lab4 && make clean && make"
    ]
   },
   {
@@ -83,7 +83,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
+    "!cd ../source_code/lab4 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
    ]
   },
   {

Разница между файлами не показана из-за своего большого размера
+ 10 - 10
hpc/miniprofiler/English/C/jupyter_notebook/profiling-c-lab5.ipynb


+ 2 - 2
hpc/miniprofiler/English/C/jupyter_notebook/profiling-c.ipynb

@@ -164,7 +164,7 @@
     "This lab comprises of multiple exercises, each follows the optimization cycle method. For each exercise, build the code with a simple `make` by running the cell and profile it with `nsys`.\n",
     "\n",
     "\n",
-    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report."
+    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report. In other words, some **screenshots represents profiler report for the values of 400,200,1500.**"
    ]
   },
   {
@@ -190,7 +190,7 @@
     "\n",
     "[CUDA Toolkit Download](https://developer.nvidia.com/cuda-downloads)\n",
     "\n",
-    "**NOTE**: To be able to see the Nsight System profiler output, please download Nsight System version 2020.1 from [here](https://developer.nvidia.com/nsight-systems).\n",
+    "**NOTE**: To be able to see the Nsight System profiler output, please download Nsight System latest version from [here](https://developer.nvidia.com/nsight-systems).\n",
     "\n",
     "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
     "\n",

+ 1 - 2
hpc/miniprofiler/English/C/source_code/lab1/Makefile

@@ -1,11 +1,10 @@
 CC := pgc++
 CFLAGS := -O3 -w
 ACCFLAGS := -Minfo=accel
-LDFLAGS :=  -lnetcdf -ldl
 NVTXLIB := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 
 miniWeather: miniWeather_serial.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_serial.cpp ${NVTXLIB} ${LDFLAGS}
+	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_serial.cpp ${NVTXLIB} 
 
 clean:
 	rm -f *.o miniWeather

+ 5 - 132
hpc/miniprofiler/English/C/source_code/lab1/miniWeather_serial.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -131,7 +130,7 @@ int main(int argc, char **argv)
   init();
 
   //Output the initial state
-  output(state, etime);
+  //output(state, etime);
 
   ////////////////////////////////////////////////////
   // MAIN TIME STEP LOOP
@@ -163,7 +162,7 @@ int main(int argc, char **argv)
     if (output_counter >= output_freq)
     {
       output_counter = output_counter - output_freq;
-      output(state, etime);
+      //output(state, etime);
     }
   }
   nvtxRangePop();
@@ -622,132 +621,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("reference.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("reference.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 1 - 2
hpc/miniprofiler/English/C/source_code/lab2/Makefile

@@ -1,10 +1,9 @@
 CC := pgc++
 CFLAGS := -O3 -w
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
-LDFLAGS := -lnetcdf
 
 miniWeather: miniWeather_openacc.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp ${LDFLAGS}
+	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp 
 
 clean:
 	rm -f *.o miniWeather

+ 5 - 132
hpc/miniprofiler/English/C/source_code/lab2/miniWeather_openacc.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -132,7 +131,7 @@ int main(int argc, char **argv)
   init();
 
   //Output the initial state
-  output(state, etime);
+  //output(state, etime);
 
   ////////////////////////////////////////////////////
   // MAIN TIME STEP LOOP
@@ -165,7 +164,7 @@ int main(int argc, char **argv)
     {
       output_counter = output_counter - output_freq;
 
-      output(state, etime);
+      //output(state, etime);
     }
   }
   nvtxRangePop();
@@ -626,132 +625,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 1 - 2
hpc/miniprofiler/English/C/source_code/lab3/Makefile

@@ -1,10 +1,9 @@
 CC := pgc++
 CFLAGS := -O3 -w
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
-LDFLAGS := -lnetcdf
 
 miniWeather: miniWeather_openacc.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp ${LDFLAGS}
+	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp 
 
 clean:
 	rm -f *.o miniWeather

+ 5 - 132
hpc/miniprofiler/English/C/source_code/lab3/miniWeather_openacc.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -132,7 +131,7 @@ int main(int argc, char **argv)
   init();
 
   //Output the initial state
-  output(state, etime);
+  //output(state, etime);
 
   ////////////////////////////////////////////////////
   // MAIN TIME STEP LOOP
@@ -165,7 +164,7 @@ int main(int argc, char **argv)
     {
       output_counter = output_counter - output_freq;
 
-      output(state, etime);
+      //output(state, etime);
     }
   }
   nvtxRangePop();
@@ -626,132 +625,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 1 - 2
hpc/miniprofiler/English/C/source_code/lab4/Makefile

@@ -1,10 +1,9 @@
 CC := pgc++
 CFLAGS := -O3 -w
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
-LDFLAGS := -lnetcdf
 
 miniWeather: miniWeather_openacc.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp ${LDFLAGS}
+	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp 
 
 clean:
 	rm -f *.o miniWeather

+ 5 - 132
hpc/miniprofiler/English/C/source_code/lab4/miniWeather_openacc.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -132,7 +131,7 @@ int main(int argc, char **argv)
   init();
 
   //Output the initial state
-  output(state, etime);
+  //output(state, etime);
 
   ////////////////////////////////////////////////////
   // MAIN TIME STEP LOOP
@@ -165,7 +164,7 @@ int main(int argc, char **argv)
     {
       output_counter = output_counter - output_freq;
 
-      output(state, etime);
+      //output(state, etime);
     }
   }
   nvtxRangePop();
@@ -626,132 +625,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 1 - 2
hpc/miniprofiler/English/C/source_code/lab5/Makefile

@@ -1,10 +1,9 @@
 CC := pgc++
 CFLAGS := -O3 -w
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
-LDFLAGS := -lnetcdf
 
 miniWeather: miniWeather_openacc.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp ${LDFLAGS}
+	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp 
 
 clean:
 	rm -f *.o miniWeather

+ 5 - 132
hpc/miniprofiler/English/C/source_code/lab5/miniWeather_openacc.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 10;     //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -136,7 +135,7 @@ int main(int argc, char **argv)
         copy(state [0:(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
   {
     //Output the initial state
-    output(state, etime);
+    //output(state, etime);
 
     ////////////////////////////////////////////////////
     // MAIN TIME STEP LOOP
@@ -169,7 +168,7 @@ int main(int argc, char **argv)
       {
         output_counter = output_counter - output_freq;
 #pragma acc update host(state[(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
-        output(state, etime);
+        //output(state, etime);
       }
     }
     nvtxRangePop();
@@ -630,132 +629,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 1 - 2
hpc/miniprofiler/English/C/source_code/solutions/Makefile

@@ -1,10 +1,9 @@
 CC := pgc++
 CFLAGS := -O3 -w
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
-LDFLAGS := -lnetcdf
 
 miniWeather: miniWeather_openacc.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp ${LDFLAGS}
+	${CC} ${CFLAGS} ${ACCFLAGS} -o miniWeather miniWeather_openacc.cpp 
 
 clean:
 	rm -f *.o miniWeather

+ 5 - 132
hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -136,7 +135,7 @@ int main(int argc, char **argv)
         copy(state [0:(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
   {
     //Output the initial state
-    output(state, etime);
+    //output(state, etime);
 
     ////////////////////////////////////////////////////
     // MAIN TIME STEP LOOP
@@ -169,7 +168,7 @@ int main(int argc, char **argv)
       {
         output_counter = output_counter - output_freq;
 #pragma acc update host(state[(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
-        output(state, etime);
+        //output(state, etime);
       }
     }
     nvtxRangePop();
@@ -630,132 +629,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 5 - 132
hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc_exr2.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -132,7 +131,7 @@ int main(int argc, char **argv)
   init();
 
   //Output the initial state
-  output(state, etime);
+  //output(state, etime);
 
   ////////////////////////////////////////////////////
   // MAIN TIME STEP LOOP
@@ -165,7 +164,7 @@ int main(int argc, char **argv)
     {
       output_counter = output_counter - output_freq;
 
-      output(state, etime);
+      //output(state, etime);
     }
   }
   nvtxRangePop();
@@ -626,132 +625,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 5 - 132
hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc_exr3.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-direction
+  nz_glob = 20;      //Number of total cells in the z-direction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -132,7 +131,7 @@ int main(int argc, char **argv)
   init();
 
   //Output the initial state
-  output(state, etime);
+  //output(state, etime);
 
   ////////////////////////////////////////////////////
   // MAIN TIME STEP LOOP
@@ -165,7 +164,7 @@ int main(int argc, char **argv)
     {
       output_counter = output_counter - output_freq;
 
-      output(state, etime);
+      //    output(state, etime);
     }
   }
   nvtxRangePop();
@@ -626,132 +625,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 5 - 132
hpc/miniprofiler/English/C/source_code/solutions/miniWeather_openacc_exr4.cpp

@@ -8,7 +8,6 @@
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
-#include <netcdf.h>
 #include <nvtx3/nvToolsExt.h>
 
 const double pi = 3.14159265358979323846264338327;   //Pi
@@ -108,9 +107,9 @@ int main(int argc, char **argv)
   ///////////////////////////////////////////////////////////////////////////////////////
   //The x-direction length is twice as long as the z-direction length
   //So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400;     //Number of total cells in the x-dirction
-  nz_glob = 200;     //Number of total cells in the z-dirction
-  sim_time = 1500;   //How many seconds to run the simulation
+  nx_glob = 40;      //Number of total cells in the x-dirction
+  nz_glob = 20;      //Number of total cells in the z-dirction
+  sim_time = 1000;   //How many seconds to run the simulation
   output_freq = 100; //How frequently to output data to file (in seconds)
   ///////////////////////////////////////////////////////////////////////////////////////
   // END USER-CONFIGURABLE PARAMETERS
@@ -136,7 +135,7 @@ int main(int argc, char **argv)
         copy(state [0:(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
   {
     //Output the initial state
-    output(state, etime);
+    ////output(state, etime);
 
     ////////////////////////////////////////////////////
     // MAIN TIME STEP LOOP
@@ -169,7 +168,7 @@ int main(int argc, char **argv)
       {
         output_counter = output_counter - output_freq;
 #pragma acc update host(state[(nz + 2 * hs) * (nx + 2 * hs) * NUM_VARS])
-        output(state, etime);
+        ////output(state, etime);
       }
     }
     nvtxRangePop();
@@ -629,132 +628,6 @@ void hydro_const_theta(double z, double &r, double &t)
   r = rt / t;                                //Density at z
 }
 
-//Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-//The file I/O uses netcdf, the only external library required for this mini-app.
-//If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-void output(double *state, double etime)
-{
-  int ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid, dimids[3];
-  int i, k, ind_r, ind_u, ind_w, ind_t;
-
-  size_t st1[1], ct1[1], st3[3], ct3[3];
-
-  //Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-  double *dens, *uwnd, *wwnd, *theta;
-  double *etimearr;
-  //Inform the user
-
-  printf("*** OUTPUT ***\n");
-
-  //Allocate some (big) temp arrays
-  dens = (double *)malloc(nx * nz * sizeof(double));
-  uwnd = (double *)malloc(nx * nz * sizeof(double));
-  wwnd = (double *)malloc(nx * nz * sizeof(double));
-  theta = (double *)malloc(nx * nz * sizeof(double));
-  etimearr = (double *)malloc(1 * sizeof(double));
-
-  //If the elapsed time is zero, create the file. Otherwise, open the file
-  if (etime == 0)
-  {
-    //Create the file
-    ncwrap(nc_create("new.nc", NC_CLOBBER, &ncid), __LINE__);
-
-    //Create the dimensions
-    ncwrap(nc_def_dim(ncid, "t", NC_UNLIMITED, &t_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "x", nx_glob, &x_dimid), __LINE__);
-    ncwrap(nc_def_dim(ncid, "z", nz_glob, &z_dimid), __LINE__);
-
-    //Create the variables
-    dimids[0] = t_dimid;
-    ncwrap(nc_def_var(ncid, "t", NC_DOUBLE, 1, dimids, &t_varid), __LINE__);
-
-    dimids[0] = t_dimid;
-    dimids[1] = z_dimid;
-    dimids[2] = x_dimid;
-
-    ncwrap(nc_def_var(ncid, "dens", NC_DOUBLE, 3, dimids, &dens_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "uwnd", NC_DOUBLE, 3, dimids, &uwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "wwnd", NC_DOUBLE, 3, dimids, &wwnd_varid), __LINE__);
-    ncwrap(nc_def_var(ncid, "theta", NC_DOUBLE, 3, dimids, &theta_varid), __LINE__);
-
-    //End "define" mode
-    ncwrap(nc_enddef(ncid), __LINE__);
-  }
-  else
-  {
-    //Open the file
-    ncwrap(nc_open("new.nc", NC_WRITE, &ncid), __LINE__);
-
-    //Get the variable IDs
-    ncwrap(nc_inq_varid(ncid, "dens", &dens_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "uwnd", &uwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "wwnd", &wwnd_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "theta", &theta_varid), __LINE__);
-    ncwrap(nc_inq_varid(ncid, "t", &t_varid), __LINE__);
-  }
-
-  //Store perturbed values in the temp arrays for output
-  for (k = 0; k < nz; k++)
-  {
-    for (i = 0; i < nx; i++)
-    {
-      ind_r = ID_DENS * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_u = ID_UMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_w = ID_WMOM * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      ind_t = ID_RHOT * (nz + 2 * hs) * (nx + 2 * hs) + (k + hs) * (nx + 2 * hs) + i + hs;
-      dens[k * nx + i] = state[ind_r];
-      uwnd[k * nx + i] = state[ind_u] / (hy_dens_cell[k + hs] + state[ind_r]);
-      wwnd[k * nx + i] = state[ind_w] / (hy_dens_cell[k + hs] + state[ind_r]);
-      theta[k * nx + i] = (state[ind_t] + hy_dens_theta_cell[k + hs]) / (hy_dens_cell[k + hs] + state[ind_r]) - hy_dens_theta_cell[k + hs] / hy_dens_cell[k + hs];
-    }
-  }
-
-  //Write the grid data to file with all the processes writing collectively
-  st3[0] = num_out;
-  st3[1] = k_beg;
-  st3[2] = i_beg;
-  ct3[0] = 1;
-  ct3[1] = nz;
-  ct3[2] = nx;
-
-  ncwrap(nc_put_vara_double(ncid, dens_varid, st3, ct3, dens), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, uwnd_varid, st3, ct3, uwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, wwnd_varid, st3, ct3, wwnd), __LINE__);
-  ncwrap(nc_put_vara_double(ncid, theta_varid, st3, ct3, theta), __LINE__);
-
-  //Only the master process needs to write the elapsed time
-  //write elapsed time to file
-
-  st1[0] = num_out;
-  ct1[0] = 1;
-  etimearr[0] = etime;
-  ncwrap(nc_put_vara_double(ncid, t_varid, st1, ct1, etimearr), __LINE__);
-
-  //Close the file
-  ncwrap(nc_close(ncid), __LINE__);
-
-  //Increment the number of outputs
-  num_out = num_out + 1;
-
-  //Deallocate the temp arrays
-  free(dens);
-  free(uwnd);
-  free(wwnd);
-  free(theta);
-  free(etimearr);
-}
-
-//Error reporting routine for the NetCDF I/O
-void ncwrap(int ierr, int line)
-{
-  if (ierr != NC_NOERR)
-  {
-    printf("NetCDF Error at line: %d\n", line);
-    printf("%s\n", nc_strerror(ierr));
-    exit(-1);
-  }
-}
-
 void finalize()
 {
   free(state);

+ 1 - 1
hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-checkpoint.ipynb

@@ -175,7 +175,7 @@
     "This lab comprises of multiple exercises, each follows the optimization cycle method. For each exercise, build the code with a simple `make` by running the cell and profile it with `nsys`.\n",
     "\n",
     "\n",
-    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report."
+    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report. In other words, some **screenshots represents profiler report for the values of 400,200,1500.**"
    ]
   },
   {

+ 2 - 2
hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab1-checkpoint.ipynb

@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab1 && make"
+    "!cd ../source_code/lab1 && make clean && make"
    ]
   },
   {
@@ -73,7 +73,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](../profiling-fortran.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
+    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](profiling-fortran.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
     "\n",
     "<img src=\"images/e1-nvtx_gui.png\">\n",
     "\n",

+ 5 - 5
hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab2-checkpoint.ipynb

@@ -37,7 +37,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `English/Fortran/source_code/lab2` directory and inspect the code before running below cells.We have already added OpenACC compute directives (`!$acc parallel loop`) around the expensive routines (loops) in the code.\n",
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `Fortran/source_code/lab2` directory and inspect the code before running below cells.We have already added OpenACC compute directives (`!$acc parallel loop`) around the expensive routines (loops) in the code.\n",
     "\n",
     "Once done, compile the code with `make`. View the PGI compiler feedback (enabled by adding `-Minfo=accel` flag) and investigate the compiler feedback for the OpenACC code. The compiler feedback provides useful information about applied optimizations."
    ]
@@ -48,7 +48,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && make"
+    "!cd ../source_code/lab2 && make clean && make"
    ]
   },
   {
@@ -68,7 +68,7 @@
     "\n",
     "It is very important to inspect the feedback to make sure the compiler is doing what you have asked of it.\n",
     "\n",
-    "Now, **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **400, 200, 10**. "
+    "Now, **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **40, 20, 1000**. "
    ]
   },
   {
@@ -93,7 +93,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather"
+    "!cd ../source_code/lab2 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather"
    ]
   },
   {
@@ -102,7 +102,7 @@
    "source": [
     "You can see that the changes made actually slowed down the code and it runs slower compared to the non-accelerated CPU only version. Let's checkout the profiler's report. [Download the profiler output](../source_code/lab2/miniWeather_3.qdrep) and open it via the GUI. \n",
     "\n",
-    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. \n",
+    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. **Screenshots represents profiler report for the values of 400,200,1500.**\n",
     "\n",
     "<img src=\"images/nsys_slow.png\" width=\"80%\" height=\"80%\">\n",
     "\n",

+ 3 - 3
hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab3-checkpoint.ipynb

@@ -79,7 +79,7 @@
     "\n",
     "Now, add `collapse` clause to the code and make necessary changes to the loop directives. Once done, save the file, re-compile via `make`, and profile it again. \n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `English/Fortran/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `Fortran/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -88,7 +88,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && make"
+    "!cd ../source_code/lab3 && make clean && make"
    ]
   },
   {
@@ -108,7 +108,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
+    "!cd ../source_code/lab3 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
    ]
   },
   {

+ 3 - 3
hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab4-checkpoint.ipynb

@@ -55,7 +55,7 @@
     "Now, add `data` directives to the code, save the file, re-compile via `make`, and profile it again.\n",
     "\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `English/Fortran/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `Fortran/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -64,7 +64,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && make"
+    "!cd ../source_code/lab4 && make clean && make"
    ]
   },
   {
@@ -84,7 +84,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
+    "!cd ../source_code/lab4 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
    ]
   },
   {

Разница между файлами не показана из-за своего большого размера
+ 10 - 10
hpc/miniprofiler/English/Fortran/jupyter_notebook/.ipynb_checkpoints/profiling-fortran-lab5-checkpoint.ipynb


+ 2 - 2
hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab1.ipynb

@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab1 && make"
+    "!cd ../source_code/lab1 && make clean && make"
    ]
   },
   {
@@ -73,7 +73,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](../profiling-fortran.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
+    "We download the profiler output (`miniWeather_1.qdrep`) and open it via the Nsight Systems UI. From the timeline view, checkout the NVTX markers displays as part of threads. **Why are we using NVTX?** Please see the section on [Using NVIDIA Tools Extension (NVTX)](profiling-fortran.ipynb#Using-NVIDIA-Tools-Extension-(NVTX))\n",
     "\n",
     "<img src=\"images/e1-nvtx_gui.png\">\n",
     "\n",

+ 5 - 5
hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab2.ipynb

@@ -37,7 +37,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `English/Fortran/source_code/lab2` directory and inspect the code before running below cells.We have already added OpenACC compute directives (`!$acc parallel loop`) around the expensive routines (loops) in the code.\n",
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `Fortran/source_code/lab2` directory and inspect the code before running below cells.We have already added OpenACC compute directives (`!$acc parallel loop`) around the expensive routines (loops) in the code.\n",
     "\n",
     "Once done, compile the code with `make`. View the PGI compiler feedback (enabled by adding `-Minfo=accel` flag) and investigate the compiler feedback for the OpenACC code. The compiler feedback provides useful information about applied optimizations."
    ]
@@ -48,7 +48,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && make"
+    "!cd ../source_code/lab2 && make clean && make"
    ]
   },
   {
@@ -68,7 +68,7 @@
     "\n",
     "It is very important to inspect the feedback to make sure the compiler is doing what you have asked of it.\n",
     "\n",
-    "Now, **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **400, 200, 10**. "
+    "Now, **Run** the application for small values of `nx_glob`,`nz_glob`, and `sim_time`: **40, 20, 1000**. "
    ]
   },
   {
@@ -93,7 +93,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab2 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather"
+    "!cd ../source_code/lab2 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_3 ./miniWeather"
    ]
   },
   {
@@ -102,7 +102,7 @@
    "source": [
     "You can see that the changes made actually slowed down the code and it runs slower compared to the non-accelerated CPU only version. Let's checkout the profiler's report. [Download the profiler output](../source_code/lab2/miniWeather_3.qdrep) and open it via the GUI. \n",
     "\n",
-    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. \n",
+    "From the \"timeline view\" on the top pane, double click on the \"CUDA\" from the function table on the left and expand it. Zoom in on the timeline and you can see a pattern similar to the screenshot below. The blue boxes are the compute kernels and each of these groupings of kernels is surrounded by purple and teal boxes (annotated with red color) representing data movements. **Screenshots represents profiler report for the values of 400,200,1500.**\n",
     "\n",
     "<img src=\"images/nsys_slow.png\" width=\"80%\" height=\"80%\">\n",
     "\n",

+ 6 - 6
hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab3.ipynb

@@ -79,7 +79,7 @@
     "\n",
     "Now, add `collapse` clause to the code and make necessary changes to the loop directives. Once done, save the file, re-compile via `make`, and profile it again. \n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `English/Fortran/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `Fortran/source_code/lab3` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -88,7 +88,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && make"
+    "!cd ../source_code/lab3 && make clean && make"
    ]
   },
   {
@@ -108,7 +108,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab3 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
+    "!cd ../source_code/lab3 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_4 ./miniWeather"
    ]
   },
   {
@@ -119,7 +119,7 @@
     "\n",
     "<img src=\"images/occu-3.png\" width=\"40%\" height=\"40%\">\n",
     "\n",
-    "As you can see from the above screenshot, the theoretical occupancy is now 75% and the block dimension is now `<128,1,1>` where *128* is the vector size per gang. \n",
+    "As you can see from the above screenshot, the theoretical occupancy is now 75% and the block dimension is now `<128,1,1>` where *128* is the vector size per gang. **Screenshots represents profiler report for the values of 400,200,1500.**\n",
     "\n",
     "```fortran\n",
     "    !$acc parallel loop collapse(3) \n",
@@ -132,13 +132,13 @@
     "    enddo\n",
     "```\n",
     "\n",
-    "The iteration count for the collapsed loop is `NUM_VARS * nz * nx` where (in this example),\n",
+    "The iteration count for the collapsed loop is `NUM_VARS * nz * nx` where (in the example screenshot),\n",
     "\n",
     "- nz= 200,\n",
     "- nx = 400, and \n",
     "- NUM_VARS = 4\n",
     "\n",
-    "So, the interaction count for this particular loop inside the `compute_tendencies_z_383_gpu` function is 320K. This number divided by the vector length of *128* would gives us the grid dimension of `<2500,1,1>`.\n",
+    "So, the interaction count for this particular loop inside the `compute_tendencies_z_383_gpu` function is 320K. This number divided by the vector length of *128* would gives us the grid dimension of `<2500,1,1>`. \n",
     "\n",
     "By creating a single iteration space across the nested loops and increasing the iteration count, we improved the occupancy and extracted more parallelism.\n",
     "\n",

+ 3 - 3
hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab4.ipynb

@@ -55,7 +55,7 @@
     "Now, add `data` directives to the code, save the file, re-compile via `make`, and profile it again.\n",
     "\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `English/Fortran/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
+    "From the top menu, click on *File*, and *Open* `miniWeather_openacc.f90` and `Makefile` from the current directory at `Fortran/source_code/lab4` directory. Remember to **SAVE** your code after changes, before running below cells."
    ]
   },
   {
@@ -64,7 +64,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && make"
+    "!cd ../source_code/lab4 && make clean && make"
    ]
   },
   {
@@ -84,7 +84,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!cd ../source_code/lab4 && nsys profile -t nvtx --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
+    "!cd ../source_code/lab4 && nsys profile -t nvtx,openacc --stats=true --force-overwrite true -o miniWeather_5 ./miniWeather"
    ]
   },
   {

Разница между файлами не показана из-за своего большого размера
+ 10 - 10
hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran-lab5.ipynb


+ 1 - 1
hpc/miniprofiler/English/Fortran/jupyter_notebook/profiling-fortran.ipynb

@@ -175,7 +175,7 @@
     "This lab comprises of multiple exercises, each follows the optimization cycle method. For each exercise, build the code with a simple `make` by running the cell and profile it with `nsys`.\n",
     "\n",
     "\n",
-    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report."
+    "**NOTE**: Example screenshots are for reference only and you may not get identical profiler report. In other words, some **screenshots represents profiler report for the values of 400,200,1500.**"
    ]
   },
   {

+ 1 - 1
hpc/miniprofiler/English/Fortran/source_code/lab1/Makefile

@@ -1,7 +1,7 @@
 FC := pgf90
 FCC := pgfortran
 FFLAGS := -fast
-LDFLAGS := -lnvToolsExt -lnetcdff
+LDFLAGS := -lnvToolsExt 
 ACCFLAGS := -Minfo=accel
 NVTXINC := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 NVTXLIB := -L/opt/pgi/linux86-64-llvm/2019/cuda/10.0/lib64/

+ 3 - 101
hpc/miniprofiler/English/Fortran/source_code/lab1/miniWeather_serial.f90

@@ -83,9 +83,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 10 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS
@@ -550,102 +550,4 @@ contains
     deallocate(hy_pressure_int   )
   end subroutine finalize
 
-
-  !Output the fluid state (state) to a NetCDF file at a given elapsed model time (etime)
-  !The file I/O uses parallel-netcdf, the only external library required for this mini-app.
-  !If it's too cumbersome, you can comment the I/O out, but you'll miss out on some potentially cool graphics
-  subroutine output(state,etime)
-    implicit none
-
-    real(rp), intent(in) :: state(1-hs:nx+hs,1-hs:nz+hs,NUM_VARS)
-    real(rp), intent(in) :: etime
-    integer :: ncid, t_dimid, x_dimid, z_dimid, dens_varid, uwnd_varid, wwnd_varid, theta_varid, t_varid
-    integer :: i,k, __LINE__
-    integer, save :: num_out = 0
-    integer :: len, st1(1),ct1(1),st3(3),ct3(3)
-    !Temporary arrays to hold density, u-wind, w-wind, and potential temperature (theta)
-    real(rp), allocatable :: dens(:,:), uwnd(:,:), wwnd(:,:), theta(:,:)
-    real(rp) :: etimearr(1)
-    !Inform the user
-    write(*,*) '*** OUTPUT ***'
-    !Allocate some (big) temp arrays
-    allocate(dens (nx,nz))
-    allocate(uwnd (nx,nz))
-    allocate(wwnd (nx,nz))
-    allocate(theta(nx,nz))
-
-    !If the elapsed time is zero, create the file. Otherwise, open the file
-    if (etime == 0) then
-      !Create the file
-      call ncwrap( nf90_create('reference.nc' , nf90_clobber , ncid ) , __LINE__ )
-      !Create the dimensions
-      len=nf90_unlimited; call ncwrap( nf90_def_dim( ncid , 't' , len , t_dimid ) , __LINE__ )
-      len=nx_glob       ; call ncwrap( nf90_def_dim( ncid , 'x' , len , x_dimid ) , __LINE__ )
-      len=nz_glob       ; call ncwrap( nf90_def_dim( ncid , 'z' , len , z_dimid ) , __LINE__ )
-      !Create the variables
-      call ncwrap( nf_def_var( ncid , 't' , NF90_DOUBLE , 1 , (/ t_dimid /) , t_varid ) , __LINE__ )
-      call ncwrap( nf_def_var( ncid , 'dens'  , nf90_double , 3 , (/ x_dimid , z_dimid , t_dimid /) ,  dens_varid ) , __LINE__ )
-      call ncwrap( nf_def_var( ncid , 'uwnd'  , nf90_double , 3 , (/ x_dimid , z_dimid , t_dimid /) ,  uwnd_varid ) , __LINE__ )
-      call ncwrap( nf_def_var( ncid , 'wwnd'  , nf90_double , 3 , (/ x_dimid , z_dimid , t_dimid /) ,  wwnd_varid ) , __LINE__ )
-      call ncwrap( nf_def_var( ncid , 'theta' , nf90_double , 3 , (/ x_dimid , z_dimid , t_dimid /) , theta_varid ) , __LINE__ )
-      !End "define" mode
-      call ncwrap( nf90_enddef( ncid ) , __LINE__ )
-    else
-      !Open the file
-      call ncwrap( nf90_open( 'reference.nc' , nf90_write , ncid ) , __LINE__ )
-      !Get the variable IDs
-      call ncwrap( nf90_inq_varid( ncid , 'dens'  ,  dens_varid ) , __LINE__ )
-      call ncwrap( nf90_inq_varid( ncid , 'uwnd'  ,  uwnd_varid ) , __LINE__ )
-      call ncwrap( nf90_inq_varid( ncid , 'wwnd'  ,  wwnd_varid ) , __LINE__ )
-      call ncwrap( nf90_inq_varid( ncid , 'theta' , theta_varid ) , __LINE__ )
-      call ncwrap( nf90_inq_varid( ncid , 't'     ,     t_varid ) , __LINE__ )
-    endif
-
-    !Store perturbed values in the temp arrays for output
-    do k = 1 , nz
-      do i = 1 , nx
-        dens (i,k) = state(i,k,ID_DENS)
-        uwnd (i,k) = state(i,k,ID_UMOM) / ( hy_dens_cell(k) + state(i,k,ID_DENS) )
-        wwnd (i,k) = state(i,k,ID_WMOM) / ( hy_dens_cell(k) + state(i,k,ID_DENS) )
-        theta(i,k) = ( state(i,k,ID_RHOT) + hy_dens_theta_cell(k) ) / ( hy_dens_cell(k) + state(i,k,ID_DENS) ) - hy_dens_theta_cell(k) / hy_dens_cell(k)
-      enddo
-    enddo
-
-    !Write the grid data to file with all the processes writing collectively
-    st3=(/i_beg,k_beg,num_out+1/); ct3=(/nx,nz,1/); call ncwrap( nf_put_vara_double( ncid ,  dens_varid , st3 , ct3 , dens  ) , __LINE__ )
-    st3=(/i_beg,k_beg,num_out+1/); ct3=(/nx,nz,1/); call ncwrap( nf_put_vara_double( ncid ,  uwnd_varid , st3 , ct3 , uwnd  ) , __LINE__ )
-    st3=(/i_beg,k_beg,num_out+1/); ct3=(/nx,nz,1/); call ncwrap( nf_put_vara_double( ncid ,  wwnd_varid , st3 , ct3 , wwnd  ) , __LINE__ )
-    st3=(/i_beg,k_beg,num_out+1/); ct3=(/nx,nz,1/); call ncwrap( nf_put_vara_double( ncid , theta_varid , st3 , ct3 , theta ) , __LINE__ )
-
-    !Only the master process needs to write the elapsed time
-    !write elapsed time to file
-
-      st1=(/num_out+1/); ct1=(/1/); etimearr(1) = etime; call ncwrap( nf_put_vara_double( ncid , t_varid , st1 , ct1 , etimearr ) , __LINE__ )
-
-
-
-    !Close the file
-    call ncwrap( nf90_close(ncid) , __LINE__ )
-
-    !Increment the number of outputs
-    num_out = num_out + 1
-
-    !Deallocate the temp arrays
-    deallocate(dens )
-    deallocate(uwnd )
-    deallocate(wwnd )
-    deallocate(theta)
-  end subroutine output
-
-  !Error reporting routine for the NetCDF I/O
-  subroutine ncwrap( ierr , line )
-    implicit none
-    integer, intent(in) :: ierr
-    integer, intent(in) :: line
-    if (ierr /= nf90_noerr) then
-      write(*,*) 'NetCDF Error at line: ', line
-      write(*,*) nf90_strerror(ierr)
-      stop
-    endif
-  end subroutine ncwrap
 end program miniweather

+ 1 - 1
hpc/miniprofiler/English/Fortran/source_code/lab2/Makefile

@@ -1,7 +1,7 @@
 FC := pgf90
 FCC := pgfortran
 FFLAGS := -fast
-LDFLAGS := -lnvToolsExt -lnetcdff
+LDFLAGS := -lnvToolsExt 
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
 NVTXINC := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 NVTXLIB := -L/opt/pgi/linux86-64-llvm/2019/cuda/10.0/lib64/

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/lab2/miniWeather_openacc.f90

@@ -82,9 +82,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 10 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 1 - 1
hpc/miniprofiler/English/Fortran/source_code/lab3/Makefile

@@ -1,7 +1,7 @@
 FC := pgf90
 FCC := pgfortran
 FFLAGS := -fast
-LDFLAGS := -lnvToolsExt -lnetcdff
+LDFLAGS := -lnvToolsExt 
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
 NVTXINC := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 NVTXLIB := -L/opt/pgi/linux86-64-llvm/2019/cuda/10.0/lib64/

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/lab3/miniWeather_openacc.f90

@@ -82,9 +82,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 1 - 1
hpc/miniprofiler/English/Fortran/source_code/lab4/Makefile

@@ -1,7 +1,7 @@
 FC := pgf90
 FCC := pgfortran
 FFLAGS := -fast
-LDFLAGS := -lnvToolsExt -lnetcdff
+LDFLAGS := -lnvToolsExt 
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
 NVTXINC := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 NVTXLIB := -L/opt/pgi/linux86-64-llvm/2019/cuda/10.0/lib64/

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/lab4/miniWeather_openacc.f90

@@ -82,9 +82,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 1 - 1
hpc/miniprofiler/English/Fortran/source_code/lab5/Makefile

@@ -1,6 +1,6 @@
 FC := pgf90
 FFLAGS := -fast
-LDFLAGS := -lnvToolsExt -lnetcdff
+LDFLAGS := -lnvToolsExt 
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
 NVTXINC := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 NVTXLIB := -L/opt/pgi/linux86-64-llvm/2019/cuda/10.0/lib64/

+ 1 - 1
hpc/miniprofiler/English/Fortran/source_code/solutions/Makefile

@@ -1,7 +1,7 @@
 FC := pgf90
 FCC := pgfortran
 FFLAGS := -fast
-LDFLAGS := -lnvToolsExt -lnetcdff
+LDFLAGS := -lnvToolsExt 
 ACCFLAGS := -ta=tesla:managed -Minfo=accel
 NVTXINC := -I/opt/pgi/linux86-64-llvm/2019/cuda/10.1/include/
 NVTXLIB := -L/opt/pgi/linux86-64-llvm/2019/cuda/10.0/lib64/

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc.f90

@@ -83,9 +83,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr2.f90

@@ -82,9 +82,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr3.f90

@@ -82,9 +82,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr4.f90

@@ -83,9 +83,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 3 - 3
hpc/miniprofiler/English/Fortran/source_code/solutions/miniWeather_openacc_exr5.f90

@@ -83,9 +83,9 @@ program miniweather
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !The x-direction length is twice as long as the z-direction length
   !So, you'll want to have nx_glob be twice as large as nz_glob
-  nx_glob = 400        !Number of total cells in the x-dirction
-  nz_glob = 200        !Number of total cells in the z-dirction
-  sim_time = 1500 !How many seconds to run the simulation
+  nx_glob = 40        !Number of total cells in the x-dirction
+  nz_glob = 20        !Number of total cells in the z-dirction
+  sim_time = 1000 !How many seconds to run the simulation
   output_freq = 100 !How frequently to output data to file (in seconds)
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !! END USER-CONFIGURABLE PARAMETERS

+ 17 - 101
hpc/miniprofiler/Singularity

@@ -1,22 +1,25 @@
 Bootstrap: docker
-FROM: nvcr.io/hpc/pgi-compilers:ce
+FROM: nvcr.io/nvidia/cuda:10.2-base-ubuntu18.04
 
 %environment
     export XDG_RUNTIME_DIR=
     export PATH="$PATH:/opt/pgi/linux86-64/19.10/bin:/usr/local/bin:/opt/anaconda3/bin:/usr/bin"
-    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/pgi/linux86-64-llvm/2019/cuda/10.1/lib64/:/opt/pgi/linux86-64-llvm/19.10/lib" 
+    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" 
     export LIBRARY_PATH="$LIBRARY_PATH:/usr/local/lib" CPATH="$CPATH:/usr/local/include"
+    NVARCH=`uname -s`_`uname -m`; export NVARCH
+    NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS
+    MANPATH=$MANPATH:$NVCOMPILERS/$NVARCH/20.7/compilers/man; export MANPATH
+    PATH=$NVCOMPILERS/$NVARCH/20.7/compilers/bin:$PATH; export PATH
+    export PATH=$NVCOMPILERS/$NVARCH/20.7/comm_libs/mpi/bin:$PATH
+    export MANPATH=$MANPATH:$NVCOMPILERS/$NVARCH/20.7/comm_libs/mpi/man
 
 %post
     build_tmp=$(mktemp -d) && cd ${build_tmp}
-
-    export PATH=/opt/pgi/linux86-64/19.10/bin:$PATH
-
     apt-get -y update
-    apt-get -y dist-upgrade 
-    apt-get -y install --no-install-recommends \
+    DEBIAN_FRONTEND=noninteractive apt-get -y dist-upgrade 
+    DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends \
 	    m4 vim-nox emacs-nox nano zip \
- 	    python3 python3-pip python3-setuptools git-core inotify-tools \
+ 	    python3 python3-pip python3-setuptools git-core inotify-tools wget\
 	    curl git-lfs \
 	    build-essential
     rm -rf /var/lib/apt/cache/* 
@@ -24,110 +27,23 @@ FROM: nvcr.io/hpc/pgi-compilers:ce
     pip3 install --upgrade pip
     pip3 install jupyter netcdf4
 
-# NVIDIA Nsight Systems 2020.3.1
-    apt-get update -y 
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget
-    echo "deb https://developer.download.nvidia.com/devtools/repo-deb/x86_64/ /" >> /etc/apt/sources.list.d/nsight.list 
-    apt-get update -y 
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.3.1
-
-    apt-get install --no-install-recommends -y build-essential
-
-##### START : netcdf installation #####
-
-    cd /usr 
-    mkdir netcdf  
-    cd netcdf  
-    mkdir WORK_DIR  
-    cd /usr/netcdf/WORK_DIR  
-    mkdir zlib hdf5 netCDF netCDF-C++ netCDF-Fortran
-
-    cd /usr/netcdf/WORK_DIR/zlib  
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4/zlib-1.2.8.tar.gz  
-    tar -xvzf zlib-1.2.8.tar.gz
-
-    cd /usr/netcdf/WORK_DIR/hdf5  
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4/hdf5-1.8.12.tar.gz  
-    tar -xvzf hdf5-1.8.12.tar.gz
-
-    cd /usr/netcdf/WORK_DIR/netCDF  
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/old/netcdf-4.3.0.tar.gz  
-    tar -xvzf netcdf-4.3.0.tar.gz 
-
-    cd /usr/netcdf/WORK_DIR/netCDF-C++  
-    wget https://github.com/Unidata/netcdf-cxx4/archive/v4.2.1.tar.gz  
-    tar -xvzf v4.2.1.tar.gz
+### NVIDIA HPC SDK 20.7
+    wget https://developer.download.nvidia.com/hpc-sdk/nvhpc_2020_207_Linux_x86_64_cuda_multi.tar.gz
+    tar xpzf nvhpc_2020_207_Linux_x86_64_cuda_multi.tar.gz
 
-    cd /usr/netcdf/WORK_DIR/netCDF-Fortran 
-    wget ftp://ftp.unidata.ucar.edu/pub/netcdf/old/netcdf-fortran-4.2.tar.gz 
-    tar -xvzf netcdf-fortran-4.2.tar.gz
-
-    export CC=pgcc CFLAGS="-O tp=p7-64" CXX=pgc++ CXXFLAGS="-O tp=p7-64" FC=pgfortran FCFLAGS="-O tp=p7-64" F77=pgfortran FFLAGS="-O tp=p7-64" CPPFLAGS="-DpgiFortran" 
-
-    mkdir -p /usr/local  
-    mkdir -p /usr/local/bin  
-    mkdir -p /usr/local/include  
-    mkdir -p /usr/local/lib 
-
-    export PATH="$PATH:/usr/local/bin" LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" LIBRARY_PATH="$LIBRARY_PATH:/usr/local/lib" CPATH="$CPATH:/usr/local/include"
-
-# zlib 
-    cd /usr/netcdf/WORK_DIR/zlib/zlib-1.2.8 
-    ./configure --prefix=/usr/local 
-    make  
-    make install 
-
-# hdf5
-    cd  /usr/netcdf/WORK_DIR/hdf5/hdf5-1.8.12 
-    unset CPP 
-    ./configure --prefix=/usr/local --enable-fortran --enable-c++ 
-    make 
-    make install 
-
-# netcdf
-    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" 
-    export CFLAGS="-O tp=p7-64 -I /usr/local/include" 
-    export LDFLAGS="-L/usr/local/lib -L/usr/local/lib" 
-    cd  /usr/netcdf/WORK_DIR/netCDF/netcdf-4.3.0 
-    ./configure --prefix=/usr/local 
-    make  
-    make install  
-
-# netcdf-c++
-    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/hdf5/lib:/usr/local/lib" 
-    export CFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" 
-    export CPPFLAGS="-DpgiFortran -I /usr/local/include -I /usr/local/include" 
-    export LDFLAGS="-L/usr/local/lib -L/usr/local/lib" 
-    cd  /usr/netcdf/WORK_DIR/netCDF-C++/netcdf-cxx4-4.2.1 
-    ./configure --prefix=/usr/local 
-    make  
-    make install 
-
-# netcdf-fortran
-    export CFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" 
-    export FCFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" 
-    export FFLAGS="-O tp=p7-64 -I /usr/local/include -I /usr/local/include" 
-    export CPPFLAGS="-DpgiFortran -I /usr/local/include -I /usr/local/include" 
-    unset LDFLAGS 
-    cd  /usr/netcdf/WORK_DIR/netCDF-Fortran/netcdf-fortran-4.2 
-    ./configure --prefix=/usr/local 
-    make 
-    make install   
-    
-##### END : netcdf installation #####
+    nvhpc_2020_207_Linux_x86_64_cuda_multi/install
+###
 
     wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 
     bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3 
     rm Miniconda3-latest-Linux-x86_64.sh 
     /opt/anaconda3/bin/conda install -y -q netcdf4
 
-    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/pgi/linux86-64-llvm/2019/cuda/10.1/lib64/" 
-
     cd /
     rm -rf ${build_tmp}
 
 %files
-    miniapps-profiler /labs
+    English/ /labs
 
 %runscript
     "$@"

+ 2 - 2
hpc/openacc/Dockerfile

@@ -5,11 +5,11 @@
 FROM nvcr.io/hpc/pgi-compilers:ce
 
 RUN apt update && \
-    apt install -y --no-install-recommends python3-pip python3-setuptools nginx zip && \
+    apt install -y --no-install-recommends python3-pip python3-setuptools nginx zip build-essential && \
     rm -rf /var/lib/apt/lists/* && \
     pip3 install --no-cache-dir jupyter
 ADD docker-configs/default /etc/nginx/sites-available/default
 
-ADD labs/ /labs
+ADD English/ /labs
 WORKDIR /labs
 CMD service nginx start && jupyter notebook --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/labs

+ 1 - 1
hpc/openacc/English/C/jupyter_notebook/.ipynb_checkpoints/openacc_c_lab3-bonus-checkpoint.ipynb

@@ -202,7 +202,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pgcc -fast -ta=tesla -Minfo=accel -o laplace_gang_worker_vector jacobi.c laplace2d.c && ./laplace_gang_worker_vector"
+    "!cd ../source_code/lab3 && make clean && make && ./laplace"
    ]
   },
   {

+ 1 - 1
hpc/openacc/English/C/jupyter_notebook/openacc_c_lab1-bonus.ipynb

@@ -63,7 +63,7 @@
     "![kernels1](images/kernels1.png)\n",
     "![kernels2](images/kernels2.png)\n",
     "\n",
-    "OK, now it's your turn to try the `kernels` approach. From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `English/C/source_code/lab1` directory and replace your `acc parallel loop` directives with `acc kernels` and rerun the code. Remember to **SAVE** your code after changes, before running below cells.\n"
+    "OK, now it's your turn to try the `kernels` approach. From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `C/source_code/lab1` directory and replace your `acc parallel loop` directives with `acc kernels` and rerun the code. Remember to **SAVE** your code after changes, before running below cells.\n"
    ]
   },
   {

+ 1 - 1
hpc/openacc/English/C/jupyter_notebook/openacc_c_lab1.ipynb

@@ -351,7 +351,7 @@
    "source": [
     "### Parallelize the Example Code\n",
     "\n",
-    "At this point you have all of the tools you need to begin accelerating your application. The loops you will be parallelizing are in `laplace2d.c`. From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `English/C/source_code/lab1` directory. Remember to **SAVE** your code after changes, before running below cells.\n",
+    "At this point you have all of the tools you need to begin accelerating your application. The loops you will be parallelizing are in `laplace2d.c`. From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `C/source_code/lab1` directory. Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",
     "It is advisable to start with the `calcNext` routine and test your changes by compiling and running the code before moving on to the `swap` routine. OpenACC can be incrementally added to your application so that you can ensure each change is correct before getting too far along, which greatly simplifies debugging.\n",
     "\n",

Разница между файлами не показана из-за своего большого размера
+ 2 - 2
hpc/openacc/English/C/jupyter_notebook/openacc_c_lab2.ipynb


+ 2 - 2
hpc/openacc/English/C/jupyter_notebook/openacc_c_lab3-bonus.ipynb

@@ -189,7 +189,7 @@
    "source": [
     "### Implementing the Gang Worker Vector\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `English/C/source_code/lab3` directory. Replace our ealier clauses with **gang, worker, and vector** To reorganize our thread blocks. Try it using a few different numbers, but always keep the vector length as a **multiple of 32** to fully utilize **warps**.\n",
+    "From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `C/source_code/lab3` directory. Replace our ealier clauses with **gang, worker, and vector** To reorganize our thread blocks. Try it using a few different numbers, but always keep the vector length as a **multiple of 32** to fully utilize **warps**.\n",
     "\n",
     "Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",
@@ -202,7 +202,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pgcc -fast -ta=tesla -Minfo=accel -o laplace_gang_worker_vector jacobi.c laplace2d.c && ./laplace_gang_worker_vector"
+    "!cd ../source_code/lab3 && make clean && make && ./laplace"
    ]
   },
   {

+ 2 - 2
hpc/openacc/English/C/jupyter_notebook/openacc_c_lab3.ipynb

@@ -158,7 +158,7 @@
    "source": [
     "#### Implementing the Collapse Clause\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `English/C/source_code/lab3` directory. Use the **collapse clause** to collapse our multi-dimensional loops into a single dimensional loop.\n",
+    "From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `C/source_code/lab3` directory. Use the **collapse clause** to collapse our multi-dimensional loops into a single dimensional loop.\n",
     "\n",
     "Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",
@@ -281,7 +281,7 @@
    "source": [
     "#### Implementing the Tile Clause\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `English/C/source_code/lab3` directory. Replace the `collapse` clause with the `tile` clause to break our multi-dimensional loops into smaller tiles. Try using a variety of different tile sizes, but for now keep one of the dimensions as a **multiple of 32**. We will talk later about why this is important.\n",
+    "From the top menu, click on *File*, and *Open* `laplace2d.c` from the current directory at `C/source_code/lab3` directory. Replace the `collapse` clause with the `tile` clause to break our multi-dimensional loops into smaller tiles. Try using a variety of different tile sizes, but for now keep one of the dimensions as a **multiple of 32**. We will talk later about why this is important.\n",
     "\n",
     "Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",

+ 1 - 1
hpc/openacc/English/C/source_code/lab1/Makefile

@@ -2,7 +2,7 @@
 CC := pgcc
 ACCFLAGS_1 := -fast
 ACCFLAGS_2 := -fast -ta=multicore -Minfo=accel
-ACCFLAGS_3 := -fast -ta=tesla,managed -Minfo=accel
+ACCFLAGS_3 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_serial: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS_1} -o laplace jacobi.c laplace2d.c

+ 1 - 1
hpc/openacc/English/C/source_code/lab1/solutions/Makefile

@@ -2,7 +2,7 @@
 CC := pgcc
 ACCFLAGS_1 := -fast
 ACCFLAGS_2 := -fast -ta=multicore -Minfo=accel
-ACCFLAGS_3 := -fast -ta=tesla,managed -Minfo=accel
+ACCFLAGS_3 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_serial: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS_1} -o laplace jacobi.c laplace2d.c

+ 2 - 2
hpc/openacc/English/C/source_code/lab2/Makefile

@@ -1,7 +1,7 @@
 
 CC := pgcc
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS_1} -o laplace_update jacobi.c laplace2d.c

+ 2 - 2
hpc/openacc/English/C/source_code/lab2/solutions/Makefile

@@ -1,7 +1,7 @@
 
 CC := pgcc
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS_1} -o laplace_update jacobi.c laplace2d.c

+ 2 - 2
hpc/openacc/English/C/source_code/lab2/update/Makefile

@@ -1,7 +1,7 @@
 
 CC := pgcc
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS_1} -o laplace_update jacobi.c laplace2d.c

+ 2 - 2
hpc/openacc/English/C/source_code/lab2/update/solution/Makefile

@@ -1,7 +1,7 @@
 
 CC := pgcc
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS_1} -o laplace_update jacobi.c laplace2d.c

+ 1 - 1
hpc/openacc/English/C/source_code/lab3/Makefile

@@ -1,6 +1,6 @@
 
 CC := pgcc
-ACCFLAGS:= -fast -ta=tesla:cc70 -Minfo=accel
+ACCFLAGS:= -fast -ta=tesla -Minfo=accel
 
 laplace: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS} -o laplace jacobi.c laplace2d.c

+ 1 - 1
hpc/openacc/English/C/source_code/lab3/solutions/collapse/Makefile

@@ -1,6 +1,6 @@
 
 CC := pgcc
-ACCFLAGS:= -fast -ta=tesla:cc70 -Minfo=accel
+ACCFLAGS:= -fast -ta=tesla -Minfo=accel
 
 laplace: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS} -o laplace jacobi.c laplace2d.c

+ 1 - 1
hpc/openacc/English/C/source_code/lab3/solutions/tile/Makefile

@@ -1,6 +1,6 @@
 
 CC := pgcc
-ACCFLAGS:= -fast -ta=tesla:cc70 -Minfo=accel
+ACCFLAGS:= -fast -ta=tesla -Minfo=accel
 
 laplace: jacobi.c laplace2d.c
 	${CC} ${ACCFLAGS} -o laplace jacobi.c laplace2d.c

+ 1 - 1
hpc/openacc/English/Fortran/jupyter_notebook/.ipynb_checkpoints/openacc_fortran_lab3-bonus-checkpoint.ipynb

@@ -181,7 +181,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pgfortran -fast -ta=tesla -Minfo=accel -o laplace_gang_worker_vector laplace2d.f90 jacobi.f90 && ./laplace_gang_worker_vector"
+    "!cd ../source_code/lab3 && make clean && make && ./laplace"
    ]
   },
   {

+ 1 - 1
hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab1-bonus.ipynb

@@ -59,7 +59,7 @@
     "![kernels1](images/kernels1f.png)\n",
     "![kernels2](images/kernels2f.png)\n",
     "\n",
-    "OK, now it's your turn to try the `kernels` approach.From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `English/Fortran/source_code/lab1` and replace your `acc parallel loop` directives with `acc kernels` and rerun the code. Remember to **SAVE** your code after changes, before running below cells.\n"
+    "OK, now it's your turn to try the `kernels` approach.From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `Fortran/source_code/lab1` and replace your `acc parallel loop` directives with `acc kernels` and rerun the code. Remember to **SAVE** your code after changes, before running below cells.\n"
    ]
   },
   {

+ 1 - 1
hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab1.ipynb

@@ -358,7 +358,7 @@
    "source": [
     "### Parallelize the Example Code\n",
     "\n",
-    "At this point you have all of the tools you need to begin accelerating your application. The loops you will be parallelizing are in `laplace2d.f90`. From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `English/Fortran/source_code/lab1` directory. Remember to **SAVE** your code after changes, before running below cells.\n",
+    "At this point you have all of the tools you need to begin accelerating your application. The loops you will be parallelizing are in `laplace2d.f90`. From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `Fortran/source_code/lab1` directory. Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",
     "It is advisable to start with the `calcNext` routine and test your changes by compiling and running the code before moving on to the `swap` routine. OpenACC can be incrementally added to your application so that you can ensure each change is correct before getting too far along, which greatly simplifies debugging.\n",
     "\n",

Разница между файлами не показана из-за своего большого размера
+ 2 - 2
hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab2.ipynb


+ 2 - 2
hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab3-bonus.ipynb

@@ -168,7 +168,7 @@
    "source": [
     "### Implementing the Gang, Worker, and Vector\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `English/Fortran/source_code/lab3` directory. Replace our earlier clauses with **gang, worker, and vector** To reorganize our thread blocks. Try it using a few different numbers, but always keep the vector length as a **multiple of 32** to fully utilize **warps**.\n",
+    "From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `Fortran/source_code/lab3` directory. Replace our earlier clauses with **gang, worker, and vector** To reorganize our thread blocks. Try it using a few different numbers, but always keep the vector length as a **multiple of 32** to fully utilize **warps**.\n",
     "\n",
     "Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",
@@ -181,7 +181,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pgfortran -fast -ta=tesla -Minfo=accel -o laplace_gang_worker_vector laplace2d.f90 jacobi.f90 && ./laplace_gang_worker_vector"
+    "!cd ../source_code/lab3 && make clean && make && ./laplace"
    ]
   },
   {

+ 2 - 2
hpc/openacc/English/Fortran/jupyter_notebook/openacc_fortran_lab3.ipynb

@@ -155,7 +155,7 @@
    "source": [
     "#### Implementing the Collapse Clause\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `English/Fortran/source_code/lab3` directory. Use the **collapse clause** to collapse our multi-dimensional loops into a single dimensional loop.\n",
+    "From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `Fortran/source_code/lab3` directory. Use the **collapse clause** to collapse our multi-dimensional loops into a single dimensional loop.\n",
     "\n",
     "Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",
@@ -265,7 +265,7 @@
    "source": [
     "#### Implementing the Tile Clause\n",
     "\n",
-    "From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `English/Fortran/source_code/lab3` directory.  Replace the **collapse clause** with the **tile clause** to break our multi-dimensional loops into smaller tiles. Try using a variety of different tile sizes, but always keep one of the dimensions as a **multiple of 32**. We will talk later about why this is important.\n",
+    "From the top menu, click on *File*, and *Open* `laplace2d.f90` from the current directory at `Fortran/source_code/lab3` directory.  Replace the **collapse clause** with the **tile clause** to break our multi-dimensional loops into smaller tiles. Try using a variety of different tile sizes, but always keep one of the dimensions as a **multiple of 32**. We will talk later about why this is important.\n",
     "\n",
     "Remember to **SAVE** your code after changes, before running below cells.\n",
     "\n",

+ 1 - 1
hpc/openacc/English/Fortran/source_code/lab1/Makefile

@@ -2,7 +2,7 @@
 FC := pgfortran
 ACCFLAGS_1 := -fast
 ACCFLAGS_2 := -fast -ta=multicore -Minfo=accel
-ACCFLAGS_3 := -fast -ta=tesla,managed -Minfo=accel
+ACCFLAGS_3 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_serial: laplace2d.f90 jacobi.f90 
 	${FC} ${ACCFLAGS_1} -o laplace laplace2d.f90 jacobi.f90

+ 1 - 1
hpc/openacc/English/Fortran/source_code/lab1/solutions/Makefile

@@ -2,7 +2,7 @@
 FC := pgfortran
 ACCFLAGS_1 := -fast
 ACCFLAGS_2 := -fast -ta=multicore -Minfo=accel
-ACCFLAGS_3 := -fast -ta=tesla,managed -Minfo=accel
+ACCFLAGS_3 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_serial: laplace2d.f90 jacobi.f90 
 	${FC} ${ACCFLAGS_1} -o laplace laplace2d.f90 jacobi.f90

+ 2 - 2
hpc/openacc/English/Fortran/source_code/lab2/Makefile

@@ -1,7 +1,7 @@
 
 FC := pgfortran
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS_1} -o laplace_update laplace2d.f90 jacobi.f90

+ 2 - 2
hpc/openacc/English/Fortran/source_code/lab2/solutions/Makefile

@@ -1,7 +1,7 @@
 
 FC := pgfortran
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS_1} -o laplace_update laplace2d.f90 jacobi.f90

+ 2 - 2
hpc/openacc/English/Fortran/source_code/lab2/update/Makefile

@@ -1,7 +1,7 @@
 
 FC := pgfortran
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS_1} -o laplace_update laplace2d.f90 jacobi.f90

+ 2 - 2
hpc/openacc/English/Fortran/source_code/lab2/update/solution/Makefile

@@ -1,7 +1,7 @@
 
 FC := pgfortran
-ACCFLAGS_1 := -fast -ta=tesla:cc70 -Minfo=accel
-ACCFLAGS_2 := -fast -ta=tesla:cc70,managed -Minfo=accel
+ACCFLAGS_1 := -fast -ta=tesla -Minfo=accel
+ACCFLAGS_2 := -fast -ta=tesla:managed -Minfo=accel
 
 laplace_update: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS_1} -o laplace_update laplace2d.f90 jacobi.f90

+ 1 - 1
hpc/openacc/English/Fortran/source_code/lab3/Makefile

@@ -1,6 +1,6 @@
 
 FC := pgfortran
-ACCFLAGS:= -fast -ta=tesla:cc70 -Minfo=accel
+ACCFLAGS:= -fast -ta=tesla -Minfo=accel
 
 laplace: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS} -o laplace laplace2d.f90 jacobi.f90

+ 1 - 1
hpc/openacc/English/Fortran/source_code/lab3/solutions/collapse/Makefile

@@ -1,6 +1,6 @@
 
 FC := pgfortran
-ACCFLAGS:= -fast -ta=tesla:cc70 -Minfo=accel
+ACCFLAGS:= -fast -ta=tesla -Minfo=accel
 
 laplace: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS} -o laplace laplace2d.f90 jacobi.f90

+ 1 - 1
hpc/openacc/English/Fortran/source_code/lab3/solutions/tile/Makefile

@@ -1,6 +1,6 @@
 
 FC := pgfortran
-ACCFLAGS:= -fast -ta=tesla:cc70 -Minfo=accel
+ACCFLAGS:= -fast -ta=tesla -Minfo=accel
 
 laplace: laplace2d.f90 jacobi.f90
 	${FC} ${ACCFLAGS} -o laplace laplace2d.f90 jacobi.f90

+ 2 - 2
hpc/openacc/Singularity

@@ -8,13 +8,13 @@ From: nvcr.io/hpc/pgi-compilers:ce
 %post
 
     apt-get -y update
-    apt-get -y install --no-install-recommends python3-pip python3-setuptools zip
+    apt-get -y install --no-install-recommends python3-pip python3-setuptools zip build-essential
     rm -rf /var/lib/apt/lists/*
     pip3 install --no-cache-dir jupyter
 
 %files
 
-labs /labs
+    English/ /labs
 
 %environment
 XDG_RUNTIME_DIR=