Browse Source

added nways-MD

Mozhgan K. Chimeh 3 years ago
parent
commit
ddaa493981
100 changed files with 1911 additions and 3 deletions
  1. 2 0
      .gitignore
  2. 17 2
      README.md
  3. 13 1
      hpc/README.md
  4. 36 0
      hpc/nways/Dockerfile
  5. 56 0
      hpc/nways/README.md
  6. 51 0
      hpc/nways/Singularity
  7. 23 0
      hpc/nways/nways_labs/nways_MD/English/C/LICENSE
  8. 118 0
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/Final_Remarks.ipynb
  9. 50 0
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/GPU_Architecture_Terminologies.ipynb
  10. 401 0
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/cudac/nways_cuda.ipynb
  11. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/Nsight Diagram.png
  12. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/Optimization_Cycle.jpg
  13. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/UM.png
  14. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/allsection-compute.png
  15. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/baseline-compute.png
  16. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/charts-compute.png
  17. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cli-out.png
  18. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/coalesced_mem.png
  19. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/collapse_feedback.png
  20. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/collapse_pre.png
  21. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/collapse_thread.png
  22. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute.png
  23. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_analyz.png
  24. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_command.png
  25. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_command_line.png
  26. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_open.png
  27. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cpu.png
  28. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda.png
  29. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_hw_sw.png
  30. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_indexing.png
  31. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile.png
  32. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_api.png
  33. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_timeline.png
  34. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_vec_add.png
  35. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/data_feedback.png
  36. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/data_thread.png
  37. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/diagram.png
  38. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_128.png
  39. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_256.png
  40. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_32.png
  41. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_vector.png
  42. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gpu_feedback.png
  43. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kernel_feedback.png
  44. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kernel_indep_feedback.png
  45. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kokkos_abstraction.png
  46. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kokkos_ecosystem.png
  47. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kokkos_mirror_view.png
  48. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/laplas3.png
  49. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/launch-compute.png
  50. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nsight_open.png
  51. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx.PNG
  52. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_gpu.png
  53. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_multicore.png
  54. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_serial.png
  55. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc correlation.png
  56. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_3_directives.png
  57. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_construct.png
  58. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_copyclause.png
  59. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_parallel.png
  60. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_parallel2.png
  61. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_parallel_loop.png
  62. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_feedback.png
  63. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_feedback_collapse.png
  64. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_fork_join.png
  65. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_parallel_construct.png
  66. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_parallelfor_construct.png
  67. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_target_distribute.png
  68. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_target_teams.png
  69. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_teams.png
  70. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_teams_for.png
  71. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/page-compute.png
  72. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_data.png
  73. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_data_feedback.png
  74. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_detailed.png
  75. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_expand.png
  76. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_loop.png
  77. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_timeline.png
  78. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_unified.png
  79. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/rdf.png
  80. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/roofline_collapse.png
  81. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/rule-compute.png
  82. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/scheduler_collapse.png
  83. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/serial.png
  84. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/sol.png
  85. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/sol_baseline.png
  86. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_collapse.png
  87. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_hover.png
  88. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_loc.png
  89. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_sass.png
  90. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_sass_collapse.png
  91. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/stdpar_gpu.png
  92. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/stdpar_um.png
  93. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/summary-compute.png
  94. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/thread.png
  95. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/uncoalesced_hint.png
  96. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/warp_collapse.png
  97. BIN
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/workflow.png
  98. 457 0
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/kokkos/nways_kokkos.ipynb
  99. 687 0
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb
  100. 0 0
      hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb

+ 2 - 0
.gitignore

@@ -1,2 +1,4 @@
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
+**/input/
+input/

+ 17 - 2
README.md

@@ -1,2 +1,17 @@
-# gpubootcamp
-This repository consists for gpu bootcamp material for HPC and AI
+#  GPUBootcamp Official Training Materials
+This repository consists of GPU bootcamp material for both HPC and AI:
+
+- [AI](https://github.com/gpuhackathons-org/gpubootcamp/tree/master/ai)
+
+- [HPC](https://github.com/gpuhackathons-org/gpubootcamp/tree/master/hpc)
+
+- [HPC_AI](https://github.com/gpuhackathons-org/gpubootcamp/tree/master/hpc_ai)
+
+# System Requirements
+Each lab contains docker and singularity definition files. Follow the readme files inside each on how to build the container and run the labs inside it. 
+
+# Slides:
+The slides associated with these training materials can be downloaded from [Google Slides](https://drive.google.com/drive/folders/1laRYdu6mtSA29M6Xthc1jP8AEOtVnbBo?usp=sharing)
+
+## Questions?
+Please join [OpenACC Slack Channel](https://openacclang.slack.com/messages/openaccusergroup) for questions.

+ 13 - 1
hpc/README.md

@@ -1,2 +1,14 @@
- hpc: This directory contains labs related to HPC(Simulation) and parallel computing
+**HPC**: This directory contains labs related to HPC(Simulation) and parallel computing. It comprises below labs:
+- Introduction to OpenACC
+- Introdcution to Nsight Profiler tool
+- Nways to GPU programming
 
+Each lab contains docker and singularity definition files. Follow the readme files inside each on how to build the container and run the labs inside it. 
+
+
+# Slides:
+The slides associated with these training materials can be downloaded from [Google Slides](https://drive.google.com/drive/folders/1nYd_oHbmA4cxdDPesg5CwQkrvr0E3ruf?usp=sharing)
+
+
+## Questions?
+Please join [OpenACC Slack Channel](https://openacclang.slack.com/messages/openaccusergroup) for questions.

+ 36 - 0
hpc/nways/Dockerfile

@@ -0,0 +1,36 @@
+# Copyright (c) 2021 NVIDIA Corporation.  All rights reserved. 
+
+# To build the docker container, run: $ sudo docker build -t openacc-labs:latest .
+# To run: $ sudo docker run --rm -it --runtime nvidia -p 8888:8888 openacc-labs:latest
+# Finally, open http://localhost:8888/
+
+FROM nvcr.io/nvidia/nvhpc:20.9-devel-ubuntu20.04
+
+RUN apt-get -y update && \
+        DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends python3-pip python3-setuptools nginx zip make build-essential && \
+        rm -rf /var/lib/apt/lists/* && \
+        pip3 install --no-cache-dir jupyter &&\
+        mkdir -p /home/openacc/labs
+
+############################################
+# NVIDIA nsight-systems-2020.5.1 ,nsight-compute-2
+RUN apt-get update -y && \
+        DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        apt-transport-https \
+        ca-certificates \
+        gnupg \
+        wget && \
+        apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 && \
+        echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list &&\
+        apt-get update -y
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
+
+#################################################
+ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/20.9/cuda/11.0/lib64/"
+ENV PATH="/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:/opt/nvidia/hpc_sdk/Linux_x86_64/20.9/cuda/11.0/include:$PATH"
+#################################################
+
+ADD nways_labs/ /labs
+WORKDIR /labs
+CMD service nginx start && jupyter notebook --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/labs

File diff suppressed because it is too large
+ 56 - 0
hpc/nways/README.md


+ 51 - 0
hpc/nways/Singularity

@@ -0,0 +1,51 @@
+# Copyright (c) 2021 NVIDIA Corporation.  All rights reserved. 
+
+Bootstrap: docker
+FROM: nvcr.io/nvidia/nvhpc:20.9-devel-ubuntu20.04
+
+%environment
+    export XDG_RUNTIME_DIR=
+    export PATH="$PATH:/usr/local/bin:/opt/anaconda3/bin:/usr/bin"
+    export PATH=/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:$PATH
+    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/20.9/cuda/11.0/lib64/"
+
+%post
+    build_tmp=$(mktemp -d) && cd ${build_tmp}
+
+    apt-get -y update
+    apt-get -y dist-upgrade 
+    DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends \
+	    m4 vim-nox emacs-nox nano zip\
+ 	    python3-pip python3-setuptools git-core inotify-tools \
+	    curl git-lfs \
+	    build-essential
+    rm -rf /var/lib/apt/cache/* 
+
+    pip3 install --upgrade pip
+    pip3 install --no-cache-dir jupyter
+
+    apt-get install --no-install-recommends -y build-essential 
+
+# NVIDIA nsight-systems-2020.5.1 ,nsight-compute-2
+    apt-get update -y   
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget
+    apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80
+    echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list 
+    apt-get update -y 
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
+    apt-get install --no-install-recommends -y build-essential
+
+    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 
+    bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3 
+    rm Miniconda3-latest-Linux-x86_64.sh 
+
+    cd /
+    rm -rf ${build_tmp}
+
+%files
+    nways_labs/ /labs
+%runscript
+    "$@"
+
+%labels
+    AUTHOR mozhgank

+ 23 - 0
hpc/nways/nways_labs/nways_MD/English/C/LICENSE

@@ -0,0 +1,23 @@
+Copyright (c) 2018, National Center for Computational Sciences, Oak Ridge National Laboratory
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 118 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/Final_Remarks.ipynb

@@ -0,0 +1,118 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Final Remarks\n",
+    "\n",
+    "In this tutorial we took an approach where same algorithm was ported to GPU using different popular methods. Each method has it strengths and suffices a purpose for which it was created. From a developer point of view below listed are some key parameters which are crucial to any development exercise: \n",
+    "\n",
+    "- **Ease of Programming**: How much in-depth knowledge of processor architecture is required for a developer before starting to convert the code to GPU?\n",
+    "- **Performance**: How much effort is required to reach desirable performance on a particular architecture.\n",
+    "- **Portability**: To what extent does the same code run on multiple architecture? What provisions are provided by programming approach to target different platforms?\n",
+    "- **Support**: The overall ecosystem and support by the community.\n",
+    "    - Which all compilers implement the standard?\n",
+    "    - Which all languages are supported?\n",
+    "    - Which all applications make use it?\n",
+    "    - How easy or difficult it is to profile/debug the application?\n",
+    "    \n",
+    "Let us try to create a high level buckets for each of these parameter above with a limited scope of GPU support:\n",
+    "\n",
+    "| | |  |  | \n",
+    "| :--- | :--- | :--- | :--- |\n",
+    "| Ease of Programming | Low: Minimal architecture specific knowledge needed  | Intermediate: Mimimal changes expected in code design.  Using these along with architecture knowledge helps in better performance | High: In-Depth GPU architecture knowledge must |\n",
+    "| Performance  | Depends: Based on the complexity/type of application the performance may vary | High: Exposes methods to get good performance. These methods are integral part of design and exposed to programmer at various granularities | Best: Full control to developers to control parallelism and memory access |\n",
+    "| Portability | Integral: Part of the key objective  | Limited: Works only on specific platform | | \n",
+    "| Support | Established: Proven over years and support by multiple vendors for GPU | Emerging: Gaining traction by multiple vendors for GPU  | |\n",
+    "\n",
+    "There is a very thin line between these categories and within that limited scope and view we could categorize different approaches as follows:\n",
+    "\n",
+    " \n",
+    "| | OpenACC | OpenMP | stdpar | Kokkos | CUDA Laguages |\n",
+    "| --- | --- | --- | --- | --- | --- |\n",
+    "| Ease | High  | High | High  | Intermediate | Low |\n",
+    "| Performance  | Depends | Depends | Depends | High | Best |\n",
+    "| Portability | Integral  | Integral | Integral | Integral | Limited |\n",
+    "| Support | Established | Emerging | Emerging | Established | Established |\n",
+    "\n",
+    "Below given are points that will help users as there is no one programming model that fits all needs.\n",
+    "\n",
+    "## Ease of Programming\n",
+    "- The directive‐based OpenMP and OpenACC programming models are generally least intrusive when applied to the loops. \n",
+    "- Kokkos required restructuring of the existing code for the parallel dispatch via functors or lambda functions\n",
+    "- CUDA required a comparable amount of rewriting effort, in particular, to map the loops onto a CUDA grid of threads and thread blocks\n",
+    "- stdpar also required us to change the constructs to make use of C++17 templates and may be preferred for new developments having C++ template style coding. \n",
+    "- The overhead for OpenMP and OpenACC in terms of lines of code is the smallest, followed by stdpar and Kokkos\n",
+    "\n",
+    "## Performance\n",
+    "While we have not gone into the details of optimization for any of these programming model the analysis provided here is based on the general design of the programming model itself.\n",
+    "\n",
+    "- Kokkos when compiled enables the use of correct compiler optimization flags for the respective platform, while for the other frameworks, the user has to set these flags manually. This gives kokkos an upper hand over OpenACC and OpenMP. \n",
+    "- OpenACC and OpenMP abstract model defines a least common denominator for accelerator devices, but cannot represent architectural specifics of these devices without making the language less portable.\n",
+    "- stdpar on the other hand is more abstract and gives less control to developers to optimize the code\n",
+    "\n",
+    "## Portability\n",
+    "We observed the same code being run on moth multicore and GPU using OpenMP, OpenACC, Kokkos and stdpar. The point we highlight here is how a programming model supports the divergent cases where developers may choose to use different directive variant to get more performance. In a real application the tolerance for this portability/performance trade-off will vary according to the needs of the programmer and application \n",
+    "- OpenMP supports [Metadirective](https://www.openmp.org/spec-html/5.0/openmpsu28.html) where the developer can choose to activate different directive variant based on the condition selected.\n",
+    "- In OpenACC when using ```kernel``` construct, the compiler is responsible for mapping and partitioning the program to the underlying hardware. Since the compiler will mostly take care of the parallelization issues, the descriptive approach may generate performance code for specific architecture. The downside is the quality of the generated accelerated code depends significantly on the capability of the compiler used and hence the term \"may\".\n",
+    "\n",
+    "\n",
+    "## Support\n",
+    "- Kokkos project is very well documented and the developers support on GitHub is excellent \n",
+    "- OpenACC implementation is present in most popular compilers like NVIDIA HPC SDK, PGI, GCC, Clang and CRAY. \n",
+    "- OpenMP GPU support is currently available on limited compilers but being the most supported programming model for multicore it is matter of time when it comes at par with other models for GPU support.\n",
+    "- stdpar being part of the C++ standard is bound to become integral part of most compiler supporting parallelism. \n",
+    "\n",
+    "\n",
+    "Parallel Computing in general has been a difficult task and requires developers not just to know a programming approach but also think in parallel. While this tutorial provide you a good start, it is highly recommended to go through Profiling and Optimization bootcamps as next steps.\n",
+    "\n",
+    "-----\n",
+    "\n",
+    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[HOME](../../nways_MD_start.ipynb)</div>\n",
+    "\n",
+    "-----\n",
+    "\n",
+    "# Links and Resources\n",
+    "[OpenACC API guide](https://www.openacc.org/sites/default/files/inline-files/OpenACC%20API%202.6%20Reference%20Guide.pdf)\n",
+    "\n",
+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
+    "\n",
+    "[NVIDIA Nsight Compute](https://developer.nvidia.com/nsight-compute)\n",
+    "\n",
+    "[CUDA Toolkit Download](https://developer.nvidia.com/cuda-downloads)\n",
+    "\n",
+    "**NOTE**: To be able to see the Nsight System profiler output, please download Nsight System latest version from [here](https://developer.nvidia.com/nsight-systems).\n",
+    "\n",
+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
+    "\n",
+    "--- \n",
+    "\n",
+    "## Licensing \n",
+    "\n",
+    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 50 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/GPU_Architecture_Terminologies.ipynb

@@ -0,0 +1,50 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Unified Memory\n",
+    "\n",
+    "With every new CUDA and GPU architecture release, new features are added. These new features provide more performance and ease of programming or allow developers to implement new algorithms that otherwise weren't possible to port on GPUs using CUDA.\n",
+    "One such important feature that was released from CUDA 6.0 onward and finds its implementation from the Kepler GPU architecture is unified memory (UM). \n",
+    "\n",
+    "In simpler words, UM provides the user with a view of single memory space that's accessible by all GPUs and CPUs in the system. This is illustrated in the following diagram:\n",
+    "\n",
+    "<img src=\"./images/UM.png\">\n",
+    "\n",
+    "UM simplifies programming effort for beginners to CUDA as developers need not explicitly manage copying data to and from GPU. We will be using this feature of latest CUDA release and GPU architecture in labs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Licensing \n",
+    "\n",
+    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

File diff suppressed because it is too large
+ 401 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/cudac/nways_cuda.ipynb


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/Nsight Diagram.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/Optimization_Cycle.jpg


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/UM.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/allsection-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/baseline-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/charts-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cli-out.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/coalesced_mem.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/collapse_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/collapse_pre.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/collapse_thread.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_analyz.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_command.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_command_line.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/compute_open.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cpu.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_hw_sw.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_indexing.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_api.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_timeline.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_vec_add.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/data_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/data_thread.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/diagram.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_128.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_256.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_32.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gang_vector.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/gpu_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kernel_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kernel_indep_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kokkos_abstraction.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kokkos_ecosystem.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/kokkos_mirror_view.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/laplas3.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/launch-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nsight_open.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx.PNG


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_gpu.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_multicore.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_serial.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc correlation.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_3_directives.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_construct.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_copyclause.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_parallel.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_parallel2.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openacc_parallel_loop.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_feedback_collapse.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_fork_join.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_parallel_construct.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_parallelfor_construct.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_target_distribute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_target_teams.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_teams.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_teams_for.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/page-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_data.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_data_feedback.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_detailed.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_expand.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_loop.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_timeline.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/parallel_unified.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/rdf.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/roofline_collapse.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/rule-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/scheduler_collapse.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/serial.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/sol.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/sol_baseline.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_collapse.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_hover.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_loc.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_sass.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/source_sass_collapse.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/stdpar_gpu.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/stdpar_um.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/summary-compute.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/thread.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/uncoalesced_hint.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/warp_collapse.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/workflow.png


File diff suppressed because it is too large
+ 457 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/kokkos/nways_kokkos.ipynb


File diff suppressed because it is too large
+ 687 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb


+ 0 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb


Some files were not shown because too many files changed in this diff