před 4 roky · 8437a89d4f
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 
				 */.ipynb_checkpoints/*
			
 
				 alk.traj.dcd
			
 
				 *.simg
			
 
				+*/.ses/*
			
 
				+*/.log/*
			
--- a/hpc/nways/Dockerfile
+++ b/hpc/nways/Dockerfile
@@ -16,17 +16,14 @@ RUN apt-get -y update && \
 
				 
			
 
				 ############################################
			
 
				 # NVIDIA nsight-systems-2020.5.1 ,nsight-compute-2
			
 
				-RUN apt-get update -y && \
			
 
				-        DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
			
 
				-        apt-transport-https \
			
 
				-        ca-certificates \
			
 
				-        gnupg \
			
 
				-        wget && \
			
 
				-        apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 && \
			
 
				-        echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list &&\
			
 
				-        apt-get update -y
			
 
				-
			
 
				-RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
			
 
				+#RUN apt-get update -y && \
			
 
				+#        DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
			
 
				+#       apt-transport-https ca-certificates gnupg wget && \
			
 
				+#        apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 && \
			
 
				+#        echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list &&\
			
 
				+RUN apt-get update -y
			
 
				+
			
 
				+# RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
			
 
				 
			
 
				 # TO COPY the data
			
 
				 COPY nways_labs/ /labs/
			
@@ -36,7 +33,8 @@ RUN python3 /labs/nways_MD/English/Fortran/source_code/dataset.py
 
				 
			
 
				 #################################################
			
 
				 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/cuda/11.3/lib64/"
			
 
				-ENV PATH="/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/cuda/11.2/include:/usr/local/bin:/opt/anaconda3/bin:/usr/bin:$PATH"
			
 
				+#ENV PATH="/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/cuda/11.2/include:/usr/local/bin:/opt/anaconda3/bin:/usr/bin:$PATH"
			
 
				+ENV PATH="/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/cuda/11.2/include:/usr/local/bin:/opt/anaconda3/bin:/usr/bin:$PATH"
			
 
				 #################################################
			
 
				 
			
 
				 ADD nways_labs/ /labs
			
--- a/hpc/nways/Singularity
+++ b/hpc/nways/Singularity
@@ -7,7 +7,7 @@ FROM: nvcr.io/nvidia/nvhpc:21.3-devel-cuda_multi-ubuntu20.04
 
				 %environment
			
 
				     export XDG_RUNTIME_DIR=
			
 
				     export PATH="$PATH:/usr/local/bin:/opt/anaconda3/bin:/usr/bin"
			
 
				-    export PATH=/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:$PATH
			
 
				+   # export PATH=/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:$PATH
			
 
				     export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/21.3/cuda/11.2/lib64/"
			
 
				 
			
 
				 %post
			
@@ -33,12 +33,12 @@ FROM: nvcr.io/nvidia/nvhpc:21.3-devel-cuda_multi-ubuntu20.04
 
				 
			
 
				 
			
 
				 # NVIDIA nsight-systems-2020.5.1 ,nsight-compute-2
			
 
				-    apt-get update -y   
			
 
				-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget
			
 
				-    apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80
			
 
				-    echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list 
			
 
				+  # apt-get update -y   
			
 
				+  #  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget
			
 
				+  #  apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80
			
 
				+  #  echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list 
			
 
				     apt-get update -y 
			
 
				-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
			
 
				+  #  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
			
 
				     apt-get install --no-install-recommends -y build-essential
			
 
				 
			
 
				     wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh 
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/Final_Remarks.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/Final_Remarks.ipynb
@@ -95,7 +95,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/GPU_Architecture_Terminologies.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/GPU_Architecture_Terminologies.ipynb
@@ -4,14 +4,63 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "# Unified Memory\n",
			
 
				+    "### Thread\n",
			
 
				+    "A basic element of the data to be processed on the GPU.\n",
			
 
				+    "\n",
			
 
				+    "### CUDA Blocks\n",
			
 
				+    "A collection or group  of threads which can communicate within their own block.\n",
			
 
				+    "### Grid\n",
			
 
				+    "CUDA blocks are grouped into a grid. Blocks are independent of each other.\n",
			
 
				+    "\n",
			
 
				+    "### Kernel\n",
			
 
				+    "A kernel is executed as a grid of blocks of threads.\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"images/grid.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    "\n",
			
 
				+    "### Streaming Multiprocessor (SM) \n",
			
 
				+    "Streaming multi-processors with multiple processing cores. Each CUDA block is executed by one streaming multiprocessor (SM) and cannot be migrated to other SMs in GPU. One SM can run several concurrent CUDA blocks depending on the resources needed by CUDA blocks. Each kernel is executed on one device and CUDA supports running multiple kernels on a device at one time. Below figure shows the kernel execution and mapping on hardware resources available in GPU.\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"images/mapping.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    "\n",
			
 
				+    "### Warp\n",
			
 
				+    "32 threads form a warp.The SM has a maximum number of warps that can be active at once. \n",
			
 
				+    "\n",
			
 
				+    "### Memory Hierarchy\n",
			
 
				+    "CUDA-capable GPUs have a memory hierarchy as shown below:\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"images/memory.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    "\n",
			
 
				+    "The following memories are exposed by the GPU architecture:\n",
			
 
				+    "\n",
			
 
				+    "- **Registers** : These are private to each thread, which means that registers assigned to a thread are not visible to other threads. The compiler makes decisions about register utilization.\n",
			
 
				+    "- **L1/Shared memory (SMEM)** : Every SM has a fast, on-chip scratchpad memory that can be used as L1 cache and shared memory. All threads in a CUDA block can share shared memory, and all CUDA blocks running on a given SM can share the physical memory resource provided by the SM..\n",
			
 
				+    "- **Read-only memory** : Each SM has an instruction cache, constant memory,  texture memory and RO cache, which is read-only to kernel code.\n",
			
 
				+    "- **L2 cache** : The L2 cache is shared across all SMs, so every thread in every CUDA block can access this memory. The NVIDIA A100 GPU has increased the L2 cache size to 40 MB as compared to 6 MB in V100 GPUs.\n",
			
 
				+    "- **Global memory** : This is the framebuffer size of the GPU and DRAM sitting in the GPU.\n",
			
 
				+    "\n",
			
 
				+    "To learn more, please checkout the CUDA Refresher series at https://developer.nvidia.com/blog/tag/cuda-refresher/ .\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### Occupancy\n",
			
 
				+    "The Streaming Multiprocessor (SM) has a maximum number of warps that can be active at once. Occupancy is the ratio of active warps to maximum supported active warps. Occupancy is 100% if the number of active warps equals the maximum. If this factor is limiting active blocks, occupancy cannot be increased. \n",
			
 
				+    "\n",
			
 
				+    "The Streaming Multiprocessor (SM)  has a maximum number of blocks that can be active at once. If occupancy is below 100% and this factor is limiting active blocks, it means each block does not contain enough warps to reach 100% occupancy when the device's active block limit is reached. Occupancy can be increased by increasing block size. \n",
			
 
				+    "\n",
			
 
				+    "To learn more about occupancy, checkout https://docs.nvidia.com/gameworks/content/developertools/desktop/analysis/report/cudaexperiments/kernellevel/achievedoccupancy.htm"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Unified Memory\n",
			
 
				     "\n",
			
 
				     "With every new CUDA and GPU architecture release, new features are added. These new features provide more performance and ease of programming or allow developers to implement new algorithms that otherwise weren't possible to port on GPUs using CUDA.\n",
			
 
				     "One such important feature that was released from CUDA 6.0 onward and finds its implementation from the Kepler GPU architecture is unified memory (UM). \n",
			
 
				     "\n",
			
 
				     "In simpler words, UM provides the user with a view of single memory space that's accessible by all GPUs and CPUs in the system. This is illustrated in the following diagram:\n",
			
 
				     "\n",
			
 
				-    "<img src=\"./images/UM.png\">\n",
			
 
				+    "<img src=\"./images/UM.png\" width=\"80%\" height=\"80%\">\n",
			
 
				     "\n",
			
 
				     "UM simplifies programming effort for beginners to CUDA as developers need not explicitly manage copying data to and from GPU. We will be using this feature of latest CUDA release and GPU architecture in labs."
			
 
				    ]
			
@@ -22,7 +71,7 @@
 
				    "source": [
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/cudac/nways_cuda.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/cudac/nways_cuda.ipynb
@@ -78,7 +78,7 @@
 
				     "\n",
			
 
				     "The diagram below shows a higher level of abstraction of components of GPU hardware and its respective programming model mapping. \n",
			
 
				     "\n",
			
 
				-    "<img src=\"../images/cuda_hw_sw.png\">\n",
			
 
				+    "<img src=\"../images/cuda_hw_sw.png\" width=\"80%\" height=\"80%\">\n",
			
 
				     "\n",
			
 
				     "As shown in the diagram above CUDA programming model is tightly coupled with hardware design. This makes CUDA one of the most efficient parallel programming model for shared memory systems. Another way to look at the diagram shown above is given below: \n",
			
 
				     "\n",
			
@@ -398,7 +398,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -418,7 +418,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/grid.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/grid.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/mapping.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/mapping.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/memory.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/memory.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_multicore
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/nvtx_multicore
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_baseline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_baseline.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg_memory.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg_memory.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg_occupancy.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg_occupancy.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg_roofline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_collapse_reg_roofline.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_gpu_collapse.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_gpu_collapse.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_collapse.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_collapse.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_occupancy.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_occupancy.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_roofline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_roofline.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_split_cmp.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_split_cmp.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_split_cmp2.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_split_cmp2.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_split_grid.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_offload_split_grid.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_warp_cmp.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/openmp_warp_cmp.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/stdpar_gpu.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/stdpar_gpu.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/kokkos/nways_kokkos.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/kokkos/nways_kokkos.ipynb
@@ -422,15 +422,8 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": []
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb
@@ -670,7 +670,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -690,7 +690,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt_2.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt_2.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp_opt_1.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp_opt_1.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp_opt.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp_opt.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/serial/rdf_overview.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/serial/rdf_overview.ipynb
@@ -44,11 +44,11 @@
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
 
				-    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[Profiling lab](../../../../../profiler/English/jupyter_notebook/profiling-c.ipynb)</div> \n",
			
 
				+    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[Profiling lab](../../../../../profiler/English/jupyter_notebook/nsight_systems.ipynb)</div> \n",
			
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
 
				-    "Now, that we are familiar with the Nsight Profiler and know how to [NVTX](../../../../../profiler/English/jupyter_notebook/profiling-c.ipynb#nvtx), let's profile the serial code and checkout the output."
			
 
				+    "Now, that we are familiar with the Nsight Profiler and know how to [NVTX](../../../../../profiler/English/jupyter_notebook/nsight_systems.ipynb#nvtx), let's profile the serial code and checkout the output."
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -101,7 +101,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -121,7 +121,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/stdpar/nways_stdpar.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/stdpar/nways_stdpar.ipynb
@@ -287,7 +287,7 @@
 
				     "\n",
			
 
				     "If you inspect the output of the profiler closer, you can see the usage of *Unified Memory* annotated with green rectangle which was explained in previous sections.\n",
			
 
				     "\n",
			
 
				-    "Moreover, if you compare the NVTX marker `Pair_Calculation` (from the NVTX row) in both multicore and GPU version, you can see how much improvement you achieved. In the *example screenshot*, we were able to reduce that range from 1.52 seconds to 188.4 mseconds.\n",
			
 
				+    "Moreover, if you compare the NVTX marker `Pair_Calculation` (from the NVTX row) in both multicore and GPU version, you can see how much improvement you achieved. In the *example screenshot*, we were able to reduce that range from 1.52 seconds to 48.8 mseconds.\n",
			
 
				     "\n",
			
 
				     "Feel free to checkout the [solution](../../source_code/stdpar/SOLUTION/rdf.cpp) to help you understand better or compare your implementation with the sample solution."
			
 
				    ]
			
@@ -361,7 +361,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/openmp/SOLUTION/rdf_offload_collapse_num.cpp
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/openmp/SOLUTION/rdf_offload_collapse_num.cpp
@@ -166,12 +166,15 @@ void pair_gpu(const double *d_x, const double *d_y, const double *d_z,
 
				     cut = box * 0.5;
			
 
				     int count = 0;
			
 
				     printf("\n %d %d ", nconf, numatm);
			
 
				+
			
 
				     for (int frame = 0; frame < nconf; frame++)
			
 
				     {
			
 
				         printf("\n %d  ", frame);
			
 
				-#pragma omp target teams distribute parallel for private(dx, dy, dz, r, ig2) collapse(2) num_threads(256)
			
 
				+#pragma omp target teams distribute
			
 
				+
			
 
				         for (int id1 = 0; id1 < numatm; id1++)
			
 
				         {
			
 
				+#pragma omp parallel for private(dx, dy, dz, r, ig2)
			
 
				             for (int id2 = 0; id2 < numatm; id2++)
			
 
				             {
			
 
				                 dx = d_x[frame * numatm + id1] - d_x[frame * numatm + id2];
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/openmp/SOLUTION/rdf_offload_split_num.cpp
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/openmp/SOLUTION/rdf_offload_split_num.cpp
@@ -0,0 +1,198 @@
 
				+#include <stdio.h>
			
 
				+#include <iostream>
			
 
				+#include <fstream>
			
 
				+#include <math.h>
			
 
				+#include <cstring>
			
 
				+#include <cstdio>
			
 
				+#include <iomanip>
			
 
				+#include <omp.h>
			
 
				+#include "dcdread.h"
			
 
				+#include <assert.h>
			
 
				+#include <nvtx3/nvToolsExt.h>
			
 
				+
			
 
				+void pair_gpu(const double *d_x, const double *d_y, const double *d_z,
			
 
				+              unsigned int *d_g2, int numatm, int nconf,
			
 
				+              const double xbox, const double ybox, const double zbox,
			
 
				+              int d_bin);
			
 
				+
			
 
				+int main(int argc, char *argv[])
			
 
				+{
			
 
				+    double xbox, ybox, zbox;
			
 
				+    double *h_x, *h_y, *h_z;
			
 
				+    unsigned int *h_g2;
			
 
				+    int nbin;
			
 
				+    int numatm, nconf, inconf;
			
 
				+    string file;
			
 
				+
			
 
				+    ///////////////////////////////////////////////////////////////
			
 
				+
			
 
				+    inconf = 10;
			
 
				+    nbin = 2000;
			
 
				+    file = "../input/alk.traj.dcd";
			
 
				+    ///////////////////////////////////////
			
 
				+    std::ifstream infile;
			
 
				+    infile.open(file.c_str());
			
 
				+    if (!infile)
			
 
				+    {
			
 
				+        cout << "file " << file.c_str() << " not found\n";
			
 
				+        return 1;
			
 
				+    }
			
 
				+    assert(infile);
			
 
				+
			
 
				+    ofstream pairfile, stwo;
			
 
				+    pairfile.open("RDF.dat");
			
 
				+    stwo.open("Pair_entropy.dat");
			
 
				+
			
 
				+    /////////////////////////////////////////////////////////
			
 
				+    dcdreadhead(&numatm, &nconf, infile);
			
 
				+    cout << "Dcd file has " << numatm << " atoms and " << nconf << " frames" << endl;
			
 
				+    if (inconf > nconf)
			
 
				+        cout << "nconf is reset to " << nconf << endl;
			
 
				+    else
			
 
				+    {
			
 
				+        nconf = inconf;
			
 
				+    }
			
 
				+    cout << "Calculating RDF for " << nconf << " frames" << endl;
			
 
				+    ////////////////////////////////////////////////////////
			
 
				+
			
 
				+    unsigned long long int sizef = nconf * numatm * sizeof(double);
			
 
				+    unsigned long long int sizebin = nbin * sizeof(unsigned int);
			
 
				+
			
 
				+    h_x = (double *)malloc(sizef);
			
 
				+    h_y = (double *)malloc(sizef);
			
 
				+    h_z = (double *)malloc(sizef);
			
 
				+    h_g2 = (unsigned int *)malloc(sizebin);
			
 
				+
			
 
				+    memset(h_g2, 0, sizebin);
			
 
				+
			
 
				+    /////////reading cordinates//////////////////////////////////////////////
			
 
				+    nvtxRangePush("Read_File");
			
 
				+
			
 
				+    double ax[numatm], ay[numatm], az[numatm];
			
 
				+    for (int i = 0; i < nconf; i++)
			
 
				+    {
			
 
				+        dcdreadframe(ax, ay, az, infile, numatm, xbox, ybox, zbox);
			
 
				+        for (int j = 0; j < numatm; j++)
			
 
				+        {
			
 
				+            h_x[i * numatm + j] = ax[j];
			
 
				+            h_y[i * numatm + j] = ay[j];
			
 
				+            h_z[i * numatm + j] = az[j];
			
 
				+        }
			
 
				+    }
			
 
				+    nvtxRangePop(); //pop for REading file
			
 
				+    cout << "Reading of input file is completed" << endl;
			
 
				+//////////////////////////////////////////////////////////////////////////
			
 
				+#pragma omp target data map(h_x [0:nconf * numatm], h_y [0:nconf * numatm], h_z [0:nconf * numatm], h_g2 [0:nbin])
			
 
				+    {
			
 
				+        nvtxRangePush("Pair_Calculation");
			
 
				+        pair_gpu(h_x, h_y, h_z, h_g2, numatm, nconf, xbox, ybox, zbox, nbin);
			
 
				+        nvtxRangePop(); //Pop for Pair Calculation
			
 
				+    }
			
 
				+    ////////////////////////////////////////////////////////////////////////
			
 
				+    double pi = acos(-1.0);
			
 
				+    double rho = (numatm) / (xbox * ybox * zbox);
			
 
				+    double norm = (4.0l * pi * rho) / 3.0l;
			
 
				+    double rl, ru, nideal;
			
 
				+    double g2[nbin];
			
 
				+    double r, gr, lngr, lngrbond, s2 = 0.0l, s2bond = 0.0l;
			
 
				+    double box = min(xbox, ybox);
			
 
				+    box = min(box, zbox);
			
 
				+    double del = box / (2.0l * nbin);
			
 
				+    nvtxRangePush("Entropy_Calculation");
			
 
				+    for (int i = 0; i < nbin; i++)
			
 
				+    {
			
 
				+        rl = (i)*del;
			
 
				+        ru = rl + del;
			
 
				+        nideal = norm * (ru * ru * ru - rl * rl * rl);
			
 
				+        g2[i] = (double)h_g2[i] / ((double)nconf * (double)numatm * nideal);
			
 
				+        r = (i)*del;
			
 
				+        pairfile << (i + 0.5l) * del << " " << g2[i] << endl;
			
 
				+        if (r < 2.0l)
			
 
				+        {
			
 
				+            gr = 0.0l;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            gr = g2[i];
			
 
				+        }
			
 
				+        if (gr < 1e-5)
			
 
				+        {
			
 
				+            lngr = 0.0l;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            lngr = log(gr);
			
 
				+        }
			
 
				+
			
 
				+        if (g2[i] < 1e-6)
			
 
				+        {
			
 
				+            lngrbond = 0.0l;
			
 
				+        }
			
 
				+        else
			
 
				+        {
			
 
				+            lngrbond = log(g2[i]);
			
 
				+        }
			
 
				+        s2 = s2 - 2.0l * pi * rho * ((gr * lngr) - gr + 1.0l) * del * r * r;
			
 
				+        s2bond = s2bond - 2.0l * pi * rho * ((g2[i] * lngrbond) - g2[i] + 1.0l) * del * r * r;
			
 
				+    }
			
 
				+    nvtxRangePop(); //Pop for Entropy Calculation
			
 
				+    stwo << "s2 value is " << s2 << endl;
			
 
				+    stwo << "s2bond value is " << s2bond << endl;
			
 
				+
			
 
				+    cout << "#Freeing Host memory" << endl;
			
 
				+    free(h_x);
			
 
				+    free(h_y);
			
 
				+    free(h_z);
			
 
				+    free(h_g2);
			
 
				+
			
 
				+    cout << "#Number of atoms processed: " << numatm << endl
			
 
				+         << endl;
			
 
				+    cout << "#Number of confs processed: " << nconf << endl
			
 
				+         << endl;
			
 
				+    return 0;
			
 
				+}
			
 
				+void pair_gpu(const double *d_x, const double *d_y, const double *d_z,
			
 
				+              unsigned int *d_g2, int numatm, int nconf,
			
 
				+              const double xbox, const double ybox, const double zbox, int d_bin)
			
 
				+{
			
 
				+    double r, cut, dx, dy, dz;
			
 
				+    int ig2;
			
 
				+    double box;
			
 
				+    int myround;
			
 
				+    box = min(xbox, ybox);
			
 
				+    box = min(box, zbox);
			
 
				+
			
 
				+    double del = box / (2.0 * d_bin);
			
 
				+    cut = box * 0.5;
			
 
				+    int count = 0;
			
 
				+    printf("\n %d %d ", nconf, numatm);
			
 
				+
			
 
				+    for (int frame = 0; frame < nconf; frame++)
			
 
				+    {
			
 
				+        printf("\n %d  ", frame);
			
 
				+#pragma omp target teams distribute num_teams(65535)
			
 
				+
			
 
				+        for (int id1 = 0; id1 < numatm; id1++)
			
 
				+        {
			
 
				+#pragma omp parallel for private(dx, dy, dz, r, ig2) 
			
 
				+            for (int id2 = 0; id2 < numatm; id2++)
			
 
				+            {
			
 
				+                dx = d_x[frame * numatm + id1] - d_x[frame * numatm + id2];
			
 
				+                dy = d_y[frame * numatm + id1] - d_y[frame * numatm + id2];
			
 
				+                dz = d_z[frame * numatm + id1] - d_z[frame * numatm + id2];
			
 
				+
			
 
				+                dx = dx - xbox * (round(dx / xbox));
			
 
				+                dy = dy - ybox * (round(dy / ybox));
			
 
				+                dz = dz - zbox * (round(dz / zbox));
			
 
				+
			
 
				+                r = sqrtf(dx * dx + dy * dy + dz * dz);
			
 
				+                if (r < cut)
			
 
				+                {
			
 
				+                    ig2 = (int)(r / del);
			
 
				+#pragma omp atomic
			
 
				+                    d_g2[ig2] = d_g2[ig2] + 1;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    } //frame ends
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/Final_Remarks.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/Final_Remarks.ipynb
@@ -93,7 +93,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -113,7 +113,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/GPU_Architecture_Terminologies.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/GPU_Architecture_Terminologies.ipynb
@@ -4,14 +4,63 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "# Unified Memory\n",
			
 
				+    "### Thread\n",
			
 
				+    "A basic element of the data to be processed on the GPU.\n",
			
 
				+    "\n",
			
 
				+    "### CUDA Blocks\n",
			
 
				+    "A collection or group  of threads which can communicate within their own block.\n",
			
 
				+    "### Grid\n",
			
 
				+    "CUDA blocks are grouped into a grid. Blocks are independent of each other.\n",
			
 
				+    "\n",
			
 
				+    "### Kernel\n",
			
 
				+    "A kernel is executed as a grid of blocks of threads.\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"images/grid.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    "\n",
			
 
				+    "### Streaming Multiprocessor (SM) \n",
			
 
				+    "Streaming multi-processors with multiple processing cores. Each CUDA block is executed by one streaming multiprocessor (SM) and cannot be migrated to other SMs in GPU. One SM can run several concurrent CUDA blocks depending on the resources needed by CUDA blocks. Each kernel is executed on one device and CUDA supports running multiple kernels on a device at one time. Below figure shows the kernel execution and mapping on hardware resources available in GPU.\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"images/mapping.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    "\n",
			
 
				+    "### Warp\n",
			
 
				+    "32 threads form a warp.The SM has a maximum number of warps that can be active at once. \n",
			
 
				+    "\n",
			
 
				+    "### Memory Hierarchy\n",
			
 
				+    "CUDA-capable GPUs have a memory hierarchy as shown below:\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"images/memory.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    "\n",
			
 
				+    "The following memories are exposed by the GPU architecture:\n",
			
 
				+    "\n",
			
 
				+    "- **Registers** : These are private to each thread, which means that registers assigned to a thread are not visible to other threads. The compiler makes decisions about register utilization.\n",
			
 
				+    "- **L1/Shared memory (SMEM)** : Every SM has a fast, on-chip scratchpad memory that can be used as L1 cache and shared memory. All threads in a CUDA block can share shared memory, and all CUDA blocks running on a given SM can share the physical memory resource provided by the SM..\n",
			
 
				+    "- **Read-only memory** : Each SM has an instruction cache, constant memory,  texture memory and RO cache, which is read-only to kernel code.\n",
			
 
				+    "- **L2 cache** : The L2 cache is shared across all SMs, so every thread in every CUDA block can access this memory. The NVIDIA A100 GPU has increased the L2 cache size to 40 MB as compared to 6 MB in V100 GPUs.\n",
			
 
				+    "- **Global memory** : This is the framebuffer size of the GPU and DRAM sitting in the GPU.\n",
			
 
				+    "\n",
			
 
				+    "To learn more, please checkout the CUDA Refresher series at https://developer.nvidia.com/blog/tag/cuda-refresher/ .\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### Occupancy\n",
			
 
				+    "The Streaming Multiprocessor (SM) has a maximum number of warps that can be active at once. Occupancy is the ratio of active warps to maximum supported active warps. Occupancy is 100% if the number of active warps equals the maximum. If this factor is limiting active blocks, occupancy cannot be increased. \n",
			
 
				+    "\n",
			
 
				+    "The Streaming Multiprocessor (SM)  has a maximum number of blocks that can be active at once. If occupancy is below 100% and this factor is limiting active blocks, it means each block does not contain enough warps to reach 100% occupancy when the device's active block limit is reached. Occupancy can be increased by increasing block size. \n",
			
 
				+    "\n",
			
 
				+    "To learn more about occupancy, checkout https://docs.nvidia.com/gameworks/content/developertools/desktop/analysis/report/cudaexperiments/kernellevel/achievedoccupancy.htm"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Unified Memory\n",
			
 
				     "\n",
			
 
				     "With every new CUDA and GPU architecture release, new features are added. These new features provide more performance and ease of programming or allow developers to implement new algorithms that otherwise weren't possible to port on GPUs using CUDA.\n",
			
 
				     "One such important feature that was released from CUDA 6.0 onward and finds its implementation from the Kepler GPU architecture is unified memory (UM). \n",
			
 
				     "\n",
			
 
				     "In simpler words, UM provides the user with a view of single memory space that's accessible by all GPUs and CPUs in the system. This is illustrated in the following diagram:\n",
			
 
				     "\n",
			
 
				-    "<img src=\"./images/UM.png\">\n",
			
 
				+    "<img src=\"./images/UM.png\" width=\"80%\" height=\"80%\">\n",
			
 
				     "\n",
			
 
				     "UM simplifies programming effort for beginners to CUDA as developers need not explicitly manage copying data to and from GPU. We will be using this feature of latest CUDA release and GPU architecture in labs."
			
 
				    ]
			
@@ -22,7 +71,7 @@
 
				    "source": [
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/cudafortran/nways_cuda.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/cudafortran/nways_cuda.ipynb
@@ -494,7 +494,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -514,7 +514,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/doconcurrent/nways_doconcurrent.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/doconcurrent/nways_doconcurrent.ipynb
@@ -323,7 +323,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -343,7 +343,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_collapse_feedback.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_collapse_feedback.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_collapse_thread.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_collapse_thread.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_compute_analyz.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_compute_analyz.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_compute_command.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_compute_command.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_data_thread.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_data_thread.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_gang_128.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_gang_128.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_gang_32.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_gang_32.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_gang_vector.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_gang_vector.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_memory_collapse.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_memory_collapse.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_memory_sec.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_memory_sec.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_offload_compare_nvtx.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_offload_compare_nvtx.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_offload_grid.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_offload_grid.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openacc_data_directive.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openacc_data_directive.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_baseline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_baseline.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg_memory.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg_memory.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg_occupancy.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg_occupancy.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg_roofline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_collapse_reg_roofline.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_feedback_offload_split.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_feedback_offload_split.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_gpu.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_gpu.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_gpu_collapse.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_gpu_collapse.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_multicore.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_multicore.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_collapse.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_collapse.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_occupancy.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_occupancy.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_roofline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_roofline.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_split_cmp.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_split_cmp.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_split_cmp2.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_split_cmp2.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_split_grid.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_offload_split_grid.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_warp_cmp.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_openmp_warp_cmp.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_roofline_collapse.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_roofline_collapse.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_sol.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_sol.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_sol_baseline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_sol_baseline.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_source_hover.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_source_hover.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_source_loc.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_source_loc.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_source_sass.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/f_source_sass.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/grid.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/grid.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/mapping.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/mapping.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/memory.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/images/memory.png
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openacc/nways_openacc.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openacc/nways_openacc.ipynb
@@ -26,7 +26,7 @@
 
				     "- Learn how to run the same code on both a multicore CPU and a GPU using the OpenACC programming model\n",
			
 
				     "- Understand the key directives and steps involved in making a sequential code parallel\n",
			
 
				     "- Learn how to interpret the compiler feedback\n",
			
 
				-    "- Learn and understand the Nsight systems profiler report\n",
			
 
				+    "- Learn and understand the Nsight Systems profiler report\n",
			
 
				     "\n",
			
 
				     "We do not intend to cover:\n",
			
 
				     "- Optimization techniques in details\n",
			
@@ -41,11 +41,11 @@
 
				     "\n",
			
 
				     "**!$acc** in Fortran is what's known as a \"compiler hint.\" These are very similar to programmer comments, however, the compiler will actually read our comments. They are a way for the programmer to \"guide\" the compiler, without running the chance damaging the code. If the compiler does not understand the comment, it can ignore it, rather than throw a syntax error.\n",
			
 
				     "\n",
			
 
				-    "**acc** specifies that this is an OpenACC related directive that will folow. Any non-OpenACC compiler will ignore this. \n",
			
 
				+    "**acc** specifies that this is an OpenACC related directive that will follow. Any non-OpenACC compiler will ignore this. \n",
			
 
				     "\n",
			
 
				     "**directives** are commands in OpenACC that will tell the compiler to do some action. For now, we will only use directives that allow the compiler to parallelize our code.\n",
			
 
				     "\n",
			
 
				-    "**clauses** are additions/alterations to our directives. These include (but are not limited to) optimizations. One way to think about it: directives describe a general action for our compiler to do (such as, paralellize our code), and clauses allow the programmer to be more specific (such as, how we specifically want the code to be parallelized).\n",
			
 
				+    "**clauses** are additions/alterations to our directives. These include (but are not limited to) optimizations. One way to think about it: directives describe a general action for our compiler to do (such as, parallelize our code), and clauses allow the programmer to be more specific (such as, how we specifically want the code to be parallelized).\n",
			
 
				     "\n",
			
 
				     "## 3 Key Directives\n",
			
 
				     "\n",
			
@@ -322,7 +322,7 @@
 
				     "\n",
			
 
				     "| Compiler | Latest Version | Maintained by | Full or Partial Support |\n",
			
 
				     "| --- | --- | --- | --- |\n",
			
 
				-    "| HPC SDK| 21.3 | NVIDIA HPC SDK | Full 2.5 spec |\n",
			
 
				+    "| HPC SDK| 21.7 | NVIDIA HPC SDK | Full 2.5 spec |\n",
			
 
				     "| GCC | 10 | Mentor Graphics, SUSE | 2.0 spec, Limited Kernel directive support, No Unified Memory |\n",
			
 
				     "| CCE| latest | Cray | 2.0 Spec | \n"
			
 
				    ]
			
@@ -640,7 +640,7 @@
 
				    "source": [
			
 
				     "Let's checkout the profiler's report. [Download the profiler output](../../source_code/openacc/rdf_no_managed.qdrep) and open it via the GUI. Have a look at the example expected profiler report below:\n",
			
 
				     "\n",
			
 
				-    "<img src=\"../images/parallel_data.jpg\">\n",
			
 
				+    "<img src=\"../images/f_openacc_data_directive.png\">\n",
			
 
				     "\n",
			
 
				     "Have a look at the data movements annotated with green color and compare it with the previous versions. We have accelerated the application and reduced the execution time by eliminating the unnecessary data transfers between CPU and GPU.\n",
			
 
				     "\n",
			
@@ -706,7 +706,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -726,7 +726,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openacc/nways_openacc_opt.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openacc/nways_openacc_opt.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openacc/nways_openacc_opt_2.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openacc/nways_openacc_opt_2.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openmp/nways_openmp.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openmp/nways_openmp.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openmp/nways_openmp_opt_1.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openmp/nways_openmp_opt_1.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openmp/nways_openmp_opt_2.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/openmp/nways_openmp_opt_2.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/serial/rdf_overview.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/jupyter_notebook/serial/rdf_overview.ipynb
@@ -42,11 +42,11 @@
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
 
				-    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[Profiling lab](../../../../../profiler/English/jupyter_notebook/profiling.ipynb)</div> \n",
			
 
				+    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[Profiling lab](../../../../../profiler/English/jupyter_notebook/nsight_systems.ipynb)</div> \n",
			
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
 
				-    "Now, that we are familiar with the Nsight Profiler and know how to [NVTX](../../../../../profiler/English/jupyter_notebook/profiling.ipynb#nvtx), let's profile the serial code and checkout the output."
			
 
				+    "Now, that we are familiar with the Nsight Profiler and know how to [NVTX](../../../../../profiler/English/jupyter_notebook/nsight_systems.ipynb#nvtx), let's profile the serial code and checkout the output."
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -99,7 +99,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -119,7 +119,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/nvtx.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/nvtx.f90
@@ -0,0 +1,72 @@
 
				+module nvtx
			
 
				+
			
 
				+use iso_c_binding
			
 
				+implicit none
			
 
				+
			
 
				+integer,private :: col(7) = [ Z'0000ff00', Z'000000ff', Z'00ffff00', Z'00ff00ff', Z'0000ffff', Z'00ff0000', Z'00ffffff']
			
 
				+character,private,target :: tempName(256)
			
 
				+
			
 
				+type, bind(C):: nvtxEventAttributes
			
 
				+  integer(C_INT16_T):: version=1
			
 
				+  integer(C_INT16_T):: size=48 !
			
 
				+  integer(C_INT):: category=0
			
 
				+  integer(C_INT):: colorType=1 ! NVTX_COLOR_ARGB = 1
			
 
				+  integer(C_INT):: color
			
 
				+  integer(C_INT):: payloadType=0 ! NVTX_PAYLOAD_UNKNOWN = 0
			
 
				+  integer(C_INT):: reserved0
			
 
				+  integer(C_INT64_T):: payload   ! union uint,int,double
			
 
				+  integer(C_INT):: messageType=1  ! NVTX_MESSAGE_TYPE_ASCII     = 1 
			
 
				+  type(C_PTR):: message  ! ascii char
			
 
				+end type
			
 
				+
			
 
				+interface nvtxRangePush
			
 
				+  ! push range with custom label and standard color
			
 
				+  subroutine nvtxRangePushA(name) bind(C, name='nvtxRangePushA')
			
 
				+  use iso_c_binding
			
 
				+  character(kind=C_CHAR) :: name(256)
			
 
				+  end subroutine
			
 
				+
			
 
				+  ! push range with custom label and custom color
			
 
				+  subroutine nvtxRangePushEx(event) bind(C, name='nvtxRangePushEx')
			
 
				+  use iso_c_binding
			
 
				+  import:: nvtxEventAttributes
			
 
				+  type(nvtxEventAttributes):: event
			
 
				+  end subroutine
			
 
				+end interface
			
 
				+
			
 
				+interface nvtxRangePop
			
 
				+  subroutine nvtxRangePop() bind(C, name='nvtxRangePop')
			
 
				+  end subroutine
			
 
				+end interface
			
 
				+
			
 
				+contains
			
 
				+
			
 
				+subroutine nvtxStartRange(name,id)
			
 
				+  character(kind=c_char,len=*) :: name
			
 
				+  integer, optional:: id
			
 
				+  type(nvtxEventAttributes):: event
			
 
				+  character(kind=c_char,len=256) :: trimmed_name
			
 
				+  integer:: i
			
 
				+
			
 
				+  trimmed_name=trim(name)//c_null_char
			
 
				+
			
 
				+  ! move scalar trimmed_name into character array tempName
			
 
				+  do i=1,LEN(trim(name)) + 1
			
 
				+     tempName(i) = trimmed_name(i:i)
			
 
				+  enddo
			
 
				+
			
 
				+
			
 
				+  if ( .not. present(id)) then
			
 
				+    call nvtxRangePush(tempName)
			
 
				+  else
			
 
				+    event%color=col(mod(id,7)+1)
			
 
				+    event%message=c_loc(tempName)
			
 
				+    call nvtxRangePushEx(event)
			
 
				+  end if
			
 
				+end subroutine
			
 
				+
			
 
				+subroutine nvtxEndRange
			
 
				+  call nvtxRangePop
			
 
				+end subroutine
			
 
				+
			
 
				+end module nvtx
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_collapse.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_collapse.f90
@@ -0,0 +1,164 @@
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+!// Author: Manish Agarwal and Gourav Shrivastava  , IIT Delhi
			
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+! Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
			
 
				+
			
 
				+module readdata
			
 
				+      contains
			
 
				+      subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      integer i,j
			
 
				+      integer maxframes,maxatoms
			
 
				+
			
 
				+      double precision d(6),xbox,ybox,zbox
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+
			
 
				+      real*4 dummyr
			
 
				+      integer*4 nset, natoms, dummyi,nframes,tframes
			
 
				+      character*4 dummyc
			
 
				+      
			
 
				+      open(10,file='../input/alk.traj.dcd',status='old',form='unformatted')
			
 
				+      read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9)
			
 
				+      read(10) dummyi, dummyr,dummyr
			
 
				+      read(10) natoms
			
 
				+      print*,"Total number of frames and atoms are",tframes,natoms
			
 
				+
			
 
				+      allocate ( x(maxframes,natoms) )
			
 
				+      allocate ( y(maxframes,natoms) )
			
 
				+      allocate ( z(maxframes,natoms) )
			
 
				+
			
 
				+      do i = 1,nframes
			
 
				+           read(10) (d(j),j=1, 6)
			
 
				+              
			
 
				+           read(10) (x(i,j),j=1,natoms)
			
 
				+           read(10) (y(i,j),j=1,natoms)
			
 
				+           read(10) (z(i,j),j=1,natoms)
			
 
				+      end do
			
 
				+      
			
 
				+      xbox=d(1)
			
 
				+      ybox=d(3)
			
 
				+      zbox=d(6)
			
 
				+      
			
 
				+      print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox
			
 
				+      return
			
 
				+
			
 
				+      end subroutine readdcd
			
 
				+ end module readdata
			
 
				+
			
 
				+program rdf
			
 
				+      use readdata
			
 
				+      use nvtx
			
 
				+      implicit none
			
 
				+      integer n,i,j,iconf,ind
			
 
				+      integer natoms,nframes,nbin
			
 
				+      integer maxframes,maxatoms
			
 
				+      parameter (maxframes=10,maxatoms=60000,nbin=2000)
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+      double precision dx,dy,dz
			
 
				+      double precision xbox,ybox,zbox,cut
			
 
				+      double precision vol,r,del,s2,s2bond
			
 
				+      double precision, allocatable   ::  g(:)
			
 
				+      double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf
			
 
				+      double precision rlower,rupper
			
 
				+      character  atmnm*4
			
 
				+      real*4 start,finish
			
 
				+        
			
 
				+      open(23,file='RDF.dat',status='unknown')
			
 
				+      open(24,file='Pair_entropy.dat',status='unknown')
			
 
				+
			
 
				+      nframes=10
			
 
				+         
			
 
				+      call cpu_time(start)
			
 
				+
			
 
				+      print*,"Going to read coordinates"
			
 
				+      call nvtxStartRange("Read File")
			
 
				+      call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      allocate ( g(nbin) )
			
 
				+      g = 0.0d0
			
 
				+ 
			
 
				+      pi=dacos(-1.0d0)
			
 
				+      vol=xbox*ybox*zbox
			
 
				+      rho=dble(natoms)/vol
			
 
				+
			
 
				+      del=xbox/dble(2.0*nbin)
			
 
				+      write(*,*) "bin width is : ",del
			
 
				+      cut = dble(xbox * 0.5);
			
 
				+
			
 
				+      !pair calculation
			
 
				+      call nvtxStartRange("Pair Calculation")
			
 
				+      !$acc data copy(g(:)) copyin(x(:,:),z(:,:),y(:,:))
			
 
				+      do iconf=1,nframes
			
 
				+         if (mod(iconf,1).eq.0) print*,iconf
			
 
				+         !$acc parallel loop collapse(2) default(present)
			
 
				+         do i=1,natoms
			
 
				+            do j=1,natoms
			
 
				+               dx=x(iconf,i)-x(iconf,j)
			
 
				+               dy=y(iconf,i)-y(iconf,j)
			
 
				+               dz=z(iconf,i)-z(iconf,j)
			
 
				+
			
 
				+               dx=dx-nint(dx/xbox)*xbox
			
 
				+               dy=dy-nint(dy/ybox)*ybox
			
 
				+               dz=dz-nint(dz/zbox)*zbox
			
 
				+   
			
 
				+               r=dsqrt(dx**2+dy**2+dz**2)
			
 
				+               ind=int(r/del)+1
			
 
				+               if(r<cut)then
			
 
				+                  !$acc atomic
			
 
				+                  g(ind)=g(ind)+1.0d0
			
 
				+               endif
			
 
				+            enddo
			
 
				+         enddo
			
 
				+      enddo
			
 
				+      !$acc end data
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      !entropy calculation
			
 
				+      s2=0.01d0
			
 
				+      s2bond=0.01d0 
			
 
				+      const=(4.0d0/3.0d0)*pi*rho
			
 
				+      call nvtxStartRange("Entropy Calculation")
			
 
				+      do i=1,nbin
			
 
				+          rlower=dble((i-1)*del)
			
 
				+          rupper=rlower+del
			
 
				+          nideal=const*(rupper**3-rlower**3)
			
 
				+          g(i)=g(i)/(dble(nframes)*dble(natoms)*nideal)
			
 
				+          r=dble(i)*del
			
 
				+          if (r.lt.2.0) then
			
 
				+            gr=0.0
			
 
				+          else
			
 
				+            gr=g(i)
			
 
				+          endif
			
 
				+
			
 
				+          if (gr.lt.1e-5) then
			
 
				+            lngr=0.0
			
 
				+          else
			
 
				+            lngr=dlog(gr)
			
 
				+          endif
			
 
				+          if (g(i).lt.1e-6) then
			
 
				+            lngrbond=0.01
			
 
				+          else
			
 
				+            lngrbond=dlog(g(i))
			
 
				+          endif
			
 
				+
			
 
				+          s2=s2-2*pi*rho*((gr*lngr)-gr+1)*del*r**2.0
			
 
				+          s2bond=s2bond-2*pi*rho*((g(i)*lngrbond)-g(i)+1)*del*r*r
			
 
				+
			
 
				+          
			
 
				+          rf=dble(i-.5)*del
			
 
				+          write(23,*) rf,g(i)
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      write(24,*)"s2      : ",s2
			
 
				+      write(24,*)"s2bond  : ",s2bond
			
 
				+      call cpu_time(finish)
			
 
				+      print*,"starting at time",start,"and ending at",finish
			
 
				+      stop
			
 
				+      deallocate(x,y,z,g)
			
 
				+end
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_gang_vector.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_gang_vector.f90
@@ -0,0 +1,165 @@
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+!// Author: Manish Agarwal and Gourav Shrivastava  , IIT Delhi
			
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+! Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
			
 
				+
			
 
				+module readdata
			
 
				+      contains
			
 
				+      subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      integer i,j
			
 
				+      integer maxframes,maxatoms
			
 
				+
			
 
				+      double precision d(6),xbox,ybox,zbox
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+
			
 
				+      real*4 dummyr
			
 
				+      integer*4 nset, natoms, dummyi,nframes,tframes
			
 
				+      character*4 dummyc
			
 
				+      
			
 
				+      open(10,file='../input/alk.traj.dcd',status='old',form='unformatted')
			
 
				+      read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9)
			
 
				+      read(10) dummyi, dummyr,dummyr
			
 
				+      read(10) natoms
			
 
				+      print*,"Total number of frames and atoms are",tframes,natoms
			
 
				+
			
 
				+      allocate ( x(maxframes,natoms) )
			
 
				+      allocate ( y(maxframes,natoms) )
			
 
				+      allocate ( z(maxframes,natoms) )
			
 
				+
			
 
				+      do i = 1,nframes
			
 
				+           read(10) (d(j),j=1, 6)
			
 
				+              
			
 
				+           read(10) (x(i,j),j=1,natoms)
			
 
				+           read(10) (y(i,j),j=1,natoms)
			
 
				+           read(10) (z(i,j),j=1,natoms)
			
 
				+      end do
			
 
				+      
			
 
				+      xbox=d(1)
			
 
				+      ybox=d(3)
			
 
				+      zbox=d(6)
			
 
				+      
			
 
				+      print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox
			
 
				+      return
			
 
				+
			
 
				+      end subroutine readdcd
			
 
				+ end module readdata
			
 
				+
			
 
				+program rdf
			
 
				+      use readdata
			
 
				+      use nvtx
			
 
				+      implicit none
			
 
				+      integer n,i,j,iconf,ind
			
 
				+      integer natoms,nframes,nbin
			
 
				+      integer maxframes,maxatoms
			
 
				+      parameter (maxframes=10,maxatoms=60000,nbin=2000)
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+      double precision dx,dy,dz
			
 
				+      double precision xbox,ybox,zbox,cut
			
 
				+      double precision vol,r,del,s2,s2bond
			
 
				+      double precision, allocatable   ::  g(:)
			
 
				+      double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf
			
 
				+      double precision rlower,rupper
			
 
				+      character  atmnm*4
			
 
				+      real*4 start,finish
			
 
				+        
			
 
				+      open(23,file='RDF.dat',status='unknown')
			
 
				+      open(24,file='Pair_entropy.dat',status='unknown')
			
 
				+
			
 
				+      nframes=10
			
 
				+         
			
 
				+      call cpu_time(start)
			
 
				+
			
 
				+      print*,"Going to read coordinates"
			
 
				+      call nvtxStartRange("Read File")
			
 
				+      call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      allocate ( g(nbin) )
			
 
				+      g = 0.0d0
			
 
				+ 
			
 
				+      pi=dacos(-1.0d0)
			
 
				+      vol=xbox*ybox*zbox
			
 
				+      rho=dble(natoms)/vol
			
 
				+
			
 
				+      del=xbox/dble(2.0*nbin)
			
 
				+      write(*,*) "bin width is : ",del
			
 
				+      cut = dble(xbox * 0.5);
			
 
				+
			
 
				+      !pair calculation
			
 
				+      call nvtxStartRange("Pair Calculation")
			
 
				+      !$acc data copy(g(:)) copyin(x(:,:),z(:,:),y(:,:))
			
 
				+      do iconf=1,nframes
			
 
				+         if (mod(iconf,1).eq.0) print*,iconf
			
 
				+         !$acc parallel loop gang default(present)
			
 
				+         do i=1,natoms
			
 
				+            !$acc loop vector
			
 
				+            do j=1,natoms
			
 
				+               dx=x(iconf,i)-x(iconf,j)
			
 
				+               dy=y(iconf,i)-y(iconf,j)
			
 
				+               dz=z(iconf,i)-z(iconf,j)
			
 
				+
			
 
				+               dx=dx-nint(dx/xbox)*xbox
			
 
				+               dy=dy-nint(dy/ybox)*ybox
			
 
				+               dz=dz-nint(dz/zbox)*zbox
			
 
				+   
			
 
				+               r=dsqrt(dx**2+dy**2+dz**2)
			
 
				+               ind=int(r/del)+1
			
 
				+               if(r<cut)then
			
 
				+                  !$acc atomic
			
 
				+                  g(ind)=g(ind)+1.0d0
			
 
				+               endif
			
 
				+            enddo
			
 
				+         enddo
			
 
				+      enddo
			
 
				+      !$acc end data
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      !entropy calculation
			
 
				+      s2=0.01d0
			
 
				+      s2bond=0.01d0 
			
 
				+      const=(4.0d0/3.0d0)*pi*rho
			
 
				+      call nvtxStartRange("Entropy Calculation")
			
 
				+      do i=1,nbin
			
 
				+          rlower=dble((i-1)*del)
			
 
				+          rupper=rlower+del
			
 
				+          nideal=const*(rupper**3-rlower**3)
			
 
				+          g(i)=g(i)/(dble(nframes)*dble(natoms)*nideal)
			
 
				+          r=dble(i)*del
			
 
				+          if (r.lt.2.0) then
			
 
				+            gr=0.0
			
 
				+          else
			
 
				+            gr=g(i)
			
 
				+          endif
			
 
				+
			
 
				+          if (gr.lt.1e-5) then
			
 
				+            lngr=0.0
			
 
				+          else
			
 
				+            lngr=dlog(gr)
			
 
				+          endif
			
 
				+          if (g(i).lt.1e-6) then
			
 
				+            lngrbond=0.01
			
 
				+          else
			
 
				+            lngrbond=dlog(g(i))
			
 
				+          endif
			
 
				+
			
 
				+          s2=s2-2*pi*rho*((gr*lngr)-gr+1)*del*r**2.0
			
 
				+          s2bond=s2bond-2*pi*rho*((g(i)*lngrbond)-g(i)+1)*del*r*r
			
 
				+
			
 
				+          
			
 
				+          rf=dble(i-.5)*del
			
 
				+          write(23,*) rf,g(i)
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      write(24,*)"s2      : ",s2
			
 
				+      write(24,*)"s2bond  : ",s2bond
			
 
				+      call cpu_time(finish)
			
 
				+      print*,"starting at time",start,"and ending at",finish
			
 
				+      stop
			
 
				+      deallocate(x,y,z,g)
			
 
				+end
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_gang_vector_length.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_gang_vector_length.f90
@@ -0,0 +1,165 @@
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+!// Author: Manish Agarwal and Gourav Shrivastava  , IIT Delhi
			
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+! Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
			
 
				+
			
 
				+module readdata
			
 
				+      contains
			
 
				+      subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      integer i,j
			
 
				+      integer maxframes,maxatoms
			
 
				+
			
 
				+      double precision d(6),xbox,ybox,zbox
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+
			
 
				+      real*4 dummyr
			
 
				+      integer*4 nset, natoms, dummyi,nframes,tframes
			
 
				+      character*4 dummyc
			
 
				+      
			
 
				+      open(10,file='../input/alk.traj.dcd',status='old',form='unformatted')
			
 
				+      read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9)
			
 
				+      read(10) dummyi, dummyr,dummyr
			
 
				+      read(10) natoms
			
 
				+      print*,"Total number of frames and atoms are",tframes,natoms
			
 
				+
			
 
				+      allocate ( x(maxframes,natoms) )
			
 
				+      allocate ( y(maxframes,natoms) )
			
 
				+      allocate ( z(maxframes,natoms) )
			
 
				+
			
 
				+      do i = 1,nframes
			
 
				+           read(10) (d(j),j=1, 6)
			
 
				+              
			
 
				+           read(10) (x(i,j),j=1,natoms)
			
 
				+           read(10) (y(i,j),j=1,natoms)
			
 
				+           read(10) (z(i,j),j=1,natoms)
			
 
				+      end do
			
 
				+      
			
 
				+      xbox=d(1)
			
 
				+      ybox=d(3)
			
 
				+      zbox=d(6)
			
 
				+      
			
 
				+      print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox
			
 
				+      return
			
 
				+
			
 
				+      end subroutine readdcd
			
 
				+ end module readdata
			
 
				+
			
 
				+program rdf
			
 
				+      use readdata
			
 
				+      use nvtx
			
 
				+      implicit none
			
 
				+      integer n,i,j,iconf,ind
			
 
				+      integer natoms,nframes,nbin
			
 
				+      integer maxframes,maxatoms
			
 
				+      parameter (maxframes=10,maxatoms=60000,nbin=2000)
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+      double precision dx,dy,dz
			
 
				+      double precision xbox,ybox,zbox,cut
			
 
				+      double precision vol,r,del,s2,s2bond
			
 
				+      double precision, allocatable   ::  g(:)
			
 
				+      double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf
			
 
				+      double precision rlower,rupper
			
 
				+      character  atmnm*4
			
 
				+      real*4 start,finish
			
 
				+        
			
 
				+      open(23,file='RDF.dat',status='unknown')
			
 
				+      open(24,file='Pair_entropy.dat',status='unknown')
			
 
				+
			
 
				+      nframes=10
			
 
				+         
			
 
				+      call cpu_time(start)
			
 
				+
			
 
				+      print*,"Going to read coordinates"
			
 
				+      call nvtxStartRange("Read File")
			
 
				+      call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      allocate ( g(nbin) )
			
 
				+      g = 0.0d0
			
 
				+ 
			
 
				+      pi=dacos(-1.0d0)
			
 
				+      vol=xbox*ybox*zbox
			
 
				+      rho=dble(natoms)/vol
			
 
				+
			
 
				+      del=xbox/dble(2.0*nbin)
			
 
				+      write(*,*) "bin width is : ",del
			
 
				+      cut = dble(xbox * 0.5);
			
 
				+
			
 
				+      !pair calculation
			
 
				+      call nvtxStartRange("Pair Calculation")
			
 
				+      !$acc data copy(g(:)) copyin(x(:,:),z(:,:),y(:,:))
			
 
				+      do iconf=1,nframes
			
 
				+         if (mod(iconf,1).eq.0) print*,iconf
			
 
				+         !$acc parallel loop gang vector_length(128) default(present)
			
 
				+         do i=1,natoms
			
 
				+            !$acc loop vector
			
 
				+            do j=1,natoms
			
 
				+               dx=x(iconf,i)-x(iconf,j)
			
 
				+               dy=y(iconf,i)-y(iconf,j)
			
 
				+               dz=z(iconf,i)-z(iconf,j)
			
 
				+
			
 
				+               dx=dx-nint(dx/xbox)*xbox
			
 
				+               dy=dy-nint(dy/ybox)*ybox
			
 
				+               dz=dz-nint(dz/zbox)*zbox
			
 
				+   
			
 
				+               r=dsqrt(dx**2+dy**2+dz**2)
			
 
				+               ind=int(r/del)+1
			
 
				+               if(r<cut)then
			
 
				+                  !$acc atomic
			
 
				+                  g(ind)=g(ind)+1.0d0
			
 
				+               endif
			
 
				+            enddo
			
 
				+         enddo
			
 
				+      enddo
			
 
				+      !$acc end data
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      !entropy calculation
			
 
				+      s2=0.01d0
			
 
				+      s2bond=0.01d0 
			
 
				+      const=(4.0d0/3.0d0)*pi*rho
			
 
				+      call nvtxStartRange("Entropy Calculation")
			
 
				+      do i=1,nbin
			
 
				+          rlower=dble((i-1)*del)
			
 
				+          rupper=rlower+del
			
 
				+          nideal=const*(rupper**3-rlower**3)
			
 
				+          g(i)=g(i)/(dble(nframes)*dble(natoms)*nideal)
			
 
				+          r=dble(i)*del
			
 
				+          if (r.lt.2.0) then
			
 
				+            gr=0.0
			
 
				+          else
			
 
				+            gr=g(i)
			
 
				+          endif
			
 
				+
			
 
				+          if (gr.lt.1e-5) then
			
 
				+            lngr=0.0
			
 
				+          else
			
 
				+            lngr=dlog(gr)
			
 
				+          endif
			
 
				+          if (g(i).lt.1e-6) then
			
 
				+            lngrbond=0.01
			
 
				+          else
			
 
				+            lngrbond=dlog(g(i))
			
 
				+          endif
			
 
				+
			
 
				+          s2=s2-2*pi*rho*((gr*lngr)-gr+1)*del*r**2.0
			
 
				+          s2bond=s2bond-2*pi*rho*((g(i)*lngrbond)-g(i)+1)*del*r*r
			
 
				+
			
 
				+          
			
 
				+          rf=dble(i-.5)*del
			
 
				+          write(23,*) rf,g(i)
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      write(24,*)"s2      : ",s2
			
 
				+      write(24,*)"s2bond  : ",s2bond
			
 
				+      call cpu_time(finish)
			
 
				+      print*,"starting at time",start,"and ending at",finish
			
 
				+      stop
			
 
				+      deallocate(x,y,z,g)
			
 
				+end
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_gang_vector_worker.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/SOLUTION/rdf_gang_vector_worker.f90
@@ -0,0 +1,165 @@
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+!// Author: Manish Agarwal and Gourav Shrivastava  , IIT Delhi
			
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+! Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
			
 
				+
			
 
				+module readdata
			
 
				+      contains
			
 
				+      subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      integer i,j
			
 
				+      integer maxframes,maxatoms
			
 
				+
			
 
				+      double precision d(6),xbox,ybox,zbox
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+
			
 
				+      real*4 dummyr
			
 
				+      integer*4 nset, natoms, dummyi,nframes,tframes
			
 
				+      character*4 dummyc
			
 
				+      
			
 
				+      open(10,file='../input/alk.traj.dcd',status='old',form='unformatted')
			
 
				+      read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9)
			
 
				+      read(10) dummyi, dummyr,dummyr
			
 
				+      read(10) natoms
			
 
				+      print*,"Total number of frames and atoms are",tframes,natoms
			
 
				+
			
 
				+      allocate ( x(maxframes,natoms) )
			
 
				+      allocate ( y(maxframes,natoms) )
			
 
				+      allocate ( z(maxframes,natoms) )
			
 
				+
			
 
				+      do i = 1,nframes
			
 
				+           read(10) (d(j),j=1, 6)
			
 
				+              
			
 
				+           read(10) (x(i,j),j=1,natoms)
			
 
				+           read(10) (y(i,j),j=1,natoms)
			
 
				+           read(10) (z(i,j),j=1,natoms)
			
 
				+      end do
			
 
				+      
			
 
				+      xbox=d(1)
			
 
				+      ybox=d(3)
			
 
				+      zbox=d(6)
			
 
				+      
			
 
				+      print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox
			
 
				+      return
			
 
				+
			
 
				+      end subroutine readdcd
			
 
				+ end module readdata
			
 
				+
			
 
				+program rdf
			
 
				+      use readdata
			
 
				+      use nvtx
			
 
				+      implicit none
			
 
				+      integer n,i,j,iconf,ind
			
 
				+      integer natoms,nframes,nbin
			
 
				+      integer maxframes,maxatoms
			
 
				+      parameter (maxframes=10,maxatoms=60000,nbin=2000)
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+      double precision dx,dy,dz
			
 
				+      double precision xbox,ybox,zbox,cut
			
 
				+      double precision vol,r,del,s2,s2bond
			
 
				+      double precision, allocatable   ::  g(:)
			
 
				+      double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf
			
 
				+      double precision rlower,rupper
			
 
				+      character  atmnm*4
			
 
				+      real*4 start,finish
			
 
				+        
			
 
				+      open(23,file='RDF.dat',status='unknown')
			
 
				+      open(24,file='Pair_entropy.dat',status='unknown')
			
 
				+
			
 
				+      nframes=10
			
 
				+         
			
 
				+      call cpu_time(start)
			
 
				+
			
 
				+      print*,"Going to read coordinates"
			
 
				+      call nvtxStartRange("Read File")
			
 
				+      call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      allocate ( g(nbin) )
			
 
				+      g = 0.0d0
			
 
				+ 
			
 
				+      pi=dacos(-1.0d0)
			
 
				+      vol=xbox*ybox*zbox
			
 
				+      rho=dble(natoms)/vol
			
 
				+
			
 
				+      del=xbox/dble(2.0*nbin)
			
 
				+      write(*,*) "bin width is : ",del
			
 
				+      cut = dble(xbox * 0.5);
			
 
				+
			
 
				+      !pair calculation
			
 
				+      call nvtxStartRange("Pair Calculation")
			
 
				+      !$acc data copy(g(:)) copyin(x(:,:),z(:,:),y(:,:))
			
 
				+      do iconf=1,nframes
			
 
				+         if (mod(iconf,1).eq.0) print*,iconf
			
 
				+         !$acc parallel loop gang worker num_workers(32) vector_length(32) default(present)
			
 
				+         do i=1,natoms
			
 
				+            !$acc loop vector
			
 
				+            do j=1,natoms
			
 
				+               dx=x(iconf,i)-x(iconf,j)
			
 
				+               dy=y(iconf,i)-y(iconf,j)
			
 
				+               dz=z(iconf,i)-z(iconf,j)
			
 
				+
			
 
				+               dx=dx-nint(dx/xbox)*xbox
			
 
				+               dy=dy-nint(dy/ybox)*ybox
			
 
				+               dz=dz-nint(dz/zbox)*zbox
			
 
				+   
			
 
				+               r=dsqrt(dx**2+dy**2+dz**2)
			
 
				+               ind=int(r/del)+1
			
 
				+               if(r<cut)then
			
 
				+                  !$acc atomic
			
 
				+                  g(ind)=g(ind)+1.0d0
			
 
				+               endif
			
 
				+            enddo
			
 
				+         enddo
			
 
				+      enddo
			
 
				+      !$acc end data
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      !entropy calculation
			
 
				+      s2=0.01d0
			
 
				+      s2bond=0.01d0 
			
 
				+      const=(4.0d0/3.0d0)*pi*rho
			
 
				+      call nvtxStartRange("Entropy Calculation")
			
 
				+      do i=1,nbin
			
 
				+          rlower=dble((i-1)*del)
			
 
				+          rupper=rlower+del
			
 
				+          nideal=const*(rupper**3-rlower**3)
			
 
				+          g(i)=g(i)/(dble(nframes)*dble(natoms)*nideal)
			
 
				+          r=dble(i)*del
			
 
				+          if (r.lt.2.0) then
			
 
				+            gr=0.0
			
 
				+          else
			
 
				+            gr=g(i)
			
 
				+          endif
			
 
				+
			
 
				+          if (gr.lt.1e-5) then
			
 
				+            lngr=0.0
			
 
				+          else
			
 
				+            lngr=dlog(gr)
			
 
				+          endif
			
 
				+          if (g(i).lt.1e-6) then
			
 
				+            lngrbond=0.01
			
 
				+          else
			
 
				+            lngrbond=dlog(g(i))
			
 
				+          endif
			
 
				+
			
 
				+          s2=s2-2*pi*rho*((gr*lngr)-gr+1)*del*r**2.0
			
 
				+          s2bond=s2bond-2*pi*rho*((g(i)*lngrbond)-g(i)+1)*del*r*r
			
 
				+
			
 
				+          
			
 
				+          rf=dble(i-.5)*del
			
 
				+          write(23,*) rf,g(i)
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      write(24,*)"s2      : ",s2
			
 
				+      write(24,*)"s2bond  : ",s2bond
			
 
				+      call cpu_time(finish)
			
 
				+      print*,"starting at time",start,"and ending at",finish
			
 
				+      stop
			
 
				+      deallocate(x,y,z,g)
			
 
				+end
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/nvtx.mod
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/nvtx.mod
@@ -0,0 +1,106 @@
 
				+V34 :0x24 nvtx
			
 
				+8 nvtx.f90 S624 0
			
 
				+08/16/2021  10:55:55
			
 
				+use iso_c_binding public 0 direct
			
 
				+enduse
			
 
				+B 525 iso_c_binding c_loc
			
 
				+B 526 iso_c_binding c_funloc
			
 
				+B 527 iso_c_binding c_associated
			
 
				+B 528 iso_c_binding c_f_pointer
			
 
				+B 529 iso_c_binding c_f_procpointer
			
 
				+B 608 iso_c_binding c_sizeof
			
 
				+D 58 26 643 8 642 7
			
 
				+D 67 26 646 8 645 7
			
 
				+D 76 23 6 1 11 72 0 0 0 0 0
			
 
				+ 0 72 11 11 72 72
			
 
				+D 82 23 22 1 11 82 0 0 0 0 0
			
 
				+ 0 82 11 11 82 82
			
 
				+D 85 26 724 48 723 7
			
 
				+D 91 23 7 1 0 11 0 0 0 0 0
			
 
				+ 0 11 0 11 11 0
			
 
				+D 94 20 84
			
 
				+D 96 23 22 1 11 82 0 0 0 0 0
			
 
				+ 0 82 11 11 82 82
			
 
				+D 99 20 85
			
 
				+D 101 20 86
			
 
				+S 624 24 0 0 0 6 1 0 5013 10005 0 A 0 0 0 0 B 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 nvtx
			
 
				+R 642 25 6 iso_c_binding c_ptr
			
 
				+R 643 5 7 iso_c_binding val c_ptr
			
 
				+R 645 25 9 iso_c_binding c_funptr
			
 
				+R 646 5 10 iso_c_binding val c_funptr
			
 
				+R 680 6 44 iso_c_binding c_null_ptr$ac
			
 
				+R 682 6 46 iso_c_binding c_null_funptr$ac
			
 
				+R 683 26 47 iso_c_binding ==
			
 
				+R 685 26 49 iso_c_binding !=
			
 
				+S 710 7 4 0 4 76 1 624 5724 80001c 100 A 0 0 0 0 B 0 6 0 0 0 0 0 0 0 0 0 0 752 0 0 0 0 0 0 0 0 0 0 624 0 0 0 0 col
			
 
				+S 712 3 0 0 0 7 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 720 7 4 0 4 82 1 624 5728 800014 108 A 0 0 0 0 B 0 7 0 0 0 0 0 0 0 0 0 0 753 0 0 0 0 0 0 0 0 0 0 624 0 0 0 0 tempname
			
 
				+S 722 3 0 0 0 7 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 256 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 723 25 0 0 0 85 1 624 5737 1000000c 810010 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 736 0 0 0 0 0 0 0 735 0 0 0 624 0 0 0 0 nvtxeventattributes
			
 
				+S 724 5 0 0 0 5 725 624 5757 800004 0 A 0 0 0 0 B 0 10 0 0 0 0 0 0 85 0 0 0 0 0 0 0 0 0 0 0 1 724 0 624 0 0 0 0 version
			
 
				+S 725 5 0 0 0 5 727 624 2875 800004 0 A 0 0 0 0 B 0 0 0 0 0 2 0 0 85 0 0 0 0 0 0 0 0 0 0 0 724 725 0 624 0 0 0 0 size
			
 
				+S 726 3 0 0 0 6 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 48 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6
			
 
				+S 727 5 0 0 0 6 728 624 5765 800004 0 A 0 0 0 0 B 0 12 0 0 0 4 0 0 85 0 0 0 0 0 0 0 0 0 0 0 725 727 0 624 0 0 0 0 category
			
 
				+S 728 5 0 0 0 6 729 624 5774 800004 0 A 0 0 0 0 B 0 13 0 0 0 8 0 0 85 0 0 0 0 0 0 0 0 0 0 0 727 728 0 624 0 0 0 0 colortype
			
 
				+S 729 5 0 0 0 6 730 624 5784 800004 0 A 0 0 0 0 B 0 14 0 0 0 12 0 0 85 0 0 0 0 0 0 0 0 0 0 0 728 729 0 624 0 0 0 0 color
			
 
				+S 730 5 0 0 0 6 731 624 5790 800004 0 A 0 0 0 0 B 0 15 0 0 0 16 0 0 85 0 0 0 0 0 0 0 0 0 0 0 729 730 0 624 0 0 0 0 payloadtype
			
 
				+S 731 5 0 0 0 6 732 624 5802 800004 0 A 0 0 0 0 B 0 16 0 0 0 20 0 0 85 0 0 0 0 0 0 0 0 0 0 0 730 731 0 624 0 0 0 0 reserved0
			
 
				+S 732 5 0 0 0 7 733 624 5812 800004 0 A 0 0 0 0 B 0 17 0 0 0 24 0 0 85 0 0 0 0 0 0 0 0 0 0 0 731 732 0 624 0 0 0 0 payload
			
 
				+S 733 5 0 0 0 6 734 624 5820 800004 0 A 0 0 0 0 B 0 18 0 0 0 32 0 0 85 0 0 0 0 0 0 0 0 0 0 0 732 733 0 624 0 0 0 0 messagetype
			
 
				+S 734 5 0 0 0 58 1 624 5832 800004 0 A 0 0 0 0 B 0 19 0 0 0 40 0 0 85 0 0 0 0 0 0 0 0 0 0 0 733 734 0 624 0 0 0 0 message
			
 
				+S 735 8 5 0 0 91 1 624 5840 40822004 1220 A 0 0 0 0 B 0 20 0 0 0 0 0 85 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 624 0 0 0 0 nvtx$nvtxeventattributes$td
			
 
				+S 736 6 4 0 0 85 1 624 5868 80004e 0 A 0 0 0 0 B 800 20 0 0 0 0 0 0 0 0 0 0 754 0 0 0 0 0 0 0 0 0 0 624 0 0 0 0 ._dtInit0085
			
 
				+S 737 19 0 0 0 6 1 624 5881 4000 0 A 0 0 0 0 B 0 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 11 2 0 0 0 0 0 624 0 0 0 0 nvtxrangepush
			
 
				+O 737 2 746 740
			
 
				+S 738 3 0 0 0 6 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6
			
 
				+S 739 3 0 0 0 94 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 5895 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 14 6e 76 74 78 52 61 6e 67 65 50 75 73 68 41
			
 
				+S 740 14 5 0 0 0 1 624 5910 0 18000 A 1000000 0 0 0 B 0 24 0 0 0 0 0 13 1 0 0 0 0 0 0 0 0 0 0 0 0 24 0 624 0 0 739 0 nvtxrangepusha nvtxrangepusha 
			
 
				+F 740 1 741
			
 
				+S 741 7 3 0 0 96 1 740 5925 802004 2000 A 0 0 0 0 B 0 24 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 name
			
 
				+S 744 3 0 0 0 6 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6
			
 
				+S 745 3 0 0 0 99 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 5943 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 15 6e 76 74 78 52 61 6e 67 65 50 75 73 68 45 78
			
 
				+S 746 14 5 0 0 0 1 624 5959 0 18000 A 1000000 0 0 0 B 0 30 0 0 0 0 0 14 1 0 0 0 0 0 0 0 0 0 0 0 0 30 0 624 0 0 745 0 nvtxrangepushex nvtxrangepushex 
			
 
				+F 746 1 747
			
 
				+S 747 1 3 0 0 85 1 746 5975 2004 2000 A 0 0 0 0 B 0 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 event
			
 
				+S 748 19 0 0 0 6 1 624 5981 4000 0 A 0 0 0 0 B 0 37 0 0 0 0 0 0 0 751 0 0 0 0 0 0 12 1 0 0 0 0 0 624 0 0 0 0 nvtxrangepop
			
 
				+O 748 1 751
			
 
				+S 749 3 0 0 0 6 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6
			
 
				+S 750 3 0 0 0 101 0 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 5994 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20 12 6e 76 74 78 52 61 6e 67 65 50 6f 70
			
 
				+S 751 14 5 0 0 0 1 624 5981 0 18000 A 1000000 0 0 0 B 0 0 0 0 0 0 0 15 0 748 0 0 0 0 0 0 0 0 0 0 0 38 0 624 0 0 750 0 nvtxrangepop nvtxrangepop 
			
 
				+F 751 0
			
 
				+S 752 11 0 0 4 9 689 624 6007 40800010 805000 A 0 0 0 0 B 0 42 0 0 0 28 0 0 710 710 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _nvtx$12
			
 
				+S 753 11 0 0 4 9 752 624 6016 40800010 805000 A 0 0 0 0 B 0 42 0 0 0 256 0 0 720 720 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _nvtx$5
			
 
				+S 754 11 0 0 0 9 753 624 6024 40800000 805000 A 0 0 0 0 B 0 42 0 0 0 48 0 0 736 736 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _nvtx$8
			
 
				+S 755 23 5 0 0 0 758 624 6032 0 0 A 0 0 0 0 B 0 44 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 nvtxstartrange
			
 
				+S 756 1 3 0 0 30 1 755 5925 4 43000 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 name
			
 
				+S 757 1 3 0 0 6 1 755 6047 80000004 3000 A 0 0 0 0 B 0 44 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 id
			
 
				+S 758 14 5 0 0 0 1 755 6032 0 400000 A 0 0 0 0 B 0 44 0 0 0 0 0 16 2 0 0 0 0 0 0 0 0 0 0 0 0 44 0 624 0 0 0 0 nvtxstartrange nvtxstartrange 
			
 
				+F 758 2 756 757
			
 
				+S 759 23 5 0 0 0 760 624 6050 0 0 A 0 0 0 0 B 0 68 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 nvtxendrange
			
 
				+S 760 14 5 0 0 0 1 759 6050 0 400000 A 0 0 0 0 B 0 68 0 0 0 0 0 19 0 0 0 0 0 0 0 0 0 0 0 0 0 68 0 624 0 0 0 0 nvtxendrange nvtxendrange 
			
 
				+F 760 0
			
 
				+A 67 1 0 0 0 58 680 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 70 1 0 0 0 67 682 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 72 2 0 0 0 7 712 0 0 0 72 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 82 2 0 0 0 7 722 0 0 0 82 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 83 2 0 0 0 6 726 0 0 0 83 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 84 2 0 0 0 6 738 0 0 0 84 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 85 2 0 0 50 6 744 0 0 0 85 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 86 2 0 0 0 6 749 0 0 0 86 0 0 0 0 0 0 0 0 0 0 0
			
 
				+Z
			
 
				+J 131 1 1
			
 
				+V 67 58 7 0
			
 
				+S 0 58 0 0 0
			
 
				+A 0 6 0 0 1 2 0
			
 
				+J 132 1 1
			
 
				+V 70 67 7 0
			
 
				+S 0 67 0 0 0
			
 
				+A 0 6 0 0 1 2 0
			
 
				+T 723 85 0 3 0 0
			
 
				+A 724 6 0 0 1 3 1
			
 
				+A 725 6 0 0 1 83 1
			
 
				+A 727 6 0 0 1 2 1
			
 
				+A 728 6 0 0 1 3 1
			
 
				+A 730 6 0 0 1 2 1
			
 
				+A 733 6 0 0 1 3 0
			
 
				+Z
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/readdata.mod
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openacc/readdata.mod
@@ -0,0 +1,119 @@
 
				+V34 :0x24 readdata
			
 
				+7 rdf.f90 S624 0
			
 
				+08/16/2021  10:55:55
			
 
				+enduse
			
 
				+D 58 23 9 2 15 33 0 1 0 0 1
			
 
				+ 17 21 23 17 21 19
			
 
				+ 25 29 31 25 29 27
			
 
				+D 61 23 7 1 0 12 0 0 0 0 0
			
 
				+ 0 12 0 11 12 0
			
 
				+D 64 23 9 2 35 46 0 1 0 0 1
			
 
				+ 36 39 40 36 39 37
			
 
				+ 41 44 45 41 44 42
			
 
				+D 67 23 7 1 0 12 0 0 0 0 0
			
 
				+ 0 12 0 11 12 0
			
 
				+D 70 23 9 2 48 59 0 1 0 0 1
			
 
				+ 49 52 53 49 52 50
			
 
				+ 54 57 58 54 57 55
			
 
				+D 73 23 7 1 0 12 0 0 0 0 0
			
 
				+ 0 12 0 11 12 0
			
 
				+S 624 24 0 0 0 9 1 0 5013 10005 0 A 0 0 0 0 B 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 readdata
			
 
				+S 625 23 5 0 0 0 636 624 5022 0 0 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 readdcd
			
 
				+S 626 1 3 0 0 6 1 625 5030 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 maxframes
			
 
				+S 627 1 3 0 0 6 1 625 5040 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 maxatoms
			
 
				+S 628 7 3 0 0 58 1 625 5049 10a00004 3050 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 638 0 0 0 0 0 0 0 0 x
			
 
				+S 629 7 3 0 0 64 1 625 5051 10a00004 3050 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 650 0 0 0 0 0 0 0 0 y
			
 
				+S 630 7 3 0 0 70 1 625 5053 10a00004 3050 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 654 0 0 0 0 0 0 0 0 z
			
 
				+S 631 1 3 0 0 10 1 625 5055 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 xbox
			
 
				+S 632 1 3 0 0 10 1 625 5060 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ybox
			
 
				+S 633 1 3 0 0 10 1 625 5065 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 zbox
			
 
				+S 634 1 3 0 0 6 1 625 5070 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 natoms
			
 
				+S 635 1 3 0 0 6 1 625 5077 4 3000 A 0 0 0 0 B 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 nframes
			
 
				+S 636 14 5 0 0 0 1 625 5022 0 400000 A 0 0 0 0 B 0 9 0 0 0 0 0 2 10 0 0 0 0 0 0 0 0 0 0 0 0 9 0 624 0 0 0 0 readdcd readdcd 
			
 
				+F 636 10 626 627 628 629 630 631 632 633 634 635
			
 
				+S 637 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 638 8 1 0 0 61 1 625 5085 40822004 1020 A 0 0 0 0 B 0 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 x$sd
			
 
				+S 642 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 643 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 644 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 645 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 646 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 647 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 648 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 649 3 0 0 0 7 1 1 0 0 0 A 0 0 0 0 B 0 0 0 0 0 0 0 0 0 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7
			
 
				+S 650 8 1 0 0 67 1 625 5107 40822004 1020 A 0 0 0 0 B 0 15 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 y$sd
			
 
				+S 654 8 1 0 0 73 1 625 5129 40822004 1020 A 0 0 0 0 B 0 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 z$sd
			
 
				+A 12 2 0 0 0 7 637 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 13 1 0 1 0 61 638 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 14 2 0 0 0 7 649 0 0 0 14 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 15 10 0 0 0 7 13 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 14
			
 
				+A 16 2 0 0 0 7 642 0 0 0 16 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 17 10 0 0 15 7 13 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 16
			
 
				+A 18 2 0 0 0 7 643 0 0 0 18 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 19 10 0 0 17 7 13 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 18
			
 
				+A 20 4 0 0 0 7 19 0 11 0 0 0 0 2 0 0 0 0 0 0 0 0
			
 
				+A 21 4 0 0 0 7 17 0 20 0 0 0 0 1 0 0 0 0 0 0 0 0
			
 
				+A 22 2 0 0 0 7 646 0 0 0 22 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 23 10 0 0 19 7 13 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 22
			
 
				+A 24 2 0 0 0 7 644 0 0 0 24 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 25 10 0 0 23 7 13 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 24
			
 
				+A 26 2 0 0 0 7 645 0 0 0 26 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 27 10 0 0 25 7 13 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 26
			
 
				+A 28 4 0 0 0 7 27 0 11 0 0 0 0 2 0 0 0 0 0 0 0 0
			
 
				+A 29 4 0 0 0 7 25 0 28 0 0 0 0 1 0 0 0 0 0 0 0 0
			
 
				+A 30 2 0 0 0 7 647 0 0 0 30 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 31 10 0 0 27 7 13 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 30
			
 
				+A 32 2 0 0 0 7 648 0 0 0 32 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 33 10 0 0 31 7 13 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 32
			
 
				+A 34 1 0 1 0 67 650 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 35 10 0 0 0 7 34 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 14
			
 
				+A 36 10 0 0 35 7 34 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 16
			
 
				+A 37 10 0 0 36 7 34 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 18
			
 
				+A 38 4 0 0 0 7 37 0 11 0 0 0 0 2 0 0 0 0 0 0 0 0
			
 
				+A 39 4 0 0 0 7 36 0 38 0 0 0 0 1 0 0 0 0 0 0 0 0
			
 
				+A 40 10 0 0 37 7 34 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 22
			
 
				+A 41 10 0 0 40 7 34 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 24
			
 
				+A 42 10 0 0 41 7 34 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 26
			
 
				+A 43 4 0 0 0 7 42 0 11 0 0 0 0 2 0 0 0 0 0 0 0 0
			
 
				+A 44 4 0 0 0 7 41 0 43 0 0 0 0 1 0 0 0 0 0 0 0 0
			
 
				+A 45 10 0 0 42 7 34 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 30
			
 
				+A 46 10 0 0 45 7 34 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 32
			
 
				+A 47 1 0 1 0 73 654 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+A 48 10 0 0 0 7 47 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 14
			
 
				+A 49 10 0 0 48 7 47 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 16
			
 
				+A 50 10 0 0 49 7 47 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 18
			
 
				+A 51 4 0 0 0 7 50 0 11 0 0 0 0 2 0 0 0 0 0 0 0 0
			
 
				+A 52 4 0 0 0 7 49 0 51 0 0 0 0 1 0 0 0 0 0 0 0 0
			
 
				+A 53 10 0 0 50 7 47 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 22
			
 
				+A 54 10 0 0 53 7 47 13 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 24
			
 
				+A 55 10 0 0 54 7 47 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 26
			
 
				+A 56 4 0 0 0 7 55 0 11 0 0 0 0 2 0 0 0 0 0 0 0 0
			
 
				+A 57 4 0 0 0 7 54 0 56 0 0 0 0 1 0 0 0 0 0 0 0 0
			
 
				+A 58 10 0 0 55 7 47 19 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 30
			
 
				+A 59 10 0 0 58 7 47 22 0 0 0 0 0 0 0 0 0 0 0 0 0 0
			
 
				+X 1 32
			
 
				+Z
			
 
				+Z
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openmp/SOLUTION/nvtx.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openmp/SOLUTION/nvtx.f90
@@ -0,0 +1,72 @@
 
				+module nvtx
			
 
				+
			
 
				+use iso_c_binding
			
 
				+implicit none
			
 
				+
			
 
				+integer,private :: col(7) = [ Z'0000ff00', Z'000000ff', Z'00ffff00', Z'00ff00ff', Z'0000ffff', Z'00ff0000', Z'00ffffff']
			
 
				+character,private,target :: tempName(256)
			
 
				+
			
 
				+type, bind(C):: nvtxEventAttributes
			
 
				+  integer(C_INT16_T):: version=1
			
 
				+  integer(C_INT16_T):: size=48 !
			
 
				+  integer(C_INT):: category=0
			
 
				+  integer(C_INT):: colorType=1 ! NVTX_COLOR_ARGB = 1
			
 
				+  integer(C_INT):: color
			
 
				+  integer(C_INT):: payloadType=0 ! NVTX_PAYLOAD_UNKNOWN = 0
			
 
				+  integer(C_INT):: reserved0
			
 
				+  integer(C_INT64_T):: payload   ! union uint,int,double
			
 
				+  integer(C_INT):: messageType=1  ! NVTX_MESSAGE_TYPE_ASCII     = 1 
			
 
				+  type(C_PTR):: message  ! ascii char
			
 
				+end type
			
 
				+
			
 
				+interface nvtxRangePush
			
 
				+  ! push range with custom label and standard color
			
 
				+  subroutine nvtxRangePushA(name) bind(C, name='nvtxRangePushA')
			
 
				+  use iso_c_binding
			
 
				+  character(kind=C_CHAR) :: name(256)
			
 
				+  end subroutine
			
 
				+
			
 
				+  ! push range with custom label and custom color
			
 
				+  subroutine nvtxRangePushEx(event) bind(C, name='nvtxRangePushEx')
			
 
				+  use iso_c_binding
			
 
				+  import:: nvtxEventAttributes
			
 
				+  type(nvtxEventAttributes):: event
			
 
				+  end subroutine
			
 
				+end interface
			
 
				+
			
 
				+interface nvtxRangePop
			
 
				+  subroutine nvtxRangePop() bind(C, name='nvtxRangePop')
			
 
				+  end subroutine
			
 
				+end interface
			
 
				+
			
 
				+contains
			
 
				+
			
 
				+subroutine nvtxStartRange(name,id)
			
 
				+  character(kind=c_char,len=*) :: name
			
 
				+  integer, optional:: id
			
 
				+  type(nvtxEventAttributes):: event
			
 
				+  character(kind=c_char,len=256) :: trimmed_name
			
 
				+  integer:: i
			
 
				+
			
 
				+  trimmed_name=trim(name)//c_null_char
			
 
				+
			
 
				+  ! move scalar trimmed_name into character array tempName
			
 
				+  do i=1,LEN(trim(name)) + 1
			
 
				+     tempName(i) = trimmed_name(i:i)
			
 
				+  enddo
			
 
				+
			
 
				+
			
 
				+  if ( .not. present(id)) then
			
 
				+    call nvtxRangePush(tempName)
			
 
				+  else
			
 
				+    event%color=col(mod(id,7)+1)
			
 
				+    event%message=c_loc(tempName)
			
 
				+    call nvtxRangePushEx(event)
			
 
				+  end if
			
 
				+end subroutine
			
 
				+
			
 
				+subroutine nvtxEndRange
			
 
				+  call nvtxRangePop
			
 
				+end subroutine
			
 
				+
			
 
				+end module nvtx
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openmp/SOLUTION/rdf_offload_split.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openmp/SOLUTION/rdf_offload_split.f90
@@ -0,0 +1,166 @@
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+!// Author: Manish Agarwal and Gourav Shrivastava  , IIT Delhi
			
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+! Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
			
 
				+
			
 
				+module readdata
			
 
				+      contains
			
 
				+      subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      integer i,j
			
 
				+      integer maxframes,maxatoms
			
 
				+
			
 
				+      double precision d(6),xbox,ybox,zbox
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+
			
 
				+      real*4 dummyr
			
 
				+      integer*4 nset, natoms, dummyi,nframes,tframes
			
 
				+      character*4 dummyc
			
 
				+      
			
 
				+      open(10,file='../input/alk.traj.dcd',status='old',form='unformatted')
			
 
				+      read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9)
			
 
				+      read(10) dummyi, dummyr,dummyr
			
 
				+      read(10) natoms
			
 
				+      print*,"Total number of frames and atoms are",tframes,natoms
			
 
				+
			
 
				+      allocate ( x(maxframes,natoms) )
			
 
				+      allocate ( y(maxframes,natoms) )
			
 
				+      allocate ( z(maxframes,natoms) )
			
 
				+
			
 
				+      do i = 1,nframes
			
 
				+           read(10) (d(j),j=1, 6)
			
 
				+              
			
 
				+           read(10) (x(i,j),j=1,natoms)
			
 
				+           read(10) (y(i,j),j=1,natoms)
			
 
				+           read(10) (z(i,j),j=1,natoms)
			
 
				+      end do
			
 
				+      
			
 
				+      xbox=d(1)
			
 
				+      ybox=d(3)
			
 
				+      zbox=d(6)
			
 
				+      
			
 
				+      print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox
			
 
				+      return
			
 
				+
			
 
				+      end subroutine readdcd
			
 
				+ end module readdata
			
 
				+
			
 
				+program rdf
			
 
				+      use readdata
			
 
				+      use nvtx
			
 
				+      implicit none
			
 
				+      integer n,i,j,iconf,ind
			
 
				+      integer natoms,nframes,nbin
			
 
				+      integer maxframes,maxatoms
			
 
				+      parameter (maxframes=10,maxatoms=60000,nbin=2000)
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+      double precision dx,dy,dz
			
 
				+      double precision xbox,ybox,zbox,cut
			
 
				+      double precision vol,r,del,s2,s2bond
			
 
				+      double precision, allocatable   ::  g(:)
			
 
				+      double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf
			
 
				+      double precision rlower,rupper
			
 
				+      character  atmnm*4
			
 
				+      real*4 start,finish
			
 
				+        
			
 
				+      open(23,file='RDF.dat',status='unknown')
			
 
				+      open(24,file='Pair_entropy.dat',status='unknown')
			
 
				+
			
 
				+      nframes=10
			
 
				+         
			
 
				+      call cpu_time(start)
			
 
				+
			
 
				+      print*,"Going to read coordinates"
			
 
				+      call nvtxStartRange("Read File")
			
 
				+      call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      allocate ( g(nbin) )
			
 
				+      g = 0.0d0
			
 
				+ 
			
 
				+      pi=dacos(-1.0d0)
			
 
				+      vol=xbox*ybox*zbox
			
 
				+      rho=dble(natoms)/vol
			
 
				+
			
 
				+      del=xbox/dble(2.0*nbin)
			
 
				+      write(*,*) "bin width is : ",del
			
 
				+      cut = dble(xbox * 0.5);
			
 
				+
			
 
				+      !pair calculation
			
 
				+      !$omp target data map(x(:,:), y (:,:), z (:,:), g (:))
			
 
				+      call nvtxStartRange("Pair Calculation")
			
 
				+      do iconf=1,nframes
			
 
				+         if (mod(iconf,1).eq.0) print*,iconf
			
 
				+         !$omp target teams distribute
			
 
				+         do i=1,natoms
			
 
				+            !$omp parallel do private(dx,dy,dz,r,ind)
			
 
				+            do j=1,natoms
			
 
				+               dx=x(iconf,i)-x(iconf,j)
			
 
				+               dy=y(iconf,i)-y(iconf,j)
			
 
				+               dz=z(iconf,i)-z(iconf,j)
			
 
				+
			
 
				+               dx=dx-nint(dx/xbox)*xbox
			
 
				+               dy=dy-nint(dy/ybox)*ybox
			
 
				+               dz=dz-nint(dz/zbox)*zbox
			
 
				+   
			
 
				+               r=dsqrt(dx**2+dy**2+dz**2)
			
 
				+               ind=int(r/del)+1
			
 
				+               !if (ind.le.nbin) then
			
 
				+               if(r<cut)then
			
 
				+                  !$omp atomic
			
 
				+                  g(ind)=g(ind)+1.0d0
			
 
				+               endif
			
 
				+            enddo
			
 
				+         enddo
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+      !$omp end target data
			
 
				+
			
 
				+      !entropy calculation
			
 
				+      s2=0.01d0
			
 
				+      s2bond=0.01d0 
			
 
				+      const=(4.0d0/3.0d0)*pi*rho
			
 
				+      call nvtxStartRange("Entropy Calculation")
			
 
				+      do i=1,nbin
			
 
				+          rlower=dble((i-1)*del)
			
 
				+          rupper=rlower+del
			
 
				+          nideal=const*(rupper**3-rlower**3)
			
 
				+          g(i)=g(i)/(dble(nframes)*dble(natoms)*nideal)
			
 
				+          r=dble(i)*del
			
 
				+          if (r.lt.2.0) then
			
 
				+            gr=0.0
			
 
				+          else
			
 
				+            gr=g(i)
			
 
				+          endif
			
 
				+
			
 
				+          if (gr.lt.1e-5) then
			
 
				+            lngr=0.0
			
 
				+          else
			
 
				+            lngr=dlog(gr)
			
 
				+          endif
			
 
				+          if (g(i).lt.1e-6) then
			
 
				+            lngrbond=0.01
			
 
				+          else
			
 
				+            lngrbond=dlog(g(i))
			
 
				+          endif
			
 
				+
			
 
				+          s2=s2-2*pi*rho*((gr*lngr)-gr+1)*del*r**2.0
			
 
				+          s2bond=s2bond-2*pi*rho*((g(i)*lngrbond)-g(i)+1)*del*r*r
			
 
				+
			
 
				+          
			
 
				+          rf=dble(i-.5)*del
			
 
				+          write(23,*) rf,g(i)
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      write(24,*)"s2      : ",s2
			
 
				+      write(24,*)"s2bond  : ",s2bond
			
 
				+      call cpu_time(finish)
			
 
				+      print*,"starting at time",start,"and ending at",finish
			
 
				+      stop
			
 
				+      deallocate(x,y,z,g)
			
 
				+end
			
--- a/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openmp/SOLUTION/rdf_offload_split_num.f90
+++ b/hpc/nways/nways_labs/nways_MD/English/Fortran/source_code/openmp/SOLUTION/rdf_offload_split_num.f90
@@ -0,0 +1,166 @@
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+!// Author: Manish Agarwal and Gourav Shrivastava  , IIT Delhi
			
 
				+!/////////////////////////////////////////////////////////////////////////////////////////
			
 
				+
			
 
				+! Copyright (c) 2021 NVIDIA Corporation.  All rights reserved.
			
 
				+
			
 
				+module readdata
			
 
				+      contains
			
 
				+      subroutine readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      integer i,j
			
 
				+      integer maxframes,maxatoms
			
 
				+
			
 
				+      double precision d(6),xbox,ybox,zbox
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+
			
 
				+      real*4 dummyr
			
 
				+      integer*4 nset, natoms, dummyi,nframes,tframes
			
 
				+      character*4 dummyc
			
 
				+      
			
 
				+      open(10,file='../input/alk.traj.dcd',status='old',form='unformatted')
			
 
				+      read(10) dummyc, tframes,(dummyi,i=1,8),dummyr, (dummyi,i=1,9)
			
 
				+      read(10) dummyi, dummyr,dummyr
			
 
				+      read(10) natoms
			
 
				+      print*,"Total number of frames and atoms are",tframes,natoms
			
 
				+
			
 
				+      allocate ( x(maxframes,natoms) )
			
 
				+      allocate ( y(maxframes,natoms) )
			
 
				+      allocate ( z(maxframes,natoms) )
			
 
				+
			
 
				+      do i = 1,nframes
			
 
				+           read(10) (d(j),j=1, 6)
			
 
				+              
			
 
				+           read(10) (x(i,j),j=1,natoms)
			
 
				+           read(10) (y(i,j),j=1,natoms)
			
 
				+           read(10) (z(i,j),j=1,natoms)
			
 
				+      end do
			
 
				+      
			
 
				+      xbox=d(1)
			
 
				+      ybox=d(3)
			
 
				+      zbox=d(6)
			
 
				+      
			
 
				+      print*,"File reading is done: xbox,ybox,zbox",xbox,ybox,zbox
			
 
				+      return
			
 
				+
			
 
				+      end subroutine readdcd
			
 
				+ end module readdata
			
 
				+
			
 
				+program rdf
			
 
				+      use readdata
			
 
				+      use nvtx
			
 
				+      implicit none
			
 
				+      integer n,i,j,iconf,ind
			
 
				+      integer natoms,nframes,nbin
			
 
				+      integer maxframes,maxatoms
			
 
				+      parameter (maxframes=10,maxatoms=60000,nbin=2000)
			
 
				+      real*4, allocatable   :: x(:,:)
			
 
				+      real*4, allocatable   :: y(:,:)
			
 
				+      real*4, allocatable   :: z(:,:)
			
 
				+      double precision dx,dy,dz
			
 
				+      double precision xbox,ybox,zbox,cut
			
 
				+      double precision vol,r,del,s2,s2bond
			
 
				+      double precision, allocatable   ::  g(:)
			
 
				+      double precision rho,gr,lngr,lngrbond,pi,const,nideal,rf
			
 
				+      double precision rlower,rupper
			
 
				+      character  atmnm*4
			
 
				+      real*4 start,finish
			
 
				+        
			
 
				+      open(23,file='RDF.dat',status='unknown')
			
 
				+      open(24,file='Pair_entropy.dat',status='unknown')
			
 
				+
			
 
				+      nframes=10
			
 
				+         
			
 
				+      call cpu_time(start)
			
 
				+
			
 
				+      print*,"Going to read coordinates"
			
 
				+      call nvtxStartRange("Read File")
			
 
				+      call readdcd(maxframes,maxatoms,x,y,z,xbox,ybox,zbox,natoms,nframes)
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      allocate ( g(nbin) )
			
 
				+      g = 0.0d0
			
 
				+ 
			
 
				+      pi=dacos(-1.0d0)
			
 
				+      vol=xbox*ybox*zbox
			
 
				+      rho=dble(natoms)/vol
			
 
				+
			
 
				+      del=xbox/dble(2.0*nbin)
			
 
				+      write(*,*) "bin width is : ",del
			
 
				+      cut = dble(xbox * 0.5);
			
 
				+
			
 
				+      !pair calculation
			
 
				+      !$omp target data map(x(:,:), y (:,:), z (:,:), g (:))
			
 
				+      call nvtxStartRange("Pair Calculation")
			
 
				+      do iconf=1,nframes
			
 
				+         if (mod(iconf,1).eq.0) print*,iconf
			
 
				+         !$omp target teams distribute num_teams(65535)
			
 
				+         do i=1,natoms
			
 
				+            !$omp parallel do private(dx,dy,dz,r,ind)
			
 
				+            do j=1,natoms
			
 
				+               dx=x(iconf,i)-x(iconf,j)
			
 
				+               dy=y(iconf,i)-y(iconf,j)
			
 
				+               dz=z(iconf,i)-z(iconf,j)
			
 
				+
			
 
				+               dx=dx-nint(dx/xbox)*xbox
			
 
				+               dy=dy-nint(dy/ybox)*ybox
			
 
				+               dz=dz-nint(dz/zbox)*zbox
			
 
				+   
			
 
				+               r=dsqrt(dx**2+dy**2+dz**2)
			
 
				+               ind=int(r/del)+1
			
 
				+               !if (ind.le.nbin) then
			
 
				+               if(r<cut)then
			
 
				+                  !$omp atomic
			
 
				+                  g(ind)=g(ind)+1.0d0
			
 
				+               endif
			
 
				+            enddo
			
 
				+         enddo
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+      !$omp end target data
			
 
				+
			
 
				+      !entropy calculation
			
 
				+      s2=0.01d0
			
 
				+      s2bond=0.01d0 
			
 
				+      const=(4.0d0/3.0d0)*pi*rho
			
 
				+      call nvtxStartRange("Entropy Calculation")
			
 
				+      do i=1,nbin
			
 
				+          rlower=dble((i-1)*del)
			
 
				+          rupper=rlower+del
			
 
				+          nideal=const*(rupper**3-rlower**3)
			
 
				+          g(i)=g(i)/(dble(nframes)*dble(natoms)*nideal)
			
 
				+          r=dble(i)*del
			
 
				+          if (r.lt.2.0) then
			
 
				+            gr=0.0
			
 
				+          else
			
 
				+            gr=g(i)
			
 
				+          endif
			
 
				+
			
 
				+          if (gr.lt.1e-5) then
			
 
				+            lngr=0.0
			
 
				+          else
			
 
				+            lngr=dlog(gr)
			
 
				+          endif
			
 
				+          if (g(i).lt.1e-6) then
			
 
				+            lngrbond=0.01
			
 
				+          else
			
 
				+            lngrbond=dlog(g(i))
			
 
				+          endif
			
 
				+
			
 
				+          s2=s2-2*pi*rho*((gr*lngr)-gr+1)*del*r**2.0
			
 
				+          s2bond=s2bond-2*pi*rho*((g(i)*lngrbond)-g(i)+1)*del*r*r
			
 
				+
			
 
				+          
			
 
				+          rf=dble(i-.5)*del
			
 
				+          write(23,*) rf,g(i)
			
 
				+      enddo
			
 
				+      call nvtxEndRange
			
 
				+
			
 
				+      write(24,*)"s2      : ",s2
			
 
				+      write(24,*)"s2bond  : ",s2bond
			
 
				+      call cpu_time(finish)
			
 
				+      print*,"starting at time",start,"and ending at",finish
			
 
				+      stop
			
 
				+      deallocate(x,y,z,g)
			
 
				+end
			
--- a/hpc/nways/nways_labs/nways_MD/English/nways_MD_start.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/nways_MD_start.ipynb
@@ -32,19 +32,26 @@
 
				     "<!--**IMPORTANT**: Before we start please download the input file needed for this application from the [Google drive](https://drive.google.com/drive/folders/1aQ_MFyrjBIDMhCczse0S2GQ36MlR6Q_s?usp=sharing) and upload it to the input folder. From the top menu, click on *File*, and *Open* and navigate to `C/source_code/input` directory and copy paste the downloaded input file (`alk.traj.dcd`).-->\n",
			
 
				     "\n",
			
 
				     "\n",
			
 
				-    "### Tutorial Outline\n",
			
 
				+    "### Bootcamp Outline\n",
			
 
				     "\n",
			
 
				     " We will be following the cycle of Analysis - Parallelization - Optimization cycle throughout. To start with let us understand the Nsight tool ecosystem:   \n",
			
 
				     "\n",
			
 
				-    "- [Introduction to Profiling](../../profiler/English/jupyter_notebook/profiling.ipynb)\n",
			
 
				+    "- [Nsight Systems](../../profiler/English/jupyter_notebook/nsight_systems.ipynb)\n",
			
 
				     "    - Overview of Nsight profiler tools\n",
			
 
				     "    - Introduction to Nsight Systems\n",
			
 
				+    "    - How to view the report\n",
			
 
				     "    - How to use NVTX APIs\n",
			
 
				-    "    - Introduction to Nsight Compute\n",
			
 
				     "    - Optimization Steps to parallel programming \n",
			
 
				     "    \n",
			
 
				-    "We will be working on porting a radial distribution function (RDF) to GPUs. Please choose one of the programming language to proceed working on RDF. \n",
			
 
				-    "\n",
			
 
				+    "- [Nsight Compute](../../profiler/English/jupyter_notebook/nsight_compute.ipynb)\n",
			
 
				+    "    - Introduction to Nsight Compute\n",
			
 
				+    "    - Overview of sections\n",
			
 
				+    "    - Roofline Charts\n",
			
 
				+    "    - Memory Charts\n",
			
 
				+    "    - Profiling a kernel using CLI\n",
			
 
				+    "    - How to view the report\n",
			
 
				+    "  \n",
			
 
				+    "We will be working on porting a radial distribution function (RDF) to GPUs. Please choose one of the programming language to proceed working on RDF. Note: Learn about all terminologies used throught the notebooks in the [GPU Architecture Terminologies](C/jupyter_notebook/GPU_Architecture_Terminologies.ipynb) notebook.\n",
			
 
				     "\n",
			
 
				     "#### C Programming Language\n",
			
 
				     "    \n",
			
@@ -67,7 +74,7 @@
 
				     "Below is the list of GPU programming approaches we will be covering during this course, click on the link below to start exploring:\n",
			
 
				     "\n",
			
 
				     "1. [do-concurrent](Fortran/jupyter_notebook/doconcurrent/nways_doconcurrent.ipynb)\n",
			
 
				-    "2. [OpenACC](Fortran/jupyter_notebook/openacc/nways_openacc.ipynb)<!-- , [OpenACC Advanced](C/jupyter_notebook/openacc/nways_openacc_opt.ipynb)-->\n",
			
 
				+    "2. [OpenACC](Fortran/jupyter_notebook/openacc/nways_openacc.ipynb)<!-- , [OpenACC Advanced](Fortran/jupyter_notebook/openacc/nways_openacc_opt.ipynb)-->\n",
			
 
				     "<!--3. [Kokkos](C/jupyter_notebook/kokkos/nways_kokkos.ipynb)-->\n",
			
 
				     "3. [OpenMP](Fortran/jupyter_notebook/openmp/nways_openmp.ipynb) \n",
			
 
				     "4. [CUDA Fortran](Fortran/jupyter_notebook/cudafortran/nways_cuda.ipynb) \n",
			
@@ -81,7 +88,7 @@
 
				    "source": [
			
 
				     "\n",
			
 
				     "\n",
			
 
				-    "### Tutorial Duration\n",
			
 
				+    "### Bootcamp Duration\n",
			
 
				     "The lab material will be presented in a 8hr session. Link to material is available for download at the end of the lab.\n",
			
 
				     "\n",
			
 
				     "### Content Level\n",
			
@@ -100,7 +107,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/hpc/nways/nways_labs/nways_start.ipynb
+++ b/hpc/nways/nways_labs/nways_start.ipynb
@@ -29,20 +29,22 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "### Tutorial Outline\n",
			
 
				+    "### Bootcamp Outline\n",
			
 
				     "\n",
			
 
				-    "During this lab, we will be working on porting mini-applications in Molecular Simulation (MD) domain to GPUs. You can choose to work with either version of this application. Please click on one of the links below to start N Ways to GPU Programming in **MD** for:\n",
			
 
				+    "During this lab, we will be working on porting mini applications in Molecular Simulation (MD) domain to GPUs. You can choose to work with either version of this application. Please click on one of the below links to start N Ways to GPU Programming in **MD** for:\n",
			
 
				     "\n",
			
 
				-    "- [ C and Fortran ](nways_MD/English/nways_MD_start.ipynb) domain\n",
			
 
				-    "- [Python ](nways_MD/English/nways_MD_start_python.ipynb) domain\n"
			
 
				+    "- [C and Fortran](nways_MD/English/nways_MD_start.ipynb) domain\n",
			
 
				+    "- [Python](nways_MD/English/nways_MD_start_python.ipynb) domain"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "### Tutorial Duration\n",
			
 
				+
			
 
				+    "### Bootcamp Duration\n",
			
 
				     "The lab material will be presented in an 8-hour session. A Link to the material is available for download at the end of the lab.\n",
			
 
				+
			
 
				     "\n",
			
 
				     "### Content Level\n",
			
 
				     "Beginner, Intermediate\n",
			
@@ -56,7 +58,7 @@
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -76,7 +78,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.7.4"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/SOL-compute.png
+++ b/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/SOL-compute.png
--- a/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/baseline-compute.png
+++ b/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/baseline-compute.png
--- a/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/baseline1-compute.png
+++ b/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/baseline1-compute.png
--- a/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/compute-cli-1.png
+++ b/hpc/nways/nways_labs/profiler/English/jupyter_notebook/images/compute-cli-1.png