4 年之前 · 11f2a5c7bc
--- a/ai/DeepStream/English/python/jupyter_notebook/Multi-stream_Multi_DNN_Solution.ipynb
+++ b/ai/DeepStream/English/python/jupyter_notebook/Multi-stream_Multi_DNN_Solution.ipynb
@@ -1,662 +0,0 @@
 
				-{
			
 
				- "cells": [
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "# Hackathon Solution : Multi-stream - Multi-DNN pipeline\n",
			
 
				-    "\n",
			
 
				-    "In this notebook, you will build an Multi-stream Multi-DNN pipeline using the concepts learned from the previous notebooks. \n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "## Building the pipeline\n",
			
 
				-    "\n",
			
 
				-    "We will the using batched on the Multi-DNN network from [Notebook 3](Introduction_to_Multi-DNN_pipeline.ipynb) and combine it with the knowledge learnt in [Notebook 4](Multi-stream_pipeline.ipynb). \n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "Here are the illustrations of the Pipeline \n",
			
 
				-    "![test2](images/test2.png)\n",
			
 
				-    "![test3](images/test3.png)\n",
			
 
				-    "\n",
			
 
				-    "Let us get started with the Notebook , You will have to fill in the `TODO` parts of the code present in the Notebook to complete the pipeline. Feel free to refer to the previous notebooks for the commands."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# Import required libraries \n",
			
 
				-    "import sys\n",
			
 
				-    "sys.path.append('../source_code')\n",
			
 
				-    "import gi\n",
			
 
				-    "import configparser\n",
			
 
				-    "gi.require_version('Gst', '1.0')\n",
			
 
				-    "from gi.repository import GObject, Gst\n",
			
 
				-    "from gi.repository import GLib\n",
			
 
				-    "from ctypes import *\n",
			
 
				-    "import time\n",
			
 
				-    "import sys\n",
			
 
				-    "import math\n",
			
 
				-    "import platform\n",
			
 
				-    "from common.bus_call import bus_call\n",
			
 
				-    "from common.FPS import GETFPS\n",
			
 
				-    "import pyds\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "# Define variables to be used later\n",
			
 
				-    "fps_streams={}\n",
			
 
				-    "\n",
			
 
				-    "PGIE_CLASS_ID_VEHICLE = 0\n",
			
 
				-    "PGIE_CLASS_ID_BICYCLE = 1\n",
			
 
				-    "PGIE_CLASS_ID_PERSON = 2\n",
			
 
				-    "PGIE_CLASS_ID_ROADSIGN = 3\n",
			
 
				-    "\n",
			
 
				-    "MUXER_OUTPUT_WIDTH=1920\n",
			
 
				-    "MUXER_OUTPUT_HEIGHT=1080\n",
			
 
				-    "\n",
			
 
				-    "TILED_OUTPUT_WIDTH=1920\n",
			
 
				-    "TILED_OUTPUT_HEIGHT=1080\n",
			
 
				-    "OSD_PROCESS_MODE= 0\n",
			
 
				-    "OSD_DISPLAY_TEXT= 0\n",
			
 
				-    "pgie_classes_str= [\"Vehicle\", \"TwoWheeler\", \"Person\",\"RoadSign\"]\n",
			
 
				-    "\n",
			
 
				-    "################ Three Stream Pipeline ###########\n",
			
 
				-    "# Define Input and output Stream information \n",
			
 
				-    "num_sources = 3 \n",
			
 
				-    "INPUT_VIDEO_1 = '/opt/nvidia/deepstream/deepstream-5.0/samples/streams/sample_720p.h264'\n",
			
 
				-    "INPUT_VIDEO_2 = '/opt/nvidia/deepstream/deepstream-5.0/samples/streams/sample_720p.h264'\n",
			
 
				-    "INPUT_VIDEO_3 = '/opt/nvidia/deepstream/deepstream-5.0/samples/streams/sample_720p.h264'\n",
			
 
				-    "OUTPUT_VIDEO_NAME = \"../source_code/N4/ds_out.mp4\""
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "We define a function `make_elm_or_print_err()` to create our elements and report any errors if the creation fails.\n",
			
 
				-    "\n",
			
 
				-    "Elements are created using the `Gst.ElementFactory.make()` function as part of Gstreamer library."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "## Make Element or Print Error and any other detail\n",
			
 
				-    "def make_elm_or_print_err(factoryname, name, printedname, detail=\"\"):\n",
			
 
				-    "  print(\"Creating\", printedname)\n",
			
 
				-    "  elm = Gst.ElementFactory.make(factoryname, name)\n",
			
 
				-    "  if not elm:\n",
			
 
				-    "     sys.stderr.write(\"Unable to create \" + printedname + \" \\n\")\n",
			
 
				-    "  if detail:\n",
			
 
				-    "     sys.stderr.write(detail)\n",
			
 
				-    "  return elm"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "#### Initialise GStreamer and Create an Empty Pipeline"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "for i in range(0,num_sources):\n",
			
 
				-    "        fps_streams[\"stream{0}\".format(i)]=GETFPS(i)\n",
			
 
				-    "\n",
			
 
				-    "# Standard GStreamer initialization\n",
			
 
				-    "GObject.threads_init()\n",
			
 
				-    "Gst.init(None)\n",
			
 
				-    "\n",
			
 
				-    "# Create gstreamer elements */\n",
			
 
				-    "# Create Pipeline element that will form a connection of other elements\n",
			
 
				-    "print(\"Creating Pipeline \\n \")\n",
			
 
				-    "pipeline = Gst.Pipeline()\n",
			
 
				-    "\n",
			
 
				-    "if not pipeline:\n",
			
 
				-    "    sys.stderr.write(\" Unable to create Pipeline \\n\")\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "#### Create Elements that are required for our pipeline\n",
			
 
				-    "\n",
			
 
				-    "Compared to the first notebook , we use a lot of queues in this notebook to buffer data when it moves from one plugin to another."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "########### Create Elements required for the Pipeline ########### \n",
			
 
				-    "\n",
			
 
				-    "######### Defining Stream 1 \n",
			
 
				-    "# Source element for reading from the file\n",
			
 
				-    "source1 = make_elm_or_print_err(\"filesrc\", \"file-source-1\",'file-source-1')\n",
			
 
				-    "# Since the data format in the input file is elementary h264 stream,we need a h264parser\n",
			
 
				-    "h264parser1 = make_elm_or_print_err(\"h264parse\", \"h264-parser-1\",\"h264-parser-1\")\n",
			
 
				-    "# Use nvdec_h264 for hardware accelerated decode on GPU\n",
			
 
				-    "decoder1 = make_elm_or_print_err(\"nvv4l2decoder\", \"nvv4l2-decoder-1\",\"nvv4l2-decoder-1\")\n",
			
 
				-    "   \n",
			
 
				-    "##########\n",
			
 
				-    "\n",
			
 
				-    "########## Defining Stream 2 \n",
			
 
				-    "# Source element for reading from the file\n",
			
 
				-    "source2 = make_elm_or_print_err(\"filesrc\", \"file-source-2\",\"file-source-2\")\n",
			
 
				-    "# Since the data format in the input file is elementary h264 stream, we need a h264parser\n",
			
 
				-    "h264parser2 = make_elm_or_print_err(\"h264parse\", \"h264-parser-2\", \"h264-parser-2\")\n",
			
 
				-    "# Use nvdec_h264 for hardware accelerated decode on GPU\n",
			
 
				-    "decoder2 = make_elm_or_print_err(\"nvv4l2decoder\", \"nvv4l2-decoder-2\",\"nvv4l2-decoder-2\")\n",
			
 
				-    "########### \n",
			
 
				-    "\n",
			
 
				-    "########## Defining Stream 3\n",
			
 
				-    "# Source element for reading from the file\n",
			
 
				-    "source3 = make_elm_or_print_err(\"filesrc\", \"file-source-3\",\"file-source-3\")\n",
			
 
				-    "# Since the data format in the input file is elementary h264 stream, we need a h264parser\n",
			
 
				-    "h264parser3 = make_elm_or_print_err(\"h264parse\", \"h264-parser-3\", \"h264-parser-3\")\n",
			
 
				-    "# Use nvdec_h264 for hardware accelerated decode on GPU\n",
			
 
				-    "decoder3 = make_elm_or_print_err(\"nvv4l2decoder\", \"nvv4l2-decoder-3\",\"nvv4l2-decoder-3\")\n",
			
 
				-    "########### \n",
			
 
				-    "    \n",
			
 
				-    "# Create nvstreammux instance to form batches from one or more sources.\n",
			
 
				-    "streammux = make_elm_or_print_err(\"nvstreammux\", \"Stream-muxer\",\"Stream-muxer\") \n",
			
 
				-    "# Use nvinfer to run inferencing on decoder's output, behaviour of inferencing is set through config file\n",
			
 
				-    "pgie = make_elm_or_print_err(\"nvinfer\", \"primary-inference\" ,\"pgie\")\n",
			
 
				-    "# Use nvtracker to give objects unique-ids\n",
			
 
				-    "tracker = make_elm_or_print_err(\"nvtracker\", \"tracker\",'tracker')\n",
			
 
				-    "# Seconday inference for Finding Car Color\n",
			
 
				-    "sgie1 = make_elm_or_print_err(\"nvinfer\", \"secondary1-nvinference-engine\",'sgie1')\n",
			
 
				-    "# Seconday inference for Finding Car Make\n",
			
 
				-    "sgie2 = make_elm_or_print_err(\"nvinfer\", \"secondary2-nvinference-engine\",'sgie2')\n",
			
 
				-    "# Seconday inference for Finding Car Type\n",
			
 
				-    "sgie3 = make_elm_or_print_err(\"nvinfer\", \"secondary3-nvinference-engine\",'sgie3')\n",
			
 
				-    "# Creating Tiler to present more than one streams\n",
			
 
				-    "tiler=make_elm_or_print_err(\"nvmultistreamtiler\", \"nvtiler\",\"nvtiler\")\n",
			
 
				-    "# Use convertor to convert from NV12 to RGBA as required by nvosd\n",
			
 
				-    "nvvidconv = make_elm_or_print_err(\"nvvideoconvert\", \"convertor\",\"nvvidconv\")\n",
			
 
				-    "# Create OSD to draw on the converted RGBA buffer\n",
			
 
				-    "nvosd = make_elm_or_print_err(\"nvdsosd\", \"onscreendisplay\",\"nvosd\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from pgie\n",
			
 
				-    "queue1=make_elm_or_print_err(\"queue\",\"queue1\",\"queue1\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from tiler\n",
			
 
				-    "queue2=make_elm_or_print_err(\"queue\",\"queue2\",\"queue2\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from nvvidconv\n",
			
 
				-    "queue3=make_elm_or_print_err(\"queue\",\"queue3\",\"queue3\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from nvosd\n",
			
 
				-    "queue4=make_elm_or_print_err(\"queue\",\"queue4\",\"queue4\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from nvvidconv2\n",
			
 
				-    "queue5=make_elm_or_print_err(\"queue\",\"queue5\",\"queue5\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from nvtracker\n",
			
 
				-    "queue6=make_elm_or_print_err(\"queue\",\"queue6\",\"queue6\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from sgie1\n",
			
 
				-    "queue7=make_elm_or_print_err(\"queue\",\"queue7\",\"queue7\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from sgie2\n",
			
 
				-    "queue8=make_elm_or_print_err(\"queue\",\"queue8\",\"queue8\")\n",
			
 
				-    "# Creating queue's to buffer incoming data from sgie3\n",
			
 
				-    "queue9=make_elm_or_print_err(\"queue\",\"queue9\",\"queue9\")\n",
			
 
				-    "# Use convertor to convert from NV12 to RGBA as required by nvosd\n",
			
 
				-    "nvvidconv2 = make_elm_or_print_err(\"nvvideoconvert\", \"convertor2\",\"nvvidconv2\")\n",
			
 
				-    "# Place an encoder instead of OSD to save as video file\n",
			
 
				-    "encoder = make_elm_or_print_err(\"avenc_mpeg4\", \"encoder\", \"Encoder\")\n",
			
 
				-    "# Parse output from Encoder \n",
			
 
				-    "codeparser = make_elm_or_print_err(\"mpeg4videoparse\", \"mpeg4-parser\", 'Code Parser')\n",
			
 
				-    "# Create a container\n",
			
 
				-    "container = make_elm_or_print_err(\"qtmux\", \"qtmux\", \"Container\")\n",
			
 
				-    "# Create Sink for storing the output \n",
			
 
				-    "sink = make_elm_or_print_err(\"filesink\", \"filesink\", \"Sink\")"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "Now that we have created the elements ,we can now set various properties for out pipeline at this point. The configuration files are the same as in [Multi-DNN Notebook](Introduction_to_Multi-DNN_pipeline.ipynb)"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "############ Set properties for the Elements ############\n",
			
 
				-    "# Set Input Video files \n",
			
 
				-    "source1.set_property('location', INPUT_VIDEO_1)\n",
			
 
				-    "source2.set_property('location', INPUT_VIDEO_2)\n",
			
 
				-    "source3.set_property('location', INPUT_VIDEO_2)\n",
			
 
				-    "# Set Input Width , Height and Batch Size \n",
			
 
				-    "streammux.set_property('width', 1920)\n",
			
 
				-    "streammux.set_property('height', 1080)\n",
			
 
				-    "streammux.set_property('batch-size', num_sources)\n",
			
 
				-    "# Timeout in microseconds to wait after the first buffer is available \n",
			
 
				-    "# to push the batch even if a complete batch is not formed.\n",
			
 
				-    "streammux.set_property('batched-push-timeout', 4000000)\n",
			
 
				-    "# Set configuration file for nvinfer \n",
			
 
				-    "# Set Congifuration file for nvinfer \n",
			
 
				-    "pgie.set_property('config-file-path', \"../source_code/N4/dstest4_pgie_config.txt\")\n",
			
 
				-    "sgie1.set_property('config-file-path', \"../source_code/N4/dstest4_sgie1_config.txt\")\n",
			
 
				-    "sgie2.set_property('config-file-path', \"../source_code/N4/dstest4_sgie2_config.txt\")\n",
			
 
				-    "sgie3.set_property('config-file-path', \"../source_code/N4/dstest4_sgie3_config.txt\")\n",
			
 
				-    "#Set properties of tracker from tracker_config\n",
			
 
				-    "config = configparser.ConfigParser()\n",
			
 
				-    "config.read('../source_code/N4/dstest4_tracker_config.txt')\n",
			
 
				-    "config.sections()\n",
			
 
				-    "for key in config['tracker']:\n",
			
 
				-    "    if key == 'tracker-width' :\n",
			
 
				-    "        tracker_width = config.getint('tracker', key)\n",
			
 
				-    "        tracker.set_property('tracker-width', tracker_width)\n",
			
 
				-    "    if key == 'tracker-height' :\n",
			
 
				-    "        tracker_height = config.getint('tracker', key)\n",
			
 
				-    "        tracker.set_property('tracker-height', tracker_height)\n",
			
 
				-    "    if key == 'gpu-id' :\n",
			
 
				-    "        tracker_gpu_id = config.getint('tracker', key)\n",
			
 
				-    "        tracker.set_property('gpu_id', tracker_gpu_id)\n",
			
 
				-    "    if key == 'll-lib-file' :\n",
			
 
				-    "        tracker_ll_lib_file = config.get('tracker', key)\n",
			
 
				-    "        tracker.set_property('ll-lib-file', tracker_ll_lib_file)\n",
			
 
				-    "    if key == 'll-config-file' :\n",
			
 
				-    "        tracker_ll_config_file = config.get('tracker', key)\n",
			
 
				-    "        tracker.set_property('ll-config-file', tracker_ll_config_file)\n",
			
 
				-    "    if key == 'enable-batch-process' :\n",
			
 
				-    "        tracker_enable_batch_process = config.getint('tracker', key)\n",
			
 
				-    "        tracker.set_property('enable_batch_process', tracker_enable_batch_process)\n",
			
 
				-    "        \n",
			
 
				-    "## Set batch size \n",
			
 
				-    "pgie_batch_size=pgie.get_property(\"batch-size\")\n",
			
 
				-    "print(\"PGIE batch size :\",end='')\n",
			
 
				-    "print(pgie_batch_size)\n",
			
 
				-    "if(pgie_batch_size != num_sources):\n",
			
 
				-    "    print(\"WARNING: Overriding infer-config batch-size\",pgie_batch_size,\" with number of sources \", num_sources,\" \\n\")\n",
			
 
				-    "    pgie.set_property(\"batch-size\",num_sources)\n",
			
 
				-    "    \n",
			
 
				-    "## Set batch size \n",
			
 
				-    "sgie1_batch_size=sgie1.get_property(\"batch-size\")\n",
			
 
				-    "print(\"SGIE1 batch size :\",end='')\n",
			
 
				-    "print(sgie1_batch_size)\n",
			
 
				-    "if(sgie1_batch_size != num_sources):\n",
			
 
				-    "    print(\"WARNING: Overriding infer-config batch-size\",sgie1_batch_size,\" with number of sources \", num_sources,\" \\n\")\n",
			
 
				-    "    sgie1.set_property(\"batch-size\",num_sources)\n",
			
 
				-    "    \n",
			
 
				-    "## Set batch size \n",
			
 
				-    "sgie2_batch_size=sgie2.get_property(\"batch-size\")\n",
			
 
				-    "print(\"SGIE2 batch size :\",end='')\n",
			
 
				-    "print(sgie2_batch_size)\n",
			
 
				-    "if(sgie2_batch_size != num_sources):\n",
			
 
				-    "    print(\"WARNING: Overriding infer-config batch-size\",sgie2_batch_size,\" with number of sources \", num_sources,\" \\n\")\n",
			
 
				-    "    sgie2.set_property(\"batch-size\",num_sources)\n",
			
 
				-    "\n",
			
 
				-    "## Set batch size \n",
			
 
				-    "sgie3_batch_size=sgie3.get_property(\"batch-size\")\n",
			
 
				-    "print(\"SGIE3 batch size :\",end='')\n",
			
 
				-    "print(sgie3_batch_size)\n",
			
 
				-    "if(sgie3_batch_size != num_sources):\n",
			
 
				-    "    print(\"WARNING: Overriding infer-config batch-size\",sgie3_batch_size,\" with number of sources \", num_sources,\" \\n\")\n",
			
 
				-    "    sgie3.set_property(\"batch-size\",num_sources)\n",
			
 
				-    "    \n",
			
 
				-    "# Set display configurations for nvmultistreamtiler    \n",
			
 
				-    "tiler_rows=int(2)\n",
			
 
				-    "tiler_columns=int(2)\n",
			
 
				-    "tiler.set_property(\"rows\",tiler_rows)\n",
			
 
				-    "tiler.set_property(\"columns\",tiler_columns)\n",
			
 
				-    "tiler.set_property(\"width\", TILED_OUTPUT_WIDTH)\n",
			
 
				-    "tiler.set_property(\"height\", TILED_OUTPUT_HEIGHT)\n",
			
 
				-    "\n",
			
 
				-    "# Set encoding properties and Sink configs\n",
			
 
				-    "encoder.set_property(\"bitrate\", 2000000)\n",
			
 
				-    "sink.set_property(\"location\", OUTPUT_VIDEO_NAME)\n",
			
 
				-    "sink.set_property(\"sync\", 0)\n",
			
 
				-    "sink.set_property(\"async\", 0)\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "We now link all the elements in the order we prefer and create Gstreamer bus to feed all messages through it. "
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "########## Add and Link ELements in the Pipeline ########## \n",
			
 
				-    "\n",
			
 
				-    "print(\"Adding elements to Pipeline \\n\")\n",
			
 
				-    "pipeline.add(source1)\n",
			
 
				-    "pipeline.add(h264parser1)\n",
			
 
				-    "pipeline.add(decoder1)\n",
			
 
				-    "pipeline.add(source2)\n",
			
 
				-    "pipeline.add(h264parser2)\n",
			
 
				-    "pipeline.add(decoder2)\n",
			
 
				-    "pipeline.add(source3)\n",
			
 
				-    "pipeline.add(h264parser3)\n",
			
 
				-    "pipeline.add(decoder3)\n",
			
 
				-    "pipeline.add(streammux)\n",
			
 
				-    "pipeline.add(pgie)\n",
			
 
				-    "pipeline.add(tracker)\n",
			
 
				-    "pipeline.add(sgie1)\n",
			
 
				-    "pipeline.add(sgie2)\n",
			
 
				-    "pipeline.add(sgie3)\n",
			
 
				-    "pipeline.add(tiler)\n",
			
 
				-    "pipeline.add(nvvidconv)\n",
			
 
				-    "pipeline.add(nvosd)\n",
			
 
				-    "pipeline.add(queue1)\n",
			
 
				-    "pipeline.add(queue2)\n",
			
 
				-    "pipeline.add(queue3)\n",
			
 
				-    "pipeline.add(queue4)\n",
			
 
				-    "pipeline.add(queue5)\n",
			
 
				-    "pipeline.add(queue6)\n",
			
 
				-    "pipeline.add(queue7)\n",
			
 
				-    "pipeline.add(queue8)\n",
			
 
				-    "pipeline.add(queue9)\n",
			
 
				-    "pipeline.add(nvvidconv2)\n",
			
 
				-    "pipeline.add(encoder)\n",
			
 
				-    "pipeline.add(codeparser)\n",
			
 
				-    "pipeline.add(container)\n",
			
 
				-    "pipeline.add(sink)\n",
			
 
				-    "\n",
			
 
				-    "print(\"Linking elements in the Pipeline \\n\")\n",
			
 
				-    "\n",
			
 
				-    "source1.link(h264parser1)\n",
			
 
				-    "h264parser1.link(decoder1)\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "###### Create Sink pad and connect to decoder's source pad \n",
			
 
				-    "sinkpad1 = streammux.get_request_pad(\"sink_0\")\n",
			
 
				-    "if not sinkpad1:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get the sink pad of streammux \\n\")\n",
			
 
				-    "    \n",
			
 
				-    "srcpad1 = decoder1.get_static_pad(\"src\")\n",
			
 
				-    "if not srcpad1:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get source pad of decoder \\n\")\n",
			
 
				-    "    \n",
			
 
				-    "srcpad1.link(sinkpad1)\n",
			
 
				-    "\n",
			
 
				-    "######\n",
			
 
				-    "\n",
			
 
				-    "###### Create Sink pad and connect to decoder's source pad \n",
			
 
				-    "source2.link(h264parser2)\n",
			
 
				-    "h264parser2.link(decoder2)\n",
			
 
				-    "\n",
			
 
				-    "sinkpad2 = streammux.get_request_pad(\"sink_1\")\n",
			
 
				-    "if not sinkpad2:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get the sink pad of streammux \\n\")\n",
			
 
				-    "    \n",
			
 
				-    "srcpad2 = decoder2.get_static_pad(\"src\")\n",
			
 
				-    "if not srcpad2:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get source pad of decoder \\n\")\n",
			
 
				-    "    \n",
			
 
				-    "srcpad2.link(sinkpad2)\n",
			
 
				-    "\n",
			
 
				-    "######\n",
			
 
				-    "\n",
			
 
				-    "###### Create Sink pad and connect to decoder's source pad \n",
			
 
				-    "source3.link(h264parser3)\n",
			
 
				-    "h264parser3.link(decoder3)\n",
			
 
				-    "\n",
			
 
				-    "sinkpad3 = streammux.get_request_pad(\"sink_2\")\n",
			
 
				-    "if not sinkpad2:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get the sink pad of streammux \\n\")\n",
			
 
				-    "    \n",
			
 
				-    "srcpad3 = decoder3.get_static_pad(\"src\")\n",
			
 
				-    "if not srcpad3:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get source pad of decoder \\n\")\n",
			
 
				-    "    \n",
			
 
				-    "srcpad3.link(sinkpad3)\n",
			
 
				-    "\n",
			
 
				-    "######\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "streammux.link(queue1)\n",
			
 
				-    "queue1.link(pgie)\n",
			
 
				-    "pgie.link(queue2)\n",
			
 
				-    "queue2.link(tracker)\n",
			
 
				-    "tracker.link(queue3)\n",
			
 
				-    "queue3.link(sgie1)\n",
			
 
				-    "sgie1.link(queue4)\n",
			
 
				-    "queue4.link(sgie2)\n",
			
 
				-    "sgie2.link(queue5)\n",
			
 
				-    "queue5.link(sgie3)\n",
			
 
				-    "sgie3.link(queue6)\n",
			
 
				-    "queue6.link(tiler)\n",
			
 
				-    "tiler.link(queue7)\n",
			
 
				-    "queue7.link(nvvidconv)\n",
			
 
				-    "nvvidconv.link(queue8)\n",
			
 
				-    "queue8.link(nvosd)\n",
			
 
				-    "nvosd.link(queue9)\n",
			
 
				-    "queue9.link(nvvidconv2)\n",
			
 
				-    "nvvidconv2.link(encoder)\n",
			
 
				-    "encoder.link(codeparser)\n",
			
 
				-    "codeparser.link(container)\n",
			
 
				-    "container.link(sink)\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# create an event loop and feed gstreamer bus mesages to it\n",
			
 
				-    "loop = GObject.MainLoop()\n",
			
 
				-    "bus = pipeline.get_bus()\n",
			
 
				-    "bus.add_signal_watch()\n",
			
 
				-    "bus.connect (\"message\", bus_call, loop)\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "Our pipeline now carries the metadata forward but we have not done anything with it until now, but as mentoioned in the above pipeline diagram , we will now create a callback function to write relevant data on the frame once called and create a sink pad in the nvosd element to call the function. \n",
			
 
				-    "\n",
			
 
				-    "This callback function is the same as used in the previous notebook."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# tiler_sink_pad_buffer_probe  will extract metadata received on OSD sink pad\n",
			
 
				-    "# and update params for drawing rectangle, object information etc.\n",
			
 
				-    "def tiler_src_pad_buffer_probe(pad,info,u_data):\n",
			
 
				-    "    #Intiallizing object counter with 0.\n",
			
 
				-    "    obj_counter = {\n",
			
 
				-    "        PGIE_CLASS_ID_VEHICLE:0,\n",
			
 
				-    "        PGIE_CLASS_ID_PERSON:0,\n",
			
 
				-    "        PGIE_CLASS_ID_BICYCLE:0,\n",
			
 
				-    "        PGIE_CLASS_ID_ROADSIGN:0\n",
			
 
				-    "    }\n",
			
 
				-    "    # Set frame_number & rectangles to draw as 0 \n",
			
 
				-    "    frame_number=0\n",
			
 
				-    "    num_rects=0\n",
			
 
				-    "    \n",
			
 
				-    "    gst_buffer = info.get_buffer()\n",
			
 
				-    "    if not gst_buffer:\n",
			
 
				-    "        print(\"Unable to get GstBuffer \")\n",
			
 
				-    "        return\n",
			
 
				-    "\n",
			
 
				-    "    # Retrieve batch metadata from the gst_buffer\n",
			
 
				-    "    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the\n",
			
 
				-    "    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)\n",
			
 
				-    "    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))\n",
			
 
				-    "    l_frame = batch_meta.frame_meta_list\n",
			
 
				-    "    while l_frame is not None:\n",
			
 
				-    "        try:\n",
			
 
				-    "            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta\n",
			
 
				-    "            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)\n",
			
 
				-    "        except StopIteration:\n",
			
 
				-    "            break\n",
			
 
				-    "        \n",
			
 
				-    "        # Get frame number , number of rectables to draw and object metadata\n",
			
 
				-    "        frame_number=frame_meta.frame_num\n",
			
 
				-    "        num_rects = frame_meta.num_obj_meta\n",
			
 
				-    "        l_obj=frame_meta.obj_meta_list\n",
			
 
				-    "        \n",
			
 
				-    "        while l_obj is not None:\n",
			
 
				-    "            try:\n",
			
 
				-    "                # Casting l_obj.data to pyds.NvDsObjectMeta\n",
			
 
				-    "                obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)\n",
			
 
				-    "            except StopIteration:\n",
			
 
				-    "                break\n",
			
 
				-    "            # Increment Object class by 1 and Set Box border to Red color     \n",
			
 
				-    "            obj_counter[obj_meta.class_id] += 1\n",
			
 
				-    "            obj_meta.rect_params.border_color.set(0.0, 0.0, 1.0, 0.0)\n",
			
 
				-    "            try: \n",
			
 
				-    "                l_obj=l_obj.next\n",
			
 
				-    "            except StopIteration:\n",
			
 
				-    "                break\n",
			
 
				-    "        ################## Setting Metadata Display configruation ############### \n",
			
 
				-    "        # Acquiring a display meta object.\n",
			
 
				-    "        display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)\n",
			
 
				-    "        display_meta.num_labels = 1\n",
			
 
				-    "        py_nvosd_text_params = display_meta.text_params[0]\n",
			
 
				-    "        # Setting display text to be shown on screen\n",
			
 
				-    "        py_nvosd_text_params.display_text = \"Frame Number={} Number of Objects={} Vehicle_count={} Person_count={}\".format(frame_number, num_rects, obj_counter[PGIE_CLASS_ID_VEHICLE], obj_counter[PGIE_CLASS_ID_PERSON])\n",
			
 
				-    "        # Now set the offsets where the string should appear\n",
			
 
				-    "        py_nvosd_text_params.x_offset = 10\n",
			
 
				-    "        py_nvosd_text_params.y_offset = 12\n",
			
 
				-    "        # Font , font-color and font-size\n",
			
 
				-    "        py_nvosd_text_params.font_params.font_name = \"Serif\"\n",
			
 
				-    "        py_nvosd_text_params.font_params.font_size = 10\n",
			
 
				-    "        # Set(red, green, blue, alpha); Set to White\n",
			
 
				-    "        py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)\n",
			
 
				-    "        # Text background color\n",
			
 
				-    "        py_nvosd_text_params.set_bg_clr = 1\n",
			
 
				-    "        # Set(red, green, blue, alpha); set to Black\n",
			
 
				-    "        py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)\n",
			
 
				-    "        # Using pyds.get_string() to get display_text as string to print in notebook\n",
			
 
				-    "        print(pyds.get_string(py_nvosd_text_params.display_text))\n",
			
 
				-    "        pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)\n",
			
 
				-    "        \n",
			
 
				-    "        ############################################################################\n",
			
 
				-    "        # Get frame rate through this probe\n",
			
 
				-    "        fps_streams[\"stream{0}\".format(frame_meta.pad_index)].get_fps()\n",
			
 
				-    "        try:\n",
			
 
				-    "            l_frame=l_frame.next\n",
			
 
				-    "        except StopIteration:\n",
			
 
				-    "            break\n",
			
 
				-    "\n",
			
 
				-    "    return Gst.PadProbeReturn.OK\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "tiler_src_pad=sgie3.get_static_pad(\"src\")\n",
			
 
				-    "if not tiler_src_pad:\n",
			
 
				-    "    sys.stderr.write(\" Unable to get src pad \\n\")\n",
			
 
				-    "else:\n",
			
 
				-    "    tiler_src_pad.add_probe(Gst.PadProbeType.BUFFER, tiler_src_pad_buffer_probe, 0)"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "Now with everything defined , we can start the playback and listen the events."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# List the sources\n",
			
 
				-    "print(\"Now playing...\")\n",
			
 
				-    "start_time = time.time()\n",
			
 
				-    "print(\"Starting pipeline \\n\")\n",
			
 
				-    "# start play back and listed to events\t\t\n",
			
 
				-    "pipeline.set_state(Gst.State.PLAYING)\n",
			
 
				-    "try:\n",
			
 
				-    "    loop.run()\n",
			
 
				-    "except:\n",
			
 
				-    "    pass\n",
			
 
				-    "# cleanup\n",
			
 
				-    "print(\"Exiting app\\n\")\n",
			
 
				-    "pipeline.set_state(Gst.State.NULL)\n",
			
 
				-    "print(\"--- %s seconds ---\" % (time.time() - start_time))"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# Convert video profile to be compatible with Jupyter notebook\n",
			
 
				-    "!ffmpeg -loglevel panic -y -an -i ../source_code/N4/ds_out.mp4 -vcodec libx264 -pix_fmt yuv420p -profile:v baseline -level 3 ../source_code/N4/output.mp4"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# Display the Output\n",
			
 
				-    "from IPython.display import HTML\n",
			
 
				-    "HTML(\"\"\"\n",
			
 
				-    " <video width=\"960\" height=\"540\" controls>\n",
			
 
				-    " <source src=\"../source_code/N4/output.mp4\"\n",
			
 
				-    " </video>\n",
			
 
				-    "\"\"\".format())"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "## Licensing\n",
			
 
				-    "  \n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				-   ]
			
 
				-  }
			
 
				- ],
			
 
				- "metadata": {
			
 
				-  "kernelspec": {
			
 
				-   "display_name": "Python 3",
			
 
				-   "language": "python",
			
 
				-   "name": "python3"
			
 
				-  },
			
 
				-  "language_info": {
			
 
				-   "codemirror_mode": {
			
 
				-    "name": "ipython",
			
 
				-    "version": 3
			
 
				-   },
			
 
				-   "file_extension": ".py",
			
 
				-   "mimetype": "text/x-python",
			
 
				-   "name": "python",
			
 
				-   "nbconvert_exporter": "python",
			
 
				-   "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				-  }
			
 
				- },
			
 
				- "nbformat": 4,
			
 
				- "nbformat_minor": 4
			
 
				-}
			
--- a/hpc/nways/Dockerfile_python
+++ b/hpc/nways/Dockerfile_python
@@ -0,0 +1,54 @@
 
				+
			
 
				+# Copyright (c) 2021 NVIDIA Corporation.  All rights reserved. 
			
 
				+
			
 
				+# To build the docker container, run: $ sudo docker build -t nways-labs:latest .
			
 
				+# To run: $ sudo docker run --rm -it --runtime nvidia -p 8888:8888 nways-labs:latest
			
 
				+# Finally, open http://localhost:8888/
			
 
				+
			
 
				+#FROM nvcr.io/nvidia/nvhpc:20.11-devel-cuda_multi-ubuntu20.04
			
 
				+FROM nvidia/cuda:11.2.2-devel-ubuntu20.04
			
 
				+
			
 
				+
			
 
				+RUN apt-get -y update && \
			
 
				+        DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends \
			
 
				+        python3-dev \  
			
 
				+        python3-pip python3-setuptools nginx zip make build-essential libtbb-dev && \
			
 
				+        rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+RUN pip3 install --no-cache-dir -U install setuptools pip
			
 
				+RUN pip3 install gdown
			
 
				+RUN pip3 install --no-cache-dir jupyter
			
 
				+RUN pip3 install --no-cache-dir "cupy-cuda112==8.6.0" \
			
 
				+    numba numpy scipy 
			
 
				+       
			
 
				+        
			
 
				+############################################
			
 
				+# NVIDIA nsight-systems-2020.5.1 ,nsight-compute-2
			
 
				+RUN apt-get update -y && \
			
 
				+        DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
			
 
				+        apt-transport-https \
			
 
				+        ca-certificates \
			
 
				+        gnupg \
			
 
				+        wget && \
			
 
				+        apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80 && \
			
 
				+        echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list &&\
			
 
				+        apt-get update -y
			
 
				+        
			
 
				+
			
 
				+RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
			
 
				+
			
 
				+# TO COPY the data
			
 
				+COPY nways_labs/ /labs/
			
 
				+
			
 
				+
			
 
				+RUN python3 /labs/nways_MD/English/Python/source_code/dataset.py
			
 
				+
			
 
				+#################################################
			
 
				+ENV LD_LIBRARY_PATH="/usr/local/lib:/usr/local/lib/python3.8/dist-packages:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
			
 
				+ENV PATH="/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:/usr/local/bin:/bin:/usr/local/cuda/bin:/usr/bin${PATH:+:${PATH}}"
			
 
				+
			
 
				+RUN pip3 install --no-cache-dir MDAnalysis
			
 
				+
			
 
				+ADD nways_labs/ /labs
			
 
				+WORKDIR /labs
			
 
				+CMD service nginx start && jupyter notebook --no-browser --allow-root --ip=0.0.0.0 --port=8888 --NotebookApp.token="" --notebook-dir=/labs
			
--- a/hpc/nways/README.md
+++ b/hpc/nways/README.md
@@ -1,10 +1,25 @@
 
				 # Nways to GPU programming
			
 
				-This repository contains mini applications for GPU Bootcamps (**Tested on NVIDIA driver 440.82**). This labs comprises Nways to GPU programming and contains below topics:
			
 
				+This repository contains mini applications for GPU Bootcamps (**Tested on NVIDIA driver 440.82**). This labs comprises Nways to GPU programming implemented with the following programning approaches:
			
 
				+
			
 
				+**C programming language**
			
 
				   - OpenACC
			
 
				   - Kokkos
			
 
				   - PSTL
			
 
				   - OpenMP
			
 
				   - CUDA C
			
 
				+  
			
 
				+  
			
 
				+  
			
 
				+**Fortran programming language**
			
 
				+  - do-concurrent
			
 
				+  - OpenACC
			
 
				+  - OpenMP
			
 
				+  - CUDA Fortran
			
 
				+  
			
 
				+  
			
 
				+**Python programming language**
			
 
				+  - CuPy
			
 
				+  - Numba
			
 
				 
			
 
				 We showcase above ways using mini applications in MD domain and CFD.
			
 
				 
			
@@ -18,13 +33,25 @@ To run this tutorial you will need a machine with NVIDIA GPU.
 
				 To start with, you will have to build a Docker or Singularity container.
			
 
				 
			
 
				 ### Docker Container
			
 
				-To build a docker container, run: 
			
 
				+To build a docker container for **C & Fortran**, run:
			
 
				+
			
 
				 `sudo docker build -t <imagename>:<tagnumber> .`
			
 
				 
			
 
				-For instance:
			
 
				+For instance :
			
 
				+
			
 
				+
			
 
				 `sudo docker build -t myimage:1.0 .`
			
 
				 
			
 
				-The code labs have been written using Jupyter notebooks and a Dockerfile has been built to simplify deployment. In order to serve the docker instance for a student, it is necessary to expose port 8000 from the container, for instance, the following command would expose port 8000 inside the container as port 8000 on the lab machine:
			
 
				+While in the case of **Python**, you have to specify the dockerfile name using flag **"-f"**, therefore run:
			
 
				+
			
 
				+`sudo docker build -f <dockerfile name> -t <imagename>:<tagnumber> .`
			
 
				+
			
 
				+For example :
			
 
				+
			
 
				+`sudo docker build -f Dockerfile_python -t myimage:1.0 .`
			
 
				+
			
 
				+
			
 
				+For C, Fortran, and Python, the code labs have been written using Jupyter notebooks and a Dockerfile has been built to simplify deployment. In order to serve the docker instance for a student, it is necessary to expose port 8000 from the container. For example, the following command would expose port 8000 inside the container as port 8000 on the lab machine:
			
 
				 
			
 
				 `sudo docker run --rm -it --gpus=all -p 8888:8888 myimage:1.0`
			
 
				 
			
@@ -40,13 +67,20 @@ Once inside the container, open the jupyter notebook in browser: http://localhos
 
				 
			
 
				 ### Singularity Container
			
 
				 
			
 
				-To build the singularity container, run: 
			
 
				+To build the singularity container for **C & Fortran**, run: 
			
 
				+
			
 
				 `singularity build nways.simg Singularity`
			
 
				 
			
 
				-and copy the files to your local machine to make sure changes are stored locally:
			
 
				+While in the case of **Python**, run:
			
 
				+
			
 
				+`singularity build nways.simg Singularity_python`
			
 
				+
			
 
				+Thereafter, for C, Fortran, and Python, copy the files to your local machine to make sure changes are stored locally:
			
 
				+
			
 
				 `singularity run nways.simg cp -rT /labs ~/labs`
			
 
				 
			
 
				 Then, run the container:
			
 
				+
			
 
				 `singularity run --nv nways.simg jupyter notebook --notebook-dir=~/labs`
			
 
				 
			
 
				 Once inside the container, open the jupyter notebook in browser: http://localhost:8888, and start the lab by clicking on the `nways_start.ipynb` notebook.
			
--- a/hpc/nways/Singularity_python
+++ b/hpc/nways/Singularity_python
@@ -0,0 +1,56 @@
 
				+# Copyright (c) 2021 NVIDIA Corporation.  All rights reserved. 
			
 
				+
			
 
				+Bootstrap: docker
			
 
				+#FROM: nvcr.io/nvidia/nvhpc:20.11-devel-cuda_multi-ubuntu20.04
			
 
				+FROM:  nvidia/cuda:11.2.2-devel-ubuntu20.04
			
 
				+
			
 
				+%environment
			
 
				+    export XDG_RUNTIME_DIR=
			
 
				+    export PATH="$PATH:/usr/local/bin:/usr/bin"
			
 
				+    export PATH=/opt/nvidia/nsight-systems/2020.5.1/bin:/opt/nvidia/nsight-compute/2020.2.1:/bin:/usr/local/cuda/bin$PATH
			
 
				+    export LD_LIBRARY_PATH="/usr/include/python3.8:/usr/local/lib:/usr/local/lib/python3.8/dist-packages:/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
			
 
				+    
			
 
				+
			
 
				+%post
			
 
				+    build_tmp=$(mktemp -d) && cd ${build_tmp}
			
 
				+
			
 
				+    apt-get -y update
			
 
				+    apt-get -y dist-upgrade 
			
 
				+    DEBIAN_FRONTEND=noninteractive apt-get -yq install --no-install-recommends python3-dev \
			
 
				+	    m4 vim-nox emacs-nox nano zip \
			
 
				+ 	    python3-pip python3-setuptools nginx zip make build-essential libtbb-dev
			
 
				+    rm -rf /var/lib/apt/cache/* 
			
 
				+
			
 
				+    pip3 install --no-cache-dir -U install setuptools pip
			
 
				+    pip3 install gdown
			
 
				+    pip3 install --no-cache-dir jupyter
			
 
				+    pip3 install --no-cache-dir "cupy-cuda112==8.6.0" \
			
 
				+    numba numpy scipy
			
 
				+    pip3 install --upgrade MDAnalysis
			
 
				+
			
 
				+    apt-get install --no-install-recommends -y build-essential 
			
 
				+
			
 
				+    python3 /labs/nways_MD/English/Python/source_code/dataset.py
			
 
				+
			
 
				+
			
 
				+# NVIDIA nsight-systems-2020.5.1 ,nsight-compute-2
			
 
				+    apt-get update -y   
			
 
				+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg wget
			
 
				+    apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys F60F4B3D7FA2AF80
			
 
				+    echo "deb https://developer.download.nvidia.com/devtools/repos/ubuntu2004/amd64/ /" >> /etc/apt/sources.list.d/nsight.list 
			
 
				+    apt-get update -y 
			
 
				+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends nsight-systems-2020.5.1 nsight-compute-2020.2.1 
			
 
				+    apt-get install --no-install-recommends -y build-essential
			
 
				+    
			
 
				+    
			
 
				+    
			
 
				+    cd /
			
 
				+    rm -rf ${build_tmp}
			
 
				+
			
 
				+%files
			
 
				+    nways_labs/ /labs
			
 
				+%runscript
			
 
				+    "$@"
			
 
				+
			
 
				+%labels
			
 
				+    AUTHOR Tosin
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/LICENSE
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/LICENSE
@@ -0,0 +1,23 @@
 
				+Copyright (c) 2018, National Center for Computational Sciences, Oak Ridge National Laboratory
			
 
				+All rights reserved.
			
 
				+
			
 
				+Redistribution and use in source and binary forms, with or without
			
 
				+modification, are permitted provided that the following conditions are met:
			
 
				+
			
 
				+* Redistributions of source code must retain the above copyright notice, this
			
 
				+  list of conditions and the following disclaimer.
			
 
				+
			
 
				+* Redistributions in binary form must reproduce the above copyright notice,
			
 
				+  this list of conditions and the following disclaimer in the documentation
			
 
				+  and/or other materials provided with the distribution.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
			
 
				+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
			
 
				+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
			
 
				+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
			
 
				+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
			
 
				+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
			
 
				+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
			
 
				+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
			
 
				+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/Final_Remarks.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/Final_Remarks.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/cupy/cupy_RDF.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/cupy/cupy_RDF.ipynb
@@ -0,0 +1,323 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# CuPy Lab 3:  Solution\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "#### [<<--CuPy Lab 2](serial_RDF.ipynb)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import cupy as cp\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import math\n",
			
 
				+    "import cupy.cuda.nvtx as nvtx\n",
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "from timeit import default_timer as timer\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadhead(infile):\n",
			
 
				+    "    nconf   = infile.n_frames\n",
			
 
				+    "    _infile = infile.header\n",
			
 
				+    "    numatm  = _infile['natoms']\n",
			
 
				+    "    return numatm, nconf\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadframe(infile, numatm, nconf):\n",
			
 
				+    "\n",
			
 
				+    "    d_x = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_y = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_z = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "\n",
			
 
				+    "    for i in range(nconf):\n",
			
 
				+    "        data = infile.readframes(i, i+1)\n",
			
 
				+    "        box = data[1]\n",
			
 
				+    "        atomset = data[0][0]\n",
			
 
				+    "        xbox = round(box[0][0], 8)\n",
			
 
				+    "        ybox = round(box[0][2],8)\n",
			
 
				+    "        zbox = round(box[0][5], 8)\n",
			
 
				+    "\n",
			
 
				+    "        for row in range(numatm):\n",
			
 
				+    "            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column\n",
			
 
				+    "            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column\n",
			
 
				+    "            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column\n",
			
 
				+    "\n",
			
 
				+    "    return xbox, ybox, zbox, d_x, d_y, d_z"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### The Raw Kernel  pair_gpu acceleration code "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "raw_kernel = cp.RawKernel(r'''\n",
			
 
				+    "extern \"C\"\n",
			
 
				+    "__global__ void pair_gpu(\n",
			
 
				+    "\t\tconst double* d_x, const double* d_y, const double* d_z, \n",
			
 
				+    "\t\tunsigned long long int *d_g2, int numatm, int nconf, \n",
			
 
				+    "\t\tconst double xbox,const double ybox,const double zbox,int d_bin,  unsigned long long int bl)\n",
			
 
				+    "{\n",
			
 
				+    "\tdouble r,cut,dx,dy,dz;\n",
			
 
				+    "\tint ig2,id1,id2;\n",
			
 
				+    "\tdouble box;\n",
			
 
				+    "\tbox=min(xbox,ybox);\n",
			
 
				+    "\tbox=min(box,zbox);\n",
			
 
				+    "\n",
			
 
				+    "\tdouble del=box/(2.0*d_bin);\n",
			
 
				+    "\tcut=box*0.5;\n",
			
 
				+    "\tint thisi;\n",
			
 
				+    "\tdouble n;\n",
			
 
				+    "\n",
			
 
				+    "\tint i = blockIdx.x * blockDim.x + threadIdx.x;\n",
			
 
				+    "\tint maxi = min(int(0.5*numatm*(numatm-1)-(bl*65535*128)),(65535*128));\n",
			
 
				+    "\n",
			
 
				+    "\tif ( i < maxi ) {\n",
			
 
				+    "\t\tthisi=bl*65535*128+i;\n",
			
 
				+    "\n",
			
 
				+    "\t\tn=(0.5)*(1+ ((double) sqrt (1.0+4.0*2.0*thisi)));\n",
			
 
				+    "\t\tid1=int(n);\n",
			
 
				+    "\t\tid2=thisi-(0.5*id1*(id1-1));\n",
			
 
				+    "\n",
			
 
				+    "\t\tfor (int frame=0;frame<nconf;frame++){\n",
			
 
				+    "\t\t\tdx=d_x[frame*numatm+id1]-d_x[frame*numatm+id2];\n",
			
 
				+    "\t\t\tdy=d_y[frame*numatm+id1]-d_y[frame*numatm+id2];\n",
			
 
				+    "\t\t\tdz=d_z[frame*numatm+id1]-d_z[frame*numatm+id2];\n",
			
 
				+    "\n",
			
 
				+    "\t\t\tdx=dx-xbox*(round(dx/xbox));\n",
			
 
				+    "\t\t\tdy=dy-ybox*(round(dy/ybox));\n",
			
 
				+    "\t\t\tdz=dz-zbox*(round(dz/zbox));\n",
			
 
				+    "\n",
			
 
				+    "\t\t\tr=sqrtf(dx*dx+dy*dy+dz*dz);\n",
			
 
				+    "\t\t\tif (r<cut) {\n",
			
 
				+    "\t\t\t\tig2=(int)(r/del);\n",
			
 
				+    "\t\t\t\tatomicAdd(&d_g2[ig2],2) ;\n",
			
 
				+    "\t\t\t}\n",
			
 
				+    "\t\t}\n",
			
 
				+    "\t}\n",
			
 
				+    "}\n",
			
 
				+    "''', 'pair_gpu')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### The Main Function"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "import os\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "\n",
			
 
				+    "def main():\n",
			
 
				+    "   # start = timer()\n",
			
 
				+    "    ########## Input Details ###########\n",
			
 
				+    "    global xbox, ybox, zbox\n",
			
 
				+    "    inconf = 10\n",
			
 
				+    "    nbin   =np.int32(2000)\n",
			
 
				+    "    xbox   = np.float32(0)\n",
			
 
				+    "    ybox   =np.float32(0)\n",
			
 
				+    "    zbox   = np.float32(0)\n",
			
 
				+    "    \n",
			
 
				+    "    fileDir = os.path.dirname(os.path.realpath('__file__'))\n",
			
 
				+    "    dataRoot = Path(fileDir).parents[1]\n",
			
 
				+    "    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')\n",
			
 
				+    "    \n",
			
 
				+    "    infile = DCDFile(file)\n",
			
 
				+    "    pairfile = open(\"cupy_RDF.dat\", \"w+\")\n",
			
 
				+    "    stwo = open(\"cupy_Pair_entropy.dat\", \"w+\")\n",
			
 
				+    "\n",
			
 
				+    "    numatm, nconf = dcdreadhead(infile)\n",
			
 
				+    "    print(\"Dcd file has {} atoms and {} frames\".format(numatm, nconf))\n",
			
 
				+    "    if inconf > nconf:\n",
			
 
				+    "        print(\"nconf is reset to {}\".format(nconf))\n",
			
 
				+    "    else:\n",
			
 
				+    "        nconf = inconf\n",
			
 
				+    "    print(\"Calculating RDF for {} frames\".format(nconf))\n",
			
 
				+    "    #numatm = 100\n",
			
 
				+    "    sizef =  nconf * numatm\n",
			
 
				+    "    sizebin = nbin\n",
			
 
				+    "\n",
			
 
				+    "    ########### reading cordinates ##############\n",
			
 
				+    "    nvtx.RangePush(\"Read_File\")\n",
			
 
				+    "    xbox, ybox, zbox, d_x, d_y, d_z = dcdreadframe(infile, numatm, nconf)\n",
			
 
				+    "    nvtx.RangePop()  # pop for reading file\n",
			
 
				+    "    print(\"Reading of input file is completed\")\n",
			
 
				+    "    ############# Stream from Host to Device #########################\n",
			
 
				+    "    d_x = cp.asarray(d_x)\n",
			
 
				+    "    d_y = cp.asarray(d_y)\n",
			
 
				+    "    d_z = cp.asarray(d_z)\n",
			
 
				+    "    d_g2 = cp.zeros(sizebin, dtype=cp.int64)\n",
			
 
				+    "\n",
			
 
				+    "    ############################## RAW KERNEL #################################################\n",
			
 
				+    "    nthreads = 128;\n",
			
 
				+    "    near2 = nthreads * (int(0.5 * numatm * (numatm - 1) / nthreads) + 1);\n",
			
 
				+    "    nblock = (near2 / nthreads);\n",
			
 
				+    "    print(\" Initial blocks are {} and now changing to\".format(nblock))\n",
			
 
				+    "    maxblock = 65535\n",
			
 
				+    "    blockloop = int(nblock / maxblock)\n",
			
 
				+    "    if blockloop != 0:\n",
			
 
				+    "        nblock = maxblock\n",
			
 
				+    "    print(\"{} and will run over {} blockloops\".format(nblock, blockloop+1))\n",
			
 
				+    "\n",
			
 
				+    "    nvtx.RangePush(\"CuPy_Pair_gpu_Circulation\")\n",
			
 
				+    "    #t1 = timer()\n",
			
 
				+    "    for bl in range(blockloop+1):\n",
			
 
				+    "        raw_kernel((nblock,),(nthreads,), (d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, nbin, bl)) ## cupy raw kernel\n",
			
 
				+    "    \n",
			
 
				+    "    cp.cuda.Device(0).synchronize()\n",
			
 
				+    "    #print(\"Kernel compute time:\", timer() - t1)\n",
			
 
				+    "    \n",
			
 
				+    "    d_g2 = cp.asnumpy(d_g2)\n",
			
 
				+    "    nvtx.RangePop()  # pop for Pair Calculation\n",
			
 
				+    "    #############################################################################################\n",
			
 
				+    "    pi = math.acos(np.int64(-1.0))\n",
			
 
				+    "    rho = (numatm) / (xbox * ybox * zbox)\n",
			
 
				+    "    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)\n",
			
 
				+    "    g2 = np.zeros(nbin, dtype=np.float32)\n",
			
 
				+    "    s2 =np.int64(0.0); s2bond = np.int64(0.0)\n",
			
 
				+    "    lngrbond = np.float32(0.0)\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del =box / (np.int64(2.0) * nbin)\n",
			
 
				+    "    gr = np.float32(0.0)\n",
			
 
				+    "    # loop to calculate entropy\n",
			
 
				+    "    nvtx.RangePush(\"Entropy_Calculation\")\n",
			
 
				+    "    for i in range(nbin):\n",
			
 
				+    "        rl = (i) * _del\n",
			
 
				+    "        ru = rl + _del\n",
			
 
				+    "        nideal = norm * (ru * ru * ru - rl * rl * rl)\n",
			
 
				+    "        g2[i] = d_g2[i] / (nconf * numatm * nideal)\n",
			
 
				+    "        r = (i) * _del\n",
			
 
				+    "        temp = (i + 0.5) * _del\n",
			
 
				+    "        pairfile.write(str(temp) + \" \" + str(g2[i]) + \"\\n\")\n",
			
 
				+    "\n",
			
 
				+    "        if r < np.int64(2.0):\n",
			
 
				+    "            gr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            gr = g2[i]\n",
			
 
				+    "        if gr < 1e-5:\n",
			
 
				+    "            lngr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngr = math.log(gr)\n",
			
 
				+    "        if g2[i] < 1e-6:\n",
			
 
				+    "            lngrbond = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngrbond = math.log(g2[i])\n",
			
 
				+    "        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)\n",
			
 
				+    "        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r\n",
			
 
				+    "\n",
			
 
				+    "    nvtx.RangePop()  # pop for entropy Calculation\n",
			
 
				+    "    stwo.writelines(\"s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    stwo.writelines(\"s2bond value is {}\".format(s2bond))\n",
			
 
				+    "    \n",
			
 
				+    "    print(\"\\n s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    print(\"s2bond value is {}\\n\".format(s2bond))\n",
			
 
				+    "\n",
			
 
				+    "    print(\"#Freeing Host memory\")\n",
			
 
				+    "    del (d_x)\n",
			
 
				+    "    del (d_y)\n",
			
 
				+    "    del (d_z)\n",
			
 
				+    "    del (d_g2)\n",
			
 
				+    "    print(\"#Number of atoms processed: {}  \\n\".format(numatm))\n",
			
 
				+    "    print(\"#number of confs processed: {} \\n\".format(nconf))\n",
			
 
				+    "    #total_time = timer() - start\n",
			
 
				+    "    #print(\"total time spent:\", total_time)\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "if __name__ == \"__main__\":\n",
			
 
				+    "    main()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---\n",
			
 
				+    "### Output Files\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/output_files.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "### Profiling\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/cupy_nsys1.png\"/>\n",
			
 
				+    "<img src=\"../images/cupy_nsys3.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "# <p style=\"text-align:center;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\"> <a href=../../../nways_MD_start_python.ipynb>HOME</a></p>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Links and Resources\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)\n",
			
 
				+    "\n",
			
 
				+    "**NOTE**: To be able to see the Nsight System profiler output, please download the latest version of the Nsight System from [here](https://developer.nvidia.com/nsight-systems).\n",
			
 
				+    "\n",
			
 
				+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## Licensing \n",
			
 
				+    "\n",
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/cupy/cupy_guide.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/cupy/cupy_guide.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/cupy/serial_RDF.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/cupy/serial_RDF.ipynb
@@ -0,0 +1,412 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# CuPy Lab 2:  Serial Code Lab Assignment\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "#### [<<--CuPy Lab 1](cupy_guide.ipynb)\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## A Recap on RDF\n",
			
 
				+    "\n",
			
 
				+    "- The radial distribution function (RDF) denoted as g(r) defines the probability of finding a particle at a distance r from another tagged particle. The RDF is strongly dependent on the type of matter so will vary greatly for solids, gases and liquids. You can read more [here](https://en.wikibooks.org/wiki/Molecular_Simulation/Radial_Distribution_Functions).\n",
			
 
				+    "- The code complexity of the algorithm is $N^{2}$. \n",
			
 
				+    "- The input data for the serial code is fetched from a DCD binary trajectory file.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### The Serial Code\n",
			
 
				+    "- The cell below consists of two functions, namely **dcdreadhead** and **dcdreadframe**\n",
			
 
				+    "- The **dcdreadhead** function computes the total number of frames and atoms from the DCDFile **(input/alk.traj.dcd)**, while the **dcdreadframe** function reads 10 frames and 6720 atoms (note: each frame contains 6720 atoms) using the MDAnalysis library. \n",
			
 
				+    "- Both functions run on the Host (CPU) and are being called from the function **main()**.\n",
			
 
				+    "### <u>Cell 1</u>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import cupy as cp\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import math\n",
			
 
				+    "import cupy.cuda.nvtx as nvtx\n",
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "from timeit import default_timer as timer\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadhead(infile):\n",
			
 
				+    "    nconf   = infile.n_frames\n",
			
 
				+    "    _infile = infile.header\n",
			
 
				+    "    numatm  = _infile['natoms']\n",
			
 
				+    "    return numatm, nconf\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadframe(infile, numatm, nconf):\n",
			
 
				+    "\n",
			
 
				+    "    d_x = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_y = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_z = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "\n",
			
 
				+    "    for i in range(nconf):\n",
			
 
				+    "        data = infile.readframes(i, i+1)\n",
			
 
				+    "        box = data[1]\n",
			
 
				+    "        atomset = data[0][0]\n",
			
 
				+    "        xbox = round(box[0][0], 8)\n",
			
 
				+    "        ybox = round(box[0][2],8)\n",
			
 
				+    "        zbox = round(box[0][5], 8)\n",
			
 
				+    "\n",
			
 
				+    "        for row in range(numatm):\n",
			
 
				+    "            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column\n",
			
 
				+    "            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column\n",
			
 
				+    "            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column\n",
			
 
				+    "\n",
			
 
				+    "    return xbox, ybox, zbox, d_x, d_y, d_z"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "##  pair_gpu function\n",
			
 
				+    "\n",
			
 
				+    "- The pair_gpu is the function where the main task of the RDF serial implementation is being executed. The function computes differences in xyz DCD frames.\n",
			
 
				+    "- The essence of njit(just-in-time) decorator is to get pair_gpu function to compile under no python mode, and this is important for good performance. \n",
			
 
				+    "- The decorator **@njit** or **@jit(nopython=True)** ensures that an exception is raised when compilation fails as a way to alert the user that a bug is found within the decorated function. You can read more [here](https://numba.pydata.org/numba-doc/latest/user/performance-tips.html).\n",
			
 
				+    "\n",
			
 
				+    "### <u>Cell 2</u>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from numba import njit\n",
			
 
				+    "\n",
			
 
				+    "@njit()\n",
			
 
				+    "def pair_gpu(d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, d_bin):\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del = box / (2.0 * d_bin)\n",
			
 
				+    "    cut = box * 0.5\n",
			
 
				+    "\n",
			
 
				+    "    for frame in range(nconf):\n",
			
 
				+    "       # print(\"\\n {}\".format(frame))\n",
			
 
				+    "        for id1 in range(numatm):\n",
			
 
				+    "            for id2 in range(numatm):\n",
			
 
				+    "                dx = d_x[frame * numatm + id1] - d_x[frame * numatm + id2]\n",
			
 
				+    "                dy = d_y[frame * numatm + id1] - d_y[frame * numatm + id2]\n",
			
 
				+    "                dz = d_z[frame * numatm + id1] - d_z[frame * numatm + id2 ]\n",
			
 
				+    "                dx = dx - xbox * (round(dx / xbox))\n",
			
 
				+    "                dy = dy - ybox * (round(dy / ybox))\n",
			
 
				+    "                dz = dz - zbox * (round(dz / zbox))\n",
			
 
				+    "\n",
			
 
				+    "                r = math.sqrt(dx * dx + dy * dy + dz * dz)\n",
			
 
				+    "                if r < cut :\n",
			
 
				+    "                    ig2  = int((r/_del))\n",
			
 
				+    "                    d_g2[ig2] = d_g2[ig2] + 1\n",
			
 
				+    "\n",
			
 
				+    "    return d_g2"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Brief Analysis on Tasks Performed within pair_gpu function\n",
			
 
				+    "- The graphic below identifies the various operations executed in the pair_gpu function. This function executes three nested loops using tricky indexing manipulation within the arrays.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/pair_gpu.png\" width=\"80%\"/>\n",
			
 
				+    "\n",
			
 
				+    "- The indexing flow for the operation 1 is simulated using the graphic below. Each green box simulates the subtraction operation within the two inner loops (id1 & id2) while the indexes written in blue signifies the outer-most loop (frame) which iterates 10 times. \n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/pair_gpu_analysis.png\" width=\"80%\"/>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### The Main Function\n",
			
 
				+    "- This is the entry point of the program where every other function including the **pair_gpu** function are called. The output of the main function is written into two files. An image version of the output files (\"**cupy_RDF.dat**\" & \"**cupy_Pair_entropy.dat**\") are displayed below the code cell.\n",
			
 
				+    "\n",
			
 
				+    "### <u>Cell 3</u>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "import os\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "\n",
			
 
				+    "def main():\n",
			
 
				+    "    start = timer()\n",
			
 
				+    "    ########## Input Details ###########\n",
			
 
				+    "    inconf = 10\n",
			
 
				+    "    nbin   = 2000\n",
			
 
				+    "    global xbox, ybox, zbox\n",
			
 
				+    "    \n",
			
 
				+    "    fileDir = os.path.dirname(os.path.realpath('__file__'))\n",
			
 
				+    "    dataRoot = Path(fileDir).parents[1]\n",
			
 
				+    "    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')\n",
			
 
				+    "    \n",
			
 
				+    "    infile = DCDFile(file)\n",
			
 
				+    "    pairfile = open(\"RDF.dat\", \"w+\")\n",
			
 
				+    "    stwo     = open(\"Pair_entropy.dat\", \"w+\")\n",
			
 
				+    "\n",
			
 
				+    "    numatm, nconf = dcdreadhead(infile)\n",
			
 
				+    "    print(\"Dcd file has {} atoms and {} frames\".format(numatm, nconf))\n",
			
 
				+    "    if inconf > nconf:\n",
			
 
				+    "        print(\"nconf is reset to {}\".format(nconf))\n",
			
 
				+    "    else:\n",
			
 
				+    "        nconf = inconf\n",
			
 
				+    "    print(\"Calculating RDF for {} frames\".format(nconf))\n",
			
 
				+    "    #numatm = 50\n",
			
 
				+    "    sizef =  nconf * numatm\n",
			
 
				+    "    sizebin = nbin\n",
			
 
				+    "    ########### reading cordinates ##############\n",
			
 
				+    "    nvtx.RangePush(\"Read_File\")\n",
			
 
				+    "    xbox, ybox, zbox, h_x, h_y, h_z = dcdreadframe(infile, numatm, nconf)\n",
			
 
				+    "    nvtx.RangePop() # pop for reading file\n",
			
 
				+    "\n",
			
 
				+    "    h_g2 = np.zeros(sizebin, dtype=np.longlong)\n",
			
 
				+    "    print(\"Reading of input file is completed\")\n",
			
 
				+    "   \n",
			
 
				+    "    print(\"\\n {} {}\".format(nconf, numatm))\n",
			
 
				+    "    ############# This where we will concentrate #########################\n",
			
 
				+    "    nvtx.RangePush(\"Pair_Circulation\")\n",
			
 
				+    "    h_g2 = pair_gpu(h_x, h_y, h_z, h_g2, numatm, nconf, xbox, ybox, zbox, nbin)\n",
			
 
				+    "    nvtx.RangePop() #pop for Pair Calculation\n",
			
 
				+    "    ######################################################################\n",
			
 
				+    "    \n",
			
 
				+    "    pi = math.acos(np.int64(-1.0))\n",
			
 
				+    "    rho = (numatm) / (xbox * ybox * zbox)\n",
			
 
				+    "    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)\n",
			
 
				+    "    g2 = np.zeros(nbin, dtype=np.float32)\n",
			
 
				+    "    s2 = np.int64(0.0);\n",
			
 
				+    "    s2bond = np.int64(0.0)\n",
			
 
				+    "    lngrbond = np.int64(0.0)\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del = box / (np.int64(2.0) * nbin)\n",
			
 
				+    "    gr = np.float32(0.0)\n",
			
 
				+    "    # loop to calculate entropy\n",
			
 
				+    "    nvtx.RangePush(\"Entropy_Calculation\")\n",
			
 
				+    "    for i in range(nbin):\n",
			
 
				+    "        rl = (i) * _del\n",
			
 
				+    "        ru = rl + _del\n",
			
 
				+    "        nideal = norm * (ru * ru * ru - rl * rl * rl)\n",
			
 
				+    "        g2[i] = h_g2[i] / (nconf * numatm * nideal)\n",
			
 
				+    "        r = (i) * _del\n",
			
 
				+    "        temp = (i + 0.5) * _del\n",
			
 
				+    "        \n",
			
 
				+    "        #writing to file\n",
			
 
				+    "        pairfile.write(str(temp) + \" \" + str(g2[i]) + \"\\n\")\n",
			
 
				+    "\n",
			
 
				+    "        if r < np.int64(2.0):\n",
			
 
				+    "            gr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            gr = g2[i]\n",
			
 
				+    "        if gr < 1e-5:\n",
			
 
				+    "            lngr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngr = math.log(gr)\n",
			
 
				+    "        if g2[i] < 1e-6:\n",
			
 
				+    "            lngrbond = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngrbond = math.log(g2[i])\n",
			
 
				+    "        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)\n",
			
 
				+    "        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r\n",
			
 
				+    "\n",
			
 
				+    "    nvtx.RangePop() # pop for entropy Calculation\n",
			
 
				+    "    \n",
			
 
				+    "    #writing s2 and s2bond to file\n",
			
 
				+    "    stwo.writelines(\"s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    stwo.writelines(\"s2bond value is {}\".format(s2bond))\n",
			
 
				+    "    \n",
			
 
				+    "    # printing s2 and s2bond to jupyter output\n",
			
 
				+    "    print(\"\\n s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    print(\"s2bond value is {}\\n\".format(s2bond))\n",
			
 
				+    "\n",
			
 
				+    "    print(\"#Freeing Host memory\")\n",
			
 
				+    "    del(h_x)\n",
			
 
				+    "    del(h_y)\n",
			
 
				+    "    del(h_z)\n",
			
 
				+    "    del(h_g2)\n",
			
 
				+    "    print(\"#Number of atoms processed: {}  \\n\".format(numatm))\n",
			
 
				+    "    print(\"#number of confs processed: {} \\n\".format(nconf))\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "if __name__ == \"__main__\":\n",
			
 
				+    "    #main()  "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "###  Output Files\n",
			
 
				+    "<table>\n",
			
 
				+    "    <tr>\n",
			
 
				+    "    <td>\n",
			
 
				+    "         <img src=\"../images/serial_output_file.png\" width=\"95%\" />\n",
			
 
				+    "    </td>\n",
			
 
				+    "    </tr>\n",
			
 
				+    "</table>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "# Lab Task \n",
			
 
				+    "\n",
			
 
				+    "1. **Run the serial code from cell 1, 2, & 3**.\n",
			
 
				+    "    - Remove the **\"#\"** behind the **main()** before running the cell 3:\n",
			
 
				+    "    ```python\n",
			
 
				+    "       if __name__ == \"__main__\":\n",
			
 
				+    "                main()\n",
			
 
				+    "    ```\n",
			
 
				+    "2. **Now, let's start modifying the original code to CuPy code constructs.**\n",
			
 
				+    "> From the top menu, click on File, and Open **nways_serial.py** from the current directory at **Python/source_code/cupy** directory. Remember to SAVE your code after changes, and then run the cell below. \n",
			
 
				+    "> Hints: focus on the **pair_gpu** function and you may need to modify few lines in the **main** function as well."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%run ../../source_code/serial/nways_serial.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "The output should be the following:\n",
			
 
				+    "\n",
			
 
				+    "```\n",
			
 
				+    "s2 value is -2.43191\n",
			
 
				+    "s2bond value is -3.87014\n",
			
 
				+    "```\n",
			
 
				+    "\n",
			
 
				+    "3. **Profile the code by running the cell bellow** "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!cd ../../source_code/serial&& nsys profile --stats=true --force-overwrite true -o serial_cpu_rdf python3 nways_serial.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "To view the profiler report, you need to [download the profiler output](../../source_code/serial/serial_cpu_rdf.qdrep) and open it via the graphical user interface (GUI). A sample expected profile report is shown below:\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/cupy_nsys1.png\"/>\n",
			
 
				+    "<img src=\"../images/cupy_nsys3.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "From the profile report, we can see that the pair_gpu function now takes milliseconds to run as compared to the serial version which takes more than 3 seconds as shown [here](../serial/rdf_overview.ipynb). \n",
			
 
				+    " \n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "### [View ](../../source_code/cupy/cupy_rdf.py) or [Run](../../jupyter_notebook/cupy/cupy_RDF.ipynb) Solution\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## Post-Lab Summary\n",
			
 
				+    "\n",
			
 
				+    "If you would like to download this lab for later viewing, we recommend you go to your browsers File menu (not the Jupyter notebook file menu) and save the complete web page. This will ensure the images are copied as well. You can also execute the following cell block to create a zip-file of the files you've been working on and download it with the link below.\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%%bash\n",
			
 
				+    "cd ..\n",
			
 
				+    "rm -f nways_files.zip\n",
			
 
				+    "zip -r nways_files.zip *"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "**After** executing the above zip command, you should be able to download the zip file [here](../nways_files.zip).\n",
			
 
				+    "\n",
			
 
				+    "**IMPORTANT**: Please click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "# <p style=\"text-align:center;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\"> <a href=../../../nways_MD_start_python.ipynb>HOME</a></p>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## Links and Resources\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)\n",
			
 
				+    "\n",
			
 
				+    "**NOTE**: To be able to see the Nsight System profiler output, please download the latest version of the Nsight System from [here](https://developer.nvidia.com/nsight-systems).\n",
			
 
				+    "\n",
			
 
				+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
			
 
				+    "\n",
			
 
				+    "--- \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## Licensing \n",
			
 
				+    "\n",
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/2d_array.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/2d_array.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/2d_col_mult.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/2d_col_mult.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cuda_cupy.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cuda_cupy.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy.JPG
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy.JPG
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_arch.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_arch.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_intro.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_intro.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_kernel_memory.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_kernel_memory.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_nsys1.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_nsys1.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_nsys2.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_nsys2.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_nsys3.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_nsys3.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_summary.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/cupy_summary.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/dcdfile.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/dcdfile.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/matrix_block.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/matrix_block.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/matrix_grid.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/matrix_grid.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/memory_architecture.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/memory_architecture.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_nsys1.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_nsys1.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_nsys2.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_nsys2.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_output_files.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_output_files.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_summary.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_summary.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_summary1.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/numba_summary1.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/output_files.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/output_files.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/pair_gpu.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/pair_gpu.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/pair_gpu_analysis.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/pair_gpu_analysis.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/rapids_package.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/rapids_package.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/raw_kernel.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/raw_kernel.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/rdf.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/rdf.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_cpu_rdf1.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_cpu_rdf1.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_cpu_rdf2.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_cpu_rdf2.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_cupy_profile.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_cupy_profile.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_numba_profile.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_numba_profile.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_output1.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_output1.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_output_file.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_output_file.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_profile.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_profile.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_profiler1.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/serial_profiler1.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/thread_blocks.JPG
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/thread_blocks.JPG
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/thread_blocks.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/thread_blocks.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/thread_position.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/thread_position.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/ufunc.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/ufunc.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/workflow.png
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/images/workflow.png
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/numba/numba_RDF.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/numba/numba_RDF.ipynb
@@ -0,0 +1,312 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "# \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Numba Lab 3: Solution\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "#### [<<-- Numba Lab 2](serial_RDF.ipynb)\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "import math\n",
			
 
				+    "import cupy.cuda.nvtx as nvtx\n",
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "from timeit import default_timer as timer\n",
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadhead(infile):\n",
			
 
				+    "    nconf   = infile.n_frames\n",
			
 
				+    "    _infile = infile.header\n",
			
 
				+    "    numatm  = _infile['natoms']\n",
			
 
				+    "    return numatm, nconf\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadframe(infile, numatm, nconf):\n",
			
 
				+    "\n",
			
 
				+    "    d_x = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_y = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_z = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "\n",
			
 
				+    "    for i in range(nconf):\n",
			
 
				+    "        data = infile.readframes(i, i+1)\n",
			
 
				+    "        box = data[1]\n",
			
 
				+    "        atomset = data[0][0]\n",
			
 
				+    "        xbox = round(box[0][0], 8)\n",
			
 
				+    "        ybox = round(box[0][2],8)\n",
			
 
				+    "        zbox = round(box[0][5], 8)\n",
			
 
				+    "\n",
			
 
				+    "        for row in range(numatm):\n",
			
 
				+    "            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column\n",
			
 
				+    "            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column\n",
			
 
				+    "            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column\n",
			
 
				+    "\n",
			
 
				+    "    return xbox, ybox, zbox, d_x, d_y, d_z\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### The Numba CUDA-jit  pair_gpu  acceleration code "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "@cuda.jit\n",
			
 
				+    "def pair_gpu_kernel(d_x, d_y,d_z, d_g2, numatm, nconf, xbox, ybox,zbox,d_bin, bl):\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del= box / (2.0 * d_bin)\n",
			
 
				+    "    cut = box * 0.5;\n",
			
 
				+    "    i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x\n",
			
 
				+    "    maxi = min(int(0.5 * numatm * (numatm - 1) - (bl * 65535 * 128)), (65535 * 128))\n",
			
 
				+    "\n",
			
 
				+    "    if i < maxi:\n",
			
 
				+    "        thisi=bl * 65535 * 128+i\n",
			
 
				+    "        n = (0.5) * (1+ ( math.sqrt (1.0+4.0 * 2.0 * thisi)))\n",
			
 
				+    "        id1 = int(n)\n",
			
 
				+    "        id2 = thisi-(0.5 * id1 * (id1-1))\n",
			
 
				+    "        for frame in range(0, nconf):\n",
			
 
				+    "            t1 = int(frame * numatm+id1)\n",
			
 
				+    "            t2 = int(frame * numatm+id2)\n",
			
 
				+    "            dx = d_x[t1] - d_x[t2]\n",
			
 
				+    "            dy = d_y[t1] - d_y[t2]\n",
			
 
				+    "            dz = d_z[t1] - d_z[t2]\n",
			
 
				+    "            dx = dx - xbox * (round(dx / xbox))\n",
			
 
				+    "            dy = dy - ybox * (round(dy / ybox))\n",
			
 
				+    "            dz = dz - zbox * (round(dz / zbox))\n",
			
 
				+    "\n",
			
 
				+    "            r= math.sqrt(dx * dx+dy * dy+dz * dz)\n",
			
 
				+    "            if r < cut:\n",
			
 
				+    "                ig2=(int)(r / _del )\n",
			
 
				+    "                cuda.atomic.add(d_g2, ig2, 2)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "### The Main Function"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "import os\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "\n",
			
 
				+    "def main():\n",
			
 
				+    "    #start = timer()\n",
			
 
				+    "    ########## Input Details ###########\n",
			
 
				+    "    global xbox, ybox, zbox\n",
			
 
				+    "    inconf = 10\n",
			
 
				+    "    nbin   =np.int32(2000)\n",
			
 
				+    "    xbox   = np.float32(0)\n",
			
 
				+    "    ybox   =np.float32(0)\n",
			
 
				+    "    zbox   = np.float32(0)\n",
			
 
				+    "    \n",
			
 
				+    "    fileDir = os.path.dirname(os.path.realpath('__file__'))\n",
			
 
				+    "    dataRoot = Path(fileDir).parents[1]\n",
			
 
				+    "    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')\n",
			
 
				+    "    \n",
			
 
				+    "    infile = DCDFile(file)\n",
			
 
				+    "\n",
			
 
				+    "    pairfile = open(\"numba_RDF.dat\", \"w+\")\n",
			
 
				+    "    stwo = open(\"numba_Pair_entropy.dat\", \"w+\")\n",
			
 
				+    "\n",
			
 
				+    "    numatm, nconf = dcdreadhead(infile)\n",
			
 
				+    "\n",
			
 
				+    "    print(\"Dcd file has {} atoms and {} frames\".format(numatm, nconf))\n",
			
 
				+    "    if inconf > nconf:\n",
			
 
				+    "        print(\"nconf is reset to {}\".format(nconf))\n",
			
 
				+    "    else:\n",
			
 
				+    "        nconf = inconf\n",
			
 
				+    "    print(\"Calculating RDF for {} frames\".format(nconf))\n",
			
 
				+    "\n",
			
 
				+    "    #numatm = 100\n",
			
 
				+    "    sizef =  nconf * numatm\n",
			
 
				+    "    sizebin = nbin\n",
			
 
				+    "\n",
			
 
				+    "    ########### reading cordinates ##############\n",
			
 
				+    "    nvtx.RangePush(\"Read_File\")\n",
			
 
				+    "    xbox, ybox, zbox, d_x, d_y, d_z = dcdreadframe(infile, numatm, nconf)\n",
			
 
				+    "    nvtx.RangePop()  # pop for reading file\n",
			
 
				+    "    print(\"Reading of input file is completed\")\n",
			
 
				+    "\n",
			
 
				+    "    ############################## Numba KERNEL #################################################\n",
			
 
				+    "\n",
			
 
				+    "    nthreads = 128;\n",
			
 
				+    "    near2 = nthreads * (int(0.5 * numatm * (numatm - 1) / nthreads) + 1);\n",
			
 
				+    "    nblock = (near2 / nthreads);\n",
			
 
				+    "    print(\" Initial blocks are {} and now changing to\".format(nblock))\n",
			
 
				+    "    maxblock = 65535\n",
			
 
				+    "    blockloop = int(nblock / maxblock)\n",
			
 
				+    "    if blockloop != 0:\n",
			
 
				+    "        nblock = maxblock\n",
			
 
				+    "    print(\"{} and will run over {} blockloops \\n\".format(nblock, blockloop+1))\n",
			
 
				+    "\n",
			
 
				+    "    # cp.cuda.runtime.memset(d_g2,0,sizebin)\n",
			
 
				+    "    d_g2 = np.zeros(sizebin, dtype=np.int64)\n",
			
 
				+    "    d_g2 = cuda.to_device(d_g2) #numba copy to device\n",
			
 
				+    "\n",
			
 
				+    "    nvtx.RangePush(\"Pair_Circulation_Numba\")\n",
			
 
				+    "    #t1 = timer()\n",
			
 
				+    "    for bl in range(blockloop+1):\n",
			
 
				+    "        pair_gpu_kernel[nblock,nthreads ](d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, nbin, bl)  ## numba jit kernel\n",
			
 
				+    "    \n",
			
 
				+    "    cuda.synchronize()\n",
			
 
				+    "   # print(\"Kernel compute time:\", timer() - t1)\n",
			
 
				+    "    d_g2  = d_g2.copy_to_host() ## numba copy to host\n",
			
 
				+    "    nvtx.RangePop()  # pop for Pair Calculation\n",
			
 
				+    "\n",
			
 
				+    "    pi = math.acos(np.int64(-1.0))\n",
			
 
				+    "    rho = (numatm) / (xbox * ybox * zbox)\n",
			
 
				+    "    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)\n",
			
 
				+    "    g2 = np.zeros(nbin, dtype=np.float32)\n",
			
 
				+    "    s2 =np.int64(0.0); s2bond = np.int64(0.0)\n",
			
 
				+    "    lngrbond = np.float32(0.0)\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del =box / (np.int64(2.0) * nbin)\n",
			
 
				+    "    gr = np.float32(0.0)\n",
			
 
				+    "    # loop to calculate entropy\n",
			
 
				+    "    nvtx.RangePush(\"Entropy_Calculation\")\n",
			
 
				+    "    for i in range(nbin):\n",
			
 
				+    "        rl = (i) * _del\n",
			
 
				+    "        ru = rl + _del\n",
			
 
				+    "        nideal = norm * (ru * ru * ru - rl * rl * rl)\n",
			
 
				+    "        g2[i] = d_g2[i] / (nconf * numatm * nideal)\n",
			
 
				+    "        r = (i) * _del\n",
			
 
				+    "        temp = (i + 0.5) * _del\n",
			
 
				+    "        pairfile.write(str(temp) + \" \" + str(g2[i]) + \"\\n\")\n",
			
 
				+    "\n",
			
 
				+    "        if r < np.int64(2.0):\n",
			
 
				+    "            gr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            gr = g2[i]\n",
			
 
				+    "        if gr < 1e-5:\n",
			
 
				+    "            lngr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngr = math.log(gr)\n",
			
 
				+    "        if g2[i] < 1e-6:\n",
			
 
				+    "            lngrbond = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngrbond = math.log(g2[i])\n",
			
 
				+    "        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)\n",
			
 
				+    "        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r\n",
			
 
				+    "\n",
			
 
				+    "    nvtx.RangePop()  # pop for entropy Calculation\n",
			
 
				+    "    stwo.writelines(\"s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    stwo.writelines(\"s2bond value is {}\".format(s2bond))\n",
			
 
				+    "    \n",
			
 
				+    "    print(\"\\n s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    print(\"s2bond value is {}\\n\".format(s2bond))\n",
			
 
				+    "     \n",
			
 
				+    "    print(\"#Freeing Host memory\")\n",
			
 
				+    "    del (d_x)\n",
			
 
				+    "    del (d_y)\n",
			
 
				+    "    del (d_z)\n",
			
 
				+    "    del (d_g2)\n",
			
 
				+    "    print(\"#Number of atoms processed: {}  \\n\".format(numatm))\n",
			
 
				+    "    print(\"#number of confs processed: {} \\n\".format(nconf))\n",
			
 
				+    "    #total_time = timer() - start\n",
			
 
				+    "    #print(\"total time spent:\", total_time)\n",
			
 
				+    "    \n",
			
 
				+    "if __name__ == \"__main__\":\n",
			
 
				+    "    main()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Output Files\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/numba_output_files.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "### Profiling Sample\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/numba_nsys1.png\"/>\n",
			
 
				+    "<img src=\"../images/numba_nsys2.png\"/>\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "**IMPORTANT**: Please click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "# <p style=\"text-align:center;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\"> <a href=../../../nways_MD_start_python.ipynb>HOME</a></p>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Links and Resources\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)\n",
			
 
				+    "\n",
			
 
				+    "**NOTE**: To be able to see the Nsight System profiler output, please download the latest version of the Nsight System from [here](https://developer.nvidia.com/nsight-systems).\n",
			
 
				+    "\n",
			
 
				+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## Licensing \n",
			
 
				+    "\n",
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/numba/numba_guide.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/numba/numba_guide.ipynb
@@ -0,0 +1,603 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# \n",
			
 
				+    "\n",
			
 
				+    "#  Numba Lab1: Numba For CUDA GPU\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "## Learning Objectives\n",
			
 
				+    "- **The goal of this lab is to:**\n",
			
 
				+    "    -   enable you to quickly start using Numba (beginner to advanced level)\n",
			
 
				+    "    -   teach you to apply the concepts of CUDA GPU programming to HPC field(s); and\n",
			
 
				+    "    -   show you how to achieve computational speedup on GPUs to maximize the throughput of your HPC implementation.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Before we begin, let's execute the cell below to display information about the CUDA driver and GPUs running on the server by running the `nvidia-smi` command. To do this, execute the cell block below by clicking on it with your mouse, and pressing Ctrl-Enter, or pressing the play button in the toolbar above. You should see some output returned below the grey cell."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!nvidia-smi"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "     \n",
			
 
				+    "##  Introduction\n",
			
 
				+    "- Numba is a just-in-time (jit) compiler for Python that works best on code that uses NumPy arrays, functions, and loops. Numba has sets of decorators that can be specified at the top of user-defined functions to determine how they are compiled.  \n",
			
 
				+    "- Numba supports CUDA GPU programming model. Decorated function written in python is compiled into a CUDA kernel to speed up the execution rate. \n",
			
 
				+    "- A kernel written in Numba automatically has direct access to NumPy arrays. This shows great support for data visibility between the host (CPU) and the device (GPU). \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "###  Definition of Terms\n",
			
 
				+    "- The CPU is called a **Host**.  \n",
			
 
				+    "- The GPU is called a **Device**.\n",
			
 
				+    "- A GPU function launched by the host and executed on the device is called a **Kernel**.\n",
			
 
				+    "- A GPU function executed on the device and can only be called from the device is called a **Device function**.\n",
			
 
				+    "\n",
			
 
				+    "### Note\n",
			
 
				+    "- It is recommended to visit the NVIDIA official documentation web page and read through [CUDA C programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide), because most CUDA programming features exposed by Numba map directly to the CUDA C language offered by NVIDIA. \n",
			
 
				+    "- Numba does not implement these CUDA features:\n",
			
 
				+    "     - dynamic parallelism\n",
			
 
				+    "     - texture memory\n",
			
 
				+    "\n",
			
 
				+    "## CUDA Kernel\n",
			
 
				+    "- In CUDA, written code can be executed by hundreds or thousands of threads at a single run, hence, a solution is modeled after the following thread hierarchy: \n",
			
 
				+    "    - **Grid**: A kernel executed as a collection of blocks. \n",
			
 
				+    "    - **Thread Block**: Collection of threads that can communicate via shared memory. Each thread is executed by a core.\n",
			
 
				+    "    - **Thread**: Single execution units that run kernels on GPU.\n",
			
 
				+    "- Numba exposes three kinds of GPU memory: \n",
			
 
				+    "    - global device memory  \n",
			
 
				+    "    - shared memory \n",
			
 
				+    "    - local memory \n",
			
 
				+    "- Memory access should be carefully considered in order to keep bandwidth contention at minimal.\n",
			
 
				+    "\n",
			
 
				+    " <img src=\"../images/thread_blocks.JPG\"/> <img src=\"../images/memory_architecture.png\"/> \n",
			
 
				+    "\n",
			
 
				+    "### Kernel Declaration\n",
			
 
				+    "- A kernel function is a GPU function that is called from a CPU code. It requires specifying the number of blocks and threads per block and cannot explicitly return a value except through a passed array. \n",
			
 
				+    "- A kernel can be called multiple times with varying number of blocks per grid and threads per block after it has been compiled once.\n",
			
 
				+    "\n",
			
 
				+    "Example:\n",
			
 
				+    "\n",
			
 
				+    "```python\n",
			
 
				+    "@cuda.jit\n",
			
 
				+    "def arrayAdd(array_A, array_B, array_out):\n",
			
 
				+    "    #...code body ...\n",
			
 
				+    "```\n",
			
 
				+    "###### Kernel Invocation\n",
			
 
				+    "- A kernel is typically launched in the following way:\n",
			
 
				+    "```python\n",
			
 
				+    "threadsperblock = 128\n",
			
 
				+    "N = array_out.size\n",
			
 
				+    "blockspergrid = ( N + (threadsperblock - 1))// threadsperblock\n",
			
 
				+    "arrayAdd[blockspergrid, threadsperblock](array_A, array_B, array_out)\n",
			
 
				+    "```\n",
			
 
				+    "\n",
			
 
				+    "###### Choosing Block Size\n",
			
 
				+    "- The block size determines how many threads share a given area of the shared memory.\n",
			
 
				+    "- The block size must be large enough to accommodate all computation units. See more details [here](https://docs.nvidia.com/cuda/cuda-c-programming-guide/).\n",
			
 
				+    "\n",
			
 
				+    "### Thread Positioning \n",
			
 
				+    "- When running a kernel, the kernel function’s code is executed by every thread once. Therefore, it is important to uniquely identify distinct threads.\n",
			
 
				+    "- The default way to determine a thread position in a grid and block is to manually compute the corresponding array positions:\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/thread_position.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "```python\n",
			
 
				+    "threadsperblock = 128\n",
			
 
				+    "N = array_out.size\n",
			
 
				+    "\n",
			
 
				+    "@cuda.jit\n",
			
 
				+    "def arrayAdd(array_A, array_B, array_out):\n",
			
 
				+    "    tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
			
 
				+    "    if tid < N: #Check array boundaries\n",
			
 
				+    "        array_out[tid] =  array_A[tid] + array_B[tid]\n",
			
 
				+    "\n",
			
 
				+    "#Unless you are sure the block size and grid size are a divisor of your array size, you must check boundaries as shown in the code block above. \n",
			
 
				+    "```\n",
			
 
				+    "### Example 1: Addition on 1D-Arrays\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "\n",
			
 
				+    "N = 500000\n",
			
 
				+    "threadsperblock = 1000\n",
			
 
				+    "\n",
			
 
				+    "@cuda.jit()\n",
			
 
				+    "def arrayAdd(array_A, array_B, array_out):\n",
			
 
				+    "    tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
			
 
				+    "    if tid < N:\n",
			
 
				+    "        array_out[tid] = array_A[tid] + array_B[tid]\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "        \n",
			
 
				+    "array_A = np.arange(N, dtype=np.int32)\n",
			
 
				+    "array_B = np.arange(N, dtype=np.int32)\n",
			
 
				+    "array_out = np.zeros(N, dtype=np.int32)\n",
			
 
				+    "\n",
			
 
				+    "blockpergrid  = N + (threadsperblock - 1) // threadsperblock\n",
			
 
				+    "\n",
			
 
				+    "arrayAdd[blockpergrid, threadsperblock](array_A, array_B, array_out)\n",
			
 
				+    "\n",
			
 
				+    "print(\"result: {} \".format(array_out))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "**From Example 1:** \n",
			
 
				+    "> - N is the size of the array and the number of threads in a single block is 128.\n",
			
 
				+    "> - The **cuda.jit()** decorator indicates that the function (arrayAdd) below is a device kernel and should run parallel. The **tid** is the estimate of a unique index for each thread in the device memory grid: \n",
			
 
				+    ">> **tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x**.\n",
			
 
				+    "> - **array_A** and **array_B** are input data, while **array_out** is the output array and is already preload with zeros.\n",
			
 
				+    "> - The statement **blockpergrid  = N + (threadsperblock - 1) // threadsperblock** computes the size of block per grid. This line of code is commonly use as the default formular to estimate the number of blocks per grid in GPU programming documentations.\n",
			
 
				+    "> - **arrayAdd[blockpergrid, threadsperblock](array_A, array_B, array_out)** indicate a call to a kernel function **arrayAdd** having the number of blocks per grid and number of threads per block in a square bracket, while kernel arguments are in a round bracket.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "###  Matrix Multiplication on 2D Array \n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/2d_array.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/2d_col_mult.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "> **Note**\n",
			
 
				+    "> - **Approach 2** would not be possible if the matrix size exceeds the maximum number of threads per block on the device, while **Approach 1** would continue to execute. The latest GPUs have maximum of 1024 threads per thread block. \n",
			
 
				+    "\n",
			
 
				+    "### Example 2:  Matrix multiplication "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import math\n",
			
 
				+    "\n",
			
 
				+    "N = 4\n",
			
 
				+    "@cuda.jit()\n",
			
 
				+    "def MatrixMul2D(array_A, array_B, array_out):\n",
			
 
				+    "   row, col = cuda.grid(2)\n",
			
 
				+    "   if row < array_out.shape[0] and col < array_out.shape[1]:\n",
			
 
				+    "      for k in range(N):\n",
			
 
				+    "         array_out[row][col]+= array_A[row][k] * array_B[k][col]\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "array_A   = np.array([[0,0,0,0],[1,1,1,1],[2,2,2,2],[3,3,3,3]], dtype=np.int32)\n",
			
 
				+    "array_B   = np.array([[0,1,2,3],[0,1,2,3],[0,1,2,3],[0,1,2,3]], dtype=np.int32)\n",
			
 
				+    "array_out = np.zeros(N*N, dtype=np.int32).reshape(N, N)\n",
			
 
				+    "\n",
			
 
				+    "threadsperblock = (2,2)\n",
			
 
				+    "blockpergrid_x  = (math.ceil( N / threadsperblock[0]))\n",
			
 
				+    "blockpergrid_y  = (math.ceil( N / threadsperblock[1]))\n",
			
 
				+    "blockpergrid    = (blockpergrid_x, blockpergrid_y)\n",
			
 
				+    "\n",
			
 
				+    "MatrixMul2D[blockpergrid,threadsperblock](array_A, array_B, array_out)\n",
			
 
				+    "\n",
			
 
				+    "print(\"array_A:\\n {}\\n\".format(array_A))\n",
			
 
				+    "print(\"array_B:\\n {}\\n\".format(array_B))\n",
			
 
				+    "print(\"array_A * array_B:\\n {}\".format(array_out))\n",
			
 
				+    "\n",
			
 
				+    "#Note\n",
			
 
				+    "#The cuda.grid() returns the thread ID in X and Y (row & col) direction of the memory grid\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Example 3: A 225 × 225 Matrix Multiplication"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "N = 225\n",
			
 
				+    "\n",
			
 
				+    "@cuda.jit()\n",
			
 
				+    "def MatrixMul2D(array_A, array_B, array_out):\n",
			
 
				+    "   x, y = cuda.grid(2)\n",
			
 
				+    "   if x < array_out.shape[0] and y < array_out.shape[1]:\n",
			
 
				+    "      for k in range(N):\n",
			
 
				+    "         array_out[x][y] += array_A[x][k] * array_B[k][y]\n",
			
 
				+    "\n",
			
 
				+    "threadsperblock = (25,25)\n",
			
 
				+    "array_A = np.arange((N*N), dtype=np.int32).reshape(N,N)\n",
			
 
				+    "array_B = np.arange((N*N), dtype=np.int32).reshape(N,N)\n",
			
 
				+    "array_out = np.zeros((N*N), dtype=np.int32).reshape(N,N)\n",
			
 
				+    "\n",
			
 
				+    "blockpergrid_x  = (math.ceil( N / threadsperblock[0]))\n",
			
 
				+    "blockpergrid_y  = (math.ceil( N / threadsperblock[1]))\n",
			
 
				+    "blockpergrid    = (blockpergrid_x, blockpergrid_y)\n",
			
 
				+    "\n",
			
 
				+    "MatrixMul2D[blockpergrid,threadsperblock](array_A, array_B, array_out)\n",
			
 
				+    "\n",
			
 
				+    "print(array_out)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Thread Reuse \n",
			
 
				+    "\n",
			
 
				+    "- It is possible to specify a few numbers of threads for a data size such that threads are reused to complete the computation of the entire data. This is one of the approaches used when a data to be computed is larger than the maximum number of threads available in a device memory. \n",
			
 
				+    "- This statement is used in a while loop: ***tid += cuda.blockDim.x * cuda.gridDim.x***\n",
			
 
				+    "- An example is given below to illustrate thread reuse. In the example, a small number of threads is specified on purpose in order to show the possibility of this approach. \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "#### Example 4: "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "\n",
			
 
				+    "N = 500000\n",
			
 
				+    "threadsperblock = 1000\n",
			
 
				+    "\n",
			
 
				+    "@cuda.jit\n",
			
 
				+    "def arrayAdd(array_A, array_B, array_out):\n",
			
 
				+    "   tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
			
 
				+    "   while tid < N:\n",
			
 
				+    "      array_out[tid] = array_A[tid] + array_B[tid]\n",
			
 
				+    "      tid += cuda.blockDim.x * cuda.gridDim.x\n",
			
 
				+    "\n",
			
 
				+    "array_A = np.arange(N, dtype=np.int32)\n",
			
 
				+    "array_B = np.arange(N, dtype=np.int32)\n",
			
 
				+    "array_out = np.zeros(N, dtype=np.int32)\n",
			
 
				+    "\n",
			
 
				+    "arrayAdd[1, threadsperblock](array_A, array_B, array_out)\n",
			
 
				+    "\n",
			
 
				+    "print(\"result: {} \".format(array_out))\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "> **Note**\n",
			
 
				+    "> - The task in **example 4** is the same as in **example 1** but with limited number of threads specified, however, the same result was achieved. \n",
			
 
				+    "> - Note that this approach may delegate more threads than required. In the code above, an excess of 1 block of threads may be delegated.\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Memory Management\n",
			
 
				+    "\n",
			
 
				+    "### Data Transfer \n",
			
 
				+    "- When a kernel is executed, Numba automatically transfers NumPy arrays to the device and vice versa.\n",
			
 
				+    "- In order to avoid the unnecessary transfer for read-only arrays, the following APIs can be used to manually control the transfer.\n",
			
 
				+    "\n",
			
 
				+    "##### 1.  Copy host to device\n",
			
 
				+    "```python\n",
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "\n",
			
 
				+    "N = 500000\n",
			
 
				+    "h_A = np.arange(N, dtype=np.int)\n",
			
 
				+    "h_B = np.arange(N, dtype=np.int)\n",
			
 
				+    "h_C = np.zeros(N, dtype=np.int)\n",
			
 
				+    "\n",
			
 
				+    "d_A = cuda.to_device(h_A)\n",
			
 
				+    "d_B = cuda.to_device(h_B)\n",
			
 
				+    "d_C = cuda.to_device(h_C)\n",
			
 
				+    "```\n",
			
 
				+    "##### 2.  Enqueue the transfer to a stream\n",
			
 
				+    "```python\n",
			
 
				+    "h_A    = np.arange(N, dtype=np.int)\n",
			
 
				+    "stream = cuda.stream()\n",
			
 
				+    "d_A    = cuda.to_device(h_A, stream=stream)\n",
			
 
				+    "```\n",
			
 
				+    "##### 3.  Copy device to host / enqueue the transfer to a stream \n",
			
 
				+    "```python\n",
			
 
				+    "h_C = d_C.copy_to_host()\n",
			
 
				+    "h_C = d_C.copy_to_host(stream=stream)\n",
			
 
				+    "```\n",
			
 
				+    "### Example 5:  Data Movement "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "N = 200\n",
			
 
				+    "threadsperblock = 25\n",
			
 
				+    "\n",
			
 
				+    "@cuda.jit\n",
			
 
				+    "def arrayAdd(d_A, d_B, d_C):\n",
			
 
				+    "   tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
			
 
				+    "   if tid < N:\n",
			
 
				+    "      d_C[tid] = d_A[tid] + d_B[tid]\n",
			
 
				+    "      \n",
			
 
				+    "h_A = np.arange(N, dtype=np.int32)\n",
			
 
				+    "h_B = np.arange(N, dtype=np.int32)\n",
			
 
				+    "h_C = np.zeros(N, dtype=np.int32)\n",
			
 
				+    "\n",
			
 
				+    "d_A = cuda.to_device(h_A)\n",
			
 
				+    "d_B = cuda.to_device(h_B)\n",
			
 
				+    "d_C = cuda.to_device(h_C)\n",
			
 
				+    "\n",
			
 
				+    "blockpergrid  = N + (threadsperblock - 1) // threadsperblock\n",
			
 
				+    "arrayAdd[blockpergrid, threadsperblock](d_A, d_B, d_C)\n",
			
 
				+    "\n",
			
 
				+    "h_C = d_C.copy_to_host()\n",
			
 
				+    "print(h_C)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Atomic Operation\n",
			
 
				+    "\n",
			
 
				+    "- Atomic operation is required when multiple threads attempt to modify a common portion of the memory. \n",
			
 
				+    "- A typical example includes simultaneous withdrawal from a bank account through ATM machine or a large number of threads modifying a particular index of an array based on certain condition(s).\n",
			
 
				+    "- List of presently implemented atomic operations supported by Numba are:\n",
			
 
				+    "> **import numba.cuda as cuda**\n",
			
 
				+    "> - cuda.atomic.add(array, index, value)\n",
			
 
				+    "> - cuda.atomic.min(array, index, value)\n",
			
 
				+    "> - cuda.atomic.max(array, index, value)\n",
			
 
				+    "> - cuda.atomic.nanmax(array, index, value)\n",
			
 
				+    "> - cuda.atomic.nanmin(array, index, value)\n",
			
 
				+    "> - cuda.atomic.compare_and_swap(array, old_value, current_value)\n",
			
 
				+    "> - cuda.atomic.sub(array, index, value)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Task ==> sum of an array: [1,2,3,4,5,6,7,8,9,10] in parallel\n",
			
 
				+    "# Note that threads are executed randomly\n",
			
 
				+    "\n",
			
 
				+    "# atomic operation example \n",
			
 
				+    "size = 10\n",
			
 
				+    "nthread = 10\n",
			
 
				+    "@cuda.jit()\n",
			
 
				+    "def add_atomic(my_array, total):\n",
			
 
				+    "   tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
			
 
				+    "   cuda.atomic.add(total,0, my_array[tid])\n",
			
 
				+    "\n",
			
 
				+    "my_array = np.array([1,2,3,4,5,6,7,8,9,10], dtype=np.int32)\n",
			
 
				+    "total = np.zeros(1, dtype=np.int32)\n",
			
 
				+    "nblock = int(size / nthread)\n",
			
 
				+    "add_atomic[nblock, nthread](my_array, total)\n",
			
 
				+    "print(\"Atomic:\", total)\n",
			
 
				+    "\n",
			
 
				+    "######################################################################################\n",
			
 
				+    "# Non-atomic operation example  \n",
			
 
				+    "size = 10\n",
			
 
				+    "nthread = 10\n",
			
 
				+    "@cuda.jit()\n",
			
 
				+    "def add_atomic(my_array, total):\n",
			
 
				+    "   tid = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x\n",
			
 
				+    "   total[0] += my_array[tid]\n",
			
 
				+    "   \n",
			
 
				+    "\n",
			
 
				+    "my_array = np.array([1,2,3,4,5,6,7,8,9,10], dtype=np.int32)\n",
			
 
				+    "total = np.zeros(1, dtype=np.int32)\n",
			
 
				+    "nblock = int(size / nthread)\n",
			
 
				+    "add_atomic[nblock, nthread](my_array, total)\n",
			
 
				+    "print(\"Non atomic: \", total)\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 7. CUDA Ufuncs\n",
			
 
				+    "\n",
			
 
				+    "- The CUDA ufunc supports passing intra-device arrays to reduce traffic over the PCI-express bus. \n",
			
 
				+    "- It also supports asynchronous mode by using stream keyword.\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/ufunc.png\"/>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# example: c = (a - b) * (a + b)\n",
			
 
				+    "# size of each array(A, B, C) is N = 10000\n",
			
 
				+    "\n",
			
 
				+    "from numba import vectorize\n",
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "\n",
			
 
				+    "@vectorize(['float32(float32, float32)'],target='cuda')\n",
			
 
				+    "def compute(a, b):\n",
			
 
				+    "    return (a - b) * (a + b)\n",
			
 
				+    "\n",
			
 
				+    "N = 10000\n",
			
 
				+    "A = np.arange(N , dtype=np.float32)\n",
			
 
				+    "B = np.arange(N, dtype=np.float32)\n",
			
 
				+    "C = compute(A, B)\n",
			
 
				+    "\n",
			
 
				+    "print(C.reshape(100,100))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Device Function\n",
			
 
				+    "\n",
			
 
				+    "- The CUDA device functions can only be invoked from within the device and can return a value like normal functions. The device function is usually placed before the CUDA ufunc kernel otherwise a call to the device function may not be visible inside the ufunc kernel.\n",
			
 
				+    "- The attributes <i>device=True</i> and <i>inline=true</i> indicate that <i>\"device_ufunc\"</i> is a device function."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "#example: c = sqrt((a - b) * (a + b))\n",
			
 
				+    "\n",
			
 
				+    "from numba import vectorize\n",
			
 
				+    "import numba.cuda as cuda\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import math\n",
			
 
				+    "\n",
			
 
				+    "@cuda.jit('float32(float32)', device=True, inline=True)\n",
			
 
				+    "def device_ufunc(c):\n",
			
 
				+    "   return math.sqrt(c)\n",
			
 
				+    "\n",
			
 
				+    "@vectorize(['float32(float32, float32)'],target='cuda')\n",
			
 
				+    "def compute(a, b):\n",
			
 
				+    "    c = (a - b) * (a + b)\n",
			
 
				+    "    return device_ufunc(c)\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Summary\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/numba_summary1.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "## Lab Task\n",
			
 
				+    "\n",
			
 
				+    "In this section, you are expected to click on the **Serial Code Lab Assignment** link and proceed to Lab 2. In this lab you will find three python serial code functions. You are required to revise the **pair_gpu** function to run on the GPU, and likewise do a few modifications within the **main** function.\n",
			
 
				+    "\n",
			
 
				+    "## <div style=\"text-align:center; color:#FF0000; border:3px solid red;height:80px;\"> <b><br/> [Serial Code Lab Assignment](serial_RDF.ipynb) </b> </div>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "## Post-Lab Summary\n",
			
 
				+    "\n",
			
 
				+    "If you would like to download this lab for later viewing, we recommend you go to your browser's File menu (not the Jupyter notebook file menu) and save the complete web page. This will ensure the images are copied as well. You can also execute the following cell block to create a zip-file of the files you've been working on and download it with the link below.\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%%bash\n",
			
 
				+    "cd ..\n",
			
 
				+    "rm -f nways_files.zip\n",
			
 
				+    "zip -r nways_files.zip *"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "**After** executing the above zip command, you should be able to download the zip file [here](../nways_files.zip).\n",
			
 
				+    "\n",
			
 
				+    "**IMPORTANT**: Please click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "# <p style=\"text-align:center;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\"> <a href=../../../nways_MD_start_python.ipynb>HOME</a></p>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Links and Resources\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)\n",
			
 
				+    "\n",
			
 
				+    "**NOTE**: To be able to see the Nsight System profiler output, please download the latest version Nsight System from [here](https://developer.nvidia.com/nsight-systems).\n",
			
 
				+    "\n",
			
 
				+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## References\n",
			
 
				+    "\n",
			
 
				+    "- Numba Documentation, Release 0.52.0-py3.7-linux-x86_64.egg, Anaconda, Nov 30, 2020.\n",
			
 
				+    "- Bhaumik Vaidya, Hands-On GPU-Accelerated Computer Vision with OpenCV and CUDA, Packt Publishing, 2018.\n",
			
 
				+    "- https://docs.nvidia.com/cuda/cuda-c-programming-guide/\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "--- \n",
			
 
				+    "\n",
			
 
				+    "## Licensing \n",
			
 
				+    "\n",
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0  International (CC BY 4.0)."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "celltoolbar": "Raw Cell Format",
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/numba/serial_RDF.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/numba/serial_RDF.ipynb
@@ -0,0 +1,410 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Numba Lab 2: HPC Approach with Serial Code\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "#### [<<--Numba Lab 1](numba_guide.ipynb)\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## A Recap on RDF\n",
			
 
				+    "\n",
			
 
				+    "- The radial distribution function (RDF) denoted as g(r) defines the probability of finding a particle at a distance r from another tagged particle. The RDF is strongly dependent on the type of matter so will vary greatly for solids, gases and liquids. You can read more [here](https://en.wikibooks.org/wiki/Molecular_Simulation/Radial_Distribution_Functions).\n",
			
 
				+    "- The code complexity of the algorithm is $N^{2}$. \n",
			
 
				+    "- The input data for the serial code is fetched from a DCD binary trajectory file.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### The Serial Code\n",
			
 
				+    "- The cell below consists of two functions, namely **dcdreadhead** and **dcdreadframe**\n",
			
 
				+    "- The **dcdreadhead** function computes the total number of frames and atoms from the DCDFile **(input/alk.traj.dcd)**, while the **dcdreadframe** function reads 10 frames and 6720 atoms (note: each frame contains 6720 atoms) using the MDAnalysis library. \n",
			
 
				+    "- Both functions run on the Host (CPU) and are being called from the function **main()**.\n",
			
 
				+    "\n",
			
 
				+    "### <u>Cell 1</u>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import cupy as cp\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import math\n",
			
 
				+    "import cupy.cuda.nvtx as nvtx\n",
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "from timeit import default_timer as timer\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadhead(infile):\n",
			
 
				+    "    nconf   = infile.n_frames\n",
			
 
				+    "    _infile = infile.header\n",
			
 
				+    "    numatm  = _infile['natoms']\n",
			
 
				+    "    return numatm, nconf\n",
			
 
				+    "\n",
			
 
				+    "def dcdreadframe(infile, numatm, nconf):\n",
			
 
				+    "\n",
			
 
				+    "    d_x = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_y = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "    d_z = np.zeros(numatm * nconf, dtype=np.float64)\n",
			
 
				+    "\n",
			
 
				+    "    for i in range(nconf):\n",
			
 
				+    "        data = infile.readframes(i, i+1)\n",
			
 
				+    "        box = data[1]\n",
			
 
				+    "        atomset = data[0][0]\n",
			
 
				+    "        xbox = round(box[0][0], 8)\n",
			
 
				+    "        ybox = round(box[0][2],8)\n",
			
 
				+    "        zbox = round(box[0][5], 8)\n",
			
 
				+    "\n",
			
 
				+    "        for row in range(numatm):\n",
			
 
				+    "            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column\n",
			
 
				+    "            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column\n",
			
 
				+    "            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column\n",
			
 
				+    "\n",
			
 
				+    "    return xbox, ybox, zbox, d_x, d_y, d_z"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "##  pair_gpu function\n",
			
 
				+    "\n",
			
 
				+    "- The pair_gpu is the function where the main task of the RDF serial implementation is being executed. The function computes differences in xyz DCD frames.\n",
			
 
				+    "- The essence of njit(just-in-time) decorator is to get pair_gpu function to compile under no python mode, and this is important for good performance. \n",
			
 
				+    "- The decorator **@njit** or **@jit(nopython=True)** ensures that an exception is raised when compilation fails as a way to alert the user that a bug is found within the decorated function. You can read more [here](https://numba.pydata.org/numba-doc/latest/user/performance-tips.html).\n",
			
 
				+    "\n",
			
 
				+    "### <u>Cell 2</u>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from numba import njit\n",
			
 
				+    "\n",
			
 
				+    "@njit()\n",
			
 
				+    "def pair_gpu(d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, d_bin):\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del = box / (2.0 * d_bin)\n",
			
 
				+    "    cut = box * 0.5\n",
			
 
				+    "    #print(\"\\n {} {}\".format(nconf, numatm))\n",
			
 
				+    "\n",
			
 
				+    "    for frame in range(nconf):\n",
			
 
				+    "        #print(\"\\n {}\".format(frame))\n",
			
 
				+    "        for id1 in range(numatm):\n",
			
 
				+    "            for id2 in range(numatm):\n",
			
 
				+    "                dx = d_x[frame * numatm + id1] - d_x[frame * numatm + id2]\n",
			
 
				+    "                dy = d_y[frame * numatm + id1] - d_y[frame * numatm + id2]\n",
			
 
				+    "                dz = d_z[frame * numatm + id1] - d_z[frame * numatm + id2 ]\n",
			
 
				+    "                dx = dx - xbox * (round(dx / xbox))\n",
			
 
				+    "                dy = dy - ybox * (round(dy / ybox))\n",
			
 
				+    "                dz = dz - zbox * (round(dz / zbox))\n",
			
 
				+    "\n",
			
 
				+    "                r = math.sqrt(dx * dx + dy * dy + dz * dz)\n",
			
 
				+    "                if r < cut :\n",
			
 
				+    "                    ig2  = int((r/_del))\n",
			
 
				+    "                    d_g2[ig2] = d_g2[ig2] + 1\n",
			
 
				+    "\n",
			
 
				+    "    return d_g2"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Brief Analysis on Tasks Performed within pair_gpu function\n",
			
 
				+    "- The graphic below identifies the various operations executed in the pair_gpu function. This function executes three nested loops using tricky indexing manipulation within the arrays.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/pair_gpu.png\" width=\"80%\"/>\n",
			
 
				+    "\n",
			
 
				+    "- The indexing flow for the operation 1 is simulated using the graphic below. Each green box simulates the subtraction operation within the two inner loops (id1 & id2) while the indexes written in blue signifies the outer-most loop (frame) which iterates 10 times. \n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/pair_gpu_analysis.png\" width=\"80%\"/>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### The Main Function\n",
			
 
				+    "- This is the entry point of the program where every other function including the **pair_gpu** function are called. The output of the main function is written into two files. An image version of the output files (\"**cupy_RDF.dat**\" & \"**cupy_Pair_entropy.dat**\") are displayed below the code cell.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### <u>Cell 3</u>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from MDAnalysis.lib.formats.libdcd import DCDFile\n",
			
 
				+    "import os\n",
			
 
				+    "from pathlib import Path\n",
			
 
				+    "\n",
			
 
				+    "def main():\n",
			
 
				+    "    \n",
			
 
				+    "    ########## Input Details ###########\n",
			
 
				+    "    inconf = 10\n",
			
 
				+    "    nbin   = 2000\n",
			
 
				+    "    global xbox, ybox, zbox\n",
			
 
				+    "    \n",
			
 
				+    "    fileDir = os.path.dirname(os.path.realpath('__file__'))\n",
			
 
				+    "    dataRoot = Path(fileDir).parents[1]\n",
			
 
				+    "    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')\n",
			
 
				+    "    \n",
			
 
				+    "    infile = DCDFile(file)\n",
			
 
				+    "    pairfile = open(\"RDF.dat\", \"w+\")\n",
			
 
				+    "    stwo     = open(\"Pair_entropy.dat\", \"w+\")\n",
			
 
				+    "\n",
			
 
				+    "    numatm, nconf = dcdreadhead(infile)\n",
			
 
				+    "    print(\"Dcd file has {} atoms and {} frames\".format(numatm, nconf))\n",
			
 
				+    "    if inconf > nconf:\n",
			
 
				+    "        print(\"nconf is reset to {}\".format(nconf))\n",
			
 
				+    "    else:\n",
			
 
				+    "        nconf = inconf\n",
			
 
				+    "    print(\"Calculating RDF for {} frames\".format(nconf))\n",
			
 
				+    "    #numatm = 50\n",
			
 
				+    "    sizef =  nconf * numatm\n",
			
 
				+    "    sizebin = nbin\n",
			
 
				+    "    ########### reading cordinates ##############\n",
			
 
				+    "    nvtx.RangePush(\"Read_File\")\n",
			
 
				+    "    xbox, ybox, zbox, h_x, h_y, h_z = dcdreadframe(infile, numatm, nconf)\n",
			
 
				+    "    nvtx.RangePop() # pop for reading file\n",
			
 
				+    "\n",
			
 
				+    "    h_g2 = np.zeros(sizebin, dtype=np.longlong)\n",
			
 
				+    "    print(\"Reading of input file is completed\")\n",
			
 
				+    "    print(\"\\n {} {}\".format(nconf, numatm))\n",
			
 
				+    "    ############# This where we will concentrate #########################\n",
			
 
				+    "    nvtx.RangePush(\"Pair_Circulation\")\n",
			
 
				+    "    h_g2 = pair_gpu(h_x, h_y, h_z, h_g2, numatm, nconf, xbox, ybox, zbox, nbin)\n",
			
 
				+    "    nvtx.RangePop() #pop for Pair Calculation\n",
			
 
				+    "    ######################################################################\n",
			
 
				+    "    \n",
			
 
				+    "    pi = math.acos(np.int64(-1.0))\n",
			
 
				+    "    rho = (numatm) / (xbox * ybox * zbox)\n",
			
 
				+    "    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)\n",
			
 
				+    "    g2 = np.zeros(nbin, dtype=np.float32)\n",
			
 
				+    "    s2 = np.int64(0.0);\n",
			
 
				+    "    s2bond = np.int64(0.0)\n",
			
 
				+    "    lngrbond = np.int64(0.0)\n",
			
 
				+    "    box = min(xbox, ybox)\n",
			
 
				+    "    box = min(box, zbox)\n",
			
 
				+    "    _del = box / (np.int64(2.0) * nbin)\n",
			
 
				+    "    gr = np.float32(0.0)\n",
			
 
				+    "    # loop to calculate entropy\n",
			
 
				+    "    nvtx.RangePush(\"Entropy_Calculation\")\n",
			
 
				+    "    for i in range(nbin):\n",
			
 
				+    "        rl = (i) * _del\n",
			
 
				+    "        ru = rl + _del\n",
			
 
				+    "        nideal = norm * (ru * ru * ru - rl * rl * rl)\n",
			
 
				+    "        g2[i] = h_g2[i] / (nconf * numatm * nideal)\n",
			
 
				+    "        r = (i) * _del\n",
			
 
				+    "        temp = (i + 0.5) * _del\n",
			
 
				+    "        \n",
			
 
				+    "        #writing to file\n",
			
 
				+    "        pairfile.write(str(temp) + \" \" + str(g2[i]) + \"\\n\")\n",
			
 
				+    "\n",
			
 
				+    "        if r < np.int64(2.0):\n",
			
 
				+    "            gr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            gr = g2[i]\n",
			
 
				+    "        if gr < 1e-5:\n",
			
 
				+    "            lngr = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngr = math.log(gr)\n",
			
 
				+    "        if g2[i] < 1e-6:\n",
			
 
				+    "            lngrbond = np.int64(0.0)\n",
			
 
				+    "        else:\n",
			
 
				+    "            lngrbond = math.log(g2[i])\n",
			
 
				+    "        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)\n",
			
 
				+    "        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r\n",
			
 
				+    "\n",
			
 
				+    "    nvtx.RangePop() # pop for entropy Calculation\n",
			
 
				+    "    \n",
			
 
				+    "    #writing s2 and s2bond to file\n",
			
 
				+    "    stwo.writelines(\"s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    stwo.writelines(\"s2bond value is {}\".format(s2bond))\n",
			
 
				+    "    \n",
			
 
				+    "    # printing s2 and s2bond to jupyter output\n",
			
 
				+    "    print(\"\\n s2 value is {}\\n\".format(s2))\n",
			
 
				+    "    print(\"s2bond value is {}\\n\".format(s2bond))\n",
			
 
				+    "\n",
			
 
				+    "    print(\"#Freeing Host memory\")\n",
			
 
				+    "    del(h_x)\n",
			
 
				+    "    del(h_y)\n",
			
 
				+    "    del(h_z)\n",
			
 
				+    "    del(h_g2)\n",
			
 
				+    "    print(\"#Number of atoms processed: {}  \\n\".format(numatm))\n",
			
 
				+    "    print(\"#number of confs processed: {} \\n\".format(nconf))\n",
			
 
				+    "    \n",
			
 
				+    "\n",
			
 
				+    "if __name__ == \"__main__\":\n",
			
 
				+    "    #main()  "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "### Console Output and Output Files\n",
			
 
				+    "<table>\n",
			
 
				+    "    <tr>\n",
			
 
				+    "    <td>\n",
			
 
				+    "         <img src=\"../images/serial_output_file.png\" width=\"95%\" />\n",
			
 
				+    "    </td>\n",
			
 
				+    "    </tr>\n",
			
 
				+    "</table>\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "## Lab Task \n",
			
 
				+    "\n",
			
 
				+    "1. 1. **Run the serial code from cell 1, 2, & 3**.\n",
			
 
				+    "    - Remove the **\"#\"** behind the **main()** before running the cell 3:\n",
			
 
				+    "    ```python\n",
			
 
				+    "       if __name__ == \"__main__\":\n",
			
 
				+    "                main()\n",
			
 
				+    "    ```\n",
			
 
				+    "2. **Now, lets start modifying the original code to Numba code constructs.**\n",
			
 
				+    "> From the top menu, click on File, and Open **nways_serial.py** from the current directory at **Python/source_code/numba** directory. Remember to SAVE your code after changes, and then run the cell below. \n",
			
 
				+    "> Hints: focus on the **pair_gpu** function and you may need to modify few lines in the **main** function as well."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%run ../../source_code/serial/nways_serial.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "The output should be the following:\n",
			
 
				+    "\n",
			
 
				+    "```\n",
			
 
				+    "s2 value is -2.43191\n",
			
 
				+    "s2bond value is -3.87014\n",
			
 
				+    "```\n",
			
 
				+    "\n",
			
 
				+    "3. **Profile the code by running the cell bellow** "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!cd ../../source_code/serial&& nsys profile --stats=true --force-overwrite true -o serial_cpu_rdf python3 nways_serial.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "To view the profiler report, you need to [download the profiler output](../../source_code/serial/serial_cpu_rdf.qdrep) and open it via the graphical user interface (GUI). A sample expected profile report is given below:\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/numba_nsys1.png\"/>\n",
			
 
				+    "<img src=\"../images/numba_nsys2.png\"/>\n",
			
 
				+    "\n",
			
 
				+    "From the profile report, we can see that the pair_gpu function now takes milliseconds to run as compared to the serial version which takes more than 3 seconds as shown [here](../serial/rdf_overview.ipynb). \n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "### [View](../../source_code/numba/numba_rdf.py) or [Run](../../jupyter_notebook/numba/numba_RDF.ipynb)  Solution \n",
			
 
				+    "--- \n",
			
 
				+    "\n",
			
 
				+    "## Post-Lab Summary\n",
			
 
				+    "\n",
			
 
				+    "If you would like to download this lab for later viewing, we recommend you go to your browsers File menu (not the Jupyter notebook file menu) and save the complete web page. This will ensure the images are copied as well. You can also execute the following cell block to create a zip-file of the files you've been working on and download it with the link below.\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%%bash\n",
			
 
				+    "cd ..\n",
			
 
				+    "rm -f nways_files.zip\n",
			
 
				+    "zip -r nways_files.zip *"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "\n",
			
 
				+    "**After** executing the above zip command, you should be able to download the zip file [here](../nways_files.zip).\n",
			
 
				+    "\n",
			
 
				+    "**IMPORTANT**: Please click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "# <p style=\"text-align:center;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\"> <a href=../../../nways_MD_start_python.ipynb>HOME</a></p>\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Links and Resources\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA CUDA Toolkit](https://developer.nvidia.com/cuda-downloads)\n",
			
 
				+    "\n",
			
 
				+    "**NOTE**: To be able to see the Nsight System profiler output, please download the latest version of the Nsight System from [here](https://developer.nvidia.com/nsight-systems).\n",
			
 
				+    "\n",
			
 
				+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
			
 
				+    "\n",
			
 
				+    "---\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "## Licensing \n",
			
 
				+    "\n",
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0)."
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/serial/rdf_overview.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/serial/rdf_overview.ipynb
@@ -0,0 +1,137 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## RDF\n",
			
 
				+    "The radial distribution function (RDF) denoted as g(r) defines the probability of finding a particle at a distance r from another tagged particle. The RDF is strongly dependent on the type of matter so will vary greatly for solids, gases and liquids. You can read more [here](https://en.wikibooks.org/wiki/Molecular_Simulation/Radial_Distribution_Functions).\n",
			
 
				+    "<img src=\"../images/rdf.png\" width=\"50%\" height=\"50%\">\n",
			
 
				+    " \n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "The code complexity of the algorithm is $N^{2}$ . Let us get into details of the serial code by clicking on the link below:\n",
			
 
				+    "\n",
			
 
				+    "[RDF Serial Code](../../source_code/serial/nways_serial.py)\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "Open the downloaded file, analyze and understand the code if possible, and run the cell below."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%run ../../source_code/serial/nways_serial.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "We plan to follow a typical optimization cycle that every code need to go through\n",
			
 
				+    "<img src=\"../images/workflow.png\" width=\"70%\" height=\"70%\">\n",
			
 
				+    "\n",
			
 
				+    "In order to analyze the application, we will make use of the NVIDIA Nsight System profiler \"nsys\" and add NVIDIA Tools Extension SDK  for annotation \"nvtx\" marking within the code to get more information out of the serial code. Before running the cell below, let's first start by diving into the profiler lab to learn more about the tools. Using profiler identifies the hotspots and helps us understand which function(s) are most important to parallelize.\n",
			
 
				+    "\n",
			
 
				+    "-----\n",
			
 
				+    "\n",
			
 
				+    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[Profiling lab](../../../../../profiler/English/jupyter_notebook/profiling-c.ipynb)</div> \n",
			
 
				+    "\n",
			
 
				+    "-----\n",
			
 
				+    "\n",
			
 
				+    "Now, that we are familiar with the Nsight Profiler and know how to use [NVTX](../../../../../profiler/English/jupyter_notebook/profiling-c.ipynb#nvtx), let's profile the serial code and evaluate the output."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!cd ../../source_code/serial&& nsys profile --stats=true --force-overwrite true -o serial_cpu_rdf python3 nways_serial.py"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Once you run the above cell, you should see the following in the terminal:\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/serial_cpu_rdf1.png\" width=\"700px\" height=\"600px\"/>\n",
			
 
				+    "<img src=\"../images/serial_cpu_rdf2.png\" width=\"700px\" height=\"400px\"/>\n",
			
 
				+    "\n",
			
 
				+    "To view the profiler report, you need to [download the profiler output](../../source_code/serial/serial_cpu_rdf.qdrep) and open it via the graphical user interface (GUI). For more information on how to open the report via the GUI, please check out the section on [how to view the report](../../../../../profiler/English/jupyter_notebook/profiling-c.ipynb#gui-report). \n",
			
 
				+    "\n",
			
 
				+    "From the timeline view, right click on the nvtx row and click the \"show in events view\". You can see the nvtx statistic at the bottom of the window which shows the duration of each range. In the following labs, we will explore the profiler report in more detail. \n",
			
 
				+    "\n",
			
 
				+    "<img src=\"../images/serial_profile.png\" width=\"100%\" height=\"100%\"/>\n",
			
 
				+    "\n",
			
 
				+    "The next step is to make the **Pair Calculation** algorithm parallel using existing approaches within GPU Programming. Please follow the link below and choose one approach to parallelize the serial code.\n",
			
 
				+    "\n",
			
 
				+    "-----\n",
			
 
				+    "\n",
			
 
				+    "# <div style=\"text-align: center ;border:3px; border-style:solid; border-color:#FF0000  ; padding: 1em\">[HOME](../../../nways_MD_start_python.ipynb)</div> \n",
			
 
				+    "-----\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Links and Resources\n",
			
 
				+    "<!--[OpenACC API guide](https://www.openacc.org/sites/default/files/inline-files/OpenACC%20API%202.6%20Reference%20Guide.pdf)-->\n",
			
 
				+    "\n",
			
 
				+    "[NVIDIA Nsight System](https://docs.nvidia.com/nsight-systems/)\n",
			
 
				+    "\n",
			
 
				+    "<!--[NVIDIA Nsight Compute](https://developer.nvidia.com/nsight-compute)-->\n",
			
 
				+    "\n",
			
 
				+    "<!--[CUDA Toolkit Download](https://developer.nvidia.com/cuda-downloads)-->\n",
			
 
				+    "\n",
			
 
				+    "[Profiling timelines with NVTX](https://devblogs.nvidia.com/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/)\n",
			
 
				+    "\n",
			
 
				+    "**NOTE**: To be able to see the Nsight System profiler output, please download the latest version of NVIDIA Nsight System from [here](https://developer.nvidia.com/nsight-systems).\n",
			
 
				+    "\n",
			
 
				+    "Don't forget to check out additional [OpenACC Resources](https://www.openacc.org/resources) and join our [OpenACC Slack Channel](https://www.openacc.org/community#slack) to share your experience and get more help from the community.\n",
			
 
				+    "\n",
			
 
				+    "--- \n",
			
 
				+    "\n",
			
 
				+    "## Licensing \n",
			
 
				+    "\n",
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/serial/serial_cpu_rdf.qdrep
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/jupyter_notebook/serial/serial_cpu_rdf.qdrep
--- a/hpc/nways/nways_labs/nways_MD/English/Python/source_code/cupy/cupy_rdf.py
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/source_code/cupy/cupy_rdf.py
@@ -0,0 +1,210 @@
 
				+# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
			
 
				+
			
 
				+import cupy as cp
			
 
				+import numpy as np
			
 
				+import math
			
 
				+import cupy.cuda.nvtx as nvtx
			
 
				+from MDAnalysis.lib.formats.libdcd import DCDFile
			
 
				+from timeit import default_timer as timer
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+
			
 
				+#pool = cp.cuda.MemoryPool(cp.cuda.malloc_managed)
			
 
				+#cp.cuda.set_allocator(pool.malloc)
			
 
				+
			
 
				+
			
 
				+def dcdreadhead(infile):
			
 
				+    nconf   = infile.n_frames
			
 
				+    _infile = infile.header
			
 
				+    numatm  = _infile['natoms']
			
 
				+    return numatm, nconf
			
 
				+
			
 
				+def dcdreadframe(infile, numatm, nconf):
			
 
				+
			
 
				+    d_x = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+    d_y = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+    d_z = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+
			
 
				+    for i in range(nconf):
			
 
				+        data = infile.readframes(i, i+1)
			
 
				+        box = data[1]
			
 
				+        atomset = data[0][0]
			
 
				+        xbox = round(box[0][0], 8)
			
 
				+        ybox = round(box[0][2],8)
			
 
				+        zbox = round(box[0][5], 8)
			
 
				+
			
 
				+        for row in range(numatm):
			
 
				+            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column
			
 
				+            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column
			
 
				+            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column
			
 
				+        
			
 
				+    return xbox, ybox, zbox, d_x, d_y, d_z
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    start = timer()
			
 
				+    ########## Input Details ###########
			
 
				+    global xbox, ybox, zbox
			
 
				+    inconf = 10
			
 
				+    nbin   =np.int32(2000)
			
 
				+    xbox   = np.float32(0)
			
 
				+    ybox   =np.float32(0)
			
 
				+    zbox   = np.float32(0)
			
 
				+
			
 
				+    ########use on jupyter notebook#######
			
 
				+    fileDir = os.path.dirname(os.path.realpath('__file__'))
			
 
				+    dataRoot = Path(fileDir).parents[1]
			
 
				+    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')
			
 
				+
			
 
				+    ########use on local computer##########
			
 
				+    #file   = "input/alk.traj.dcd"
			
 
				+    #######################################
			
 
				+    infile = DCDFile(file)
			
 
				+    pairfile = open("cupy_RDF.dat", "w+")
			
 
				+    stwo = open("cupy_Pair_entropy.dat", "w+")
			
 
				+
			
 
				+    numatm, nconf = dcdreadhead(infile)
			
 
				+    print("Dcd file has {} atoms and {} frames".format(numatm, nconf))
			
 
				+    if inconf > nconf:
			
 
				+        print("nconf is reset to {}".format(nconf))
			
 
				+    else:
			
 
				+        nconf = inconf
			
 
				+    print("Calculating RDF for {} frames".format(nconf))
			
 
				+    #numatm = 10
			
 
				+    sizef =  nconf * numatm
			
 
				+    sizebin = nbin
			
 
				+
			
 
				+    ########### reading cordinates ##############
			
 
				+    nvtx.RangePush("Read_File")
			
 
				+    xbox, ybox, zbox, d_x, d_y, d_z = dcdreadframe(infile, numatm, nconf)
			
 
				+    nvtx.RangePop()  # pop for reading file
			
 
				+    print("Reading of input file is completed")
			
 
				+    ############# Stream from Host to Device #########################
			
 
				+    d_x = cp.asarray(d_x)
			
 
				+    d_y = cp.asarray(d_y)
			
 
				+    d_z = cp.asarray(d_z)
			
 
				+    d_g2 = np.zeros(sizebin, dtype=np.int64)
			
 
				+    d_g2 = cp.asarray(d_g2)
			
 
				+    ############################## RAW KERNEL #################################################
			
 
				+    nthreads = 128;
			
 
				+    near2 = nthreads * (int(0.5 * numatm * (numatm - 1) / nthreads) + 1);
			
 
				+    nblock = (near2 / nthreads);
			
 
				+    print(" Initial blocks are {} and now changing to".format(nblock))
			
 
				+    maxblock = 65535
			
 
				+    blockloop = int(nblock / maxblock)
			
 
				+    if blockloop != 0:
			
 
				+        nblock = maxblock
			
 
				+    print("{} and will run over {} blockloops".format(nblock, blockloop+1))
			
 
				+
			
 
				+    nvtx.RangePush("CuPy_Pair_Circulation")
			
 
				+    #################################
			
 
				+    t1 = timer()
			
 
				+    for bl in range(blockloop+1):
			
 
				+        raw_kernel((nblock,),(nthreads,), (d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, nbin, bl)) ## cupy raw kernel
			
 
				+    cp.cuda.Device(0).synchronize()
			
 
				+    print("Kernel compute time:", timer() - t1)
			
 
				+    d_g2 = cp.asnumpy(d_g2)
			
 
				+    nvtx.RangePop()  # pop for Pair Calculation
			
 
				+    ######################################################################
			
 
				+    pi = math.acos(np.int64(-1.0))
			
 
				+    rho = (numatm) / (xbox * ybox * zbox)
			
 
				+    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)
			
 
				+    g2 = np.zeros(nbin, dtype=np.float32)
			
 
				+    s2 =np.int64(0.0); s2bond = np.int64(0.0)
			
 
				+    lngrbond = np.float32(0.0)
			
 
				+    box = min(xbox, ybox)
			
 
				+    box = min(box, zbox)
			
 
				+    _del =box / (np.int64(2.0) * nbin)
			
 
				+    gr = np.float32(0.0)
			
 
				+    # loop to calculate entropy
			
 
				+    nvtx.RangePush("Entropy_Calculation")
			
 
				+    for i in range(nbin):
			
 
				+        rl = (i) * _del
			
 
				+        ru = rl + _del
			
 
				+        nideal = norm * (ru * ru * ru - rl * rl * rl)
			
 
				+        g2[i] = d_g2[i] / (nconf * numatm * nideal)
			
 
				+        r = (i) * _del
			
 
				+        temp = (i + 0.5) * _del
			
 
				+        pairfile.write(str(temp) + " " + str(g2[i]) + "\n")
			
 
				+
			
 
				+        if r < np.int64(2.0):
			
 
				+            gr = np.int64(0.0)
			
 
				+        else:
			
 
				+            gr = g2[i]
			
 
				+        if gr < 1e-5:
			
 
				+            lngr = np.int64(0.0)
			
 
				+        else:
			
 
				+            lngr = math.log(gr)
			
 
				+        if g2[i] < 1e-6:
			
 
				+            lngrbond = np.int64(0.0)
			
 
				+        else:
			
 
				+            lngrbond = math.log(g2[i])
			
 
				+        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)
			
 
				+        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r
			
 
				+
			
 
				+    nvtx.RangePop()  # pop for entropy Calculation
			
 
				+    stwo.writelines("s2 value is {}\n".format(s2))
			
 
				+    stwo.writelines("s2bond value is {}".format(s2bond))
			
 
				+
			
 
				+    print("#Freeing Host memory")
			
 
				+    del (d_x)
			
 
				+    del (d_y)
			
 
				+    del (d_z)
			
 
				+    del (d_g2)
			
 
				+    print("#Number of atoms processed: {}  \n".format(numatm))
			
 
				+    print("#number of confs processed: {} \n".format(nconf))
			
 
				+    total_time = timer() - start
			
 
				+    print("total time spent:", total_time)
			
 
				+
			
 
				+##################################################################################
			
 
				+
			
 
				+raw_kernel = cp.RawKernel(r'''
			
 
				+extern "C"
			
 
				+__global__ void cupy_pair_gpu(
			
 
				+		const double* d_x, const double* d_y, const double* d_z, 
			
 
				+		unsigned long long int *d_g2, int numatm, int nconf, 
			
 
				+		const double xbox,const double ybox,const double zbox,int d_bin,  unsigned long long int bl)
			
 
				+{
			
 
				+	double r,cut,dx,dy,dz;
			
 
				+	int ig2,id1,id2;
			
 
				+	double box;
			
 
				+	box=min(xbox,ybox);
			
 
				+	box=min(box,zbox);
			
 
				+
			
 
				+	double del=box/(2.0*d_bin);
			
 
				+	cut=box*0.5;
			
 
				+	int thisi;
			
 
				+	double n;
			
 
				+
			
 
				+	int i = blockIdx.x * blockDim.x + threadIdx.x;
			
 
				+	int maxi = min(int(0.5*numatm*(numatm-1)-(bl*65535*128)),(65535*128));
			
 
				+
			
 
				+	if ( i < maxi ) {
			
 
				+		thisi=bl*65535*128+i;
			
 
				+
			
 
				+		n=(0.5)*(1+ ((double) sqrt (1.0+4.0*2.0*thisi)));
			
 
				+		id1=int(n);
			
 
				+		id2=thisi-(0.5*id1*(id1-1));
			
 
				+
			
 
				+		for (int frame=0;frame<nconf;frame++){
			
 
				+			dx=d_x[frame*numatm+id1]-d_x[frame*numatm+id2];
			
 
				+			dy=d_y[frame*numatm+id1]-d_y[frame*numatm+id2];
			
 
				+			dz=d_z[frame*numatm+id1]-d_z[frame*numatm+id2];
			
 
				+
			
 
				+			dx=dx-xbox*(round(dx/xbox));
			
 
				+			dy=dy-ybox*(round(dy/ybox));
			
 
				+			dz=dz-zbox*(round(dz/zbox));
			
 
				+
			
 
				+			r=sqrtf(dx*dx+dy*dy+dz*dz);
			
 
				+			if (r<cut) {
			
 
				+				ig2=(int)(r/del);
			
 
				+				atomicAdd(&d_g2[ig2],2) ;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+''', 'cupy_pair_gpu')
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
 
				+
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/source_code/dataset.py
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/source_code/dataset.py
@@ -0,0 +1,9 @@
 
				+# Copyright (c) 2021 NVIDIA Corporation.  All rights reserved. 
			
 
				+
			
 
				+import gdown
			
 
				+import os
			
 
				+
			
 
				+## alk.traj.dcd input file 
			
 
				+url = 'https://drive.google.com/uc?id=1WZ0rtXZ-uMLfy7htT0gaU4EQ_Rq61QTF&export=download'
			
 
				+output_python = '/labs/nways_MD/English/Python/source_code/input/alk.traj.dcd'
			
 
				+gdown.download(url, output_python, quiet=False, proxy=None)
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/source_code/input/.gitignore
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/source_code/input/.gitignore
@@ -0,0 +1,4 @@
 
				+# Ignore everything in this directory
			
 
				+*
			
 
				+# Except this file
			
 
				+!.gitignore
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/source_code/numba/numba_rdf.py
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/source_code/numba/numba_rdf.py
@@ -0,0 +1,193 @@
 
				+# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
			
 
				+
			
 
				+import numpy as np
			
 
				+import math
			
 
				+import cupy.cuda.nvtx as nvtx
			
 
				+from MDAnalysis.lib.formats.libdcd import DCDFile
			
 
				+from timeit import default_timer as timer
			
 
				+import numba.cuda as cuda
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+
			
 
				+def dcdreadhead(infile):
			
 
				+    nconf   = infile.n_frames
			
 
				+    _infile = infile.header
			
 
				+    numatm  = _infile['natoms']
			
 
				+    return numatm, nconf
			
 
				+
			
 
				+def dcdreadframe(infile, numatm, nconf):
			
 
				+
			
 
				+    d_x = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+    d_y = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+    d_z = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+
			
 
				+    for i in range(nconf):
			
 
				+        data = infile.readframes(i, i+1)
			
 
				+        box = data[1]
			
 
				+        atomset = data[0][0]
			
 
				+        xbox = round(box[0][0], 8)
			
 
				+        ybox = round(box[0][2],8)
			
 
				+        zbox = round(box[0][5], 8)
			
 
				+
			
 
				+        for row in range(numatm):
			
 
				+            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column
			
 
				+            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column
			
 
				+            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column
			
 
				+
			
 
				+    return xbox, ybox, zbox, d_x, d_y, d_z
			
 
				+
			
 
				+def main():
			
 
				+    start = timer()
			
 
				+    ########## Input Details ###########
			
 
				+    global xbox, ybox, zbox
			
 
				+    inconf = 10
			
 
				+    nbin   =np.int32(2000)
			
 
				+    xbox   = np.float32(0)
			
 
				+    ybox   =np.float32(0)
			
 
				+    zbox   = np.float32(0)
			
 
				+
			
 
				+    ########use on jupyter notebook#######
			
 
				+    fileDir = os.path.dirname(os.path.realpath('__file__'))
			
 
				+    dataRoot = Path(fileDir).parents[1]
			
 
				+    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')
			
 
				+
			
 
				+    ########use on local computer##########
			
 
				+    # file   = "input/alk.traj.dcd"
			
 
				+    #######################################
			
 
				+
			
 
				+    infile = DCDFile(file)
			
 
				+
			
 
				+    pairfile = open("numba_RDF.dat", "w+")
			
 
				+    stwo = open("numba_Pair_entropy.dat", "w+")
			
 
				+
			
 
				+    numatm, nconf = dcdreadhead(infile)
			
 
				+
			
 
				+    print("Dcd file has {} atoms and {} frames".format(numatm, nconf))
			
 
				+    if inconf > nconf:
			
 
				+        print("nconf is reset to {}".format(nconf))
			
 
				+    else:
			
 
				+        nconf = inconf
			
 
				+    print("Calculating RDF for {} frames".format(nconf))
			
 
				+
			
 
				+    #numatm = 100
			
 
				+    sizef =  nconf * numatm
			
 
				+    sizebin = nbin
			
 
				+
			
 
				+    ########### reading cordinates ##############
			
 
				+    nvtx.RangePush("Read_File")
			
 
				+    xbox, ybox, zbox, d_x, d_y, d_z = dcdreadframe(infile, numatm, nconf)
			
 
				+    nvtx.RangePop()  # pop for reading file
			
 
				+    print("Reading of input file is completed")
			
 
				+
			
 
				+    ############################## Numba KERNEL #################################################
			
 
				+
			
 
				+    nthreads = 128;
			
 
				+    near2 = nthreads * (int(0.5 * numatm * (numatm - 1) / nthreads) + 1);
			
 
				+    nblock = (near2 / nthreads);
			
 
				+    print(" Initial blocks are {} and now changing to".format(nblock))
			
 
				+    maxblock = 65535
			
 
				+    blockloop = int(nblock / maxblock)
			
 
				+    if blockloop != 0:
			
 
				+        nblock = maxblock
			
 
				+    print("{} and will run over {} blockloops".format(nblock, blockloop+1))
			
 
				+
			
 
				+    # cp.cuda.runtime.memset(d_g2,0,sizebin)
			
 
				+    d_g2 = np.zeros(sizebin, dtype=np.int64)
			
 
				+    d_g2 = cuda.to_device(d_g2) #numba copy to device
			
 
				+
			
 
				+    nvtx.RangePush("Pair_Circulation_Numba")
			
 
				+    t1 = timer()
			
 
				+    for bl in range(blockloop+1):
			
 
				+        pair_gpu_kernel[nblock,nthreads ](d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, nbin, bl)  ## numba jit kernel
			
 
				+    cuda.synchronize()
			
 
				+    print("Kernel compute time:", timer() - t1)
			
 
				+    d_g2  = d_g2.copy_to_host() ## numba copy to host
			
 
				+    nvtx.RangePop()  # pop for Pair Calculation
			
 
				+
			
 
				+    pi = math.acos(np.int64(-1.0))
			
 
				+    rho = (numatm) / (xbox * ybox * zbox)
			
 
				+    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)
			
 
				+    g2 = np.zeros(nbin, dtype=np.float32)
			
 
				+    s2 =np.int64(0.0); s2bond = np.int64(0.0)
			
 
				+    lngrbond = np.float32(0.0)
			
 
				+    box = min(xbox, ybox)
			
 
				+    box = min(box, zbox)
			
 
				+    _del =box / (np.int64(2.0) * nbin)
			
 
				+    gr = np.float32(0.0)
			
 
				+    # loop to calculate entropy
			
 
				+    nvtx.RangePush("Entropy_Calculation")
			
 
				+    for i in range(nbin):
			
 
				+        rl = (i) * _del
			
 
				+        ru = rl + _del
			
 
				+        nideal = norm * (ru * ru * ru - rl * rl * rl)
			
 
				+        g2[i] = d_g2[i] / (nconf * numatm * nideal)
			
 
				+        r = (i) * _del
			
 
				+        temp = (i + 0.5) * _del
			
 
				+        pairfile.write(str(temp) + " " + str(g2[i]) + "\n")
			
 
				+
			
 
				+        if r < np.int64(2.0):
			
 
				+            gr = np.int64(0.0)
			
 
				+        else:
			
 
				+            gr = g2[i]
			
 
				+        if gr < 1e-5:
			
 
				+            lngr = np.int64(0.0)
			
 
				+        else:
			
 
				+            lngr = math.log(gr)
			
 
				+        if g2[i] < 1e-6:
			
 
				+            lngrbond = np.int64(0.0)
			
 
				+        else:
			
 
				+            lngrbond = math.log(g2[i])
			
 
				+        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)
			
 
				+        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r
			
 
				+
			
 
				+    nvtx.RangePop()  # pop for entropy Calculation
			
 
				+    stwo.writelines("s2 value is {}\n".format(s2))
			
 
				+    stwo.writelines("s2bond value is {}".format(s2bond))
			
 
				+
			
 
				+    print("#Freeing Host memory")
			
 
				+    del (d_x)
			
 
				+    del (d_y)
			
 
				+    del (d_z)
			
 
				+    del (d_g2)
			
 
				+    print("#Number of atoms processed: {}  \n".format(numatm))
			
 
				+    print("#number of confs processed: {} \n".format(nconf))
			
 
				+    total_time = timer() - start
			
 
				+    print("total time spent:", total_time)
			
 
				+
			
 
				+
			
 
				+##--------------------------NUMBA KERNEL---------------------------------------------------------##
			
 
				+@cuda.jit
			
 
				+def pair_gpu_kernel(d_x, d_y,d_z, d_g2, numatm, nconf, xbox, ybox,zbox,d_bin, bl):
			
 
				+    box = min(xbox, ybox)
			
 
				+    box = min(box, zbox)
			
 
				+    _del= box / (2.0 * d_bin)
			
 
				+    cut = box * 0.5;
			
 
				+    i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
			
 
				+    maxi = min(int(0.5 * numatm * (numatm - 1) - (bl * 65535 * 128)), (65535 * 128))
			
 
				+
			
 
				+    if i < maxi:
			
 
				+        thisi=bl * 65535 * 128+i
			
 
				+        n = (0.5) * (1+ ( math.sqrt (1.0+4.0 * 2.0 * thisi)))
			
 
				+        id1 = int(n)
			
 
				+        id2 = thisi-(0.5 * id1 * (id1-1))
			
 
				+        for frame in range(0, nconf):
			
 
				+            t1 = int(frame * numatm+id1)
			
 
				+            t2 = int(frame * numatm+id2)
			
 
				+            dx = d_x[t1] - d_x[t2]
			
 
				+            dy = d_y[t1] - d_y[t2]
			
 
				+            dz = d_z[t1] - d_z[t2]
			
 
				+            dx = dx - xbox * (round(dx / xbox))
			
 
				+            dy = dy - ybox * (round(dy / ybox))
			
 
				+            dz = dz - zbox * (round(dz / zbox))
			
 
				+
			
 
				+            r= math.sqrt(dx * dx+dy * dy+dz * dz)
			
 
				+            if r < cut:
			
 
				+                ig2=(int)(r / _del )
			
 
				+                cuda.atomic.add(d_g2, ig2, 2)
			
 
				+
			
 
				+##--------------------------END NUMBA KERNEL---------------------------------------------------------##
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
 
				+
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/source_code/serial/nways_serial.py
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/source_code/serial/nways_serial.py
@@ -0,0 +1,160 @@
 
				+# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
			
 
				+
			
 
				+import numpy as np
			
 
				+import math
			
 
				+import cupy.cuda.nvtx as nvtx
			
 
				+from MDAnalysis.lib.formats.libdcd import DCDFile
			
 
				+from timeit import default_timer as timer
			
 
				+from numba import  njit
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+
			
 
				+def dcdreadhead(infile):
			
 
				+    nconf   = infile.n_frames
			
 
				+    _infile = infile.header
			
 
				+    numatm  = _infile['natoms']
			
 
				+    return numatm, nconf
			
 
				+
			
 
				+def dcdreadframe(infile, numatm, nconf):
			
 
				+
			
 
				+    d_x = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+    d_y = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+    d_z = np.zeros(numatm * nconf, dtype=np.float64)
			
 
				+
			
 
				+    for i in range(nconf):
			
 
				+        data = infile.readframes(i, i+1)
			
 
				+        box = data[1]
			
 
				+        atomset = data[0][0]
			
 
				+        xbox = round(box[0][0], 8)
			
 
				+        ybox = round(box[0][2],8)
			
 
				+        zbox = round(box[0][5], 8)
			
 
				+
			
 
				+        for row in range(numatm):
			
 
				+            d_x[i * numatm + row] = round(atomset[row][0], 8) # 0 is column
			
 
				+            d_y[i * numatm + row] = round(atomset[row][1], 8)  # 1 is column
			
 
				+            d_z[i * numatm + row] = round(atomset[row][2], 8)  # 2 is column
			
 
				+
			
 
				+    return xbox, ybox, zbox, d_x, d_y, d_z
			
 
				+
			
 
				+def main():
			
 
				+    start = timer()
			
 
				+    ########## Input Details ###########
			
 
				+    inconf = 10
			
 
				+    nbin   = 2000
			
 
				+    global xbox, ybox, zbox
			
 
				+    ######## for jupyter notebook ########################
			
 
				+    fileDir = os.path.dirname(os.path.realpath('__file__'))
			
 
				+    dataRoot = Path(fileDir).parents[1]
			
 
				+    file = os.path.join(dataRoot, 'source_code/input/alk.traj.dcd')
			
 
				+
			
 
				+    ######## local computer #############
			
 
				+    #file = "input/alk.traj.dcd"
			
 
				+
			
 
				+    infile = DCDFile(file)
			
 
				+    pairfile = open("RDF.dat", "w+")
			
 
				+    stwo     = open("Pair_entropy.dat", "w+")
			
 
				+
			
 
				+    numatm, nconf = dcdreadhead(infile)
			
 
				+    print("Dcd file has {} atoms and {} frames".format(numatm, nconf))
			
 
				+    if inconf > nconf:
			
 
				+        print("nconf is reset to {}".format(nconf))
			
 
				+    else:
			
 
				+        nconf = inconf
			
 
				+    print("Calculating RDF for {} frames".format(nconf))
			
 
				+    #numatm = 50
			
 
				+    sizef =  nconf * numatm
			
 
				+    sizebin = nbin
			
 
				+    ########### reading cordinates ##############
			
 
				+    nvtx.RangePush("Read_File")
			
 
				+    xbox, ybox, zbox, h_x, h_y, h_z = dcdreadframe(infile, numatm, nconf)
			
 
				+    nvtx.RangePop() # pop for reading file
			
 
				+
			
 
				+    h_g2 = np.zeros(sizebin, dtype=np.longlong)
			
 
				+    print("Reading of input file is completed")
			
 
				+    ############# This where we will concentrate #########################
			
 
				+    nvtx.RangePush("Pair_Circulation")
			
 
				+    h_g2 = pair_gpu(h_x, h_y, h_z, h_g2, numatm, nconf, xbox, ybox, zbox, nbin)
			
 
				+    nvtx.RangePop() #pop for Pair Calculation
			
 
				+    ######################################################################
			
 
				+    pi = math.acos(np.int64(-1.0))
			
 
				+    rho = (numatm) / (xbox * ybox * zbox)
			
 
				+    norm = (np.int64(4.0) * pi * rho) / np.int64(3.0)
			
 
				+    g2 = np.zeros(nbin, dtype=np.float32)
			
 
				+    s2 = np.int64(0.0);
			
 
				+    s2bond = np.int64(0.0)
			
 
				+    lngrbond = np.float32(0.0)
			
 
				+    box = min(xbox, ybox)
			
 
				+    box = min(box, zbox)
			
 
				+    _del = box / (np.int64(2.0) * nbin)
			
 
				+    gr = np.float32(0.0)
			
 
				+    # loop to calculate entropy
			
 
				+    nvtx.RangePush("Entropy_Calculation")
			
 
				+    for i in range(nbin):
			
 
				+        rl = (i) * _del
			
 
				+        ru = rl + _del
			
 
				+        nideal = norm * (ru * ru * ru - rl * rl * rl)
			
 
				+        g2[i] = h_g2[i] / (nconf * numatm * nideal)
			
 
				+        r = (i) * _del
			
 
				+        temp = (i + 0.5) * _del
			
 
				+        pairfile.write(str(temp) + " " + str(g2[i]) + "\n")
			
 
				+
			
 
				+        if r < np.int64(2.0):
			
 
				+            gr = np.int64(0.0)
			
 
				+        else:
			
 
				+            gr = g2[i]
			
 
				+        if gr < 1e-5:
			
 
				+            lngr = np.int64(0.0)
			
 
				+        else:
			
 
				+            lngr = math.log(gr)
			
 
				+        if g2[i] < 1e-6:
			
 
				+            lngrbond = np.int64(0.0)
			
 
				+        else:
			
 
				+            lngrbond = math.log(g2[i])
			
 
				+        s2 = s2 - (np.int64(2.0) * pi * rho * ((gr * lngr) - gr + np.int64(1.0)) * _del * r * r)
			
 
				+        s2bond = s2bond - np.int64(2.0) * pi * rho * ((g2[i] * lngrbond) - g2[i] + np.int64(1.0)) * _del * r * r
			
 
				+
			
 
				+    nvtx.RangePop() # pop for entropy Calculation
			
 
				+    stwo.writelines("s2 value is {}\n".format(s2))
			
 
				+    stwo.writelines("s2bond value is {}".format(s2bond))
			
 
				+
			
 
				+    print("\n s2 value is {}\n".format(s2))
			
 
				+    print("s2bond value is {}\n".format(s2bond))
			
 
				+    print("#Freeing Host memory")
			
 
				+    del(h_x)
			
 
				+    del(h_y)
			
 
				+    del(h_z)
			
 
				+    del(h_g2)
			
 
				+    print("#Number of atoms processed: {}  \n".format(numatm))
			
 
				+    print("#number of confs processed: {} \n".format(nconf))
			
 
				+    total_time = timer() - start
			
 
				+    print("total time spent:", total_time)
			
 
				+
			
 
				+@njit()
			
 
				+def pair_gpu(d_x, d_y, d_z, d_g2, numatm, nconf, xbox, ybox, zbox, d_bin):
			
 
				+    box = min(xbox, ybox)
			
 
				+    box = min(box, zbox)
			
 
				+    _del = box / (2.0 * d_bin)
			
 
				+    cut = box * 0.5
			
 
				+    #print("\n {} {}".format(nconf, numatm))
			
 
				+
			
 
				+    for frame in range(nconf):
			
 
				+        #print("\n {}".format(frame))
			
 
				+        for id1 in range(numatm):
			
 
				+            for id2 in range(numatm):
			
 
				+                dx = d_x[frame * numatm + id1] - d_x[frame * numatm + id2]
			
 
				+                dy = d_y[frame * numatm + id1] - d_y[frame * numatm + id2]
			
 
				+                dz = d_z[frame * numatm + id1] - d_z[frame * numatm + id2 ]
			
 
				+                dx = dx - xbox * (round(dx / xbox))
			
 
				+                dy = dy - ybox * (round(dy / ybox))
			
 
				+                dz = dz - zbox * (round(dz / zbox))
			
 
				+
			
 
				+                r = math.sqrt(dx * dx + dy * dy + dz * dz)
			
 
				+                if r < cut :
			
 
				+                    ig2  = int((r/_del))
			
 
				+                    d_g2[ig2] = d_g2[ig2] + 1
			
 
				+
			
 
				+    return d_g2
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/hpc/nways/nways_labs/nways_MD/English/Python/source_code/serial/serial_cpu_rdf.qdrep
+++ b/hpc/nways/nways_labs/nways_MD/English/Python/source_code/serial/serial_cpu_rdf.qdrep
--- a/hpc/nways/nways_labs/nways_MD/English/nways_MD_start.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/nways_MD_start.ipynb
@@ -72,7 +72,7 @@
 
				     "3. [OpenMP](Fortran/jupyter_notebook/openmp/nways_openmp.ipynb) \n",
			
 
				     "4. [CUDA Fortran](Fortran/jupyter_notebook/cudafortran/nways_cuda.ipynb) \n",
			
 
				     "\n",
			
 
				-    "To finish the lab let us go through some final [remarks](Fortran/jupyter_notebook/Final_Remarks.ipynb)\n"
			
 
				+    "To finish the lab let us go through some final [remarks](Fortran/jupyter_notebook/Final_Remarks.ipynb)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -90,7 +90,7 @@
 
				     "### Target Audience and Prerequisites\n",
			
 
				     "The target audience for this lab is researchers/graduate students and developers who are interested in learning about programming various ways to programming GPUs to accelerate their scientific applications.\n",
			
 
				     "\n",
			
 
				-    "Basic experience with Fortran programming is needed. No GPU programming knowledge is required.\n",
			
 
				+    "Basic experience with C/C++ or Fortran programming is needed. No GPU programming knowledge is required.\n",
			
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
@@ -102,13 +102,6 @@
 
				     "\n",
			
 
				     "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				    ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": []
			
 
				   }
			
 
				  ],
			
 
				  "metadata": {
			
@@ -127,7 +120,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.8.5"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc/nways/nways_labs/nways_MD/English/nways_MD_start_python.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/nways_MD_start_python.ipynb
--- a/hpc/nways/nways_labs/nways_MD/README.md
+++ b/hpc/nways/nways_labs/nways_MD/README.md
@@ -11,13 +11,27 @@ To run this tutorial you will need a machine with NVIDIA GPU.
 
				 To start with, you will have to build a Docker or Singularity container.
			
 
				 
			
 
				 ### Docker Container
			
 
				-To build a docker container, run: 
			
 
				+
			
 
				+#### C & Fortran
			
 
				+To build a docker container, run:
			
 
				+
			
 
				 `sudo docker build -t <imagename>:<tagnumber> .`
			
 
				 
			
 
				 For instance:
			
 
				+
			
 
				 `sudo docker build -t myimage:1.0 .`
			
 
				 
			
 
				-The code labs have been written using Jupyter notebooks and a Dockerfile has been built to simplify deployment. In order to serve the docker instance for a student, it is necessary to expose port 8000 from the container, for instance, the following command would expose port 8000 inside the container as port 8000 on the lab machine:
			
 
				+#### Python
			
 
				+To build a docker container, you have to specify the dockerfile name using flag **"-f"**, hence run: 
			
 
				+
			
 
				+`sudo docker build -f <dockerfile name> -t <imagename>:<tagnumber> .`
			
 
				+
			
 
				+For example :
			
 
				+
			
 
				+`sudo docker build -f Dockerfile_python -t myimage:1.0 .`
			
 
				+
			
 
				+
			
 
				+For C, Fortran, and Python, the code labs have been written using Jupyter notebooks and a Dockerfile has been built to simplify deployment. In order to serve the docker instance for a student, it is necessary to expose port 8000 from the container. For example, the following command would expose port 8000 inside the container as port 8000 on the lab machine:
			
 
				 
			
 
				 `sudo docker run --rm -it --gpus=all -p 8888:8888 myimage:1.0`
			
 
				 
			
@@ -33,13 +47,23 @@ Once inside the container, open the jupyter notebook in browser: http://localhos
 
				 
			
 
				 ### Singularity Container
			
 
				 
			
 
				+#### C & Fortran
			
 
				 To build the singularity container, run: 
			
 
				+
			
 
				 `singularity build nways.simg Singularity`
			
 
				 
			
 
				-and copy the files to your local machine to make sure changes are stored locally:
			
 
				+#### Python
			
 
				+To build the singularity container, run: 
			
 
				+
			
 
				+`singularity build nways.simg Singularity_python`
			
 
				+
			
 
				+
			
 
				+For C, Fortran, and Python, copy the files to your local machine to make sure changes are stored locally:
			
 
				+
			
 
				 `singularity run nways.simg cp -rT /labs ~/labs`
			
 
				 
			
 
				 Then, run the container:
			
 
				+
			
 
				 `singularity run --nv nways.simg jupyter notebook --notebook-dir=~/labs`
			
 
				 
			
 
				 Once inside the container, open the jupyter notebook in browser: http://localhost:8888, and start the lab by clicking on the `START_nways.ipynb` notebook.
			
--- a/hpc/nways/nways_labs/nways_start.ipynb
+++ b/hpc/nways/nways_labs/nways_start.ipynb
@@ -6,14 +6,14 @@
 
				    "source": [
			
 
				     "## N Ways to GPU Programming\n",
			
 
				     "\n",
			
 
				-    "## Learning objectives\n",
			
 
				-    "With the release of CUDA in 2007, different approaches to programming GPUs have evolved. Each approach has its own advantages and disadvantages. By the end of this bootcamp session, students will have a broader perspective on GPU programming approaches to help them select a programming model that better fits their applications' needs and constraints. The bootcamp will teach how to accelerate a real world scientific application  using the following methods:\n",
			
 
				+    "## Learning Objectives\n",
			
 
				+    "With the release of NVIDIA CUDA in 2007, different approaches to GPU programming have evolved. Each approach has its own advantages and disadvantages. By the end of this bootcamp session, participants will have a broader perspective on GPU programming approaches to help them select a programming model that better fits their application's needs and constraints. The bootcamp will teach how to accelerate a real-world scientific application using the following methods:\n",
			
 
				     "* Standard: C++ stdpar, Fortran Do-Concurrent\n",
			
 
				     "* Directives: OpenACC, OpenMP\n",
			
 
				     "* Frameworks: Kokkos\n",
			
 
				-    "* Programming Language Extension: CUDA C, CUDA Fortran\n",
			
 
				+    "* Programming Language Extension: CUDA C, CUDA Fortran, Python CuPy, Python Numba\n",
			
 
				     "\n",
			
 
				-    "Let's start with testing the CUDA Driver and GPU you are running the code on in this lab:"
			
 
				+    "Let's start by testing the CUDA Driver and GPU you are running the code on in this lab:"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -31,9 +31,10 @@
 
				    "source": [
			
 
				     "### Tutorial Outline\n",
			
 
				     "\n",
			
 
				-    "During this lab, we will be working on porting mini applications in Molecular Simulation (MD) domain to GPUs. You can choose to work with either of this application. Please click on one of the below links to start:\n",
			
 
				+    "During this lab, we will be working on porting mini-applications in Molecular Simulation (MD) domain to GPUs. You can choose to work with either version of this application. Please click on one of the links below to start N Ways to GPU Programming in **MD** for:\n",
			
 
				     "\n",
			
 
				-    "- N Ways to GPU Programming in [MD](nways_MD/English/nways_MD_start.ipynb) domain\n"
			
 
				+    "- [ C and Fortran ](nways_MD/English/nways_MD_start.ipynb) domain\n",
			
 
				+    "- [Python ](nways_MD/English/nways_MD_start_python.ipynb) domain\n"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -41,21 +42,21 @@
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "### Tutorial Duration\n",
			
 
				-    "The lab material will be presented in a 8hr session. Link to material is available for download at the end of the lab.\n",
			
 
				+    "The lab material will be presented in an 8-hour session. A Link to the material is available for download at the end of the lab.\n",
			
 
				     "\n",
			
 
				     "### Content Level\n",
			
 
				     "Beginner, Intermediate\n",
			
 
				     "\n",
			
 
				     "### Target Audience and Prerequisites\n",
			
 
				-    "The target audience for this lab is researchers/graduate students and developers who are interested in learning about programming various ways to programming GPUs to accelerate their scientific applications.\n",
			
 
				+    "The target audience for this lab are researchers/graduate students and developers who are interested in learning about various ways of GPU programming to accelerate their scientific applications.\n",
			
 
				     "\n",
			
 
				-    "Basic experience with C/C++ or Fortran programming is needed. No GPU programming knowledge is required. \n",
			
 
				+    "Basic experience with C/C++ or Python or Fortran programming is needed. No GPU programming knowledge is required. \n",
			
 
				     "\n",
			
 
				     "--- \n",
			
 
				     "\n",
			
 
				     "## Licensing \n",
			
 
				     "\n",
			
 
				-    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				+    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				    ]
			
 
				   }
			
 
				  ],
			
@@ -75,7 +76,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.6.2"
			
 
				+   "version": "3.8.5"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/hpc_ai/PINN/English/python/Start_Here.ipynb
+++ b/hpc_ai/PINN/English/python/Start_Here.ipynb
@@ -0,0 +1,45 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Welcome to AI for Science Bootcamp using SimNet\n",
			
 
				+    "\n",
			
 
				+    "The objective of this bootcamp is to provide fundamental difference between Data driven and Physics Driven approach  to using Artificial Intelligence (AI) algorithms in Science ( High Performance Computing(HPC) Simulations ). \n",
			
 
				+    "\n",
			
 
				+    "This bootcamp will introduce participants to Physics Informed Neural Network ( PINN ) with help of Nvidia toolkit SimNet.\n",
			
 
				+    "\n",
			
 
				+    "1. [Introduction](jupyter_notebook/introduction/Introductory_Notebook.ipynb) : In this notebook we will see the advantages of Physics Informed modeling over data-driven modeling and will also outline the brief theory about Physics Informed Neural Networks (PINNs).\n",
			
 
				+    "\n",
			
 
				+    "2. [Solving PDEs using PINNs](jupyter_notebook/diffusion_1d/Diffusion_Problem_Notebook.ipynb)  : This notebook give you a headstart in solving your own Partial Differential Equations (PDEs) using neural networks. You will also see how to solve parameterized PDEs. \n",
			
 
				+    "\n",
			
 
				+    "3. [Solving transient problems and inverse problems](jupyter_notebook/spring_mass/Spring_Mass_Problem_Notebook.ipynb) : In this tutorial we will see how to solve the transient problems over small time intervals easily by treating time as a continuous variable. We will also cover how to solve inverse problems. \n",
			
 
				+    "\n",
			
 
				+    "4. [Challenge](jupyter_notebook/chip_2d/Challenge_CFD_Problem_Notebook.ipynb) : A small exercise to solve  a fluid mechanics problem involving solution to the Navier Stokes equations.\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc_ai/PINN/English/python/jupyter_notebook/Start_Here.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/Start_Here.ipynb
@@ -0,0 +1,45 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# Welcome to AI for Science Bootcamp using SimNet\n",
			
 
				+    "\n",
			
 
				+    "The objective of this bootcamp is to provide fundamental difference between Data driven and Physics Driven approach  to using Artificial Intelligence (AI) algorithms in Science ( High Performance Computing(HPC) Simulations ). \n",
			
 
				+    "\n",
			
 
				+    "This bootcamp will introduce participants to Physics Informed Neural Network ( PINN ) with help of Nvidia toolkit SimNet.\n",
			
 
				+    "\n",
			
 
				+    "1. [Introduction](introduction/Introductory_Notebook.ipynb) : In this notebook we will see the advantages of Physics Informed modeling over data-driven modeling and will also outline the brief theory about Physics Informed Neural Networks (PINNs).\n",
			
 
				+    "\n",
			
 
				+    "2. [Solving PDEs using PINNs](diffusion_1d/Diffusion_Problem_Notebook.ipynb)  : This notebook give you a headstart in solving your own Partial Differential Equations (PDEs) using neural networks. You will also see how to solve parameterized PDEs. \n",
			
 
				+    "\n",
			
 
				+    "3. [Solving transient problems and inverse problems](spring_mass/Spring_Mass_Problem_Notebook.ipynb) : In this tutorial we will see how to solve the transient problems over small time intervals easily by treating time as a continuous variable. We will also cover how to solve inverse problems. \n",
			
 
				+    "\n",
			
 
				+    "4. [Challenge](chip_2d/Challenge_CFD_Problem_Notebook.ipynb) : A small exercise to solve  a fluid mechanics problem involving solution to the Navier Stokes equations.\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.6.2"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_1_template.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_1_template.ipynb
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_2_template.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_2_template.ipynb
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_3_template.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_3_template.ipynb
@@ -0,0 +1,187 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Problem Statement\n",
			
 
				+    "\n",
			
 
				+    "Use PINNs along with the OpenFOAM data to solve the inverse problem of finding the viscosity of the flow, given the flow field data\n",
			
 
				+    "\n",
			
 
				+    "## Challenge\n",
			
 
				+    "\n",
			
 
				+    "The main challenge in this problem is to correctly formulate the problem using PINNs. In order to achieve that, you will have to complete the following parts successfully:\n",
			
 
				+    "1. Assimilate the OpenFOAM data\n",
			
 
				+    "2. Set-up the correct equation residuals to miminize\n",
			
 
				+    "3. Create the neural network and solve the inverse problem\n",
			
 
				+    "\n",
			
 
				+    "The viscosity in the OpenFOAM simulation was set to $0.02 \\text{ }m^2/s$. A successful completion of the problem should result in infering out the same viscosity within 10% error margin. The OpenFOAM data that was used for validation in the previous parts would now be used as a training input. "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Let us start by importing the required packages and modules\n",
			
 
				+    "\n",
			
 
				+    "**Note: You need to edit the `chip_2d_inverse_template.py` script that is placed in the ../source_code/chip_2d/ directory.**\n",
			
 
				+    "\n",
			
 
				+    "From the top menu, click on File, and Open `chip_2d_inverse_template.py` from the current directory at `../source_code/chip_2d` directory. Remember to SAVE your code after changes, before running below cells."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "```python\n",
			
 
				+    "from sympy import Symbol\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import tensorflow as tf\n",
			
 
				+    "from simnet.solver import Solver\n",
			
 
				+    "from simnet.dataset import TrainDomain, ValidationDomain, MonitorDomain\n",
			
 
				+    "from simnet.data import Validation, Monitor, BC\n",
			
 
				+    "from simnet.sympy_utils.geometry_2d import Rectangle, Line, Channel2D\n",
			
 
				+    "from simnet.sympy_utils.functions import parabola\n",
			
 
				+    "from simnet.csv_utils.csv_rw import csv_to_dict\n",
			
 
				+    "from simnet.PDES.navier_stokes import IntegralContinuity, NavierStokes\n",
			
 
				+    "from simnet.controller import SimNetController\n",
			
 
				+    "from simnet.architecture import FourierNetArch\n",
			
 
				+    "```"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "For this problem, since we are interested in only assimilating the OpenFOAM data and getting the inference out of it, you do not need create the geometry. Fill in the `Chip2DTrain` based on the data generated from OpenFOAM. Remember to add appropriate keys to the dataset to solve for the correct equations. "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "```python\n",
			
 
				+    "#TODO: Replace all the placeholders with appropriate values\n",
			
 
				+    "\n",
			
 
				+    "# define sympy variables to parametrize domain curves\n",
			
 
				+    "x, y = Symbol('x'), Symbol('y')\n",
			
 
				+    "\n",
			
 
				+    "# OpenFOAM data\n",
			
 
				+    "mapping = {'Points:0': 'x', 'Points:1': 'y',\n",
			
 
				+    "           'U:0': 'u', 'U:1': 'v', 'p': 'p'}\n",
			
 
				+    "openfoam_var = csv_to_dict('openfoam/2D_chip_fluid0.csv', mapping)\n",
			
 
				+    "openfoam_var['x'] -= 2.5 # normalize pos\n",
			
 
				+    "openfoam_var['y'] -= 0.5\n",
			
 
				+    "openfoam_invar_numpy = {key: value for key, value in openfoam_var.items() if key in ['x', 'y']}\n",
			
 
				+    "openfoam_outvar_numpy = {key: value for key, value in openfoam_var.items() if key in ['u', 'v', 'p']}\n",
			
 
				+    "\n",
			
 
				+    "#TODO: Add keys and appropriate values for continuity and momentum equations in x and y directions here:\n",
			
 
				+    "openfoam_outvar_numpy['continuity'] = placeholder\n",
			
 
				+    "openfoam_outvar_numpy['momentum_x'] = placeholder\n",
			
 
				+    "openfoam_outvar_numpy['momentum_y'] = placeholder\n",
			
 
				+    "\n",
			
 
				+    "class Chip2DTrain(TrainDomain):\n",
			
 
				+    "  def __init__(self, **config):\n",
			
 
				+    "    super(Chip2DTrain, self).__init__()\n",
			
 
				+    "    \n",
			
 
				+    "    # fill in the appropriate parameters for the from_numpy function\n",
			
 
				+    "    interior=BC.from_numpy(placeholder, placeholder, batch_size=placeholder)\n",
			
 
				+    "    self.add(interior, name=\"Interior\")\n",
			
 
				+    "```"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Create a `MonitorDomain` to monitor the average viscosity predicted by the model "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "```python\n",
			
 
				+    "class Chip2DMonitor(MonitorDomain):\n",
			
 
				+    "  def __init__(self, **config):\n",
			
 
				+    "    super(Chip2DMonitor, self).__init__()\n",
			
 
				+    "    \n",
			
 
				+    "    global_monitor = Monitor(openfoam_invar_numpy, {'average_nu': lambda var: tf.reduce_mean(var['nu'])})\n",
			
 
				+    "    self.add(global_monitor, 'GlobalMonitor')\n",
			
 
				+    "```"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Now complete the last part of the code by creating the `ChipSolver` to solve our problem. Don't forget to stop the gradients for appropriate variables while setting the equations. "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "```python\n",
			
 
				+    "#TODO: Replace all the placeholders with appropriate values\n",
			
 
				+    "class ChipSolver(Solver):\n",
			
 
				+    "  train_domain = placeholder\n",
			
 
				+    "  monitor_domain = placeholder\n",
			
 
				+    "\n",
			
 
				+    "  def __init__(self, **config):\n",
			
 
				+    "    super(ChipSolver, self).__init__(**config)\n",
			
 
				+    "\n",
			
 
				+    "    self.equations = (NavierStokes(nu=placeholder, rho=1, dim=2, time=False).make_node(stop_gradients=[placeholder]))\n",
			
 
				+    "\n",
			
 
				+    "    flow_net = self.arch.make_node(name='flow_net',\n",
			
 
				+    "                                   inputs=['x', 'y'],\n",
			
 
				+    "                                   outputs=['u', 'v', 'p'])\n",
			
 
				+    "    invert_net = self.arch.make_node(name='invert_net',\n",
			
 
				+    "                                     inputs=['x', 'y'],\n",
			
 
				+    "                                     outputs=['nu'])\n",
			
 
				+    "    self.nets = [flow_net, invert_net]\n",
			
 
				+    "\n",
			
 
				+    "  @classmethod\n",
			
 
				+    "  def update_defaults(cls, defaults):\n",
			
 
				+    "    defaults.update({\n",
			
 
				+    "        'network_dir': './network_checkpoint_chip_2d_inverse',\n",
			
 
				+    "        'rec_results': True,\n",
			
 
				+    "        'rec_results_freq': 100,\n",
			
 
				+    "        'start_lr': 3e-4,\n",
			
 
				+    "        'max_steps': 40000,\n",
			
 
				+    "        'decay_steps': 100,\n",
			
 
				+    "        'xla': True\n",
			
 
				+    "        })\n",
			
 
				+    "if __name__ == '__main__':\n",
			
 
				+    "  ctr = SimNetController(ChipSolver)\n",
			
 
				+    "  ctr.run()\n",
			
 
				+    "```\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Licensing\n",
			
 
				+    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0)"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.6.2"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_CFD_Problem_Notebook.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/Challenge_CFD_Problem_Notebook.ipynb
@@ -0,0 +1,99 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&ensp;\n",
			
 
				+    "[Home Page](../../Start_Here.ipynb)\n",
			
 
				+    "\n",
			
 
				+    "[Previous Notebook](../spring_mass/Spring_Mass_Problem_Notebook.ipynb)\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&ensp;\n",
			
 
				+    "[1](../introduction/Introductory_Notebook.ipynb)\n",
			
 
				+    "[2](../diffusion_1d/Diffusion_Problem_Notebook.ipynb)\n",
			
 
				+    "[3](../spring_mass/Spring_Mass_Problem_Notebook.ipynb)\n",
			
 
				+    "[4]\n",
			
 
				+    "   \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Steady State 2D Laminar Flow over a Chip"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Now that you are familiar with the PINNs and the SimNet library, let's solve a fluid mechanics problem involving solution to the Navier Stokes equations. The problem is defined as follows:\n",
			
 
				+    "\n",
			
 
				+    "A 2D chip is placed inside a 2D channel. The flow enters inlet (a parabolic profile is used with $u_{max}=1.5\\text{ m/s}$) and exits through the outlet which is a $0 Pa$. All the other walls are treated as no-slip. The kinematic viscosity $(\\nu)$ for the flow is $0.02 \\text{ }m^2/s$ and the density $(\\rho)$ is $1 \\text{ }kg/m^3$. The problem is shown in the figure below.\n",
			
 
				+    "\n",
			
 
				+    "<img src=\"chip_2d_geom.png\" alt=\"Drawing\" style=\"width: 800px;\"/>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Objectives \n",
			
 
				+    "The objectives of this problem are following:\n",
			
 
				+    "1. [Fluid Flow](Challenge_1_template.ipynb) Use PINNs to solve the fluid flow for the given geometry and flow parameters\n",
			
 
				+    "2. [Parameterized](Challenge_2_template.ipynb)Use PINNs to solve the parameterized problem by varying the chip's geometry (width and height)\n",
			
 
				+    "3. [Inverse Problem](Challenge_3_template.ipynb)Use PINNs along with the OpenFOAM data to solve the inverse problem of finding the viscosity of the flow, given the flow field data \n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "# Licensing\n",
			
 
				+    "This material is released by NVIDIA Corporation under the Creative Commons Attribution 4.0 International (CC BY 4.0)\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&ensp;\n",
			
 
				+    "[Home Page](../../Start_Here.ipynb)\n",
			
 
				+    "\n",
			
 
				+    "[Previous Notebook](../spring_mass/Spring_Mass_Problem_Notebook.ipynb)\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&emsp;&emsp;&emsp;\n",
			
 
				+    "&emsp;&emsp;&ensp;\n",
			
 
				+    "[1](../introduction/Introductory_Notebook.ipynb)\n",
			
 
				+    "[2](../diffusion_1d/Diffusion_Problem_Notebook.ipynb)\n",
			
 
				+    "[3](../spring_mass/Spring_Mass_Problem_Notebook.ipynb)\n",
			
 
				+    "[4]\n",
			
 
				+    "   "
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/challenge_results.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/challenge_results.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/challenge_results_param_updated.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/challenge_results_param_updated.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/chip_2d_geom.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/chip_2d_geom.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/chip_2d_parameterized.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/chip_2d_parameterized.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/multi_GPU_1.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/multi_GPU_1.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/multi_GPU_2.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/multi_GPU_2.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/openfoam/2D_chip_fluid0.csv
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/chip_2d/openfoam/2D_chip_fluid0.csv
--- a/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/Diffusion_Problem_Notebook.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/Diffusion_Problem_Notebook.ipynb
--- a/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/diffusion_bar_geometry.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/diffusion_bar_geometry.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/image_diffusion_problem_bootcamp.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/image_diffusion_problem_bootcamp.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/image_diffusion_problem_bootcamp_parameterized.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/image_diffusion_problem_bootcamp_parameterized.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/image_tensorboard.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/diffusion_1d/image_tensorboard.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/Introductory_Notebook.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/Introductory_Notebook.ipynb
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/SimNet_v21.06_User_Guide.pdf
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/SimNet_v21.06_User_Guide.pdf
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/every_parabola.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/every_parabola.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/image_data_driven_cons.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/image_data_driven_cons.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/inverse_parabola.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/inverse_parabola.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/inverse_parabola_2.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/inverse_parabola_2.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/introduction/single_parabola.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/introduction/single_parabola.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/Spring_Mass_Problem_Notebook.ipynb
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/Spring_Mass_Problem_Notebook.ipynb
--- a/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/comparison.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/comparison.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/comparison_spring_mass.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/comparison_spring_mass.png
--- a/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/spring_mass_drawing.png
+++ b/hpc_ai/PINN/English/python/jupyter_notebook/spring_mass/spring_mass_drawing.png
--- a/hpc_ai/PINN/English/python/source_code/chip_2d/chip_2d_inverse_template.py
+++ b/hpc_ai/PINN/English/python/source_code/chip_2d/chip_2d_inverse_template.py
@@ -0,0 +1,79 @@
 
				+from sympy import Symbol
			
 
				+import numpy as np
			
 
				+import tensorflow as tf
			
 
				+from simnet.solver import Solver
			
 
				+from simnet.dataset import TrainDomain, ValidationDomain, MonitorDomain
			
 
				+from simnet.data import Validation, Monitor, BC
			
 
				+from simnet.sympy_utils.geometry_2d import Rectangle, Line, Channel2D
			
 
				+from simnet.sympy_utils.functions import parabola
			
 
				+from simnet.csv_utils.csv_rw import csv_to_dict
			
 
				+from simnet.PDES.navier_stokes import IntegralContinuity, NavierStokes
			
 
				+from simnet.controller import SimNetController
			
 
				+from simnet.architecture import FourierNetArch
			
 
				+
			
 
				+#TODO: Replace all the placeholders with appropriate values
			
 
				+
			
 
				+# define sympy variables to parametrize domain curves
			
 
				+x, y = Symbol('x'), Symbol('y')
			
 
				+
			
 
				+# OpenFOAM data
			
 
				+mapping = {'Points:0': 'x', 'Points:1': 'y',
			
 
				+           'U:0': 'u', 'U:1': 'v', 'p': 'p'}
			
 
				+openfoam_var = csv_to_dict('openfoam/2D_chip_fluid0.csv', mapping)
			
 
				+openfoam_var['x'] -= 2.5 # normalize pos
			
 
				+openfoam_var['y'] -= 0.5
			
 
				+openfoam_invar_numpy = {key: value for key, value in openfoam_var.items() if key in ['x', 'y']}
			
 
				+openfoam_outvar_numpy = {key: value for key, value in openfoam_var.items() if key in ['u', 'v', 'p']}
			
 
				+
			
 
				+#TODO: Add keys and appropriate values for continuity and momentum equations in x and y directions here:
			
 
				+openfoam_outvar_numpy['continuity'] = placeholder
			
 
				+openfoam_outvar_numpy['momentum_x'] = placeholder
			
 
				+openfoam_outvar_numpy['momentum_y'] = placeholder
			
 
				+
			
 
				+class Chip2DTrain(TrainDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DTrain, self).__init__()
			
 
				+
			
 
				+    # fill in the appropriate parameters for the from_numpy function
			
 
				+    interior=BC.from_numpy(placeholder, placeholder, batch_size=placeholder)
			
 
				+    self.add(interior, name="Interior")
			
 
				+
			
 
				+class Chip2DMonitor(MonitorDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DMonitor, self).__init__()
			
 
				+
			
 
				+    global_monitor = Monitor(openfoam_invar_numpy, {'average_nu': lambda var: tf.reduce_mean(var['nu'])})
			
 
				+    self.add(global_monitor, 'GlobalMonitor')
			
 
				+
			
 
				+#TODO: Replace all the placeholders with appropriate values
			
 
				+class ChipSolver(Solver):
			
 
				+  train_domain = placeholder
			
 
				+  monitor_domain = placeholder
			
 
				+
			
 
				+  def __init__(self, **config):
			
 
				+    super(ChipSolver, self).__init__(**config)
			
 
				+
			
 
				+    self.equations = (NavierStokes(nu=placeholder, rho=1, dim=2, time=False).make_node(stop_gradients=[placeholder]))
			
 
				+
			
 
				+    flow_net = self.arch.make_node(name='flow_net',
			
 
				+                                   inputs=['x', 'y'],
			
 
				+                                   outputs=['u', 'v', 'p'])
			
 
				+    invert_net = self.arch.make_node(name='invert_net',
			
 
				+                                     inputs=['x', 'y'],
			
 
				+                                     outputs=['nu'])
			
 
				+    self.nets = [flow_net, invert_net]
			
 
				+
			
 
				+  @classmethod
			
 
				+  def update_defaults(cls, defaults):
			
 
				+    defaults.update({
			
 
				+        'network_dir': './network_checkpoint_chip_2d_inverse',
			
 
				+        'rec_results': True,
			
 
				+        'rec_results_freq': 100,
			
 
				+        'start_lr': 3e-4,
			
 
				+        'max_steps': 40000,
			
 
				+        'decay_steps': 100,
			
 
				+        'xla': True
			
 
				+        })
			
 
				+if __name__ == '__main__':
			
 
				+  ctr = SimNetController(ChipSolver)
			
 
				+  ctr.run()
			
--- a/hpc_ai/PINN/English/python/source_code/chip_2d/chip_2d_parameterized_template.py
+++ b/hpc_ai/PINN/English/python/source_code/chip_2d/chip_2d_parameterized_template.py
@@ -0,0 +1,170 @@
 
				+from sympy import Symbol
			
 
				+import numpy as np
			
 
				+import tensorflow as tf
			
 
				+from simnet.solver import Solver
			
 
				+from simnet.dataset import TrainDomain, ValidationDomain, InferenceDomain
			
 
				+from simnet.data import Validation, Inference
			
 
				+from simnet.sympy_utils.geometry_2d import Rectangle, Line, Channel2D
			
 
				+from simnet.sympy_utils.functions import parabola
			
 
				+from simnet.csv_utils.csv_rw import csv_to_dict
			
 
				+from simnet.PDES.navier_stokes import IntegralContinuity, NavierStokes
			
 
				+from simnet.controller import SimNetController
			
 
				+from simnet.architecture import FourierNetArch
			
 
				+from simnet.learning_rate import ExponentialDecayLRWithWarmup
			
 
				+
			
 
				+# simulation params
			
 
				+channel_length = (-2.5, 2.5)
			
 
				+channel_width = (-0.5, 0.5)
			
 
				+chip_pos = -1.0
			
 
				+#chip_height = 0.6         # Not fixed anymore
			
 
				+#chip_width = 1.0          # Not fixed anymore
			
 
				+inlet_vel = 1.5
			
 
				+
			
 
				+# paramteric variables
			
 
				+chip_height = Symbol('chip_height')
			
 
				+chip_width = Symbol('chip_width')
			
 
				+
			
 
				+chip_height_range = (0.4, 0.8)
			
 
				+chip_width_range  = (0.6, 1.4)
			
 
				+
			
 
				+param_ranges = {chip_height: chip_height_range,
			
 
				+                chip_width: chip_width_range}
			
 
				+
			
 
				+#TODO: Replace all the placeholders with appropriate geometry constructions
			
 
				+# define geometry here
			
 
				+# you may use the geometry generated in the previous challenge problem as a reference
			
 
				+
			
 
				+channel = placeholder
			
 
				+# define inlet and outlet
			
 
				+inlet = placeholder
			
 
				+outlet = placeholder
			
 
				+# define the chip
			
 
				+rec = placeholder
			
 
				+# create a geometry for higher sampling of point cloud near the fin
			
 
				+flow_rec = placeholder
			
 
				+
			
 
				+# fluid area
			
 
				+geo = placeholder
			
 
				+geo_hr = placeholder
			
 
				+geo_lr = placeholder
			
 
				+
			
 
				+x_pos = Symbol('x_pos')
			
 
				+integral_line = placeholder
			
 
				+x_pos_range = {x_pos: lambda batch_size: np.full((batch_size, 1), np.random.uniform(channel_length[0], channel_length[1]))}
			
 
				+
			
 
				+#TODO: Replace all the placeholders with appropriate values
			
 
				+
			
 
				+# define sympy variables to parametrize domain curves
			
 
				+x, y = Symbol('x'), Symbol('y')
			
 
				+
			
 
				+class Chip2DTrain(TrainDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DTrain, self).__init__()
			
 
				+
			
 
				+    # inlet
			
 
				+    inlet_parabola = parabola(y, channel_width[0], channel_width[1], inlet_vel)
			
 
				+    inlet_bc = inlet.boundary_bc(outvar_sympy={'u': inlet_parabola, 'v': 0},
			
 
				+                                 batch_size_per_area=64,
			
 
				+                                 param_ranges=param_ranges)
			
 
				+    self.add(inlet_bc, name="Inlet")
			
 
				+
			
 
				+    # outlet
			
 
				+    outlet_bc = outlet.boundary_bc(outvar_sympy={placeholder},
			
 
				+                                   batch_size_per_area=placeholder,
			
 
				+                                   param_ranges=placeholder)
			
 
				+    self.add(outlet_bc, name="Outlet")
			
 
				+
			
 
				+    # noslip
			
 
				+    noslip = geo.boundary_bc(outvar_sympy={placeholder},
			
 
				+                             batch_size_per_area=placeholder,
			
 
				+                             param_ranges=placeholder)
			
 
				+    self.add(noslip, name="ChipNS")
			
 
				+
			
 
				+    # interior lr
			
 
				+    interior_lr = geo_lr.interior_bc(outvar_sympy={placeholder},
			
 
				+                                     bounds={placeholder},
			
 
				+                                     lambda_sympy={placeholder},
			
 
				+                                     batch_size_per_area=placeholder,
			
 
				+                                     param_ranges=placeholder)
			
 
				+    self.add(interior_lr, name="InteriorLR")
			
 
				+
			
 
				+    # interior hr
			
 
				+    interior_hr = geo_hr.interior_bc(outvar_sympy={placeholder},
			
 
				+                                     bounds={placeholder},
			
 
				+                                     lambda_sympy={placeholder},
			
 
				+                                     batch_size_per_area=placeholder,
			
 
				+                                     param_ranges=placeholder)
			
 
				+    self.add(interior_hr, name="InteriorHR")
			
 
				+
			
 
				+
			
 
				+    # integral continuity
			
 
				+    for i in range(4):
			
 
				+      IC = integral_line.boundary_bc(outvar_sympy={placeholder},
			
 
				+                                     batch_size_per_area=placeholder,
			
 
				+                                     lambda_sympy={placeholder},
			
 
				+                                     criteria=placeholder,
			
 
				+                                     param_ranges={placeholder},
			
 
				+                                     fixed_var=placeholder)
			
 
				+      self.add(IC, name="IntegralContinuity_"+str(i))
			
 
				+
			
 
				+# validation data
			
 
				+mapping = {'Points:0': 'x', 'Points:1': 'y',
			
 
				+           'U:0': 'u', 'U:1': 'v', 'p': 'p'}
			
 
				+openfoam_var = csv_to_dict('openfoam/2D_chip_fluid0.csv', mapping)
			
 
				+openfoam_var['x'] -= 2.5 # normalize pos
			
 
				+openfoam_var['y'] -= 0.5
			
 
				+
			
 
				+#TODO: Add the arrays for 'chip_height' and 'chip_width'
			
 
				+
			
 
				+openfoam_invar_numpy = {key: value for key, value in openfoam_var.items() if key in ['x', 'y', 'chip_height', 'chip_width']}
			
 
				+openfoam_outvar_numpy = {key: value for key, value in openfoam_var.items() if key in ['u', 'v', 'p']}
			
 
				+
			
 
				+class Chip2DVal(ValidationDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DVal, self).__init__()
			
 
				+    val = Validation.from_numpy(openfoam_invar_numpy, openfoam_outvar_numpy)
			
 
				+    self.add(val, name='Val')
			
 
				+
			
 
				+class Chip2DInf(InferenceDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DInf, self).__init__()
			
 
				+    inf = Inference(geo.sample_interior(2048, bounds={x: channel_length, y: channel_width}, 
			
 
				+                                        param_ranges={chip_height: 0.4, chip_width: 1.4}),
			
 
				+                    ['u', 'v', 'p'])
			
 
				+    self.add(inf, name='Inference')
			
 
				+
			
 
				+#TODO: Replace all the placeholders with appropriate values
			
 
				+class ChipSolver(Solver):
			
 
				+  train_domain = placeholder
			
 
				+  val_domain = placeholder
			
 
				+  arch = FourierNetArch
			
 
				+  lr = ExponentialDecayLRWithWarmup
			
 
				+  inference_domain = placeholder
			
 
				+
			
 
				+  def __init__(self, **config):
			
 
				+    super(ChipSolver, self).__init__(**config)
			
 
				+
			
 
				+    self.frequencies = ('axis,diagonal', [i/5. for i in range(25)]) 
			
 
				+
			
 
				+    self.equations = (placeholder)
			
 
				+    flow_net = self.arch.make_node(name='flow_net',
			
 
				+                                   inputs=[placeholder],
			
 
				+                                   outputs=[placeholder])
			
 
				+    self.nets = [flow_net]
			
 
				+
			
 
				+  @classmethod
			
 
				+  def update_defaults(cls, defaults):
			
 
				+    defaults.update({
			
 
				+        'network_dir': './network_checkpoint_chip_2d_parameterized',
			
 
				+        'rec_results': True,
			
 
				+        'rec_results_freq': 5000,
			
 
				+        'max_steps': 20000,
			
 
				+        'decay_steps': 400,
			
 
				+        'warmup_type': 'gradual',
			
 
				+        'warmup_steps': 2000,
			
 
				+        'xla': True
			
 
				+        })
			
 
				+if __name__ == '__main__':
			
 
				+  ctr = SimNetController(ChipSolver)
			
 
				+  ctr.run()
			
 
				+
			
--- a/hpc_ai/PINN/English/python/source_code/chip_2d/chip_2d_template.py
+++ b/hpc_ai/PINN/English/python/source_code/chip_2d/chip_2d_template.py
@@ -0,0 +1,143 @@
 
				+from sympy import Symbol
			
 
				+import numpy as np
			
 
				+import tensorflow as tf
			
 
				+from simnet.solver import Solver
			
 
				+from simnet.dataset import TrainDomain, ValidationDomain
			
 
				+from simnet.data import Validation
			
 
				+from simnet.sympy_utils.geometry_2d import Rectangle, Line, Channel2D
			
 
				+from simnet.sympy_utils.functions import parabola
			
 
				+from simnet.csv_utils.csv_rw import csv_to_dict
			
 
				+from simnet.PDES.navier_stokes import IntegralContinuity, NavierStokes
			
 
				+from simnet.controller import SimNetController
			
 
				+from simnet.architecture import FourierNetArch
			
 
				+
			
 
				+# simulation params
			
 
				+channel_length = (-2.5, 2.5)
			
 
				+channel_width = (-0.5, 0.5)
			
 
				+chip_pos = -1.0
			
 
				+chip_height = 0.6
			
 
				+chip_width = 1.0
			
 
				+inlet_vel = 1.5
			
 
				+#TODO: Replace x1, y1, x2, y2, and X's with appropriate values
			
 
				+
			
 
				+# define geometry
			
 
				+# define channel
			
 
				+channel = Channel2D((x1, y1), (x2, y2))
			
 
				+# define inlet and outlet
			
 
				+inlet = Line((x1, y1), (x1, y2), normal= X)
			
 
				+outlet = Line((x1, y1), (x1, y2), normal= X)
			
 
				+# define the chip
			
 
				+rec = Rectangle((x1, y1), (x2, y2))
			
 
				+# create a geometry for higher sampling of point cloud near the fin
			
 
				+flow_rec = Rectangle((chip_pos-0.25, channel_width[0]),
			
 
				+                     (chip_pos+chip_width+0.25, channel_width[1]))
			
 
				+# fluid area
			
 
				+geo = channel - rec
			
 
				+geo_hr = geo & flow_rec
			
 
				+geo_lr = geo - flow_rec
			
 
				+
			
 
				+# Optional integral continuity planes to speed up convergence
			
 
				+x_pos = Symbol('x_pos')
			
 
				+integral_line = Line((x_pos, channel_width[0]),
			
 
				+                     (x_pos, channel_width[1]),
			
 
				+                     1)
			
 
				+x_pos_range = {x_pos: lambda batch_size: np.full((batch_size, 1), np.random.uniform(channel_length[0], channel_length[1]))}
			
 
				+
			
 
				+# TODO: Replace all the placeholders with appropriate values
			
 
				+
			
 
				+# define sympy variables to parametrize domain curves
			
 
				+x, y = Symbol('x'), Symbol('y')
			
 
				+
			
 
				+class Chip2DTrain(TrainDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DTrain, self).__init__()
			
 
				+
			
 
				+    # inlet
			
 
				+    inlet_parabola = parabola(y, channel_width[0], channel_width[1], inlet_vel)
			
 
				+    inlet_bc = inlet.boundary_bc(outvar_sympy={'u': inlet_parabola, 'v': 0},
			
 
				+                                 batch_size_per_area=64)
			
 
				+    self.add(inlet_bc, name="Inlet")
			
 
				+
			
 
				+    # outlet
			
 
				+    outlet_bc = outlet.boundary_bc(outvar_sympy={placeholder},
			
 
				+                                   batch_size_per_area=placeholder)
			
 
				+    self.add(outlet_bc, name="Outlet")
			
 
				+
			
 
				+    # noslip
			
 
				+    noslip = geo.boundary_bc(outvar_sympy={placeholder},
			
 
				+                             batch_size_per_area=placeholder)
			
 
				+    self.add(noslip, name="ChipNS")
			
 
				+
			
 
				+    # interior lr
			
 
				+    interior_lr = geo_lr.interior_bc(outvar_sympy={placeholder},
			
 
				+                                     bounds={placeholder},
			
 
				+                                     lambda_sympy={placeholder},
			
 
				+                                     batch_size_per_area=placeholder)
			
 
				+    self.add(interior_lr, name="InteriorLR")
			
 
				+
			
 
				+    # interior hr
			
 
				+    interior_hr = geo_hr.interior_bc(outvar_sympy=placeholder,
			
 
				+                                     bounds=placeholder,
			
 
				+                                     lambda_sympy=placeholder,
			
 
				+                                     batch_size_per_area=placeholder)
			
 
				+    self.add(interior_hr, name="InteriorHR")
			
 
				+
			
 
				+
			
 
				+    # integral continuity
			
 
				+    for i in range(4):
			
 
				+      IC = integral_line.boundary_bc(outvar_sympy={'integral_continuity': 1.0},
			
 
				+                                     batch_size_per_area=512,
			
 
				+                                     lambda_sympy={'lambda_integral_continuity': 1.0},
			
 
				+                                     criteria=geo.sdf>0,
			
 
				+                                     param_ranges=x_pos_range,
			
 
				+                                     fixed_var=False)
			
 
				+      self.add(IC, name="IntegralContinuity_"+str(i))
			
 
				+
			
 
				+# TODO: Set the appropriate normalization for the validation data
			
 
				+# The validation data has domain extents of (0,0) to (5,1). Normalize this based on your definition of the domain
			
 
				+
			
 
				+# validation data
			
 
				+mapping = {'Points:0': 'x', 'Points:1': 'y',
			
 
				+           'U:0': 'u', 'U:1': 'v', 'p': 'p'}
			
 
				+openfoam_var = csv_to_dict('openfoam/2D_chip_fluid0.csv', mapping)
			
 
				+openfoam_var['x'] -= 2.5 #TODO Samle normalization of position. Edit based on your geometry definition
			
 
				+openfoam_var['y'] -= 0.5
			
 
				+openfoam_invar_numpy = {key: value for key, value in openfoam_var.items() if key in ['x', 'y']}
			
 
				+openfoam_outvar_numpy = {key: value for key, value in openfoam_var.items() if key in ['u', 'v', 'p']}
			
 
				+
			
 
				+class Chip2DVal(ValidationDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(Chip2DVal, self).__init__()
			
 
				+    val = Validation.from_numpy(placeholder)
			
 
				+    self.add(val, name='Val')
			
 
				+
			
 
				+#TODO: Replace all the placeholders with appropriate values
			
 
				+class ChipSolver(Solver):
			
 
				+  train_domain = placeholder
			
 
				+  val_domain = placeholder
			
 
				+  arch = FourierNetArch
			
 
				+
			
 
				+  def __init__(self, **config):
			
 
				+    super(ChipSolver, self).__init__(**config)
			
 
				+
			
 
				+    self.frequencies = ('axis,diagonal', [i/5. for i in range(25)]) 
			
 
				+
			
 
				+    self.equations = (placeholder)
			
 
				+    flow_net = self.arch.make_node(name='flow_net',
			
 
				+                                   inputs=[placeholder],
			
 
				+                                   outputs=[placeholder])
			
 
				+    self.nets = [flow_net]
			
 
				+
			
 
				+  @classmethod
			
 
				+  def update_defaults(cls, defaults):
			
 
				+    defaults.update({
			
 
				+        'network_dir': './network_checkpoint_chip_2d',
			
 
				+        'rec_results': True,
			
 
				+        'rec_results_freq': 5000,
			
 
				+        'max_steps': 10000,
			
 
				+        'decay_steps': 100,
			
 
				+        'xla': True
			
 
				+        })
			
 
				+if __name__ == '__main__':
			
 
				+  ctr = SimNetController(ChipSolver)
			
 
				+  ctr.run()
			
--- a/hpc_ai/PINN/English/python/source_code/chip_2d/openfoam/2D_chip_fluid0.csv
+++ b/hpc_ai/PINN/English/python/source_code/chip_2d/openfoam/2D_chip_fluid0.csv
--- a/hpc_ai/PINN/English/python/source_code/chip_2d/sample_plotting_script.py
+++ b/hpc_ai/PINN/English/python/source_code/chip_2d/sample_plotting_script.py
@@ -0,0 +1,27 @@
 
				+# template script to create some easy plots for the chip problem
			
 
				+import numpy as np
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+import simnet as sn
			
 
				+
			
 
				+# set the path for the .npz files
			
 
				+base_dir = 'network_checkpoint_chip_2d/val_domain/results/'
			
 
				+
			
 
				+# load the .npz files
			
 
				+pred_data = np.load(base_dir + 'Val_pred.npz', allow_pickle=True)
			
 
				+true_data = np.load(base_dir + 'Val_true.npz', allow_pickle=True)
			
 
				+
			
 
				+pred_data = np.atleast_1d(pred_data.f.arr_0)[0]
			
 
				+true_data = np.atleast_1d(true_data.f.arr_0)[0]
			
 
				+
			
 
				+# remove the variables created for parameterization (uncomment when visualizing parameteric results)
			
 
				+#pred_data.pop('chip_width')
			
 
				+#pred_data.pop('chip_height')
			
 
				+#true_data.pop('chip_width')
			
 
				+#true_data.pop('chip_height')
			
 
				+
			
 
				+# plot only one set of variables
			
 
				+sn.plot_utils.field.plot_field(pred_data, 'chip_predicted', coordinates=['x', 'y'], resolution=256)
			
 
				+
			
 
				+# plot the comparison between a set of variables
			
 
				+sn.plot_utils.field.plot_field_compare(true_data, pred_data, 'chip_comparison', coordinates=['x', 'y'], resolution=256)
			
--- a/hpc_ai/PINN/English/python/source_code/diffusion_1d/diffusion_bar.py
+++ b/hpc_ai/PINN/English/python/source_code/diffusion_1d/diffusion_bar.py
@@ -0,0 +1,242 @@
 
				+# import SimNet library
			
 
				+from sympy import Symbol, sin, Eq, Abs, exp
			
 
				+import numpy as np
			
 
				+import sys
			
 
				+sys.path.append('../../')
			
 
				+from simnet.solver import Solver
			
 
				+from simnet.dataset import TrainDomain, ValidationDomain, MonitorDomain
			
 
				+from simnet.data import Validation, BC, Monitor
			
 
				+from simnet.sympy_utils.geometry_1d import Line1D
			
 
				+#from simnet.PDES.diffusion import Diffusion
			
 
				+from simnet.controller import SimNetController
			
 
				+from simnet.node import Node
			
 
				+from simnet.pdes import PDES
			
 
				+import tensorflow as tf
			
 
				+from sympy import Symbol, Function, Number
			
 
				+
			
 
				+from simnet.pdes import PDES
			
 
				+from simnet.node import Node
			
 
				+from simnet.variables import Variables
			
 
				+
			
 
				+
			
 
				+# params for domain
			
 
				+L1 = Line1D(0,1)
			
 
				+L2 = Line1D(1,2)
			
 
				+
			
 
				+D1 = 1e1
			
 
				+D2 = 1e-1
			
 
				+
			
 
				+Tc = 100
			
 
				+Ta = 0
			
 
				+Tb = (Tc + (D1/D2)*Ta)/(1 + (D1/D2))
			
 
				+
			
 
				+#Tb = Tc - u_1__x*D1/D2 
			
 
				+#Ta = Tb - u_1__x
			
 
				+print(Ta)
			
 
				+print(Tb)
			
 
				+print(Tc)
			
 
				+#exit()
			
 
				+
			
 
				+class Diffusion(PDES):
			
 
				+  name = 'Diffusion'
			
 
				+ 
			
 
				+  def __init__(self, T='T', D='D', Q=0, dim=3, time=True):
			
 
				+    # set params
			
 
				+    self.T = T
			
 
				+    self.dim = dim
			
 
				+    self.time = time
			
 
				+
			
 
				+    # coordinates
			
 
				+    x, y, z = Symbol('x'), Symbol('y'), Symbol('z')
			
 
				+
			
 
				+    # time
			
 
				+    t = Symbol('t')
			
 
				+
			
 
				+    # make input variables 
			
 
				+    input_variables = {'x':x,'y':y,'z':z,'t':t}
			
 
				+    if self.dim == 1:
			
 
				+      input_variables.pop('y')
			
 
				+      input_variables.pop('z')
			
 
				+    elif self.dim == 2:
			
 
				+      input_variables.pop('z')
			
 
				+    if not self.time: 
			
 
				+      input_variables.pop('t')
			
 
				+
			
 
				+    # Temperature
			
 
				+    assert type(T) == str, "T needs to be string"
			
 
				+    T = Function(T)(*input_variables)
			
 
				+
			
 
				+    # Diffusivity
			
 
				+    if type(D) is str:
			
 
				+      D = Function(D)(*input_variables)
			
 
				+    elif type(D) in [float, int]:
			
 
				+      D = Number(D)
			
 
				+
			
 
				+    # Source
			
 
				+    if type(Q) is str:
			
 
				+      Q = Function(Q)(*input_variables)
			
 
				+    elif type(Q) in [float, int]:
			
 
				+      Q = Number(Q)
			
 
				+
			
 
				+    # set equations
			
 
				+    self.equations = Variables()
			
 
				+    self.equations['diffusion_'+self.T] = (T.diff(t)
			
 
				+                                            - (D*T.diff(x)).diff(x)
			
 
				+                                            - (D*T.diff(y)).diff(y)
			
 
				+                                            - (D*T.diff(z)).diff(z)
			
 
				+                                            - Q)
			
 
				+
			
 
				+class DiffusionInterface(PDES):
			
 
				+  name = 'DiffusionInterface'
			
 
				+
			
 
				+  def __init__(self, T_1, T_2, D_1, D_2, dim=3, time=True):
			
 
				+    # set params
			
 
				+    self.T_1 = T_1
			
 
				+    self.T_2 = T_2
			
 
				+    self.D_1 = D_1
			
 
				+    self.D_2 = D_2
			
 
				+    self.dim = dim
			
 
				+    self.time = time
			
 
				+ 
			
 
				+    # coordinates
			
 
				+    x, y, z = Symbol('x'), Symbol('y'), Symbol('z')
			
 
				+    normal_x, normal_y, normal_z = Symbol('normal_x'), Symbol('normal_y'), Symbol('normal_z')
			
 
				+
			
 
				+    # time
			
 
				+    t = Symbol('t')
			
 
				+
			
 
				+    # make input variables 
			
 
				+    input_variables = {'x':x,'y':y,'z':z,'t':t}
			
 
				+    if self.dim == 1:
			
 
				+      input_variables.pop('y')
			
 
				+      input_variables.pop('z')
			
 
				+    elif self.dim == 2:
			
 
				+      input_variables.pop('z')
			
 
				+    if not self.time: 
			
 
				+      input_variables.pop('t')
			
 
				+
			
 
				+    # variables to match the boundary conditions (example Temperature)
			
 
				+    T_1 = Function(T_1)(*input_variables)
			
 
				+    T_2 = Function(T_2)(*input_variables)
			
 
				+
			
 
				+    # set equations
			
 
				+    self.equations = Variables()
			
 
				+    self.equations['diffusion_interface_dirichlet_'+self.T_1+'_'+self.T_2] = T_1 - T_2
			
 
				+    flux_1 = self.D_1 * (normal_x * T_1.diff(x) + normal_y * T_1.diff(y) + normal_z * T_1.diff(z))
			
 
				+    flux_2 = self.D_2 * (normal_x * T_2.diff(x) + normal_y * T_2.diff(y) + normal_z * T_2.diff(z))
			
 
				+    self.equations['diffusion_interface_neumann_'+self.T_1+'_'+self.T_2] = flux_1 - flux_2
			
 
				+
			
 
				+class DiffusionTrain(TrainDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionTrain, self).__init__()
			
 
				+    # sympy variables
			
 
				+    x = Symbol('x')
			
 
				+    c = Symbol('c')
			
 
				+    
			
 
				+    # right hand side (x = 2) Pt c
			
 
				+    IC = L2.boundary_bc(outvar_sympy={'u_2': Tc},
			
 
				+                        batch_size_per_area=1,
			
 
				+                        criteria=Eq(x, 2))
			
 
				+    self.add(IC, name="RightHandSide")
			
 
				+    
			
 
				+    # left hand side (x = 0) Pt a
			
 
				+    IC = L1.boundary_bc(outvar_sympy={'u_1': Ta},
			
 
				+                        batch_size_per_area=1,
			
 
				+                        criteria=Eq(x, 0))
			
 
				+    self.add(IC, name="LeftHandSide")
			
 
				+    
			
 
				+    # interface 1-2
			
 
				+    IC = L1.boundary_bc(outvar_sympy={'diffusion_interface_dirichlet_u_1_u_2': 0,
			
 
				+                                      'diffusion_interface_neumann_u_1_u_2': 0},
			
 
				+                        lambda_sympy={'lambda_diffusion_interface_dirichlet_u_1_u_2': 1,
			
 
				+                                      'lambda_diffusion_interface_neumann_u_1_u_2': 1},
			
 
				+                        batch_size_per_area=1,
			
 
				+                        criteria=Eq(x, 1))
			
 
				+    self.add(IC, name="Interface1n2")
			
 
				+    
			
 
				+    # interior 1
			
 
				+    interior = L1.interior_bc(outvar_sympy={'diffusion_u_1': 0},
			
 
				+                              lambda_sympy={'lambda_diffusion_u_1': 1},
			
 
				+                              bounds={x: (0, 1)},
			
 
				+                              batch_size_per_area=200)
			
 
				+    self.add(interior, name="Interior1")
			
 
				+    
			
 
				+    # interior 2
			
 
				+    interior = L2.interior_bc(outvar_sympy={'diffusion_u_2': 0},
			
 
				+                              lambda_sympy={'lambda_diffusion_u_2': 1},
			
 
				+                              bounds={x: (1, 2)},
			
 
				+                              batch_size_per_area=200)
			
 
				+    self.add(interior, name="Interior2")
			
 
				+
			
 
				+class DiffusionVal(ValidationDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionVal, self).__init__()
			
 
				+    # make validation data line 1
			
 
				+    #Tc = u_2__2
			
 
				+    #Tb = Tc - u_1__x*D1/D2 
			
 
				+    #Ta = Tb - u_1__x
			
 
				+
			
 
				+    x = np.expand_dims(np.linspace(0, 1, 100), axis=-1)
			
 
				+    u_1 = x*Tb + (1-x)*Ta
			
 
				+    invar_numpy = {'x': x}
			
 
				+    outvar_numpy = {'u_1': u_1}
			
 
				+    val = Validation.from_numpy(invar_numpy, outvar_numpy)
			
 
				+    self.add(val, name='Val1')
			
 
				+    
			
 
				+    # make validation data line 2
			
 
				+    x = np.expand_dims(np.linspace(1, 2, 100), axis=-1)
			
 
				+    u_2 = (x-1)*Tc + (2-x)*Tb
			
 
				+    invar_numpy = {'x': x}
			
 
				+    outvar_numpy = {'u_2': u_2}
			
 
				+    val = Validation.from_numpy(invar_numpy, outvar_numpy)
			
 
				+    self.add(val, name='Val2')
			
 
				+
			
 
				+class DiffusionMonitor(MonitorDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionMonitor, self).__init__()
			
 
				+    x = Symbol('x')
			
 
				+
			
 
				+    # flux in U1 at x = 1 
			
 
				+    fluxU1 = Monitor(L1.sample_boundary(10, criteria=Eq(x, 1)),
			
 
				+                    {'flux_U1': lambda var: tf.reduce_mean(D1*var['u_1__x'])})
			
 
				+    self.add(fluxU1, 'FluxU1')
			
 
				+
			
 
				+    # flux in U2 at x = 1 
			
 
				+    fluxU2 = Monitor(L2.sample_boundary(10, criteria=Eq(x, 1)),
			
 
				+                    {'flux_U2': lambda var: tf.reduce_mean(D2*var['u_2__x'])})
			
 
				+    self.add(fluxU2, 'FluxU2')
			
 
				+
			
 
				+# Define neural network
			
 
				+class DiffusionSolver(Solver):
			
 
				+  train_domain = DiffusionTrain
			
 
				+  val_domain = DiffusionVal
			
 
				+  monitor_domain = DiffusionMonitor
			
 
				+
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionSolver, self).__init__(**config)
			
 
				+
			
 
				+    self.equations = (Diffusion(T='u_1', D=D1, dim=1, time=False).make_node()
			
 
				+                      + Diffusion(T='u_2', D=D2, dim=1, time=False).make_node()
			
 
				+                      + DiffusionInterface('u_1', 'u_2', D1, D2, dim=1, time=False).make_node())
			
 
				+    diff_net_u_1 = self.arch.make_node(name='diff_net_u_1',
			
 
				+                                   inputs=['x'],
			
 
				+                                   outputs=['u_1'])
			
 
				+    diff_net_u_2 = self.arch.make_node(name='diff_net_u_2',
			
 
				+                                   inputs=['x'],
			
 
				+                                   outputs=['u_2'])
			
 
				+    self.nets = [diff_net_u_1, diff_net_u_2]
			
 
				+
			
 
				+  @classmethod # Explain This
			
 
				+  def update_defaults(cls, defaults):
			
 
				+    defaults.update({
			
 
				+        'network_dir': './network_checkpoint_diff',
			
 
				+        'max_steps': 5000,
			
 
				+        'decay_steps': 100,
			
 
				+        'start_lr': 1e-4,
			
 
				+        'xla': True,
			
 
				+        #'end_lr': 1e-6,
			
 
				+        })
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+  ctr = SimNetController(DiffusionSolver)
			
 
				+  ctr.run()
			
--- a/hpc_ai/PINN/English/python/source_code/diffusion_1d/diffusion_bar_paramaterized.py
+++ b/hpc_ai/PINN/English/python/source_code/diffusion_1d/diffusion_bar_paramaterized.py
@@ -0,0 +1,253 @@
 
				+# import SimNet library
			
 
				+from sympy import Symbol, sin, Eq, Abs, exp
			
 
				+import numpy as np
			
 
				+import sys
			
 
				+sys.path.append('../../')
			
 
				+from simnet.solver import Solver
			
 
				+from simnet.dataset import TrainDomain, ValidationDomain, MonitorDomain
			
 
				+from simnet.data import Validation, BC, Monitor
			
 
				+from simnet.sympy_utils.geometry_1d import Line1D
			
 
				+from simnet.controller import SimNetController
			
 
				+from simnet.node import Node
			
 
				+from simnet.pdes import PDES
			
 
				+import tensorflow as tf
			
 
				+from sympy import Symbol, Function, Number
			
 
				+
			
 
				+from simnet.pdes import PDES
			
 
				+from simnet.node import Node
			
 
				+from simnet.variables import Variables
			
 
				+
			
 
				+
			
 
				+# params for domain
			
 
				+L1 = Line1D(0,1)
			
 
				+L2 = Line1D(1,2)
			
 
				+
			
 
				+D1 = Symbol('D1')
			
 
				+D1_range = {D1: (5, 25)}
			
 
				+D1_validation = 1e1
			
 
				+
			
 
				+D2 = 1e-1
			
 
				+
			
 
				+Tc = 100
			
 
				+Ta = 0
			
 
				+Tb = (Tc + (D1/D2)*Ta)/(1 + (D1/D2))
			
 
				+
			
 
				+Tb_validation = float(Tb.evalf(subs={D1: 1e1}))
			
 
				+
			
 
				+class Diffusion(PDES):
			
 
				+  name = 'Diffusion'
			
 
				+ 
			
 
				+  def __init__(self, T='T', D='D', Q=0, dim=3, time=True):
			
 
				+    # set params
			
 
				+    self.T = T
			
 
				+    self.dim = dim
			
 
				+    self.time = time
			
 
				+
			
 
				+    # coordinates
			
 
				+    x, y, z = Symbol('x'), Symbol('y'), Symbol('z')
			
 
				+
			
 
				+    # time
			
 
				+    t = Symbol('t')
			
 
				+
			
 
				+    # make input variables 
			
 
				+    input_variables = {'x':x,'y':y,'z':z,'t':t}
			
 
				+    if self.dim == 1:
			
 
				+      input_variables.pop('y')
			
 
				+      input_variables.pop('z')
			
 
				+    elif self.dim == 2:
			
 
				+      input_variables.pop('z')
			
 
				+    if not self.time: 
			
 
				+      input_variables.pop('t')
			
 
				+
			
 
				+    # Temperature
			
 
				+    assert type(T) == str, "T needs to be string"
			
 
				+    T = Function(T)(*input_variables)
			
 
				+
			
 
				+    # Diffusivity
			
 
				+    if type(D) is str:
			
 
				+      D = Function(D)(*input_variables)
			
 
				+    elif type(D) in [float, int]:
			
 
				+      D = Number(D)
			
 
				+
			
 
				+    # Source
			
 
				+    if type(Q) is str:
			
 
				+      Q = Function(Q)(*input_variables)
			
 
				+    elif type(Q) in [float, int]:
			
 
				+      Q = Number(Q)
			
 
				+
			
 
				+    # set equations
			
 
				+    self.equations = Variables()                  
			
 
				+    self.equations['diffusion_'+self.T] = (T.diff(t)
			
 
				+                                            - (D*T.diff(x)).diff(x)
			
 
				+                                            - (D*T.diff(y)).diff(y)
			
 
				+                                            - (D*T.diff(z)).diff(z)
			
 
				+                                            - Q)
			
 
				+
			
 
				+class DiffusionInterface(PDES):
			
 
				+  name = 'DiffusionInterface'
			
 
				+
			
 
				+  def __init__(self, T_1='T_1', T_2='T_2', D_1='D_1', D_2='D_2', dim=3, time=True):
			
 
				+    # set params
			
 
				+    self.T_1 = T_1
			
 
				+    self.T_2 = T_2
			
 
				+    self.dim = dim
			
 
				+    self.time = time
			
 
				+ 
			
 
				+    # coordinates
			
 
				+    x, y, z = Symbol('x'), Symbol('y'), Symbol('z')
			
 
				+    normal_x, normal_y, normal_z = Symbol('normal_x'), Symbol('normal_y'), Symbol('normal_z')
			
 
				+
			
 
				+    # time
			
 
				+    t = Symbol('t')
			
 
				+
			
 
				+    # make input variables 
			
 
				+    input_variables = {'x':x,'y':y,'z':z,'t':t}
			
 
				+    if self.dim == 1:
			
 
				+      input_variables.pop('y')
			
 
				+      input_variables.pop('z')
			
 
				+    elif self.dim == 2:
			
 
				+      input_variables.pop('z')
			
 
				+    if not self.time: 
			
 
				+      input_variables.pop('t')
			
 
				+
			
 
				+    # variables to match the boundary conditions (example Temperature)
			
 
				+    T_1 = Function(T_1)(*input_variables)
			
 
				+    T_2 = Function(T_2)(*input_variables)
			
 
				+    
			
 
				+    # Diffusivity D_1
			
 
				+    if type(D_1) is str:
			
 
				+      D_1 = Function(D_1)(*input_variables)
			
 
				+    elif type(D_1) in [float, int]:
			
 
				+      D_1 = Number(D_1)
			
 
				+
			
 
				+    # Diffusivity D_2
			
 
				+    if type(D_2) is str:
			
 
				+      D_2 = Function(D_2)(*input_variables)
			
 
				+    elif type(D_2) in [float, int]:
			
 
				+      D_2 = Number(D_2)
			
 
				+
			
 
				+    # set equations
			
 
				+    self.equations = Variables()
			
 
				+    self.equations['diffusion_interface_dirichlet_'+self.T_1+'_'+self.T_2] = T_1 - T_2
			
 
				+    flux_1 = D_1 * (normal_x * T_1.diff(x) + normal_y * T_1.diff(y) + normal_z * T_1.diff(z))
			
 
				+    flux_2 = D_2 * (normal_x * T_2.diff(x) + normal_y * T_2.diff(y) + normal_z * T_2.diff(z))
			
 
				+    self.equations['diffusion_interface_neumann_'+self.T_1+'_'+self.T_2] = flux_1 - flux_2
			
 
				+
			
 
				+class DiffusionTrain(TrainDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionTrain, self).__init__()
			
 
				+    # sympy variables
			
 
				+    x = Symbol('x')
			
 
				+    c = Symbol('c')
			
 
				+    
			
 
				+    # right hand side (x = 2) Pt c
			
 
				+    IC = L2.boundary_bc(outvar_sympy={'u_2': Tc},
			
 
				+                        batch_size_per_area=10,
			
 
				+                        criteria=Eq(x, 2),
			
 
				+                        param_ranges=D1_range)
			
 
				+    self.add(IC, name="RightHandSide")
			
 
				+    
			
 
				+    # left hand side (x = 0) Pt a
			
 
				+    IC = L1.boundary_bc(outvar_sympy={'u_1': Ta},
			
 
				+                        batch_size_per_area=10,
			
 
				+                        criteria=Eq(x, 0),
			
 
				+                        param_ranges=D1_range)
			
 
				+    self.add(IC, name="LeftHandSide")
			
 
				+    
			
 
				+    # interface 1-2
			
 
				+    IC = L1.boundary_bc(outvar_sympy={'diffusion_interface_dirichlet_u_1_u_2': 0,
			
 
				+                                      'diffusion_interface_neumann_u_1_u_2': 0},
			
 
				+                        lambda_sympy={'lambda_diffusion_interface_dirichlet_u_1_u_2': 1,
			
 
				+                                      'lambda_diffusion_interface_neumann_u_1_u_2': 1},
			
 
				+                        batch_size_per_area=10,
			
 
				+                        criteria=Eq(x, 1),
			
 
				+                        param_ranges=D1_range)
			
 
				+    self.add(IC, name="Interface1n2")
			
 
				+    
			
 
				+    # interior 1
			
 
				+    interior = L1.interior_bc(outvar_sympy={'diffusion_u_1': 0},
			
 
				+                              lambda_sympy={'lambda_diffusion_u_1': 1},
			
 
				+                              bounds={x: (0, 1)},
			
 
				+                              batch_size_per_area=400,
			
 
				+                              param_ranges=D1_range)
			
 
				+    self.add(interior, name="Interior1")
			
 
				+    
			
 
				+    # interior 2
			
 
				+    interior = L2.interior_bc(outvar_sympy={'diffusion_u_2': 0},
			
 
				+                              lambda_sympy={'lambda_diffusion_u_2': 1},
			
 
				+                              bounds={x: (1, 2)},
			
 
				+                              batch_size_per_area=400,
			
 
				+                              param_ranges=D1_range)
			
 
				+    self.add(interior, name="Interior2")
			
 
				+
			
 
				+class DiffusionVal(ValidationDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionVal, self).__init__()
			
 
				+    # make validation data line 1
			
 
				+    x = np.expand_dims(np.linspace(0, 1, 100), axis=-1)
			
 
				+    D1 = np.zeros_like(x) + D1_validation                      # For creating D1 input array
			
 
				+    u_1 = x*Tb_validation + (1-x)*Ta
			
 
				+    invar_numpy = {'x': x}                                    # Set the invars for the required D1 
			
 
				+    invar_numpy.update({'D1': np.full_like(invar_numpy['x'], D1_validation)})
			
 
				+    outvar_numpy = {'u_1': u_1}
			
 
				+    val = Validation.from_numpy(invar_numpy, outvar_numpy)
			
 
				+    self.add(val, name='Val1')
			
 
				+    
			
 
				+    # make validation data line 2
			
 
				+    x = np.expand_dims(np.linspace(1, 2, 100), axis=-1)
			
 
				+    u_2 = (x-1)*Tc + (2-x)*Tb_validation
			
 
				+    invar_numpy = {'x': x}                           # Set the invars for the required D1 
			
 
				+    invar_numpy.update({'D1': np.full_like(invar_numpy['x'], D1_validation)})
			
 
				+    outvar_numpy = {'u_2': u_2}
			
 
				+    val = Validation.from_numpy(invar_numpy, outvar_numpy)
			
 
				+    self.add(val, name='Val2')
			
 
				+
			
 
				+class DiffusionMonitor(MonitorDomain):
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionMonitor, self).__init__()
			
 
				+    x = Symbol('x')
			
 
				+
			
 
				+    # flux in U1 at x = 1 
			
 
				+    fluxU1 = Monitor(L1.sample_boundary(10, criteria=Eq(x, 1), param_ranges={D1: D1_validation}),   # Set the parameter range for the required D1 
			
 
				+                    {'flux_U1': lambda var: tf.reduce_mean(D1_validation*var['u_1__x'])})
			
 
				+    self.add(fluxU1, 'FluxU1')
			
 
				+
			
 
				+    # flux in U2 at x = 1 
			
 
				+    fluxU2 = Monitor(L2.sample_boundary(10, criteria=Eq(x, 1), param_ranges={D1: D1_validation}),   # Set the parameter range for the required D1 
			
 
				+                    {'flux_U2': lambda var: tf.reduce_mean(D2*var['u_2__x'])})
			
 
				+    self.add(fluxU2, 'FluxU2')
			
 
				+
			
 
				+# Define neural network
			
 
				+class DiffusionSolver(Solver):
			
 
				+  train_domain = DiffusionTrain
			
 
				+  val_domain = DiffusionVal
			
 
				+  monitor_domain = DiffusionMonitor
			
 
				+
			
 
				+  def __init__(self, **config):
			
 
				+    super(DiffusionSolver, self).__init__(**config)
			
 
				+
			
 
				+    self.equations = (Diffusion(T='u_1', D='D1', dim=1, time=False).make_node()         # Symbolic input to the equation
			
 
				+                      + Diffusion(T='u_2', D=D2, dim=1, time=False).make_node()
			
 
				+                      + DiffusionInterface('u_1', 'u_2', 'D1', D2, dim=1, time=False).make_node())
			
 
				+    diff_net_u_1 = self.arch.make_node(name='diff_net_u_1',
			
 
				+                                   inputs=['x', 'D1'],                                  # Add the parameters to the network
			
 
				+                                   outputs=['u_1'])
			
 
				+    diff_net_u_2 = self.arch.make_node(name='diff_net_u_2',
			
 
				+                                   inputs=['x', 'D1'],
			
 
				+                                   outputs=['u_2'])
			
 
				+    self.nets = [diff_net_u_1, diff_net_u_2]
			
 
				+
			
 
				+  @classmethod # Explain This
			
 
				+  def update_defaults(cls, defaults):
			
 
				+    defaults.update({
			
 
				+        'network_dir': './network_checkpoint_diff_parameterized',
			
 
				+        'max_steps': 10000,
			
 
				+        'decay_steps': 200,
			
 
				+        'start_lr': 1e-4,
			
 
				+        'layer_size': 256,
			
 
				+        'xla': True,
			
 
				+        })
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+  ctr = SimNetController(DiffusionSolver)
			
 
				+  ctr.run()
			
--- a/hpc_ai/PINN/English/python/source_code/diffusion_1d/plot_results.py
+++ b/hpc_ai/PINN/English/python/source_code/diffusion_1d/plot_results.py