5 роки тому · c252ad8118
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_api.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_api.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_timeline.png
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_timeline.png
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb
@@ -29,7 +29,7 @@
 
				     "- Learn and understand the Nsight systems profiler report\n",
			
 
				     "\n",
			
 
				     "We do not intend to cover:\n",
			
 
				-    "- Optimization techniques in details, while you are free to test optimization concepts in the **Additional Section** at the end of the lab.\n",
			
 
				+    "- Optimization techniques in details\n",
			
 
				     "\n",
			
 
				     "\n",
			
 
				     "# OpenACC Directives\n",
			
@@ -41,7 +41,7 @@
 
				     "\n",
			
 
				     "**#pragma** in C/C++ is what's known as a \"compiler hint.\" These are very similar to programmer comments, however, the compiler will actually read our pragmas. Pragmas are a way for the programmer to \"guide\" the compiler, without running the chance damaging the code. If the compiler does not understand the pragma, it can ignore it, rather than throw a syntax error.\n",
			
 
				     "\n",
			
 
				-    "**acc** is an addition to our pragma. It specifies that this is an OpenACC pragma. Any non-OpenACC compiler will ignore this pragma. Even our PGI compiler can be told to ignore them. (which lets us run our parallel code sequentially!)\n",
			
 
				+    "**acc** is an addition to our pragma. It specifies that this is an OpenACC pragma. Any non-OpenACC compiler will ignore this pragma. \n",
			
 
				     "\n",
			
 
				     "**directives** are commands in OpenACC that will tell the compiler to do some action. For now, we will only use directives that allow the compiler to parallelize our code.\n",
			
 
				     "\n",
			
@@ -150,7 +150,7 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "#Compile the code for multicore\n",
			
 
				-    "!cd ../../source_code/openacc && nvc++ -acc -ta=multicore -Minfo=accel -o rdf rdf.cpp -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include"
			
 
				+    "!cd ../../source_code/openacc && nvc++ -acc -ta=multicore -Minfo=accel -o rdf rdf.cpp -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -235,7 +235,7 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "#compile for Tesla GPU\n",
			
 
				-    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp"
			
 
				+    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -316,8 +316,7 @@
 
				     "\n",
			
 
				     "| Compiler | Latest Version | Maintained by | Full or Partial Support |\n",
			
 
				     "| --- | --- | --- | --- |\n",
			
 
				-    "| HPC SDK| 20.9 | NVIDIA HPC SDK | Full 2.5 spec |\n",
			
 
				-    "| PGI | 19.10 | NVIDIA PGI | Full 2.5 spec |\n",
			
 
				+    "| HPC SDK| 20.11 | NVIDIA HPC SDK | Full 2.5 spec |\n",
			
 
				     "| GCC | 10 | Mentor Graphics, SUSE | 2.0 spec, Limited Kernel directive support, No Unified Memory |\n",
			
 
				     "| CCE| latest | Cray | 2.0 Spec | \n"
			
 
				    ]
			
@@ -363,7 +362,7 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "#compile for Tesla GPU\n",
			
 
				-    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp"
			
 
				+    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -550,7 +549,7 @@
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "#compile for Tesla GPU without managed memory\n",
			
 
				-    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla,lineinfo -Minfo=accel -o rdf rdf.cpp"
			
 
				+    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla,lineinfo -Minfo=accel -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -642,7 +641,7 @@
 
				     "**IMPORTANT**: If you would like to continue and optimize this application further with OpenACC, please click on the **NEXT** button, otherwise click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				     "-->\n",
			
 
				     "\n",
			
 
				-    "**IMPORTANT**: Please click on the **NEXT** button to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				+    "**IMPORTANT**: Please click on the **HOME** button to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb
@@ -401,6 +401,7 @@
 
				    "source": [
			
 
				     "**After** executing the above zip command, you should be able to download the zip file [here](../nways_files.zip). Let us now go back to parallelizing our code using other approaches.\n",
			
 
				     "\n",
			
 
				+    "**IMPORTANT**: Please click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
			
 
				     "\n",
			
 
				     "-----\n",
			
 
				     "\n",
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp.ipynb
@@ -27,7 +27,7 @@
 
				     "- Understand the key directives and steps involved in making a sequential code parallel\n",
			
 
				     "\n",
			
 
				     "We do not intend to cover:\n",
			
 
				-    "- Optimization techniques in details, while you are free to test optimization concepts in the **Additional Section** at the end of the lab.\n",
			
 
				+    "- Optimization techniques in details\n",
			
 
				     "\n",
			
 
				     "\n",
			
 
				     "# OpenMP Directives\n",
			
@@ -437,7 +437,7 @@
 
				     "| CCE| latest | Cray | 4.5 partial spec supported | \n",
			
 
				     "| XL | latest | IBM | 4.5 partial spec supported |\n",
			
 
				     "| Clang | 9.0 | Community | 4.5 partial spec supported |\n",
			
 
				-    "| HPC SDK | 20.9 | NVIDIA HPC SDK | 5.0 spec supported |\n",
			
 
				+    "| HPC SDK | 20.11 | NVIDIA HPC SDK | 5.0 spec supported |\n",
			
 
				     "\n"
			
 
				    ]
			
 
				   },
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/stdpar/nways_stdpar.ipynb
+++ b/hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/stdpar/nways_stdpar.ipynb
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/SOLUTION/rdf_malloc.cu
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/SOLUTION/rdf_malloc.cu
@@ -183,7 +183,7 @@ int main(int argc , char* argv[] )
 
				 		s2bond=s2bond-2.0l*pi*rho*((g2[i]*lngrbond)-g2[i]+1.0l)*del*r*r;
			
 
				 
			
 
				 	}
			
 
				-	nvtxRangePush("Entropy_Calculation");
			
 
				+	nvtxRangePop(); //Pop for Entropy Calculation
			
 
				 	stwo<<"s2 value is "<<s2<<endl;
			
 
				 	stwo<<"s2bond value is "<<s2bond<<endl;
			
 
				 
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/SOLUTION/rdf_unified_memory.cu
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/SOLUTION/rdf_unified_memory.cu
@@ -165,7 +165,7 @@ int main(int argc , char* argv[] )
 
				 		s2bond=s2bond-2.0l*pi*rho*((g2[i]*lngrbond)-g2[i]+1.0l)*del*r*r;
			
 
				 
			
 
				 	}
			
 
				-	nvtxRangePush("Entropy_Calculation");
			
 
				+	nvtxRangePop(); //Pop for Entropy Calculation
			
 
				 	stwo<<"s2 value is "<<s2<<endl;
			
 
				 	stwo<<"s2bond value is "<<s2bond<<endl;
			
 
				 
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/rdf.cu
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/rdf.cu
@@ -184,7 +184,7 @@ int main(int argc , char* argv[] )
 
				 							s2bond=s2bond-2.0l*pi*rho*((g2[i]*lngrbond)-g2[i]+1.0l)*del*r*r;
			
 
				 
			
 
				 						}
			
 
				-						nvtxRangePush("Entropy_Calculation");
			
 
				+						nvtxRangePop(); //Pop for Entropy Calculation
			
 
				 						stwo<<"s2 value is "<<s2<<endl;
			
 
				 						stwo<<"s2bond value is "<<s2bond<<endl;
			
 
				 
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/openacc/Makefile
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/openacc/Makefile
@@ -4,7 +4,7 @@ CC := nvc++
 
				 ACCFLAGS := -ta=tesla:managed,lineinfo -Minfo=accel
			
 
				 
			
 
				 rdf: rdf.cpp
			
 
				-	${CC} ${CFLAGS} ${ACCFLAGS} -o rdf rdf.cpp 
			
 
				+	${CC} ${CFLAGS} ${ACCFLAGS} -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt
			
 
				 
			
 
				 clean:
			
 
				 	rm -f *.o rdf
			
--- a/hpc/nways/nways_labs/nways_MD/English/C/source_code/serial/Makefile
+++ b/hpc/nways/nways_labs/nways_MD/English/C/source_code/serial/Makefile
@@ -3,7 +3,7 @@
 
				 CC := nvc++
			
 
				 CFLAGS := -O3 -w -ldl
			
 
				 ACCFLAGS := -Minfo=accel
			
 
				-NVTXLIB := -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include
			
 
				+NVTXLIB := -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt
			
 
				 
			
 
				 rdf: rdf.cpp
			
 
				 	${CC} ${CFLAGS} ${ACCFLAGS} -o rdf rdf.cpp ${NVTXLIB}