Browse Source

fixed CUDA nvtx

Mozhgan K. Chimeh 4 years ago
parent
commit
c252ad8118

BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_api.png


BIN
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/images/cuda_profile_timeline.png


+ 8 - 9
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc.ipynb

@@ -29,7 +29,7 @@
     "- Learn and understand the Nsight systems profiler report\n",
     "\n",
     "We do not intend to cover:\n",
-    "- Optimization techniques in details, while you are free to test optimization concepts in the **Additional Section** at the end of the lab.\n",
+    "- Optimization techniques in details\n",
     "\n",
     "\n",
     "# OpenACC Directives\n",
@@ -41,7 +41,7 @@
     "\n",
     "**#pragma** in C/C++ is what's known as a \"compiler hint.\" These are very similar to programmer comments, however, the compiler will actually read our pragmas. Pragmas are a way for the programmer to \"guide\" the compiler, without running the chance damaging the code. If the compiler does not understand the pragma, it can ignore it, rather than throw a syntax error.\n",
     "\n",
-    "**acc** is an addition to our pragma. It specifies that this is an OpenACC pragma. Any non-OpenACC compiler will ignore this pragma. Even our PGI compiler can be told to ignore them. (which lets us run our parallel code sequentially!)\n",
+    "**acc** is an addition to our pragma. It specifies that this is an OpenACC pragma. Any non-OpenACC compiler will ignore this pragma. \n",
     "\n",
     "**directives** are commands in OpenACC that will tell the compiler to do some action. For now, we will only use directives that allow the compiler to parallelize our code.\n",
     "\n",
@@ -150,7 +150,7 @@
    "outputs": [],
    "source": [
     "#Compile the code for multicore\n",
-    "!cd ../../source_code/openacc && nvc++ -acc -ta=multicore -Minfo=accel -o rdf rdf.cpp -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include"
+    "!cd ../../source_code/openacc && nvc++ -acc -ta=multicore -Minfo=accel -o rdf rdf.cpp -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
    ]
   },
   {
@@ -235,7 +235,7 @@
    "outputs": [],
    "source": [
     "#compile for Tesla GPU\n",
-    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp"
+    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
    ]
   },
   {
@@ -316,8 +316,7 @@
     "\n",
     "| Compiler | Latest Version | Maintained by | Full or Partial Support |\n",
     "| --- | --- | --- | --- |\n",
-    "| HPC SDK| 20.9 | NVIDIA HPC SDK | Full 2.5 spec |\n",
-    "| PGI | 19.10 | NVIDIA PGI | Full 2.5 spec |\n",
+    "| HPC SDK| 20.11 | NVIDIA HPC SDK | Full 2.5 spec |\n",
     "| GCC | 10 | Mentor Graphics, SUSE | 2.0 spec, Limited Kernel directive support, No Unified Memory |\n",
     "| CCE| latest | Cray | 2.0 Spec | \n"
    ]
@@ -363,7 +362,7 @@
    "outputs": [],
    "source": [
     "#compile for Tesla GPU\n",
-    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp"
+    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla:managed,lineinfo  -Minfo=accel -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
    ]
   },
   {
@@ -550,7 +549,7 @@
    "outputs": [],
    "source": [
     "#compile for Tesla GPU without managed memory\n",
-    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla,lineinfo -Minfo=accel -o rdf rdf.cpp"
+    "!cd ../../source_code/openacc && nvc++ -acc -ta=tesla,lineinfo -Minfo=accel -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt"
    ]
   },
   {
@@ -642,7 +641,7 @@
     "**IMPORTANT**: If you would like to continue and optimize this application further with OpenACC, please click on the **NEXT** button, otherwise click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
     "-->\n",
     "\n",
-    "**IMPORTANT**: Please click on the **NEXT** button to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
+    "**IMPORTANT**: Please click on the **HOME** button to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
     "\n",
     "-----\n",
     "\n",

+ 1 - 0
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openacc/nways_openacc_opt.ipynb

@@ -401,6 +401,7 @@
    "source": [
     "**After** executing the above zip command, you should be able to download the zip file [here](../nways_files.zip). Let us now go back to parallelizing our code using other approaches.\n",
     "\n",
+    "**IMPORTANT**: Please click on **HOME** to go back to the main notebook for *N ways of GPU programming for MD* code.\n",
     "\n",
     "-----\n",
     "\n",

+ 2 - 2
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/openmp/nways_openmp.ipynb

@@ -27,7 +27,7 @@
     "- Understand the key directives and steps involved in making a sequential code parallel\n",
     "\n",
     "We do not intend to cover:\n",
-    "- Optimization techniques in details, while you are free to test optimization concepts in the **Additional Section** at the end of the lab.\n",
+    "- Optimization techniques in details\n",
     "\n",
     "\n",
     "# OpenMP Directives\n",
@@ -437,7 +437,7 @@
     "| CCE| latest | Cray | 4.5 partial spec supported | \n",
     "| XL | latest | IBM | 4.5 partial spec supported |\n",
     "| Clang | 9.0 | Community | 4.5 partial spec supported |\n",
-    "| HPC SDK | 20.9 | NVIDIA HPC SDK | 5.0 spec supported |\n",
+    "| HPC SDK | 20.11 | NVIDIA HPC SDK | 5.0 spec supported |\n",
     "\n"
    ]
   },

File diff suppressed because it is too large
+ 14 - 13
hpc/nways/nways_labs/nways_MD/English/C/jupyter_notebook/stdpar/nways_stdpar.ipynb


+ 1 - 1
hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/SOLUTION/rdf_malloc.cu

@@ -183,7 +183,7 @@ int main(int argc , char* argv[] )
 		s2bond=s2bond-2.0l*pi*rho*((g2[i]*lngrbond)-g2[i]+1.0l)*del*r*r;
 
 	}
-	nvtxRangePush("Entropy_Calculation");
+	nvtxRangePop(); //Pop for Entropy Calculation
 	stwo<<"s2 value is "<<s2<<endl;
 	stwo<<"s2bond value is "<<s2bond<<endl;
 

+ 1 - 1
hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/SOLUTION/rdf_unified_memory.cu

@@ -165,7 +165,7 @@ int main(int argc , char* argv[] )
 		s2bond=s2bond-2.0l*pi*rho*((g2[i]*lngrbond)-g2[i]+1.0l)*del*r*r;
 
 	}
-	nvtxRangePush("Entropy_Calculation");
+	nvtxRangePop(); //Pop for Entropy Calculation
 	stwo<<"s2 value is "<<s2<<endl;
 	stwo<<"s2bond value is "<<s2bond<<endl;
 

+ 1 - 1
hpc/nways/nways_labs/nways_MD/English/C/source_code/cudac/rdf.cu

@@ -184,7 +184,7 @@ int main(int argc , char* argv[] )
 							s2bond=s2bond-2.0l*pi*rho*((g2[i]*lngrbond)-g2[i]+1.0l)*del*r*r;
 
 						}
-						nvtxRangePush("Entropy_Calculation");
+						nvtxRangePop(); //Pop for Entropy Calculation
 						stwo<<"s2 value is "<<s2<<endl;
 						stwo<<"s2bond value is "<<s2bond<<endl;
 

+ 1 - 1
hpc/nways/nways_labs/nways_MD/English/C/source_code/openacc/Makefile

@@ -4,7 +4,7 @@ CC := nvc++
 ACCFLAGS := -ta=tesla:managed,lineinfo -Minfo=accel
 
 rdf: rdf.cpp
-	${CC} ${CFLAGS} ${ACCFLAGS} -o rdf rdf.cpp 
+	${CC} ${CFLAGS} ${ACCFLAGS} -o rdf rdf.cpp -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt
 
 clean:
 	rm -f *.o rdf

+ 1 - 1
hpc/nways/nways_labs/nways_MD/English/C/source_code/serial/Makefile

@@ -3,7 +3,7 @@
 CC := nvc++
 CFLAGS := -O3 -w -ldl
 ACCFLAGS := -Minfo=accel
-NVTXLIB := -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include
+NVTXLIB := -I/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/include -L/opt/nvidia/hpc_sdk/Linux_x86_64/20.11/cuda/11.0/lib64 -lnvToolsExt
 
 rdf: rdf.cpp
 	${CC} ${CFLAGS} ${ACCFLAGS} -o rdf rdf.cpp ${NVTXLIB}