4 lat temu · a21e44d2d1
--- a/ai/Megatron/English/Python/Start_Here.ipynb
+++ b/ai/Megatron/English/Python/Start_Here.ipynb
@@ -60,29 +60,21 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "Wed Aug 25 07:03:55 2021       \n",
			
 
				+      "Wed Sep 15 09:14:15 2021       \n",
			
 
				       "+-----------------------------------------------------------------------------+\n",
			
 
				-      "| NVIDIA-SMI 450.51.05    Driver Version: 450.51.05    CUDA Version: 11.2     |\n",
			
 
				+      "| NVIDIA-SMI 460.27.04    Driver Version: 460.27.04    CUDA Version: 11.2     |\n",
			
 
				       "|-------------------------------+----------------------+----------------------+\n",
			
 
				       "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
			
 
				       "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
			
 
				       "|                               |                      |               MIG M. |\n",
			
 
				       "|===============================+======================+======================|\n",
			
 
				-      "|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |\n",
			
 
				-      "| N/A   34C    P0    57W / 300W |      0MiB / 16160MiB |      0%      Default |\n",
			
 
				-      "|                               |                      |                  N/A |\n",
			
 
				+      "|   0  A100-SXM4-40GB      On   | 00000000:07:00.0 Off |                    0 |\n",
			
 
				+      "| N/A   24C    P0    57W / 400W |      0MiB / 40536MiB |      4%      Default |\n",
			
 
				+      "|                               |                      |             Disabled |\n",
			
 
				       "+-------------------------------+----------------------+----------------------+\n",
			
 
				-      "|   1  Tesla V100-SXM2...  On   | 00000000:85:00.0 Off |                    0 |\n",
			
 
				-      "| N/A   30C    P0    41W / 300W |      0MiB / 16160MiB |      0%      Default |\n",
			
 
				-      "|                               |                      |                  N/A |\n",
			
 
				-      "+-------------------------------+----------------------+----------------------+\n",
			
 
				-      "|   2  Tesla V100-SXM2...  On   | 00000000:86:00.0 Off |                    0 |\n",
			
 
				-      "| N/A   31C    P0    41W / 300W |      0MiB / 16160MiB |      0%      Default |\n",
			
 
				-      "|                               |                      |                  N/A |\n",
			
 
				-      "+-------------------------------+----------------------+----------------------+\n",
			
 
				-      "|   3  Tesla V100-SXM2...  On   | 00000000:89:00.0 Off |                    0 |\n",
			
 
				-      "| N/A   33C    P0    40W / 300W |      0MiB / 16160MiB |      0%      Default |\n",
			
 
				-      "|                               |                      |                  N/A |\n",
			
 
				+      "|   1  A100-SXM4-40GB      On   | 00000000:0F:00.0 Off |                    0 |\n",
			
 
				+      "| N/A   24C    P0    53W / 400W |      0MiB / 40536MiB |      0%      Default |\n",
			
 
				+      "|                               |                      |             Disabled |\n",
			
 
				       "+-------------------------------+----------------------+----------------------+\n",
			
 
				       "                                                                               \n",
			
 
				       "+-----------------------------------------------------------------------------+\n",
			
@@ -132,34 +124,32 @@
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "GPU 0: Tesla V100-SXM2-16GB (UUID: GPU-b29deceb-3745-51d2-2cf3-807ea8ac8e60)\n",
			
 
				-      "\t Link 0: 25.781 GB/s\n",
			
 
				-      "\t Link 1: 25.781 GB/s\n",
			
 
				-      "\t Link 2: 25.781 GB/s\n",
			
 
				-      "\t Link 3: 25.781 GB/s\n",
			
 
				-      "\t Link 4: 25.781 GB/s\n",
			
 
				-      "\t Link 5: 25.781 GB/s\n",
			
 
				-      "GPU 1: Tesla V100-SXM2-16GB (UUID: GPU-4de46420-3e95-182f-c0c3-d488dda562d8)\n",
			
 
				-      "\t Link 0: 25.781 GB/s\n",
			
 
				-      "\t Link 1: 25.781 GB/s\n",
			
 
				-      "\t Link 2: 25.781 GB/s\n",
			
 
				-      "\t Link 3: 25.781 GB/s\n",
			
 
				-      "\t Link 4: 25.781 GB/s\n",
			
 
				-      "\t Link 5: 25.781 GB/s\n",
			
 
				-      "GPU 2: Tesla V100-SXM2-16GB (UUID: GPU-8e9b4e82-ac7f-c189-cc17-045a3585def2)\n",
			
 
				-      "\t Link 0: 25.781 GB/s\n",
			
 
				-      "\t Link 1: 25.781 GB/s\n",
			
 
				-      "\t Link 2: 25.781 GB/s\n",
			
 
				-      "\t Link 3: 25.781 GB/s\n",
			
 
				-      "\t Link 4: 25.781 GB/s\n",
			
 
				-      "\t Link 5: 25.781 GB/s\n",
			
 
				-      "GPU 3: Tesla V100-SXM2-16GB (UUID: GPU-a3d96d2e-c606-b23f-e9e0-59a3a507fc10)\n",
			
 
				-      "\t Link 0: 25.781 GB/s\n",
			
 
				-      "\t Link 1: 25.781 GB/s\n",
			
 
				-      "\t Link 2: 25.781 GB/s\n",
			
 
				-      "\t Link 3: 25.781 GB/s\n",
			
 
				-      "\t Link 4: 25.781 GB/s\n",
			
 
				-      "\t Link 5: 25.781 GB/s\n"
			
 
				+      "GPU 0: A100-SXM4-40GB (UUID: GPU-2e4d2105-718d-3b94-6f0f-25c148681e83)\n",
			
 
				+      "\t Link 0: 25 GB/s\n",
			
 
				+      "\t Link 1: 25 GB/s\n",
			
 
				+      "\t Link 2: 25 GB/s\n",
			
 
				+      "\t Link 3: 25 GB/s\n",
			
 
				+      "\t Link 4: 25 GB/s\n",
			
 
				+      "\t Link 5: 25 GB/s\n",
			
 
				+      "\t Link 6: 25 GB/s\n",
			
 
				+      "\t Link 7: 25 GB/s\n",
			
 
				+      "\t Link 8: 25 GB/s\n",
			
 
				+      "\t Link 9: 25 GB/s\n",
			
 
				+      "\t Link 10: 25 GB/s\n",
			
 
				+      "\t Link 11: 25 GB/s\n",
			
 
				+      "GPU 1: A100-SXM4-40GB (UUID: GPU-49615223-919e-6f9f-ad79-69d86bc1a13b)\n",
			
 
				+      "\t Link 0: 25 GB/s\n",
			
 
				+      "\t Link 1: 25 GB/s\n",
			
 
				+      "\t Link 2: 25 GB/s\n",
			
 
				+      "\t Link 3: 25 GB/s\n",
			
 
				+      "\t Link 4: 25 GB/s\n",
			
 
				+      "\t Link 5: 25 GB/s\n",
			
 
				+      "\t Link 6: 25 GB/s\n",
			
 
				+      "\t Link 7: 25 GB/s\n",
			
 
				+      "\t Link 8: 25 GB/s\n",
			
 
				+      "\t Link 9: 25 GB/s\n",
			
 
				+      "\t Link 10: 25 GB/s\n",
			
 
				+      "\t Link 11: 25 GB/s\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -198,12 +188,12 @@
 
				      "text": [
			
 
				       "\n",
			
 
				       "Sampling Environment Check\n",
			
 
				-      "Linux Kernel Paranoid Level = 1: OK\n",
			
 
				+      "Linux Kernel Paranoid Level = 2: OK\n",
			
 
				       "Linux Distribution = Ubuntu\n",
			
 
				-      "Linux Kernel Version = 4.15.0-112-generic: OK\n",
			
 
				+      "Linux Kernel Version = 4.18.0-305.12.1.el8_4.x86_64: OK\n",
			
 
				       "Linux perf_event_open syscall available: OK\n",
			
 
				       "Sampling trigger event available: OK\n",
			
 
				-      "Intel(c) Last Branch Record support: Available\n",
			
 
				+      "Intel(c) Last Branch Record support: Not Available\n",
			
 
				       "Sampling Environment: OK\n"
			
 
				      ]
			
 
				     }
			
@@ -223,7 +213,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				+   "execution_count": 1,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -231,7 +221,11 @@
 
				     "os.makedirs('./dataset/EN/32k', exist_ok=True)\n",
			
 
				     "os.makedirs('./dataset/EN/50k', exist_ok=True)\n",
			
 
				     "os.makedirs('./dataset/SV/32k', exist_ok=True)\n",
			
 
				-    "os.makedirs('./dataset/SV/56k', exist_ok=True)"
			
 
				+    "os.makedirs('./dataset/SV/56k', exist_ok=True)\n",
			
 
				+    "os.makedirs('./sv_ckpt/', exist_ok=True)\n",
			
 
				+    "os.makedirs('./profiles/naive', exist_ok=True)\n",
			
 
				+    "os.makedirs('./profiles/2ndrun', exist_ok=True)\n",
			
 
				+    "os.makedirs('./profiles/SV', exist_ok=True)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -247,6 +241,7 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				+    "---\n",
			
 
				     "### Tutorial Outline\n",
			
 
				     "\n",
			
 
				     "The following contents will be covered during the Bootcamp :\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Day2-3_GPT_vocab_merge_files.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day2-3_GPT_vocab_merge_files.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "valid-smoke",
			
 
				+   "id": "maritime-macro",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "# \n",
			
@@ -25,7 +25,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "novel-crash",
			
 
				+   "id": "compliant-champion",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "#### let's review the source code of [gpt2 tokenizer](https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html)\n",
			
@@ -46,20 +46,147 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "enhanced-vehicle",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "thrown-aurora",
			
 
				    "metadata": {},
			
 
				-   "outputs": [],
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Defaulting to user installation because normal site-packages is not writeable\n",
			
 
				+      "Requirement already satisfied: tokenizers in /home/x_zench/.local/lib/python3.8/site-packages (0.10.3)\n",
			
 
				+      "Requirement already satisfied: transformers in /home/x_zench/.local/lib/python3.8/site-packages (4.10.0)\n",
			
 
				+      "Requirement already satisfied: ipywidgets in /home/x_zench/.local/lib/python3.8/site-packages (7.6.4)\n",
			
 
				+      "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.8/site-packages (from transformers) (5.4.1)\n",
			
 
				+      "Requirement already satisfied: sacremoses in /opt/conda/lib/python3.8/site-packages (from transformers) (0.0.35)\n",
			
 
				+      "Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from transformers) (2.24.0)\n",
			
 
				+      "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.8/site-packages (from transformers) (2021.3.17)\n",
			
 
				+      "Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from transformers) (20.9)\n",
			
 
				+      "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.8/site-packages (from transformers) (1.19.2)\n",
			
 
				+      "Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.8/site-packages (from transformers) (4.53.0)\n",
			
 
				+      "Requirement already satisfied: huggingface-hub>=0.0.12 in /home/x_zench/.local/lib/python3.8/site-packages (from transformers) (0.0.16)\n",
			
 
				+      "Requirement already satisfied: filelock in /opt/conda/lib/python3.8/site-packages (from transformers) (3.0.12)\n",
			
 
				+      "Requirement already satisfied: jupyterlab-widgets>=1.0.0; python_version >= \"3.6\" in /home/x_zench/.local/lib/python3.8/site-packages (from ipywidgets) (1.0.1)\n",
			
 
				+      "Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (5.5.0)\n",
			
 
				+      "Requirement already satisfied: ipython-genutils~=0.2.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (0.2.0)\n",
			
 
				+      "Requirement already satisfied: nbformat>=4.2.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (5.1.2)\n",
			
 
				+      "Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (5.0.5)\n",
			
 
				+      "Requirement already satisfied: widgetsnbextension~=3.5.0 in /home/x_zench/.local/lib/python3.8/site-packages (from ipywidgets) (3.5.1)\n",
			
 
				+      "Requirement already satisfied: ipython>=4.0.0; python_version >= \"3.3\" in /opt/conda/lib/python3.8/site-packages (from ipywidgets) (7.21.0)\n",
			
 
				+      "Requirement already satisfied: click in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers) (7.1.2)\n",
			
 
				+      "Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers) (1.0.1)\n",
			
 
				+      "Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers) (1.15.0)\n",
			
 
				+      "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests->transformers) (3.0.4)\n",
			
 
				+      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->transformers) (1.25.11)\n",
			
 
				+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->transformers) (2020.12.5)\n",
			
 
				+      "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->transformers) (2.10)\n",
			
 
				+      "Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging->transformers) (2.4.7)\n",
			
 
				+      "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from huggingface-hub>=0.0.12->transformers) (3.7.4.3)\n",
			
 
				+      "Requirement already satisfied: tornado>=4.2 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1)\n",
			
 
				+      "Requirement already satisfied: jupyter-client in /opt/conda/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1.12)\n",
			
 
				+      "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (3.0.2)\n",
			
 
				+      "Requirement already satisfied: jupyter-core in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (4.7.1)\n",
			
 
				+      "Requirement already satisfied: notebook>=4.4.1 in /opt/conda/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets) (6.2.0)\n",
			
 
				+      "Requirement already satisfied: decorator in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (4.4.2)\n",
			
 
				+      "Requirement already satisfied: pygments in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (2.8.1)\n",
			
 
				+      "Requirement already satisfied: pexpect>4.3; sys_platform != \"win32\" in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (4.8.0)\n",
			
 
				+      "Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (0.17.0)\n",
			
 
				+      "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (3.0.8)\n",
			
 
				+      "Requirement already satisfied: pickleshare in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (0.7.5)\n",
			
 
				+      "Requirement already satisfied: backcall in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (0.2.0)\n",
			
 
				+      "Requirement already satisfied: setuptools>=18.5 in /opt/conda/lib/python3.8/site-packages (from ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (50.3.1.post20201107)\n",
			
 
				+      "Requirement already satisfied: pyzmq>=13 in /opt/conda/lib/python3.8/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (22.0.3)\n",
			
 
				+      "Requirement already satisfied: python-dateutil>=2.1 in /opt/conda/lib/python3.8/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.1)\n",
			
 
				+      "Requirement already satisfied: attrs>=17.4.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (20.3.0)\n",
			
 
				+      "Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (0.17.3)\n",
			
 
				+      "Requirement already satisfied: argon2-cffi in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (20.1.0)\n",
			
 
				+      "Requirement already satisfied: nbconvert in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (6.0.7)\n",
			
 
				+      "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.11.3)\n",
			
 
				+      "Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.9.0)\n",
			
 
				+      "Requirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.9.3)\n",
			
 
				+      "Requirement already satisfied: Send2Trash>=1.5.0 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.0)\n",
			
 
				+      "Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.8/site-packages (from pexpect>4.3; sys_platform != \"win32\"->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (0.7.0)\n",
			
 
				+      "Requirement already satisfied: parso>=0.7.0 in /opt/conda/lib/python3.8/site-packages (from jedi>=0.16->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (0.8.1)\n",
			
 
				+      "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0; python_version >= \"3.3\"->ipywidgets) (0.2.5)\n",
			
 
				+      "Requirement already satisfied: cffi>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.14.3)\n",
			
 
				+      "Requirement already satisfied: defusedxml in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.7.1)\n",
			
 
				+      "Requirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.3)\n",
			
 
				+      "Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.4.3)\n",
			
 
				+      "Requirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.1.2)\n",
			
 
				+      "Requirement already satisfied: testpath in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.4.4)\n",
			
 
				+      "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.3)\n",
			
 
				+      "Requirement already satisfied: mistune<2,>=0.8.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.4)\n",
			
 
				+      "Requirement already satisfied: bleach in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (3.3.0)\n",
			
 
				+      "Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.1.1)\n",
			
 
				+      "Requirement already satisfied: pycparser in /opt/conda/lib/python3.8/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.20)\n",
			
 
				+      "Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.1)\n",
			
 
				+      "Requirement already satisfied: async-generator in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.10)\n",
			
 
				+      "Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.1)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				    "source": [
			
 
				     "!pip install tokenizers  transformers ipywidgets"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "consolidated-substance",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "critical-apparel",
			
 
				    "metadata": {},
			
 
				-   "outputs": [],
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "--2021-09-15 09:29:57--  https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json\n",
			
 
				+      "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.95.125\n",
			
 
				+      "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.95.125|:443... connected.\n",
			
 
				+      "HTTP request sent, awaiting response... 200 OK\n",
			
 
				+      "Length: 1042301 (1018K) [application/json]\n",
			
 
				+      "Saving to: ‘gpt2-vocab.json’\n",
			
 
				+      "\n",
			
 
				+      "gpt2-vocab.json     100%[===================>]   1018K  1.53MB/s    in 0.7s    \n",
			
 
				+      "\n",
			
 
				+      "2021-09-15 09:29:58 (1.53 MB/s) - ‘gpt2-vocab.json’ saved [1042301/1042301]\n",
			
 
				+      "\n",
			
 
				+      "--2021-09-15 09:29:58--  https://huggingface.co/openai-gpt/resolve/main/vocab.json\n",
			
 
				+      "Resolving huggingface.co (huggingface.co)... 107.23.77.87, 34.200.164.230, 34.195.144.223, ...\n",
			
 
				+      "Connecting to huggingface.co (huggingface.co)|107.23.77.87|:443... connected.\n",
			
 
				+      "HTTP request sent, awaiting response... 200 OK\n",
			
 
				+      "Length: 815973 (797K) [application/json]\n",
			
 
				+      "Saving to: ‘vocab.json’\n",
			
 
				+      "\n",
			
 
				+      "vocab.json          100%[===================>] 796.85K  1.78MB/s    in 0.4s    \n",
			
 
				+      "\n",
			
 
				+      "2021-09-15 09:29:59 (1.78 MB/s) - ‘vocab.json’ saved [815973/815973]\n",
			
 
				+      "\n",
			
 
				+      "--2021-09-15 09:30:00--  https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt\n",
			
 
				+      "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.95.125\n",
			
 
				+      "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.95.125|:443... connected.\n",
			
 
				+      "HTTP request sent, awaiting response... 200 OK\n",
			
 
				+      "Length: 456318 (446K) [text/plain]\n",
			
 
				+      "Saving to: ‘gpt2-merges.txt’\n",
			
 
				+      "\n",
			
 
				+      "gpt2-merges.txt     100%[===================>] 445.62K  1.00MB/s    in 0.4s    \n",
			
 
				+      "\n",
			
 
				+      "2021-09-15 09:30:01 (1.00 MB/s) - ‘gpt2-merges.txt’ saved [456318/456318]\n",
			
 
				+      "\n",
			
 
				+      "--2021-09-15 09:30:01--  https://huggingface.co/openai-gpt/resolve/main/merges.txt\n",
			
 
				+      "Resolving huggingface.co (huggingface.co)... 107.23.77.87, 34.200.164.230, 34.195.144.223, ...\n",
			
 
				+      "Connecting to huggingface.co (huggingface.co)|107.23.77.87|:443... connected.\n",
			
 
				+      "HTTP request sent, awaiting response... 200 OK\n",
			
 
				+      "Length: 458495 (448K) [text/plain]\n",
			
 
				+      "Saving to: ‘merges.txt’\n",
			
 
				+      "\n",
			
 
				+      "merges.txt          100%[===================>] 447.75K  1007KB/s    in 0.4s    \n",
			
 
				+      "\n",
			
 
				+      "2021-09-15 09:30:02 (1007 KB/s) - ‘merges.txt’ saved [458495/458495]\n",
			
 
				+      "\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				    "source": [
			
 
				     "!wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json\n",
			
 
				     "!wget https://huggingface.co/openai-gpt/resolve/main/vocab.json\n",
			
@@ -69,8 +196,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				-   "id": "copyrighted-present",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "circular-covering",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -88,7 +215,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "critical-specific",
			
 
				+   "id": "liked-reach",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "## examine the vocab and merge files"
			
@@ -96,16 +223,16 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				-   "id": "chemical-ebony",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "bored-standing",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "noted that the Ġ = space +256 to form that control letter\n",
			
 
				-      "['Ġdegrees', 'Donald', 'Ġcentre', 'Ġsharing', 'Ġwinter', 'ĠCO', 'Che', 'ĠÎ', 'MP', 'Ġunw', 'Ġfewer', 'ĠMir', 'Ġsomewhere', 'ĠKey', 'Ġattacked', 'ĠKir', 'Ġdomain', 'Ġstronger', 'Ġ99', 'Ġpenalty']\n"
			
 
				+      "noted that the Ġ = space + 256 to form that control letter\n",
			
 
				+      "['Ġassorted', 'ĠRevision', 'ĠPiano', 'ĠGideon', 'Ocean', 'Ġsalon', 'Ġbustling', 'ognitive', 'ĠRahman', 'Ġwaiter', 'Ġpresets', 'ĠOsh', 'ĠGHC', 'operator', 'Ġreptiles', 'Ġ413', 'ĠGarr', 'ĠChak', 'Ġhashes', 'Ġfailings']\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -122,8 +249,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 12,
			
 
				-   "id": "mediterranean-outreach",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "driven-coaching",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -144,7 +271,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "controversial-albuquerque",
			
 
				+   "id": "celtic-cheese",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "## sanity check load from transformer GPT2Tokenizer "
			
@@ -152,8 +279,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				-   "id": "classical-stretch",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "handled-cooper",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -186,8 +313,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				-   "id": "cordless-science",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "electrical-performance",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -220,8 +347,8 @@
 
				     "        tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel()\n",
			
 
				     "        tokenizer.decoder = ByteLevelDecoder()\n",
			
 
				     "    return tokenizer , vocab\n",
			
 
				-    "vocab_file='./Megatron-LM/gpt2-vocab.json'\n",
			
 
				-    "merge_file='./Megatron-LM/gpt2-merges.txt'\n",
			
 
				+    "vocab_file='./gpt2-vocab.json'\n",
			
 
				+    "merge_file='./gpt2-merges.txt'\n",
			
 
				     "tokenizers_gpt,_=load_tokenizer(vocab_file,merge_file,True)\n",
			
 
				     "sample_text=' Hello world' \n",
			
 
				     "output=tokenizers_gpt.encode(sample_text)\n",
			
@@ -244,8 +371,20 @@
 
				    ]
			
 
				   },
			
 
				   {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 8,
			
 
				+   "id": "funny-scheduling",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "## clean up\n",
			
 
				+    "!rm merges.txt\n",
			
 
				+    "!rm vocab.json"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "temporal-latest",
			
 
				+   "id": "placed-necessity",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -259,7 +398,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "educational-ecology",
			
 
				+   "id": "respected-class",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Day2-4_jsonfy_and_process2mmap.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day2-4_jsonfy_and_process2mmap.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "convertible-whale",
			
 
				+   "id": "dependent-chemistry",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "# \n",
			
@@ -23,8 +23,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				-   "id": "surrounded-counter",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "square-louisville",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -35,15 +35,15 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				-   "id": "complete-lindsay",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "human-appliance",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "3.07 ms ± 55.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
			
 
				+      "3.84 ms ± 36.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -54,15 +54,15 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				-   "id": "conventional-mason",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "heard-baseball",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "62 µs ± 136 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
			
 
				+      "43 µs ± 120 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -73,8 +73,19 @@
 
				    ]
			
 
				   },
			
 
				   {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "dynamic-nudist",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "## clean up\n",
			
 
				+    "!rm myarr.npy"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "functioning-stage",
			
 
				+   "id": "soviet-jumping",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "----------------------------------------------------------\n",
			
@@ -86,7 +97,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "eastern-habitat",
			
 
				+   "id": "acting-covering",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "----------------------------------------------------------\n",
			
@@ -102,25 +113,25 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				-   "id": "finite-marina",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "organic-malaysia",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "finished processing 74 lines to loose json format\n"
			
 
				+      "finished processing 71 lines to loose json format\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "!python create_loose_json.py --infile ./Megatron-LM/dataset/EN/extractedNVblogs.txt --outfile ./Megatron-LM/dataset/EN/extractedNVblogs.json"
			
 
				+    "!python create_loose_json.py --infile ../dataset/EN/extractedNVblogs.txt --outfile ../dataset/EN/extractedNVblogs.json"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "proof-pakistan",
			
 
				+   "id": "rubber-absolute",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "----------------------------------------------------------\n",
			
@@ -155,7 +166,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "angry-canvas",
			
 
				+   "id": "lined-transfer",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "----------------------------------------------------------\n",
			
@@ -191,21 +202,41 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				-   "id": "regional-stake",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "marked-midnight",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "gpt2-merges.txt  gpt2-vocab.json\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "!mv gpt2-vocab.json ../dataset/EN/50k/\n",
			
 
				+    "!mv gpt2-merges.txt ../dataset/EN/50k/\n",
			
 
				+    "!ls ../dataset/EN/50k/"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "id": "adjustable-hammer",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "INPUT_JSON_FILE='../dataset/EN/extractedNVblogs.json'\n",
			
 
				-    "OUTPUT_PATH='../dataset/EN/CustomSentenceSplitter'\n",
			
 
				+    "OUTPUT_PATH='../dataset/EN/NVblog'\n",
			
 
				     "VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json'\n",
			
 
				     "MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt'\n",
			
 
				-    "NUM_CPUS=16\n"
			
 
				+    "NUM_CPUS=16"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "similar-commonwealth",
			
 
				+   "id": "hidden-patrick",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -239,15 +270,15 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				-   "id": "framed-point",
			
 
				+   "execution_count": 10,
			
 
				+   "id": "professional-lawyer",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "Opening ./Megatron-LM/dataset/EN/extractedNVblogs.json\n",
			
 
				+      "Opening ../dataset/EN/extractedNVblogs.json\n",
			
 
				       "> building GPT2BPETokenizer tokenizer ...\n",
			
 
				       " > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304)\n",
			
 
				       "> building GPT2BPETokenizer tokenizer ...\n",
			
@@ -267,8 +298,8 @@
 
				       "> building GPT2BPETokenizer tokenizer ...\n",
			
 
				       "> building GPT2BPETokenizer tokenizer ...\n",
			
 
				       "Vocab size: 50257\n",
			
 
				-      "Output prefix: ./Megatron-LM/dataset/EN/NVblogs\n",
			
 
				-      "Time to startup: 0.5460700988769531\n",
			
 
				+      "Output prefix: ../dataset/EN/NVblog\n",
			
 
				+      "Time to startup: 0.1618051528930664\n",
			
 
				       " > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304)\n",
			
 
				       " > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304)\n",
			
 
				       " > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304)\n",
			
@@ -303,7 +334,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "endless-vietnamese",
			
 
				+   "id": "valuable-equilibrium",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -317,7 +348,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "married-necklace",
			
 
				+   "id": "accurate-drinking",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Day2-5_Observe_GPT_runs_vs_performance.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day2-5_Observe_GPT_runs_vs_performance.ipynb
--- a/ai/Megatron/English/Python/jupyter_notebook/Day3-3_train_own_GPT2BPETokenizer.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day3-3_train_own_GPT2BPETokenizer.ipynb
--- a/ai/Megatron/English/Python/jupyter_notebook/Day3-4_customize_process2mmap.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day3-4_customize_process2mmap.ipynb
--- a/ai/Megatron/English/Python/jupyter_notebook/Day3-5_run_Megatron_with_varying_config.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day3-5_run_Megatron_with_varying_config.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "boxed-privilege",
			
 
				+   "id": "charged-allen",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "# \n",
			
@@ -50,7 +50,31 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "royal-holiday",
			
 
				+   "id": "continuing-passport",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---\n",
			
 
				+    "# Hint :\n",
			
 
				+    "### call out a terminal and type in **nvidia-smi** to monitor the GPUs' utils and power consumption \n",
			
 
				+    "### remember to fill up the GPU memory\n",
			
 
				+    "![call out a terminal ](./Megatron-LM/pics/Alt_callout2terminals.JPG)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "corrected-bacteria",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---\n",
			
 
				+    "## modify and rerun the below to get a even bigger GPT model \n",
			
 
				+    "<a id=\"MODIFY_CELL\"></a>\n",
			
 
				+    "\n",
			
 
				+    "<a href=\"./Day3-5_run_Megatron_with_varying_config.ipynb#Rerun_Cell\">Jump to ReRun Cell</a> "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "dramatic-opinion",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "<a id=\"Rerun_Cell\"></a>"
			
@@ -58,18 +82,18 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 30,
			
 
				-   "id": "opening-description",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "massive-industry",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "!rm -fr ./Megatron-LM/sv_ckpt/* "
			
 
				+    "!rm -fr ../sv_ckpt/* "
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 29,
			
 
				-   "id": "future-explorer",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "understood-swimming",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -89,22 +113,22 @@
 
				     "NODE_RANK=0\n",
			
 
				     "\n",
			
 
				     "### modify this section to point the file to its own path \n",
			
 
				-    "CHECKPOINT_PATH='./Megatron-LM/sv_ckpt/'\n",
			
 
				-    "DATA_PATH='../dataset/SV/webnyheter2013_text_document'\n",
			
 
				-    "VOCAB_FILE='../dataset/SV/32k/vocab.json'\n",
			
 
				-    "MERGE_FILE='../dataset/SV/32k/merges.txt'\n",
			
 
				-    "PROFILE_OUTPUT_PATH='/home/zcharpy/profiles/DLprof/2ndrun/nsys_improved' # modify this to your own profile path\n",
			
 
				+    "CHECKPOINT_PATH='../sv_ckpt/'\n",
			
 
				+    "DATA_PATH='../dataset/SV/webnyheter2013_56kvocab_text_document'\n",
			
 
				+    "VOCAB_FILE='../dataset/SV/56k/vocab.json'\n",
			
 
				+    "MERGE_FILE='../dataset/SV/56k/merges.txt'\n",
			
 
				+    "PROFILE_OUTPUT_PATH='../profiles/SV/nsys_sv_' # modify this to your own profile path\n",
			
 
				     "\n",
			
 
				     "#### [TODO]--------------- params in the following block are allowed to change -----------#### \n",
			
 
				-    "WORLD_SIZE=8 # <--- remember to change the number of GPUs you actually have in your system\n",
			
 
				-    "GPUS_PER_NODE=8 # <--- remember to change the number of GPUs you actually have in your system\n",
			
 
				+    "WORLD_SIZE=2 # <--- remember to change the number of GPUs you actually have in your system\n",
			
 
				+    "GPUS_PER_NODE=2 # <--- remember to change the number of GPUs you actually have in your system\n",
			
 
				     "\n",
			
 
				-    "TENSOR_MP_SIZE=8\n",
			
 
				+    "TENSOR_MP_SIZE=2\n",
			
 
				     "PIPELINE_MP_SIZE=1\n",
			
 
				-    "LAYERS=64\n",
			
 
				-    "HIDDEN_SZ=2048\n",
			
 
				+    "LAYERS=32\n",
			
 
				+    "HIDDEN_SZ=4096\n",
			
 
				     "NUM_ATTN_HEADS=32\n",
			
 
				-    "MICRO_BZ=64\n",
			
 
				+    "MICRO_BZ=8\n",
			
 
				     "GLOBAL_BZ=512\n",
			
 
				     "SEQ_LEN=512\n",
			
 
				     "MAX_POS_EM=512\n",
			
@@ -151,26 +175,27 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "confident-prerequisite",
			
 
				+   "id": "monetary-trial",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
 
				     "## check how big is your model - \n",
			
 
				-    "I got 1 Billion :)  what about you ?"
			
 
				+    "modify the parameters in the [params_cnt.sh](./params_cnt.sh)\n",
			
 
				+    "I got 6 Billion :)  what about you ?"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 26,
			
 
				-   "id": "affecting-function",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "afraid-promise",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "3\n",
			
 
				-      "3289513984\n"
			
 
				+      "6\n",
			
 
				+      "6675628032\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -180,7 +205,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "hairy-dominican",
			
 
				+   "id": "portuguese-freedom",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -203,48 +228,37 @@
 
				    ]
			
 
				   },
			
 
				   {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "competent-romania",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "---\n",
			
 
				+    "# Re-run this cell below to get an even bigger GPT model\n",
			
 
				+    "## remember to modify the [params count](./params_cnt.sh) to check how big is your model\n",
			
 
				+    "## click the below to go back to Modify the profile_SVGPT_BIG.sh \n",
			
 
				+    "<a href=\"./Day3-5_run_Megatron_with_varying_config.ipynb#MODIFY_CELL\">Jump back to modify and overwrite profile_SVGPT_BIG.sh </a> \n",
			
 
				+    "<a id=\"Rerun_Cell\"></a>"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 31,
			
 
				-   "id": "acknowledged-brake",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "injured-pasta",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "Initializing NVTX monkey patches\n",
			
 
				-      "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				-      "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				-      "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				-      "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				-      "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				-      "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				-      "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				+      "Initializing NVTX monkey patchesInitializing NVTX monkey patches\n",
			
 
				+      "\n",
			
 
				       "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				       "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				       "/opt/conda/lib/python3.8/site-packages/torch/distributed/distributed_c10d.py:144: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n",
			
 
				       "  warnings.warn(\"torch.distributed.reduce_op is deprecated, please use \"\n",
			
 
				-      "Done with NVTX monkey patching\n",
			
 
				-      "using world size: 8, data-parallel-size: 1, tensor-model-parallel size: 8, pipeline-model-parallel size: 1 \n",
			
 
				+      "Done with NVTX monkey patchingDone with NVTX monkey patching\n",
			
 
				+      "\n",
			
 
				+      "using world size: 2, data-parallel-size: 1, tensor-model-parallel size: 2, pipeline-model-parallel size: 1 \n",
			
 
				       "using torch.float16 for parameters ...\n",
			
 
				       "------------------------ arguments ------------------------\n",
			
 
				       "  accumulate_allreduce_grads_in_fp32 .............. False\n",
			
@@ -272,7 +286,7 @@
 
				       "  consumed_valid_samples .......................... 0\n",
			
 
				       "  data_impl ....................................... mmap\n",
			
 
				       "  data_parallel_size .............................. 1\n",
			
 
				-      "  data_path ....................................... ['1.', '../dataset/SV/webnyheter2013_text_document']\n",
			
 
				+      "  data_path ....................................... ['1.', '../dataset/SV/webnyheter2013_56kvocab_text_document']\n",
			
 
				       "  dataloader_type ................................. single\n",
			
 
				       "  DDP_impl ........................................ local\n",
			
 
				       "  decoder_seq_length .............................. None\n",
			
@@ -286,14 +300,14 @@
 
				       "  evidence_data_path .............................. None\n",
			
 
				       "  exit_duration_in_mins ........................... None\n",
			
 
				       "  exit_interval ................................... None\n",
			
 
				-      "  ffn_hidden_size ................................. 8192\n",
			
 
				+      "  ffn_hidden_size ................................. 16384\n",
			
 
				       "  finetune ........................................ False\n",
			
 
				       "  fp16 ............................................ True\n",
			
 
				       "  fp16_lm_cross_entropy ........................... False\n",
			
 
				       "  fp32_residual_connection ........................ False\n",
			
 
				       "  global_batch_size ............................... 512\n",
			
 
				       "  hidden_dropout .................................. 0.1\n",
			
 
				-      "  hidden_size ..................................... 2048\n",
			
 
				+      "  hidden_size ..................................... 4096\n",
			
 
				       "  hysteresis ...................................... 2\n",
			
 
				       "  ict_head_size ................................... None\n",
			
 
				       "  ict_load ........................................ None\n",
			
@@ -303,10 +317,10 @@
 
				       "  init_method_std ................................. 0.02\n",
			
 
				       "  init_method_xavier_uniform ...................... False\n",
			
 
				       "  initial_loss_scale .............................. 4294967296\n",
			
 
				-      "  kv_channels ..................................... 64\n",
			
 
				+      "  kv_channels ..................................... 128\n",
			
 
				       "  layernorm_epsilon ............................... 1e-05\n",
			
 
				       "  lazy_mpu_init ................................... None\n",
			
 
				-      "  load ............................................ ./Megatron-LM/sv_ckpt/\n",
			
 
				+      "  load ............................................ ../sv_ckpt/\n",
			
 
				       "  local_rank ...................................... 0\n",
			
 
				       "  log_batch_size_to_tensorboard ................... False\n",
			
 
				       "  log_interval .................................... 10\n",
			
@@ -329,8 +343,8 @@
 
				       "  mask_prob ....................................... 0.15\n",
			
 
				       "  masked_softmax_fusion ........................... True\n",
			
 
				       "  max_position_embeddings ......................... 512\n",
			
 
				-      "  merge_file ...................................... ../dataset/SV/32k/merges.txt\n",
			
 
				-      "  micro_batch_size ................................ 64\n",
			
 
				+      "  merge_file ...................................... ../dataset/SV/56k/merges.txt\n",
			
 
				+      "  micro_batch_size ................................ 8\n",
			
 
				       "  min_loss_scale .................................. 1.0\n",
			
 
				       "  min_lr .......................................... 1e-05\n",
			
 
				       "  mmap_warmup ..................................... False\n",
			
@@ -341,7 +355,7 @@
 
				       "  num_attention_heads ............................. 32\n",
			
 
				       "  num_channels .................................... 3\n",
			
 
				       "  num_classes ..................................... 1000\n",
			
 
				-      "  num_layers ...................................... 64\n",
			
 
				+      "  num_layers ...................................... 32\n",
			
 
				       "  num_layers_per_virtual_pipeline_stage ........... None\n",
			
 
				       "  num_workers ..................................... 2\n",
			
 
				       "  onnx_safe ....................................... None\n",
			
@@ -360,7 +374,7 @@
 
				       "  retriever_score_scaling ......................... False\n",
			
 
				       "  retriever_seq_length ............................ 256\n",
			
 
				       "  sample_rate ..................................... 1.0\n",
			
 
				-      "  save ............................................ ./Megatron-LM/sv_ckpt/\n",
			
 
				+      "  save ............................................ ../sv_ckpt/\n",
			
 
				       "  save_interval ................................... 100\n",
			
 
				       "  scatter_gather_tensors_in_pipeline .............. True\n",
			
 
				       "  seed ............................................ 1234\n",
			
@@ -368,7 +382,7 @@
 
				       "  sgd_momentum .................................... 0.9\n",
			
 
				       "  short_seq_prob .................................. 0.1\n",
			
 
				       "  split ........................................... 949,50,1\n",
			
 
				-      "  tensor_model_parallel_size ...................... 8\n",
			
 
				+      "  tensor_model_parallel_size ...................... 2\n",
			
 
				       "  tensorboard_dir ................................. None\n",
			
 
				       "  tensorboard_log_interval ........................ 1\n",
			
 
				       "  tensorboard_queue_size .......................... 1000\n",
			
@@ -382,60 +396,54 @@
 
				       "  use_one_sent_docs ............................... False\n",
			
 
				       "  virtual_pipeline_model_parallel_size ............ None\n",
			
 
				       "  vocab_extra_ids ................................. 0\n",
			
 
				-      "  vocab_file ...................................... ../dataset/SV/32k/vocab.json\n",
			
 
				+      "  vocab_file ...................................... ../dataset/SV/56k/vocab.json\n",
			
 
				       "  weight_decay .................................... 0.01\n",
			
 
				-      "  world_size ...................................... 8\n",
			
 
				+      "  world_size ...................................... 2\n",
			
 
				       "-------------------- end of arguments ---------------------\n",
			
 
				-      "setting number of micro-batches to constant 8\n",
			
 
				+      "setting number of micro-batches to constant 64\n",
			
 
				       "> building GPT2BPETokenizer tokenizer ...\n",
			
 
				-      " > padded vocab (size: 32000) with 768 dummy tokens (new size: 32768)\n",
			
 
				+      " > padded vocab (size: 56000) with 64 dummy tokens (new size: 56064)\n",
			
 
				       "> initializing torch distributed ...\n",
			
 
				-      "> initializing tensor model parallel with size 8\n",
			
 
				+      "> initializing tensor model parallel with size 2\n",
			
 
				       "> initializing pipeline model parallel with size 1\n",
			
 
				       "> setting random seeds to 1234 ...\n",
			
 
				       "> initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234\n",
			
 
				       "> compiling dataset index builder ...\n",
			
 
				-      "make: Entering directory '/home/zcharpy/bootcamp/jupyter_notebook/Megatron-LM/megatron/data'\n",
			
 
				+      "make: Entering directory '/proj/guest_at_nsc/users/zcharpy/gpubootcamp/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/megatron/data'\n",
			
 
				       "make: Nothing to be done for 'default'.\n",
			
 
				-      "make: Leaving directory '/home/zcharpy/bootcamp/jupyter_notebook/Megatron-LM/megatron/data'\n",
			
 
				-      ">>> done with dataset index builder. Compilation time: 0.167 seconds\n",
			
 
				+      "make: Leaving directory '/proj/guest_at_nsc/users/zcharpy/gpubootcamp/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/megatron/data'\n",
			
 
				+      ">>> done with dataset index builder. Compilation time: 0.145 seconds\n",
			
 
				       "> compiling and loading fused kernels ...\n",
			
 
				       "Detected CUDA files, patching ldflags\n",
			
 
				-      "Emitting ninja build file /home/zcharpy/bootcamp/jupyter_notebook/Megatron-LM/megatron/fused_kernels/build/build.ninja...\n",
			
 
				+      "Emitting ninja build file /proj/guest_at_nsc/users/zcharpy/gpubootcamp/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/megatron/fused_kernels/build/build.ninja...\n",
			
 
				       "Building extension module scaled_upper_triang_masked_softmax_cuda...\n",
			
 
				       "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
			
 
				       "ninja: no work to do.\n",
			
 
				       "Loading extension module scaled_upper_triang_masked_softmax_cuda...\n",
			
 
				       "Detected CUDA files, patching ldflags\n",
			
 
				-      "Emitting ninja build file /home/zcharpy/bootcamp/jupyter_notebook/Megatron-LM/megatron/fused_kernels/build/build.ninja...\n",
			
 
				+      "Emitting ninja build file /proj/guest_at_nsc/users/zcharpy/gpubootcamp/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/megatron/fused_kernels/build/build.ninja...\n",
			
 
				       "Building extension module scaled_masked_softmax_cuda...\n",
			
 
				       "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
			
 
				       "ninja: no work to do.\n",
			
 
				       "Loading extension module scaled_masked_softmax_cuda...\n",
			
 
				       "Detected CUDA files, patching ldflags\n",
			
 
				-      "Emitting ninja build file /home/zcharpy/bootcamp/jupyter_notebook/Megatron-LM/megatron/fused_kernels/build/build.ninja...\n",
			
 
				+      "Emitting ninja build file /proj/guest_at_nsc/users/zcharpy/gpubootcamp/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/megatron/fused_kernels/build/build.ninja...\n",
			
 
				       "Building extension module fused_mix_prec_layer_norm_cuda...\n",
			
 
				       "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
			
 
				       "ninja: no work to do.\n",
			
 
				       "Loading extension module fused_mix_prec_layer_norm_cuda...\n",
			
 
				-      ">>> done with compiling and loading fused kernels. Compilation time: 18.065 seconds\n",
			
 
				-      "time to initialize megatron (seconds): 90.261\n",
			
 
				-      "[after megatron is initialized] datetime: 2021-08-30 08:59:22 \n",
			
 
				+      ">>> done with compiling and loading fused kernels. Compilation time: 2.868 seconds\n",
			
 
				+      "time to initialize megatron (seconds): 43.936\n",
			
 
				+      "[after megatron is initialized] datetime: 2021-09-15 11:55:55 \n",
			
 
				       "building GPT model ...\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (1, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (4, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (6, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (7, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (0, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (2, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (3, 0): 412995584\n",
			
 
				-      " > number of parameters on (tensor, pipeline) model parallel rank (5, 0): 412995584\n",
			
 
				+      " > number of parameters on (tensor, pipeline) model parallel rank (0, 0): 3339395072\n",
			
 
				+      " > number of parameters on (tensor, pipeline) model parallel rank (1, 0): 3339395072\n",
			
 
				       "setting training iterations to 0\n",
			
 
				       "> learning rate decay style: cosine\n",
			
 
				-      "WARNING: could not find the metadata file ./Megatron-LM/sv_ckpt/latest_checkpointed_iteration.txt \n",
			
 
				+      "WARNING: could not find the metadata file ../sv_ckpt/latest_checkpointed_iteration.txt \n",
			
 
				       "    will not load any checkpoints and will start from random\n",
			
 
				-      "time (ms) | load-checkpoint: 25.10\n",
			
 
				-      "[after model, optimizer, and learning rate scheduler are built] datetime: 2021-08-30 08:59:28 \n",
			
 
				+      "time (ms) | load-checkpoint: 2.66\n",
			
 
				+      "[after model, optimizer, and learning rate scheduler are built] datetime: 2021-09-15 11:55:56 \n",
			
 
				       "> building train, validation, and test datasets ...\n",
			
 
				       " > datasets target sizes (minimum size):\n",
			
 
				       "    train:      100\n",
			
@@ -448,7 +456,7 @@
 
				       "    reading document index...\n",
			
 
				       "    creating numpy buffer of mmap...\n",
			
 
				       "    creating memory view of numpy buffer...\n",
			
 
				-      " > finished creating indexed dataset in 0.004143 seconds\n",
			
 
				+      " > finished creating indexed dataset in 0.004941 seconds\n",
			
 
				       "    number of documents: 1249010\n",
			
 
				       " > dataset split:\n",
			
 
				       "    train:\n",
			
@@ -457,24 +465,57 @@
 
				       "     document indices in [1185311, 1247761) total of 62450 documents\n",
			
 
				       "    test:\n",
			
 
				       "     document indices in [1247761, 1249010) total of 1249 documents\n",
			
 
				-      " > loading doc-idx mapping from ../dataset/SV/webnyheter2013_text_document_train_indexmap_101ns_512sl_1234s_doc_idx.npy\n",
			
 
				-      " > loading sample-idx mapping from ../dataset/SV/webnyheter2013_text_document_train_indexmap_101ns_512sl_1234s_sample_idx.npy\n",
			
 
				-      " > loading shuffle-idx mapping from ../dataset/SV/webnyheter2013_text_document_train_indexmap_101ns_512sl_1234s_shuffle_idx.npy\n",
			
 
				+      " > WARNING: could not find index map files, building the indices on rank 0 ...\n",
			
 
				+      " > only one epoch required, setting separate_last_epoch to False\n",
			
 
				+      " > elasped time to build and save doc-idx mapping (seconds): 0.066494\n",
			
 
				+      "    using:\n",
			
 
				+      "     number of documents:       1185311\n",
			
 
				+      "     number of epochs:          1\n",
			
 
				+      "     sequence length:           512\n",
			
 
				+      "     total number of samples:   51303\n",
			
 
				+      " > elasped time to build and save sample-idx mapping (seconds): 0.008808\n",
			
 
				+      " > building shuffle index with split [0, 51303) and [51303, 51303) ...\n",
			
 
				+      " > elasped time to build and save shuffle-idx mapping (seconds): 0.002738\n",
			
 
				+      " > loading doc-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_train_indexmap_101ns_512sl_1234s_doc_idx.npy\n",
			
 
				+      " > loading sample-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_train_indexmap_101ns_512sl_1234s_sample_idx.npy\n",
			
 
				+      " > loading shuffle-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_train_indexmap_101ns_512sl_1234s_shuffle_idx.npy\n",
			
 
				       "    loaded indexed file in 0.005 seconds\n",
			
 
				-      "    total number of samples: 53948\n",
			
 
				+      "    total number of samples: 51304\n",
			
 
				       "    total number of epochs: 1\n",
			
 
				-      " > loading doc-idx mapping from ../dataset/SV/webnyheter2013_text_document_valid_indexmap_5146ns_512sl_1234s_doc_idx.npy\n",
			
 
				-      " > loading sample-idx mapping from ../dataset/SV/webnyheter2013_text_document_valid_indexmap_5146ns_512sl_1234s_sample_idx.npy\n",
			
 
				-      " > loading shuffle-idx mapping from ../dataset/SV/webnyheter2013_text_document_valid_indexmap_5146ns_512sl_1234s_shuffle_idx.npy\n",
			
 
				-      "    loaded indexed file in 0.003 seconds\n",
			
 
				-      "    total number of samples: 5695\n",
			
 
				+      " > WARNING: could not find index map files, building the indices on rank 0 ...\n",
			
 
				+      " > last epoch number of samples (2438) is larger than 80% of number of samples per epoch (2708), setting separate_last_epoch to False\n",
			
 
				+      " > elasped time to build and save doc-idx mapping (seconds): 0.005265\n",
			
 
				+      "    using:\n",
			
 
				+      "     number of documents:       62450\n",
			
 
				+      "     number of epochs:          2\n",
			
 
				+      "     sequence length:           512\n",
			
 
				+      "     total number of samples:   5416\n",
			
 
				+      " > elasped time to build and save sample-idx mapping (seconds): 0.001357\n",
			
 
				+      " > building shuffle index with split [0, 5416) and [5416, 5416) ...\n",
			
 
				+      " > elasped time to build and save shuffle-idx mapping (seconds): 0.002597\n",
			
 
				+      " > loading doc-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_valid_indexmap_5146ns_512sl_1234s_doc_idx.npy\n",
			
 
				+      " > loading sample-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_valid_indexmap_5146ns_512sl_1234s_sample_idx.npy\n",
			
 
				+      " > loading shuffle-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_valid_indexmap_5146ns_512sl_1234s_shuffle_idx.npy\n",
			
 
				+      "    loaded indexed file in 0.002 seconds\n",
			
 
				+      "    total number of samples: 5417\n",
			
 
				       "    total number of epochs: 2\n",
			
 
				-      " > loading doc-idx mapping from ../dataset/SV/webnyheter2013_text_document_test_indexmap_5146ns_512sl_1234s_doc_idx.npy\n",
			
 
				-      " > loading sample-idx mapping from ../dataset/SV/webnyheter2013_text_document_test_indexmap_5146ns_512sl_1234s_sample_idx.npy\n",
			
 
				-      " > loading shuffle-idx mapping from ../dataset/SV/webnyheter2013_text_document_test_indexmap_5146ns_512sl_1234s_shuffle_idx.npy\n",
			
 
				-      "    loaded indexed file in 0.003 seconds\n",
			
 
				-      "    total number of samples: 5192\n",
			
 
				-      "    total number of epochs: 91\n",
			
 
				+      " > WARNING: could not find index map files, building the indices on rank 0 ...\n",
			
 
				+      " > last epoch number of samples (12) is smaller than 80% of number of samples per epoch (54), setting separate_last_epoch to True\n",
			
 
				+      " > elasped time to build and save doc-idx mapping (seconds): 0.004714\n",
			
 
				+      "    using:\n",
			
 
				+      "     number of documents:       1249\n",
			
 
				+      "     number of epochs:          96\n",
			
 
				+      "     sequence length:           512\n",
			
 
				+      "     total number of samples:   5188\n",
			
 
				+      " > elasped time to build and save sample-idx mapping (seconds): 0.001624\n",
			
 
				+      " > building shuffle index with split [0, 5134) and [5134, 5188) ...\n",
			
 
				+      " > elasped time to build and save shuffle-idx mapping (seconds): 0.001298\n",
			
 
				+      " > loading doc-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_test_indexmap_5146ns_512sl_1234s_doc_idx.npy\n",
			
 
				+      " > loading sample-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_test_indexmap_5146ns_512sl_1234s_sample_idx.npy\n",
			
 
				+      " > loading shuffle-idx mapping from ../dataset/SV/webnyheter2013_56kvocab_text_document_test_indexmap_5146ns_512sl_1234s_shuffle_idx.npy\n",
			
 
				+      "    loaded indexed file in 0.002 seconds\n",
			
 
				+      "    total number of samples: 5189\n",
			
 
				+      "    total number of epochs: 96\n",
			
 
				       "> building indices for blendable datasets ...\n",
			
 
				       " > sample ratios:\n",
			
 
				       "   dataset 0, input: 1, achieved: 1\n",
			
@@ -488,17 +529,17 @@
 
				       "   dataset 0, input: 1, achieved: 1\n",
			
 
				       "> elapsed time for building blendable dataset indices: 0.00 (sec)\n",
			
 
				       "> finished creating GPT datasets ...\n",
			
 
				-      "[after dataloaders are built] datetime: 2021-08-30 08:59:32 \n",
			
 
				+      "[after dataloaders are built] datetime: 2021-09-15 11:55:58 \n",
			
 
				       "done with setup ...\n",
			
 
				       "training ...\n",
			
 
				-      "time (ms) | model-and-optimizer-setup: 6065.80 | train/valid/test-data-iterators-setup: 2661.91\n",
			
 
				-      "[after training is done] datetime: 2021-08-30 08:59:32 \n",
			
 
				+      "time (ms) | model-and-optimizer-setup: 929.42 | train/valid/test-data-iterators-setup: 1004.53\n",
			
 
				+      "[after training is done] datetime: 2021-09-15 11:55:58 \n",
			
 
				       "------------------------------------------------------------------------------------------------------------------\n",
			
 
				-      " validation loss at the end of training for val data | lm loss value: 1.081321E+01 | lm loss PPL: 4.967259E+04 | \n",
			
 
				+      " validation loss at the end of training for val data | lm loss value: 1.171452E+01 | lm loss PPL: 1.223352E+05 | \n",
			
 
				       "------------------------------------------------------------------------------------------------------------------\n",
			
 
				       "Evaluating iter 10/10\n",
			
 
				       "-------------------------------------------------------------------------------------------------------------------\n",
			
 
				-      " validation loss at the end of training for test data | lm loss value: 1.081394E+01 | lm loss PPL: 4.970880E+04 | \n",
			
 
				+      " validation loss at the end of training for test data | lm loss value: 1.171400E+01 | lm loss PPL: 1.222719E+05 | \n",
			
 
				       "-------------------------------------------------------------------------------------------------------------------\n"
			
 
				      ]
			
 
				     }
			
@@ -509,7 +550,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "determined-right",
			
 
				+   "id": "entertaining-transparency",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "## Remember to copy and paste your output on Slack or Zoom\n",
			
@@ -518,7 +559,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "searching-worthy",
			
 
				+   "id": "hidden-minister",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/Dlprof_pretrain_gpt.py
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/Dlprof_pretrain_gpt.py
@@ -1,126 +0,0 @@
 
				-# coding=utf-8
			
 
				-# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-"""Pretrain GPT"""
			
 
				-
			
 
				-import torch
			
 
				-from functools import partial
			
 
				-from megatron import get_args
			
 
				-from megatron import print_rank_0
			
 
				-from megatron import get_timers
			
 
				-from megatron import get_tokenizer
			
 
				-from megatron import mpu
			
 
				-from megatron.data.gpt_dataset import build_train_valid_test_datasets
			
 
				-from megatron.model import GPTModel
			
 
				-from megatron.training import pretrain
			
 
				-from megatron.utils import get_ltor_masks_and_position_ids
			
 
				-from megatron.utils import average_losses_across_data_parallel_group
			
 
				-import pyprof
			
 
				-pyprof.init(enable_function_stack=True)
			
 
				-def model_provider(pre_process=True, post_process=True):
			
 
				-    """Build the model."""
			
 
				-
			
 
				-    print_rank_0('building GPT model ...')
			
 
				-    model = GPTModel(
			
 
				-        num_tokentypes=0,
			
 
				-        parallel_output=True,
			
 
				-        pre_process=pre_process,
			
 
				-        post_process=post_process
			
 
				-    )
			
 
				-    return model
			
 
				-
			
 
				-
			
 
				-def get_batch(data_iterator):
			
 
				-    """Generate a batch"""
			
 
				-    args = get_args()
			
 
				-    tokenizer = get_tokenizer()
			
 
				-
			
 
				-    # Items and their type.
			
 
				-    keys = ['text']
			
 
				-    datatype = torch.int64
			
 
				-
			
 
				-    # Broadcast data.
			
 
				-    if data_iterator is not None:
			
 
				-        data = next(data_iterator)
			
 
				-    else:
			
 
				-        data = None
			
 
				-    data_b = mpu.broadcast_data(keys, data, datatype)
			
 
				-
			
 
				-    # Unpack.
			
 
				-    tokens_ = data_b['text'].long()
			
 
				-    labels = tokens_[:, 1:].contiguous()
			
 
				-    tokens = tokens_[:, :-1].contiguous()
			
 
				-
			
 
				-    # Get the masks and postition ids.
			
 
				-    attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids(
			
 
				-        tokens,
			
 
				-        tokenizer.eod,
			
 
				-        args.reset_position_ids,
			
 
				-        args.reset_attention_mask,
			
 
				-        args.eod_mask_loss)
			
 
				-
			
 
				-    return tokens, labels, loss_mask, attention_mask, position_ids
			
 
				-
			
 
				-def loss_func(loss_mask, output_tensor):
			
 
				-    losses = output_tensor.float()
			
 
				-    loss_mask = loss_mask.view(-1).float()
			
 
				-    loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum()
			
 
				-
			
 
				-    # Reduce loss for logging.
			
 
				-    averaged_loss = average_losses_across_data_parallel_group([loss])
			
 
				-
			
 
				-    return loss, {'lm loss': averaged_loss[0]}
			
 
				-
			
 
				-
			
 
				-def forward_step(data_iterator, model):
			
 
				-    """Forward step."""
			
 
				-    args = get_args()
			
 
				-    timers = get_timers()
			
 
				-
			
 
				-    # Get the batch.
			
 
				-    timers('batch-generator').start()
			
 
				-    tokens, labels, loss_mask, attention_mask, position_ids = get_batch(
			
 
				-        data_iterator)
			
 
				-    timers('batch-generator').stop()
			
 
				-
			
 
				-    output_tensor = model(tokens, position_ids, attention_mask,
			
 
				-                          labels=labels)
			
 
				-
			
 
				-    return output_tensor, partial(loss_func, loss_mask)
			
 
				-
			
 
				-
			
 
				-def train_valid_test_datasets_provider(train_val_test_num_samples):
			
 
				-    """Build train, valid, and test datasets."""
			
 
				-    args = get_args()
			
 
				-
			
 
				-    print_rank_0('> building train, validation, and test datasets '
			
 
				-                 'for GPT ...')
			
 
				-    train_ds, valid_ds, test_ds = build_train_valid_test_datasets(
			
 
				-        data_prefix=args.data_path,
			
 
				-        data_impl=args.data_impl,
			
 
				-        splits_string=args.split,
			
 
				-        train_valid_test_num_samples=train_val_test_num_samples,
			
 
				-        seq_length=args.seq_length,
			
 
				-        seed=args.seed,
			
 
				-        skip_warmup=(not args.mmap_warmup))
			
 
				-    print_rank_0("> finished creating GPT datasets ...")
			
 
				-
			
 
				-    return train_ds, valid_ds, test_ds
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    with torch.autograd.profiler.emit_nvtx():
			
 
				-        pretrain(train_valid_test_datasets_provider, model_provider, forward_step,
			
 
				-             args_defaults={'tokenizer_type': 'GPT2BPETokenizer'})
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/nsys_test.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/nsys_test.sh
@@ -1,49 +0,0 @@
 
				-# Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
			
 
				-GPUS_PER_NODE=8 # <--- remember to change the number of GPUs you actually have in your system
			
 
				-# Change for multinode config
			
 
				-MASTER_ADDR=localhost
			
 
				-MASTER_PORT=6000
			
 
				-NNODES=1 #<-- currently we are using 1 node multigpus
			
 
				-NODE_RANK=0
			
 
				-WORLD_SIZE=8 # <--- remember to change the number of GPUs you actually have in your system
			
 
				-
			
 
				-CHECKPOINT_PATH='./Megatron-LM/sv_ckpt/'
			
 
				-DATA_PATH='../dataset/EN/NVblogs_text_document'
			
 
				-VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json'
			
 
				-MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt'
			
 
				-PROFILE_OUTPUT_PATH='/home/zcharpy/profiles/DLprof/naive/' # modify this to your own profile path
			
 
				-
			
 
				-
			
 
				-
			
 
				-DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
			
 
				-
			
 
				-nsys profile --stats=true --force-overwrite=true --duration=300 --trace=cudnn,cuda,osrt -o /home/zcharpy/profiles/GPT360M_naive \
			
 
				-    python -m torch.distributed.launch $DISTRIBUTED_ARGS \
			
 
				-    ./Megatron-LM/pretrain_gpt.py \
			
 
				-       --num-layers 16 \
			
 
				-       --hidden-size 1024 \
			
 
				-       --num-attention-heads 16 \
			
 
				-       --micro-batch-size 1 \
			
 
				-       --global-batch-size 8 \
			
 
				-       --seq-length 512 \
			
 
				-       --max-position-embeddings 512 \
			
 
				-       --train-samples 100 \
			
 
				-       --save $CHECKPOINT_PATH \
			
 
				-       --load $CHECKPOINT_PATH \
			
 
				-       --data-path $DATA_PATH \
			
 
				-       --vocab-file $VOCAB_FILE \
			
 
				-       --merge-file $MERGE_FILE \
			
 
				-       --data-impl mmap \
			
 
				-       --split 949,50,1 \
			
 
				-       --distributed-backend nccl \
			
 
				-       --lr 0.00015 \
			
 
				-       --lr-decay-style cosine \
			
 
				-       --min-lr 1.0e-5 \
			
 
				-       --weight-decay 1e-2 \
			
 
				-       --clip-grad 1.0 \
			
 
				-       --lr-warmup-fraction .01 \
			
 
				-       --checkpoint-activations \
			
 
				-       --log-interval 10 \
			
 
				-       --save-interval 100 \
			
 
				-       --eval-interval 100 \
			
 
				-       --eval-iters 10 
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/profile_2nd_run.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/profile_2nd_run.sh
@@ -9,11 +9,11 @@ WORLD_SIZE=8 # <--- remember to change the number of GPUs you actually have in y
 
				 TENSOR_MP_SIZE=8
			
 
				 PIPELINE_MP_SIZE=1
			
 
				 ### modify this section to point the file to its own path 
			
 
				-CHECKPOINT_PATH='./Megatron-LM/sv_ckpt/'
			
 
				-DATA_PATH='../dataset/EN/NVblogs_text_document'
			
 
				-VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json'
			
 
				-MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt'
			
 
				-PROFILE_OUTPUT_PATH='/home/zcharpy/profiles/DLprof/2ndrun/nsys_improved' # modify this to your own profile path
			
 
				+CHECKPOINT_PATH='../sv_ckpt/' ## modify this path if you customize it 
			
 
				+DATA_PATH='../dataset/EN/NVblog_text_document' ## modify this path if you customize it 
			
 
				+VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json' ## modify this path if you customize it 
			
 
				+MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt' ## modify this path if you customize it 
			
 
				+PROFILE_OUTPUT_PATH='../profiles/2ndrun/nsys_improved' # modify this to your own profile path
			
 
				 
			
 
				 export OMP_NUM_THREADS=1
			
 
				 DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/profile_SVGPT_BIG.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/profile_SVGPT_BIG.sh
@@ -5,22 +5,22 @@ NNODES=1 #<-- currently we are using 1 node multigpus
 
				 NODE_RANK=0
			
 
				 
			
 
				 ### modify this section to point the file to its own path 
			
 
				-CHECKPOINT_PATH='./Megatron-LM/sv_ckpt/'
			
 
				-DATA_PATH='../dataset/EN/NVblogs_text_document'
			
 
				-VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json'
			
 
				-MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt'
			
 
				-PROFILE_OUTPUT_PATH='/home/zcharpy/profiles/DLprof/2ndrun/nsys_improved' # modify this to your own profile path
			
 
				+CHECKPOINT_PATH='../sv_ckpt/'
			
 
				+DATA_PATH='../dataset/SV/webnyheter2013_56kvocab_text_document'
			
 
				+VOCAB_FILE='../dataset/SV/56k/vocab.json'
			
 
				+MERGE_FILE='../dataset/SV/56k/merges.txt'
			
 
				+PROFILE_OUTPUT_PATH='../profiles/SV/nsys_sv_' # modify this to your own profile path
			
 
				 
			
 
				 #### [TODO]--------------- params in the following block are allowed to change -----------#### 
			
 
				-WORLD_SIZE=8 # <--- remember to change the number of GPUs you actually have in your system
			
 
				-GPUS_PER_NODE=8 # <--- remember to change the number of GPUs you actually have in your system
			
 
				+WORLD_SIZE=2 # <--- remember to change the number of GPUs you actually have in your system
			
 
				+GPUS_PER_NODE=2 # <--- remember to change the number of GPUs you actually have in your system
			
 
				 
			
 
				-TENSOR_MP_SIZE=8
			
 
				+TENSOR_MP_SIZE=2
			
 
				 PIPELINE_MP_SIZE=1
			
 
				 LAYERS=32
			
 
				-HIDDEN_SZ=2048
			
 
				+HIDDEN_SZ=4096
			
 
				 NUM_ATTN_HEADS=32
			
 
				-MICRO_BZ=64
			
 
				+MICRO_BZ=8
			
 
				 GLOBAL_BZ=512
			
 
				 SEQ_LEN=512
			
 
				 MAX_POS_EM=512
			
@@ -30,7 +30,7 @@ export OMP_NUM_THREADS=1
 
				 DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
			
 
				 
			
 
				 ## for nsys run
			
 
				-nsys profile --stats=false --force-overwrite=true --duration=300 --trace=cudnn,cuda,osrt,nvtx -o $PROFILE_OUTPUT_PATH \
			
 
				+#nsys profile --stats=false --force-overwrite=true --duration=300 --trace=cudnn,cuda,osrt,nvtx -o $PROFILE_OUTPUT_PATH \
			
 
				 python -m torch.distributed.launch $DISTRIBUTED_ARGS \
			
 
				     ./Megatron-LM/Dlprof_pretrain_gpt.py \
			
 
				        --tensor-model-parallel-size $TENSOR_MP_SIZE \
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/profile_naive_run.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/profile_naive_run.sh
@@ -1,17 +1,17 @@
 
				 # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
			
 
				-GPUS_PER_NODE=8 # <--- remember to change the number of GPUs you actually have in your system
			
 
				+GPUS_PER_NODE=2 # <--- remember to change the number of GPUs you actually have in your system
			
 
				 # Change for multinode config
			
 
				 MASTER_ADDR=localhost
			
 
				 MASTER_PORT=6000
			
 
				 NNODES=1 #<-- currently we are using 1 node multigpus
			
 
				 NODE_RANK=0
			
 
				-WORLD_SIZE=8 # <--- remember to change the number of GPUs you actually have in your system
			
 
				+WORLD_SIZE=2 # <--- remember to change the number of GPUs you actually have in your system
			
 
				 
			
 
				-CHECKPOINT_PATH='./Megatron-LM/sv_ckpt/'
			
 
				-DATA_PATH='../dataset/EN/NVblogs_text_document'
			
 
				-VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json'
			
 
				-MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt'
			
 
				-PROFILE_OUTPUT_PATH='/home/zcharpy/profiles/DLprof/naive/nsys_naive' # modify this to your own profile path
			
 
				+CHECKPOINT_PATH='../sv_ckpt/' ## modify this path if you customize it 
			
 
				+DATA_PATH='../dataset/EN/NVblog_text_document' ## modify this path if you customize it 
			
 
				+VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json' ## modify this path if you customize it 
			
 
				+MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt' ## modify this path if you customize it 
			
 
				+PROFILE_OUTPUT_PATH='../profiles/naive/nsys_naive' # modify this to your own profile path
			
 
				 
			
 
				 DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
			
 
				 
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/resume_iter1M_SVpretrainGPT3_2.7B.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/resume_iter1M_SVpretrainGPT3_2.7B.sh
@@ -1,76 +0,0 @@
 
				-#!/bin/bash 
			
 
				-####### not working need tweaking
			
 
				-EXP_NAME="SwedishGPT3_2.7B_OriginalMegatron"
			
 
				- # ngc args
			
 
				-INSTANCE="dgx1v.32g.8.norm"
			
 
				-IMAGE="nvcr.io/nvidia/pytorch:20.11-py3"
			
 
				-# wandb args
			
 
				-PROJECT_NAME=SwedishGPT3_2.7B_OriginalMegatron
			
 
				-# megatron-lm args
			
 
				-GPUS_PER_NODE=8
			
 
				-# Change for multinode config
			
 
				-MASTER_ADDR=localhost
			
 
				-MASTER_PORT=6000
			
 
				-NNODES=1
			
 
				-NODE_RANK=0
			
 
				-WORLD_SIZE=$((${GPUS_PER_NODE}*${NNODES}))
			
 
				-DATA_PATH=/raid/SV_CC100Sprakbank_text_document
			
 
				-CHECKPOINT_PATH=/result
			
 
				-VOCAB_FILE=/mnt/dataset/32k/vocab.json
			
 
				-MERGE_FILE=/mnt/dataset/32k/merges.txt
			
 
				-
			
 
				-MP_SIZE=8
			
 
				-DISTRIBUTED_ARGS="--nproc_per_node ${GPUS_PER_NODE} --nnodes ${NNODES} --node_rank ${NODE_RANK} --master_addr ${MASTER_ADDR} --master_port ${MASTER_PORT}"
			
 
				-GPT_ARGS="--num-layers 32 \
			
 
				-           --hidden-size 2560 \
			
 
				-           --num-attention-heads 32 \
			
 
				-           --seq-length 512 \
			
 
				-           --max-position-embeddings 1024 \
			
 
				-           --lr 0.00015 \
			
 
				-           --train-iters 5000000 \
			
 
				-           --min-lr 0.00001 \
			
 
				-           --lr-decay-iters 990000 \
			
 
				-           --lr-warmup-fraction 0.01 \
			
 
				-           --override-lr-scheduler \
			
 
				-           --micro-batch-size 2 \
			
 
				-           --vocab-file ${VOCAB_FILE} \
			
 
				-           --merge-file ${MERGE_FILE} \
			
 
				-           --split 949,50,1 \
			
 
				-           --distributed-backend nccl \
			
 
				-           --fp16"
			
 
				-
			
 
				-OUTPUT_ARGS="--log-interval 10000 \
			
 
				-             --save-interval 500000 \
			
 
				-             --eval-interval 500000 \
			
 
				-             --eval-iters 100000 \
			
 
				-             --checkpoint-activations"
			
 
				-CMD="python -m torch.distributed.launch ${DISTRIBUTED_ARGS} \
			
 
				-    pretrain_gpt.py \
			
 
				-        --tensor-model-parallel-size 2 \
			
 
				-        --pipeline-model-parallel-size 2 \
			
 
				-        ${GPT_ARGS} \
			
 
				-        ${OUTPUT_ARGS} \
			
 
				-        --save ${CHECKPOINT_PATH} \
			
 
				-        --load ${CHECKPOINT_PATH} \
			
 
				-        --data-path ${DATA_PATH}
			
 
				-        --tensorboard-dir ${CHECKPOINT_PATH} "
			
 
				-echo "${CMD}"
			
 
				-ngc batch run \
			
 
				---name ${EXP_NAME} --preempt RUNONCE --ace nv-us-west-2 \
			
 
				---instance ${INSTANCE} \
			
 
				---commandline "nvidia-smi && \
			
 
				-cp -r /mnt/dataset/32k /raid && \
			
 
				-cp /mnt/dataset/SV_CC100Sprakbank_text_document.bin /raid/ && \
			
 
				-cp /mnt/dataset/SV_CC100Sprakbank_text_document.idx /raid/ && \
			
 
				-cp -r /mnt/ckpt/iter_1000000 /result && \
			
 
				-cp /mnt/ckpt/latest_checkpointed_iteration.txt /result && \
			
 
				-ls /raid && \
			
 
				-git clone https://github.com/NVIDIA/Megatron-LM.git && \
			
 
				-cd Megatron-LM/ && \
			
 
				-${CMD}" \
			
 
				---result /result \
			
 
				---image ${IMAGE} \
			
 
				---org nvidian \
			
 
				---datasetid 80889:/mnt/dataset \
			
 
				---datasetid 84035:/mnt/ckpt \
			
 
				---port 6006
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/sv_utils/trainGPTTokenizer.py
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/sv_utils/trainGPTTokenizer.py
@@ -1,3 +1,4 @@
 
				+# Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
			
 
				 # -*- coding: utf-8 -*-
			
 
				 import os , sys
			
 
				 from tokenizers import Tokenizer
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-(option)Website_scrapping.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-(option)Website_scrapping.ipynb
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-1_acquiring_data.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-1_acquiring_data.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "vital-advertising",
			
 
				+   "id": "ahead-surrey",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "# \n",
			
@@ -24,7 +24,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "permanent-reception",
			
 
				+   "id": "exterior-avatar",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "--------------------------------------------------------------------------------------------------------------------\n",
			
@@ -33,28 +33,28 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				-   "id": "steady-henry",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "yellow-happening",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "--2021-08-27 02:36:06--  http://spraakbanken.gu.se/lb/resurser/meningsmangder/webbnyheter2013.xml.bz2\n",
			
 
				+      "--2021-09-15 10:33:55--  http://spraakbanken.gu.se/lb/resurser/meningsmangder/webbnyheter2013.xml.bz2\n",
			
 
				       "Resolving spraakbanken.gu.se (spraakbanken.gu.se)... 130.241.42.13\n",
			
 
				       "Connecting to spraakbanken.gu.se (spraakbanken.gu.se)|130.241.42.13|:80... connected.\n",
			
 
				       "HTTP request sent, awaiting response... 301 Moved Permanently\n",
			
 
				       "Location: https://spraakbanken.gu.se/lb/resurser/meningsmangder/webbnyheter2013.xml.bz2 [following]\n",
			
 
				-      "--2021-08-27 02:36:06--  https://spraakbanken.gu.se/lb/resurser/meningsmangder/webbnyheter2013.xml.bz2\n",
			
 
				+      "--2021-09-15 10:33:55--  https://spraakbanken.gu.se/lb/resurser/meningsmangder/webbnyheter2013.xml.bz2\n",
			
 
				       "Connecting to spraakbanken.gu.se (spraakbanken.gu.se)|130.241.42.13|:443... connected.\n",
			
 
				       "HTTP request sent, awaiting response... 200 OK\n",
			
 
				       "Length: 464382665 (443M) [application/x-bzip2]\n",
			
 
				       "Saving to: ‘webbnyheter2013.xml.bz2’\n",
			
 
				       "\n",
			
 
				-      "webbnyheter2013.xml 100%[===================>] 442.87M  16.9MB/s    in 28s     \n",
			
 
				+      "webbnyheter2013.xml 100%[===================>] 442.87M   110MB/s    in 4.1s    \n",
			
 
				       "\n",
			
 
				-      "2021-08-27 02:36:35 (16.0 MB/s) - ‘webbnyheter2013.xml.bz2’ saved [464382665/464382665]\n",
			
 
				+      "2021-09-15 10:33:59 (109 MB/s) - ‘webbnyheter2013.xml.bz2’ saved [464382665/464382665]\n",
			
 
				       "\n"
			
 
				      ]
			
 
				     }
			
@@ -65,26 +65,18 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 13,
			
 
				-   "id": "exposed-mouth",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "happy-spectrum",
			
 
				    "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stdout",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "bunzip2: Input file ../../../../dataset/SV/ is a directory.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				+   "outputs": [],
			
 
				    "source": [
			
 
				     "!bunzip2 -d webbnyheter2013.xml.bz2 "
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 15,
			
 
				-   "id": "turned-navigator",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "modular-helmet",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -93,15 +85,15 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 17,
			
 
				-   "id": "level-discipline",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "roman-strap",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "56k  webbnyheter2013.xml\n"
			
 
				+      "32k  56k  webbnyheter2013.xml\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -111,24 +103,24 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 18,
			
 
				-   "id": "separated-payday",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "moderate-newfoundland",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "--2021-08-27 03:35:01--  https://raw.githubusercontent.com/spraakbanken/sb-nltk-tools/master/sb_corpus_reader.py\n",
			
 
				-      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
			
 
				-      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
			
 
				+      "--2021-09-15 10:38:48--  https://raw.githubusercontent.com/spraakbanken/sb-nltk-tools/master/sb_corpus_reader.py\n",
			
 
				+      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...\n",
			
 
				+      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
			
 
				       "HTTP request sent, awaiting response... 200 OK\n",
			
 
				       "Length: 3065 (3.0K) [text/plain]\n",
			
 
				       "Saving to: ‘sb_corpus_reader.py’\n",
			
 
				       "\n",
			
 
				-      "sb_corpus_reader.py 100%[===================>]   2.99K  --.-KB/s    in 0s      \n",
			
 
				+      "sb_corpus_reader.py 100%[===================>]   2.99K  --.-KB/s    in 0.001s  \n",
			
 
				       "\n",
			
 
				-      "2021-08-27 03:35:01 (45.7 MB/s) - ‘sb_corpus_reader.py’ saved [3065/3065]\n",
			
 
				+      "2021-09-15 10:38:49 (3.77 MB/s) - ‘sb_corpus_reader.py’ saved [3065/3065]\n",
			
 
				       "\n"
			
 
				      ]
			
 
				     }
			
@@ -139,15 +131,14 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 20,
			
 
				-   "id": "guilty-comparative",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "annoying-topic",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "fname:  .txt\n",
			
 
				       "[['Telekombranschen', 'lyfts', 'av', 'en', 'större', 'europeisk', 'telekomaffär', ',', 'nederländska', 'KPN', 'säljer', 'tysk', 'verksamhet', 'för', 'omkring', 'åtta', 'miljarder', 'euro', ',', 'och', 'en', 'stark', 'rapport', 'från', 'Telenor', '.'], ['Denna', 'upprepade', 'process', 'är', 'död', 'nu', '\"', ',', 'skriver', '\"', 'Shield', '\"', '-', 'skaparen', 'Shawn', 'Ryan', ',', 'som', 'låg', 'bakom', 'idén', ',', 'på', 'Twitter', '.']]\n",
			
 
				       "write to :  webnyheter2013.txt\n",
			
 
				       "finish processing  webnyheter2013.txt\n",
			
@@ -172,7 +163,7 @@
 
				     "    print(\"finish processing \",fname)\n",
			
 
				     "    f.close()\n",
			
 
				     "    \n",
			
 
				-    "out_path='./dataset/SV/'\n",
			
 
				+    "out_path='../../../../dataset/SV/'\n",
			
 
				     "xml_f=out_path+'webbnyheter2013.xml'\n",
			
 
				     "if xml_f.endswith('.xml') :    \n",
			
 
				     "    corpus = SBCorpusReader(xml_f)\n",
			
@@ -189,15 +180,15 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 21,
			
 
				-   "id": "rubber-finnish",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "exterior-episode",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "56k  webbnyheter2013.xml  webnyheter2013.txt\n"
			
 
				+      "32k  56k  webbnyheter2013.xml  webnyheter2013.txt\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -207,7 +198,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "round-somewhere",
			
 
				+   "id": "impaired-sierra",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -221,7 +212,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "celtic-appreciation",
			
 
				+   "id": "junior-washington",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-2_SentenceBoundary_and_Deduplicate.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-2_SentenceBoundary_and_Deduplicate.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "placed-musician",
			
 
				+   "id": "cognitive-explanation",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "# \n",
			
@@ -30,7 +30,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "greenhouse-living",
			
 
				+   "id": "injured-appraisal",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "--------------------------------------------------------------------------------------------------------------------\n",
			
@@ -45,9 +45,9 @@
 
				     "            call out a terminal             \n",
			
 
				     "   ![call out a terminal ](../../pics/Alt_callout2terminals.JPG)\n",
			
 
				     "   \n",
			
 
				-    "            cd ./jupyter_notebook/Megatron-LM/tools/openwebtext/\n",
			
 
				+    "            cd gpubootcamp/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/\n",
			
 
				     "        \n",
			
 
				-    "            git clone https://github.com/mattilyra/LSH\n",
			
 
				+    "            git clone https://github.com/mattilyra/LSH.git\n",
			
 
				     "            cd LSH\n",
			
 
				     "            pip install -U --user cython>=0.24.1\n",
			
 
				     "            open setup.py in an editor and modify as below\n",
			
@@ -58,8 +58,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				-   "id": "broadband-staff",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "elect-chair",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -67,9 +67,13 @@
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				       "Defaulting to user installation because normal site-packages is not writeable\n",
			
 
				-      "Requirement already satisfied: ftfy in /home/zcharpy/.local/lib/python3.8/site-packages (6.0.3)\n",
			
 
				-      "Requirement already satisfied: langdetect in /home/zcharpy/.local/lib/python3.8/site-packages (1.0.9)\n",
			
 
				-      "Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (1.19.2)\n",
			
 
				+      "Collecting ftfy\n",
			
 
				+      "  Downloading ftfy-6.0.3.tar.gz (64 kB)\n",
			
 
				+      "\u001b[K     |████████████████████████████████| 64 kB 3.1 MB/s  eta 0:00:01\n",
			
 
				+      "\u001b[?25hCollecting langdetect\n",
			
 
				+      "  Downloading langdetect-1.0.9.tar.gz (981 kB)\n",
			
 
				+      "\u001b[K     |████████████████████████████████| 981 kB 30.5 MB/s eta 0:00:01\n",
			
 
				+      "\u001b[?25hRequirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (1.19.2)\n",
			
 
				       "Requirement already satisfied: torch in /opt/conda/lib/python3.8/site-packages (1.9.0a0+df837d0)\n",
			
 
				       "Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (1.1.4)\n",
			
 
				       "Requirement already satisfied: nltk in /opt/conda/lib/python3.8/site-packages (3.5)\n",
			
@@ -77,29 +81,54 @@
 
				       "Requirement already satisfied: boto3 in /opt/conda/lib/python3.8/site-packages (1.17.32)\n",
			
 
				       "Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (4.53.0)\n",
			
 
				       "Requirement already satisfied: regex in /opt/conda/lib/python3.8/site-packages (2021.3.17)\n",
			
 
				-      "Requirement already satisfied: bs4 in /home/zcharpy/.local/lib/python3.8/site-packages (0.0.1)\n",
			
 
				-      "Requirement already satisfied: htmlmin in /home/zcharpy/.local/lib/python3.8/site-packages (0.1.12)\n",
			
 
				-      "Requirement already satisfied: tldextract in /home/zcharpy/.local/lib/python3.8/site-packages (3.1.0)\n",
			
 
				-      "Requirement already satisfied: sentence-splitter in /home/zcharpy/.local/lib/python3.8/site-packages (1.4)\n",
			
 
				-      "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from ftfy) (0.2.5)\n",
			
 
				+      "Requirement already satisfied: bs4 in /home/x_zench/.local/lib/python3.8/site-packages (0.0.1)\n",
			
 
				+      "Collecting htmlmin\n",
			
 
				+      "  Downloading htmlmin-0.1.12.tar.gz (19 kB)\n",
			
 
				+      "Collecting tldextract\n",
			
 
				+      "  Downloading tldextract-3.1.2-py2.py3-none-any.whl (87 kB)\n",
			
 
				+      "\u001b[K     |████████████████████████████████| 87 kB 10.3 MB/s  eta 0:00:01\n",
			
 
				+      "\u001b[?25hCollecting sentence-splitter\n",
			
 
				+      "  Downloading sentence_splitter-1.4-py2.py3-none-any.whl (44 kB)\n",
			
 
				+      "\u001b[K     |████████████████████████████████| 44 kB 3.2 MB/s s eta 0:00:01\n",
			
 
				+      "\u001b[?25hRequirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from ftfy) (0.2.5)\n",
			
 
				       "Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from langdetect) (1.15.0)\n",
			
 
				       "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.8/site-packages (from torch) (3.7.4.3)\n",
			
 
				       "Requirement already satisfied: pytz>=2017.2 in /opt/conda/lib/python3.8/site-packages (from pandas) (2021.1)\n",
			
 
				       "Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas) (2.8.1)\n",
			
 
				       "Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from nltk) (1.0.1)\n",
			
 
				       "Requirement already satisfied: click in /opt/conda/lib/python3.8/site-packages (from nltk) (7.1.2)\n",
			
 
				-      "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from boto3) (0.3.6)\n",
			
 
				       "Requirement already satisfied: botocore<1.21.0,>=1.20.32 in /opt/conda/lib/python3.8/site-packages (from boto3) (1.20.32)\n",
			
 
				+      "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from boto3) (0.3.6)\n",
			
 
				       "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /opt/conda/lib/python3.8/site-packages (from boto3) (0.10.0)\n",
			
 
				       "Requirement already satisfied: beautifulsoup4 in /opt/conda/lib/python3.8/site-packages (from bs4) (4.9.3)\n",
			
 
				       "Requirement already satisfied: filelock>=3.0.8 in /opt/conda/lib/python3.8/site-packages (from tldextract) (3.0.12)\n",
			
 
				       "Requirement already satisfied: requests>=2.1.0 in /opt/conda/lib/python3.8/site-packages (from tldextract) (2.24.0)\n",
			
 
				       "Requirement already satisfied: idna in /opt/conda/lib/python3.8/site-packages (from tldextract) (2.10)\n",
			
 
				-      "Requirement already satisfied: requests-file>=1.4 in /home/zcharpy/.local/lib/python3.8/site-packages (from tldextract) (1.5.1)\n",
			
 
				+      "Collecting requests-file>=1.4\n",
			
 
				+      "  Downloading requests_file-1.5.1-py2.py3-none-any.whl (3.7 kB)\n",
			
 
				       "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.8/site-packages (from botocore<1.21.0,>=1.20.32->boto3) (1.25.11)\n",
			
 
				       "Requirement already satisfied: soupsieve>1.2; python_version >= \"3.0\" in /opt/conda/lib/python3.8/site-packages (from beautifulsoup4->bs4) (2.2)\n",
			
 
				+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.1.0->tldextract) (2020.12.5)\n",
			
 
				       "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.1.0->tldextract) (3.0.4)\n",
			
 
				-      "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.1.0->tldextract) (2020.12.5)\n"
			
 
				+      "Building wheels for collected packages: ftfy, langdetect, htmlmin\n",
			
 
				+      "  Building wheel for ftfy (setup.py) ... \u001b[?25ldone\n",
			
 
				+      "\u001b[?25h  Created wheel for ftfy: filename=ftfy-6.0.3-py3-none-any.whl size=41914 sha256=c53e0371cfd741f088eee74f8f73b93014e7846ecdabc0b0f52157d3b014124b\n",
			
 
				+      "  Stored in directory: /home/x_zench/.cache/pip/wheels/7f/40/63/4bf603cec3ecc4a26985405834cb47eb8368bfa59e15dde046\n",
			
 
				+      "  Building wheel for langdetect (setup.py) ... \u001b[?25ldone\n",
			
 
				+      "\u001b[?25h  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=755dd5f12ab2219ddb785b5ff2215d15cb75e189b9ff00fa01506c4caab14d18\n",
			
 
				+      "  Stored in directory: /home/x_zench/.cache/pip/wheels/13/c7/b0/79f66658626032e78fc1a83103690ef6797d551cb22e56e734\n",
			
 
				+      "  Building wheel for htmlmin (setup.py) ... \u001b[?25ldone\n",
			
 
				+      "\u001b[?25h  Created wheel for htmlmin: filename=htmlmin-0.1.12-py3-none-any.whl size=27084 sha256=bf688cdafbb2a552e92b829fa0fc877d8739d7b32740491ea9ea0c04afdc6f61\n",
			
 
				+      "  Stored in directory: /home/x_zench/.cache/pip/wheels/23/14/6e/4be5bfeeb027f4939a01764b48edd5996acf574b0913fe5243\n",
			
 
				+      "Successfully built ftfy langdetect htmlmin\n",
			
 
				+      "Installing collected packages: ftfy, langdetect, htmlmin, requests-file, tldextract, sentence-splitter\n",
			
 
				+      "\u001b[33m  WARNING: The script ftfy is installed in '/home/x_zench/.local/bin' which is not on PATH.\n",
			
 
				+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
			
 
				+      "\u001b[33m  WARNING: The script htmlmin is installed in '/home/x_zench/.local/bin' which is not on PATH.\n",
			
 
				+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
			
 
				+      "\u001b[33m  WARNING: The script tldextract is installed in '/home/x_zench/.local/bin' which is not on PATH.\n",
			
 
				+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n",
			
 
				+      "Successfully installed ftfy-6.0.3 htmlmin-0.1.12 langdetect-1.0.9 requests-file-1.5.1 sentence-splitter-1.4 tldextract-3.1.2\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -109,7 +138,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "cordless-square",
			
 
				+   "id": "excessive-madison",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-------------------------------------------------------------------------------\n",
			
@@ -118,8 +147,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				-   "id": "selected-panel",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "choice-nicholas",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -128,7 +157,7 @@
 
				        "'sv'"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 3,
			
 
				+     "execution_count": 2,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -141,8 +170,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				-   "id": "binding-arthur",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "nonprofit-statistics",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -151,7 +180,7 @@
 
				        "'da'"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 4,
			
 
				+     "execution_count": 3,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -163,8 +192,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				-   "id": "whole-advisory",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "adverse-robertson",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -173,7 +202,7 @@
 
				        "'fi'"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 5,
			
 
				+     "execution_count": 4,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -185,7 +214,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "hourly-capital",
			
 
				+   "id": "interpreted-links",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----------------------------------------------------------\n",
			
@@ -194,16 +223,16 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				-   "id": "running-incentive",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "typical-accused",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stderr",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "[nltk_data] Downloading package punkt to /home/zcharpy/nltk_data...\n",
			
 
				-      "[nltk_data]   Package punkt is already up-to-date!\n"
			
 
				+      "[nltk_data] Downloading package punkt to /home/x_zench/nltk_data...\n",
			
 
				+      "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
			
 
				      ]
			
 
				     },
			
 
				     {
			
@@ -212,7 +241,7 @@
 
				        "True"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 6,
			
 
				+     "execution_count": 5,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -224,8 +253,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				-   "id": "thermal-element",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "ranking-semester",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -254,7 +283,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "verbal-consortium",
			
 
				+   "id": "portable-bumper",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----------------------------------------------------------\n",
			
@@ -263,8 +292,46 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				-   "id": "afraid-armor",
			
 
				+   "execution_count": 8,
			
 
				+   "id": "environmental-rating",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "--2021-09-15 11:26:03--  https://github.com/mediacloud/sentence-splitter/blob/develop/sentence_splitter/non_breaking_prefixes/sv.txt\n",
			
 
				+      "Resolving github.com (github.com)... 140.82.121.4\n",
			
 
				+      "Connecting to github.com (github.com)|140.82.121.4|:443... connected.\n",
			
 
				+      "HTTP request sent, awaiting response... 200 OK\n",
			
 
				+      "Length: unspecified [text/html]\n",
			
 
				+      "Saving to: ‘sv.txt’\n",
			
 
				+      "\n",
			
 
				+      "sv.txt                  [ <=>                ] 191.69K  --.-KB/s    in 0.08s   \n",
			
 
				+      "\n",
			
 
				+      "2021-09-15 11:26:04 (2.23 MB/s) - ‘sv.txt’ saved [196294]\n",
			
 
				+      "\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "!wget https://github.com/mediacloud/sentence-splitter/blob/develop/sentence_splitter/non_breaking_prefixes/sv.txt"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "id": "collective-medication",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!mv sv.txt custom_english_non_breaking_prefixes.txt"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 10,
			
 
				+   "id": "secure-encounter",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -289,7 +356,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "activated-blair",
			
 
				+   "id": "varying-province",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----------------------------------------------------------\n",
			
@@ -298,8 +365,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				-   "id": "noble-elements",
			
 
				+   "execution_count": 11,
			
 
				+   "id": "voluntary-madness",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -334,17 +401,17 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				-   "id": "personal-knowing",
			
 
				+   "execution_count": 12,
			
 
				+   "id": "raising-salad",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "------- sentence 3 -------\n",
			
 
				+      "------- sentence 1 -------\n",
			
 
				       "Andersson pekas ut som nästa partiledare:\n",
			
 
				-      "------- sentence 4 -------\n",
			
 
				+      "------- sentence 2 -------\n",
			
 
				       "“Medlemmarna ska säga sitt”\n"
			
 
				      ]
			
 
				     }
			
@@ -360,7 +427,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "monthly-impossible",
			
 
				+   "id": "facial-trading",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----------------------------------------------------------\n",
			
@@ -370,7 +437,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 1,
			
 
				-   "id": "modified-carpet",
			
 
				+   "id": "agricultural-onion",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -418,8 +485,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				-   "id": "civil-range",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "nonprofit-panama",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -432,19 +499,19 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				-   "id": "electoral-mining",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "empty-while",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "0.6\n",
			
 
				-      "0.62\n",
			
 
				       "0.61\n",
			
 
				-      "0.66\n",
			
 
				-      "0.63\n"
			
 
				+      "0.72\n",
			
 
				+      "0.6\n",
			
 
				+      "0.6\n",
			
 
				+      "0.66\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
@@ -459,7 +526,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "guided-hardware",
			
 
				+   "id": "blind-union",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "## dataset extracted from NVIDIA blog urls "
			
@@ -467,8 +534,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				-   "id": "suitable-director",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "instant-grade",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -498,23 +565,23 @@
 
				        "  <tbody>\n",
			
 
				        "    <tr>\n",
			
 
				        "      <th>0</th>\n",
			
 
				-       "      <td>Today, NVIDIA announced new pretrained models ...</td>\n",
			
 
				+       "      <td>Deep learning models have been successfully us...</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				        "      <th>1</th>\n",
			
 
				-       "      <td>This post was updated July 20, 2021 to reflect...</td>\n",
			
 
				+       "      <td>Breast cancer is the most frequently diagnosed...</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				        "      <th>2</th>\n",
			
 
				-       "      <td>In part 1 of this series, we introduced new AP...</td>\n",
			
 
				+       "      <td>The NVIDIA Deep Learning Institute (DLI) exten...</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				        "      <th>3</th>\n",
			
 
				-       "      <td>The NVIDIA NGC team is hosting a webinar with ...</td>\n",
			
 
				+       "      <td>Engineers, product developers and designers ar...</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				        "      <th>4</th>\n",
			
 
				-       "      <td>NVIDIA announces our newest release of the CUD...</td>\n",
			
 
				+       "      <td>Despite substantial progress in natural langua...</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "  </tbody>\n",
			
 
				        "</table>\n",
			
@@ -522,14 +589,14 @@
 
				       ],
			
 
				       "text/plain": [
			
 
				        "                                                doc1\n",
			
 
				-       "0  Today, NVIDIA announced new pretrained models ...\n",
			
 
				-       "1  This post was updated July 20, 2021 to reflect...\n",
			
 
				-       "2  In part 1 of this series, we introduced new AP...\n",
			
 
				-       "3  The NVIDIA NGC team is hosting a webinar with ...\n",
			
 
				-       "4  NVIDIA announces our newest release of the CUD..."
			
 
				+       "0  Deep learning models have been successfully us...\n",
			
 
				+       "1  Breast cancer is the most frequently diagnosed...\n",
			
 
				+       "2  The NVIDIA Deep Learning Institute (DLI) exten...\n",
			
 
				+       "3  Engineers, product developers and designers ar...\n",
			
 
				+       "4  Despite substantial progress in natural langua..."
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 6,
			
 
				+     "execution_count": 4,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -543,7 +610,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "massive-tenant",
			
 
				+   "id": "attached-candle",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "## create our own groudtruth dataset"
			
@@ -551,8 +618,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 66,
			
 
				-   "id": "bigger-elder",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "constant-mouth",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -582,8 +649,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 67,
			
 
				-   "id": "automatic-sheffield",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "accepting-truck",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -615,38 +682,38 @@
 
				        "  </thead>\n",
			
 
				        "  <tbody>\n",
			
 
				        "    <tr>\n",
			
 
				-       "      <th>68</th>\n",
			
 
				-       "      <td>68</td>\n",
			
 
				-       "      <td>Despite substantial progress in natural langua...</td>\n",
			
 
				-       "      <td>Have a story to share? Submit an idea.Get the ...</td>\n",
			
 
				-       "      <td>False</td>\n",
			
 
				+       "      <th>65</th>\n",
			
 
				+       "      <td>65</td>\n",
			
 
				+       "      <td>This post was updated July 20, 2021 to reflect...</td>\n",
			
 
				+       "      <td>This post was updated July 20, 2021 to reflect...</td>\n",
			
 
				+       "      <td>True</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				-       "      <th>69</th>\n",
			
 
				-       "      <td>69</td>\n",
			
 
				-       "      <td>Engineers, product developers and designers ar...</td>\n",
			
 
				-       "      <td>The NGC team is hosting a webinar and live Q&amp;A...</td>\n",
			
 
				+       "      <th>66</th>\n",
			
 
				+       "      <td>66</td>\n",
			
 
				+       "      <td>Researchers, developers, and engineers worldwi...</td>\n",
			
 
				+       "      <td>This post was originally published in August 2...</td>\n",
			
 
				        "      <td>False</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				-       "      <th>70</th>\n",
			
 
				-       "      <td>70</td>\n",
			
 
				-       "      <td>NVIDIA NeMo is a conversational AI toolkit bui...</td>\n",
			
 
				-       "      <td>NVIDIA NeMo is a conversational AI toolkit bui...</td>\n",
			
 
				-       "      <td>True</td>\n",
			
 
				+       "      <th>67</th>\n",
			
 
				+       "      <td>67</td>\n",
			
 
				+       "      <td>Looking to reveal secrets of days past, histor...</td>\n",
			
 
				+       "      <td>The NVIDIA Deep Learning Institute (DLI) exten...</td>\n",
			
 
				+       "      <td>False</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				-       "      <th>71</th>\n",
			
 
				-       "      <td>71</td>\n",
			
 
				-       "      <td>In NVIDIA Clara Train 4.0, we added homomorphi...</td>\n",
			
 
				-       "      <td>In NVIDIA Clara Train 4.0, we added homomorphi...</td>\n",
			
 
				-       "      <td>True</td>\n",
			
 
				+       "      <th>68</th>\n",
			
 
				+       "      <td>68</td>\n",
			
 
				+       "      <td>Scientists searching the universe for gravitat...</td>\n",
			
 
				+       "      <td>Robotics researchers from NVIDIA and Universit...</td>\n",
			
 
				+       "      <td>False</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "    <tr>\n",
			
 
				-       "      <th>72</th>\n",
			
 
				-       "      <td>72</td>\n",
			
 
				-       "      <td>Targeting areas populated with disease-carryin...</td>\n",
			
 
				-       "      <td>The NVIDIA NGC team is hosting a webinar with ...</td>\n",
			
 
				+       "      <th>69</th>\n",
			
 
				+       "      <td>69</td>\n",
			
 
				+       "      <td>At GTC ’21, experts presented a variety of tec...</td>\n",
			
 
				+       "      <td>The NVIDIA Hardware Grant Program helps advanc...</td>\n",
			
 
				        "      <td>False</td>\n",
			
 
				        "    </tr>\n",
			
 
				        "  </tbody>\n",
			
@@ -655,21 +722,21 @@
 
				       ],
			
 
				       "text/plain": [
			
 
				        "    index                                               doc1  \\\n",
			
 
				-       "68     68  Despite substantial progress in natural langua...   \n",
			
 
				-       "69     69  Engineers, product developers and designers ar...   \n",
			
 
				-       "70     70  NVIDIA NeMo is a conversational AI toolkit bui...   \n",
			
 
				-       "71     71  In NVIDIA Clara Train 4.0, we added homomorphi...   \n",
			
 
				-       "72     72  Targeting areas populated with disease-carryin...   \n",
			
 
				+       "65     65  This post was updated July 20, 2021 to reflect...   \n",
			
 
				+       "66     66  Researchers, developers, and engineers worldwi...   \n",
			
 
				+       "67     67  Looking to reveal secrets of days past, histor...   \n",
			
 
				+       "68     68  Scientists searching the universe for gravitat...   \n",
			
 
				+       "69     69  At GTC ’21, experts presented a variety of tec...   \n",
			
 
				        "\n",
			
 
				        "                                                 doc2  duplicate  \n",
			
 
				-       "68  Have a story to share? Submit an idea.Get the ...      False  \n",
			
 
				-       "69  The NGC team is hosting a webinar and live Q&A...      False  \n",
			
 
				-       "70  NVIDIA NeMo is a conversational AI toolkit bui...       True  \n",
			
 
				-       "71  In NVIDIA Clara Train 4.0, we added homomorphi...       True  \n",
			
 
				-       "72  The NVIDIA NGC team is hosting a webinar with ...      False  "
			
 
				+       "65  This post was updated July 20, 2021 to reflect...       True  \n",
			
 
				+       "66  This post was originally published in August 2...      False  \n",
			
 
				+       "67  The NVIDIA Deep Learning Institute (DLI) exten...      False  \n",
			
 
				+       "68  Robotics researchers from NVIDIA and Universit...      False  \n",
			
 
				+       "69  The NVIDIA Hardware Grant Program helps advanc...      False  "
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 67,
			
 
				+     "execution_count": 6,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -681,19 +748,19 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				-   "id": "french-solution",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "acting-tiffany",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "data": {
			
 
				       "text/plain": [
			
 
				-       "False    42\n",
			
 
				-       "True     31\n",
			
 
				+       "False    45\n",
			
 
				+       "True     25\n",
			
 
				        "Name: duplicate, dtype: int64"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 11,
			
 
				+     "execution_count": 7,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -705,8 +772,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 76,
			
 
				-   "id": "tight-complexity",
			
 
				+   "execution_count": 8,
			
 
				+   "id": "wicked-youth",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -715,7 +782,7 @@
 
				        "Index(['index', 'doc1', 'doc2', 'duplicate'], dtype='object')"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 76,
			
 
				+     "execution_count": 8,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -726,10 +793,92 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "dried-section",
			
 
				+   "execution_count": 9,
			
 
				+   "id": "rural-lotus",
			
 
				    "metadata": {},
			
 
				-   "outputs": [],
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/html": [
			
 
				+       "<div>\n",
			
 
				+       "<style scoped>\n",
			
 
				+       "    .dataframe tbody tr th:only-of-type {\n",
			
 
				+       "        vertical-align: middle;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe tbody tr th {\n",
			
 
				+       "        vertical-align: top;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe thead th {\n",
			
 
				+       "        text-align: right;\n",
			
 
				+       "    }\n",
			
 
				+       "</style>\n",
			
 
				+       "<table border=\"1\" class=\"dataframe\">\n",
			
 
				+       "  <thead>\n",
			
 
				+       "    <tr style=\"text-align: right;\">\n",
			
 
				+       "      <th></th>\n",
			
 
				+       "      <th>index</th>\n",
			
 
				+       "      <th>doc1</th>\n",
			
 
				+       "      <th>doc2</th>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </thead>\n",
			
 
				+       "  <tbody>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>0</th>\n",
			
 
				+       "      <td>0</td>\n",
			
 
				+       "      <td>Deep learning models have been successfully us...</td>\n",
			
 
				+       "      <td>Deep learning models have been successfully us...</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>1</th>\n",
			
 
				+       "      <td>1</td>\n",
			
 
				+       "      <td>Breast cancer is the most frequently diagnosed...</td>\n",
			
 
				+       "      <td>In NVIDIA Clara Train 4.0, we added homomorphi...</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>2</th>\n",
			
 
				+       "      <td>2</td>\n",
			
 
				+       "      <td>The NVIDIA Deep Learning Institute (DLI) exten...</td>\n",
			
 
				+       "      <td>The NVIDIA Deep Learning Institute (DLI) exten...</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>3</th>\n",
			
 
				+       "      <td>3</td>\n",
			
 
				+       "      <td>Engineers, product developers and designers ar...</td>\n",
			
 
				+       "      <td>Deep learning research requires working at sca...</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>4</th>\n",
			
 
				+       "      <td>4</td>\n",
			
 
				+       "      <td>Despite substantial progress in natural langua...</td>\n",
			
 
				+       "      <td>NVIDIA announces our newest release of the CUD...</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </tbody>\n",
			
 
				+       "</table>\n",
			
 
				+       "</div>"
			
 
				+      ],
			
 
				+      "text/plain": [
			
 
				+       "   index                                               doc1  \\\n",
			
 
				+       "0      0  Deep learning models have been successfully us...   \n",
			
 
				+       "1      1  Breast cancer is the most frequently diagnosed...   \n",
			
 
				+       "2      2  The NVIDIA Deep Learning Institute (DLI) exten...   \n",
			
 
				+       "3      3  Engineers, product developers and designers ar...   \n",
			
 
				+       "4      4  Despite substantial progress in natural langua...   \n",
			
 
				+       "\n",
			
 
				+       "                                                doc2  \n",
			
 
				+       "0  Deep learning models have been successfully us...  \n",
			
 
				+       "1  In NVIDIA Clara Train 4.0, we added homomorphi...  \n",
			
 
				+       "2  The NVIDIA Deep Learning Institute (DLI) exten...  \n",
			
 
				+       "3  Deep learning research requires working at sca...  \n",
			
 
				+       "4  NVIDIA announces our newest release of the CUD...  "
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 9,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				    "source": [
			
 
				     "del df\n",
			
 
				     "keep_cols_to_write=['index','doc1','doc2']\n",
			
@@ -739,7 +888,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "italic-statement",
			
 
				+   "id": "dominican-trick",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -748,8 +897,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				-   "id": "motivated-saudi",
			
 
				+   "execution_count": 10,
			
 
				+   "id": "married-straight",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -835,7 +984,7 @@
 
				        "4  As an undergraduate student excited about AI f...      False  "
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 8,
			
 
				+     "execution_count": 10,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -848,8 +997,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				-   "id": "spare-springer",
			
 
				+   "execution_count": 11,
			
 
				+   "id": "rocky-courage",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -860,7 +1009,7 @@
 
				        "Name: duplicate, dtype: int64"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 9,
			
 
				+     "execution_count": 11,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -873,7 +1022,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 12,
			
 
				-   "id": "premium-debate",
			
 
				+   "id": "boring-piece",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -890,7 +1039,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "looking-funds",
			
 
				+   "id": "fresh-norfolk",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -900,24 +1049,26 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 71,
			
 
				-   "id": "potential-functionality",
			
 
				+   "execution_count": 13,
			
 
				+   "id": "starting-arabic",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "pair of similar sentences with jaccard_sim score:0.9685534591194969 and minhash_sim score:0.9801980198019802 --- \n",
			
 
				-      "\n",
			
 
				-      "text_a: ['Have', 'a', 'story', 'to', 'share?']\n",
			
 
				-      "text_b: ['\\xa0Read', 'more', '>>>Read', 'the', 'full']\n",
			
 
				-      "--------------------------------------------------\n",
			
 
				-      "pair of similar sentences with jaccard_sim score:0.8197797952482132 and minhash_sim score:0.7543859649122807 --- \n",
			
 
				+      "pair of similar sentences with jaccard_sim score:0.8197797952482132 and minhash_sim score:0.639344262295082 --- \n",
			
 
				       "\n",
			
 
				       "text_a: ['The', 'NVIDIA,', 'Facebook,', 'and', 'TensorFlow']\n",
			
 
				       "text_b: ['Deep', 'learning', '(DL)', 'is', 'the']\n",
			
 
				       "--------------------------------------------------\n",
			
 
				+      "pair of similar sentences with jaccard_sim score:0.9133693568066934 and minhash_sim score:0.8867924528301887 --- \n",
			
 
				+      "\n",
			
 
				+      "100% duplicates \n",
			
 
				+      "\n",
			
 
				+      "text_a: ['The', 'first', 'post', 'in', 'this']\n",
			
 
				+      "text_b: ['The', 'first', 'post', 'in', 'this']\n",
			
 
				+      "--------------------------------------------------\n",
			
 
				       "\n",
			
 
				       "There are **3** candidate duplicates in total\n",
			
 
				       "\n"
			
@@ -926,10 +1077,10 @@
 
				     {
			
 
				      "data": {
			
 
				       "text/plain": [
			
 
				-       "[('17', '38', 0.8650075414781297, 0.6949152542372882)]"
			
 
				+       "[('25', '51', 0.9685534591194969, 0.9607843137254902)]"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 71,
			
 
				+     "execution_count": 13,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -994,7 +1145,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "fewer-template",
			
 
				+   "id": "banner-dispute",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -1003,8 +1154,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				-   "id": "actual-holocaust",
			
 
				+   "execution_count": 14,
			
 
				+   "id": "spread-entity",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -1015,7 +1166,7 @@
 
				        "Name: duplicate, dtype: int64"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 10,
			
 
				+     "execution_count": 14,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -1026,8 +1177,19 @@
 
				    ]
			
 
				   },
			
 
				   {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 15,
			
 
				+   "id": "exempt-juice",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# clean up \n",
			
 
				+    "!rm custom_english_non_breaking_prefixes.txt"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "contemporary-accreditation",
			
 
				+   "id": "abandoned-valuation",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "<a id=\"TheChallenge\"></a>"
			
@@ -1035,14 +1197,14 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "moved-housing",
			
 
				+   "id": "thick-external",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
 
				     "# Mini Challenge - approaching the groundtruth !\n",
			
 
				     "\n",
			
 
				     "Task : Aiming to approach the number 31 modifying the below parameters\n",
			
 
				-    "rerun cell <a href=\"./Day3-1_SentenceBoundary_and_Deduplicate.ipynb#Rerun_Cell\">Jump to ReRun Cell</a>\n",
			
 
				+    "rerun cell <a href=\"./Day3-2_SentenceBoundary_and_Deduplicate.ipynb#Rerun_Cell\">Jump to ReRun Cell</a>\n",
			
 
				     "\n",
			
 
				     "Consider yourself pass this mini challenge when you approach the number **31 +/- 3** ! \n",
			
 
				     "\n",
			
@@ -1060,7 +1222,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 42,
			
 
				-   "id": "approved-stanford",
			
 
				+   "id": "sophisticated-boating",
			
 
				    "metadata": {
			
 
				     "collapsed": true,
			
 
				     "jupyter": {
			
@@ -1095,7 +1257,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 114,
			
 
				-   "id": "recent-father",
			
 
				+   "id": "meaningful-sample",
			
 
				    "metadata": {
			
 
				     "jupyter": {
			
 
				      "source_hidden": true
			
@@ -1116,7 +1278,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 115,
			
 
				-   "id": "aggressive-craps",
			
 
				+   "id": "operational-steps",
			
 
				    "metadata": {
			
 
				     "collapsed": true,
			
 
				     "jupyter": {
			
@@ -1152,7 +1314,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "human-palmer",
			
 
				+   "id": "revolutionary-framing",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -1166,7 +1328,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "permanent-effectiveness",
			
 
				+   "id": "cutting-greeting",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-Website_scrapping.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/Day3-Website_scrapping.ipynb
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/fetchURLs_and_write2html.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/tools/openwebtext/fetchURLs_and_write2html.sh
@@ -1,3 +1,4 @@
 
				+# Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
			
 
				 #!/bin/bash
			
 
				 export PATH="${PATH}:${HOME}/.local/bin"
			
 
				 input="NVdevblog_urls.txt"
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/verify_GPT3_Svenska.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/Megatron-LM/verify_GPT3_Svenska.sh
@@ -1,17 +1,17 @@
 
				 # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
			
 
				-GPUS_PER_NODE=4
			
 
				+GPUS_PER_NODE=2
			
 
				 # Change for multinode config
			
 
				 MASTER_ADDR=localhost
			
 
				 MASTER_PORT=6000
			
 
				 NNODES=1
			
 
				 NODE_RANK=0
			
 
				-WORLD_SIZE=8
			
 
				+WORLD_SIZE=2
			
 
				 TENSOR_MP_SIZE=1
			
 
				 PIPELINE_MP_SIZE=1
			
 
				-CHECKPOINT_PATH='./Megatron-LM/sv_ckpt/'
			
 
				-DATA_PATH='../dataset/EN/NVblogs_text_document'
			
 
				-VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json'
			
 
				-MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt'
			
 
				+CHECKPOINT_PATH='../sv_ckpt/' ## modify this path if you customize it 
			
 
				+DATA_PATH='../dataset/EN/NVblog_text_document' ## modify this path if you customize it 
			
 
				+VOCAB_FILE='../dataset/EN/50k/gpt2-vocab.json' ## modify this path if you customize it 
			
 
				+MERGE_FILE='../dataset/EN/50k/gpt2-merges.txt' ## modify this path if you customize it 
			
 
				 
			
 
				 DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
			
 
				 
			
--- a/ai/Megatron/English/Python/jupyter_notebook/params_cnt.sh
+++ b/ai/Megatron/English/Python/jupyter_notebook/params_cnt.sh
@@ -1,9 +1,9 @@
 
				 # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.
			
 
				-NLAYERS=32
			
 
				-NHIDDEN=2048
			
 
				-NHEADS=32
			
 
				-SEQ_LEN=64
			
 
				-VOCAB_SIZE=32000
			
 
				+NLAYERS=32 ## modify this param
			
 
				+NHIDDEN=4096 ## modify this param
			
 
				+NHEADS=32 ## modify this param
			
 
				+SEQ_LEN=512 ## modify this param
			
 
				+VOCAB_SIZE=56000 ## modify this param
			
 
				 
			
 
				 MODEL_SIZE=$((($NLAYERS * (12*$NHIDDEN**2 + 13*$NHIDDEN) + ($VOCAB_SIZE * $NHIDDEN) + ($SEQ_LEN * $NHIDDEN) ) / 10**9))
			
 
				 EXACT_MODEL_SIZE=$(($NLAYERS * (12*$NHIDDEN**2 + 13*$NHIDDEN) + ($VOCAB_SIZE * $NHIDDEN) + ($SEQ_LEN * $NHIDDEN) ))
			
--- a/ai/Megatron/English/Python/source_code/create_dir_and_download_pytorch_sif_file.sh
+++ b/ai/Megatron/English/Python/source_code/create_dir_and_download_pytorch_sif_file.sh
@@ -46,8 +46,11 @@ mv toydata.zip ./dataset/
 
				 cd dataset/
			
 
				 unzip -r toydata.zip
			
 
				 cd ..
			
 
				-ls ./dataset/
			
 
				 
			
 
				 ### move the Megatron run script to the correct directory
			
 
				 cp ./gpubootcamp/ai/Megatron/English/Python/source_code/Day1-runMegatron-LM_GPT_template.sh ./
			
 
				+
			
 
				+### clone Megatron-LM repo onto the current dir
			
 
				+git clone https://github.com/NVIDIA/Megatron-LM.git
			
 
				+ls .
			
 
				 echo "done !"