4 jaren geleden · 18f0793867
--- a/ai/Megatron/English/Python/jupyter_notebook/Lab2-4_customize_process2mmap.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Lab2-4_customize_process2mmap.ipynb
@@ -2,7 +2,7 @@
 
																  "cells": [
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "crazy-behalf",
															
 
																+   "id": "encouraging-melissa",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "## Customize preprocess_data.py\n",
															
@@ -27,7 +27,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "suburban-coast",
															
 
																+   "id": "regulation-medium",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "1. Convert the extracted raw Swedish text from webnyheter2013.txt to webnyheter2013.json."
															
@@ -36,7 +36,7 @@
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "parliamentary-accountability",
															
 
																+   "id": "alone-asian",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -45,7 +45,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "pursuant-ghost",
															
 
																+   "id": "corporate-honor",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Below is the expected outputs :\n",
															
@@ -58,7 +58,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "insured-excitement",
															
 
																+   "id": "diagnostic-mercury",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "2. Generate the mmap format files by default preprocess_data.py as the first step to ensure we have data necessary for the next notebook to run, in case time runs out."
															
@@ -66,22 +66,22 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 7,
															
 
																-   "id": "palestinian-locking",
															
 
																+   "execution_count": null,
															
 
																+   "id": "unique-assistant",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
 
																     "INPUT_JSON_FILE='../dataset/SV/webnyheter2013.json'\n",
															
 
																-    "OUTPUT_PATH='../dataset/SV/webnyheter2013_56kvocab'\n",
															
 
																-    "VOCAB_FILE='../dataset/SV/56k/vocab.json'\n",
															
 
																-    "MERGE_FILE='../dataset/SV/56k/merges.txt'\n",
															
 
																+    "OUTPUT_PATH='../dataset/SV/webnyheter2013_32kvocab'\n",
															
 
																+    "VOCAB_FILE='../dataset/SV/32k/vocab.json'\n",
															
 
																+    "MERGE_FILE='../dataset/SV/32k/merges.txt'\n",
															
 
																     "NUM_CPUS=16"
															
 
																    ]
															
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "collect-soccer",
															
 
																+   "id": "affecting-brave",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -99,7 +99,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "italian-mount",
															
 
																+   "id": "mathematical-crawford",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Below is the expected outputs :\n",
															
@@ -116,7 +116,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "surrounded-clothing",
															
 
																+   "id": "involved-geneva",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Now we get the default mmap files (xxx.bin and xxx.idx ) and therefore guarantee we have the data needed for the next notebook to run disregard whether we finish the mini-challenge or not. \n",
															
@@ -130,8 +130,8 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 2,
															
 
																-   "id": "addressed-month",
															
 
																+   "execution_count": null,
															
 
																+   "id": "understood-poultry",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -140,7 +140,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "growing-restriction",
															
 
																+   "id": "north-pension",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "<a id=\"Custom-Sentence-Splitter\"></a>"
															
@@ -148,7 +148,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "chemical-selection",
															
 
																+   "id": "textile-australian",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "The custom sentence-splitter `cut_sentence_with_quotation_marks` function is provided below for your convenience, please integrate this custom function into `MYpreprocess_data.py`."
															
@@ -156,8 +156,8 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 3,
															
 
																-   "id": "vital-latino",
															
 
																+   "execution_count": null,
															
 
																+   "id": "swedish-column",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -192,7 +192,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "musical-benjamin",
															
 
																+   "id": "heated-drunk",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "<a id=\"Mini-Challenge\"></a>"
															
@@ -200,7 +200,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "robust-apparel",
															
 
																+   "id": "normal-rachel",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "---\n",
															
@@ -221,8 +221,8 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 2,
															
 
																-   "id": "decimal-enlargement",
															
 
																+   "execution_count": null,
															
 
																+   "id": "adopted-yeast",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -435,7 +435,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "innocent-delight",
															
 
																+   "id": "accessory-banana",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Below cell block specify all the input parameters in order to run `MYpreprocess_data.py`. \n",
															
@@ -445,8 +445,8 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 11,
															
 
																-   "id": "geographic-convention",
															
 
																+   "execution_count": null,
															
 
																+   "id": "overhead-hydrogen",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -459,7 +459,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "unavailable-steps",
															
 
																+   "id": "interior-healthcare",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Below code block is a ReRun cell to launch `MYpreprocess_data.py` and produce the customSentenceSplit_text_document.bin and customSentenceSplit_text_document.idx files, if the script runs successfully.\n",
															
@@ -472,7 +472,7 @@
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "smoking-memory",
															
 
																+   "id": "compact-access",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -490,7 +490,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "strange-maldives",
															
 
																+   "id": "automatic-gravity",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Check whether these two files : `customSentenceSplit_text_document.bin` and `customSentenceSplit_text_document.idx` files were successfully generated and is in the correct folder under dataset."
															
@@ -499,7 +499,7 @@
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "difficult-library",
															
 
																+   "id": "substantial-spare",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -509,7 +509,7 @@
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "strategic-confusion",
															
 
																+   "id": "civic-airplane",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -519,7 +519,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "temporal-spring",
															
 
																+   "id": "velvet-tennessee",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "-----\n",
															
@@ -528,7 +528,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "parental-tourism",
															
 
																+   "id": "rising-ready",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "-----\n",
															
--- a/ai/Megatron/English/Python/jupyter_notebook/Lab2-5_run_Megatron_with_varying_config.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Lab2-5_run_Megatron_with_varying_config.ipynb
@@ -2,7 +2,7 @@
 
																  "cells": [
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "rising-software",
															
 
																+   "id": "alike-prisoner",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "## Scale up model size\n",
															
@@ -37,7 +37,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "historic-eating",
															
 
																+   "id": "material-finland",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "\n",
															
@@ -47,7 +47,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "cleared-toolbox",
															
 
																+   "id": "driven-drawing",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Modify and rerun the code blocks below to obtain a even bigger GPT model. \n",
															
@@ -59,7 +59,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "large-buying",
															
 
																+   "id": "proprietary-marketing",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "<a id=\"MODIFY_CELL\"></a>"
															
@@ -67,7 +67,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "approved-beatles",
															
 
																+   "id": "adjustable-engineer",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Always clean the checkpoint folder to ensure trainining start from scratch."
															
@@ -75,71 +75,64 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 1,
															
 
																-   "id": "attended-vault",
															
 
																+   "execution_count": null,
															
 
																+   "id": "other-parts",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
 
																-    "!rm -fr ../sv_ckpt/* "
															
 
																+    "!rm -fr ../sv_ckpt/* \n",
															
 
																+    "!rm -fr ../dataset/SV/*.npy"
															
 
																    ]
															
 
																   },
															
 
																   {
															
 
																    "cell_type": "code",
															
 
																-   "execution_count": 2,
															
 
																-   "id": "engaging-ocean",
															
 
																+   "execution_count": null,
															
 
																+   "id": "invisible-pepper",
															
 
																    "metadata": {},
															
 
																-   "outputs": [
															
 
																-    {
															
 
																-     "name": "stdout",
															
 
																-     "output_type": "stream",
															
 
																-     "text": [
															
 
																-      "Overwriting ./Megatron-LM/profile_SVGPT_BIG.sh\n"
															
 
																-     ]
															
 
																-    }
															
 
																-   ],
															
 
																+   "outputs": [],
															
 
																    "source": [
															
 
																     "%%writefile ./Megatron-LM/SV_GPT_goingBIG.sh\n",
															
 
																     "# Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.\n",
															
 
																+    "# Copyright (c) 2020 NVIDIA Corporation.  All rights reserved.\n",
															
 
																+    "GPUS_PER_NODE=2 # <--- remember to change the number of GPUs you actually have in your system\n",
															
 
																+    "# Change for multinode config\n",
															
 
																     "MASTER_ADDR=localhost\n",
															
 
																     "MASTER_PORT=6000\n",
															
 
																     "NNODES=1 #<-- currently we are using 1 node multigpus\n",
															
 
																     "NODE_RANK=0\n",
															
 
																-    "WORLD_SIZE=2 \n",
															
 
																-    "GPUS_PER_NODE=2  \n",
															
 
																-    "\n",
															
 
																+    "WORLD_SIZE=2 # <--- remember to change the number of GPUs you actually have in your system\n",
															
 
																     "\n",
															
 
																-    "CHECKPOINT_PATH='../sv_ckpt/'\n",
															
 
																-    "DATA_PATH='../dataset/SV/webnyheter2013_56kvocab_text_document'\n",
															
 
																-    "VOCAB_FILE='../dataset/SV/56k/vocab.json'\n",
															
 
																-    "MERGE_FILE='../dataset/SV/56k/merges.txt'\n",
															
 
																-    "PROFILE_OUTPUT_PATH='../profiles/SV/nsys_sv_' # modify this to your own profile path\n",
															
 
																-    "\n",
															
 
																-    "# -------------------- #####  Begin of modifiable block ##### -------------------- \n",
															
 
																+    "### modify this section to point the file to its own path \n",
															
 
																+    "CHECKPOINT_PATH='../sv_ckpt/' ## modify this path if you customize it \n",
															
 
																+    "DATA_PATH='../dataset/SV/webnyheter2013_32kvocab_text_document' ## modify this path if you customize it \n",
															
 
																+    "VOCAB_FILE='../dataset/SV/32k/vocab.json' ## modify this path if you customize it \n",
															
 
																+    "MERGE_FILE='../dataset/SV/32k/merges.txt' ## modify this path if you customize it \n",
															
 
																+    "PROFILE_OUTPUT_PATH='../profiles/SV/nsys_improved2' # modify this to your own profile path\n",
															
 
																     "\n",
															
 
																+    "################   Beginning of modifiable section    ####################\n",
															
 
																     "TENSOR_MP_SIZE=<FILL_IN>\n",
															
 
																     "PIPELINE_MP_SIZE=<FILL_IN>\n",
															
 
																-    "LAYERS=<FILL_IN>\n",
															
 
																-    "HIDDEN_SZ=<FILL_IN>\n",
															
 
																+    "NUM_LYS=<FILL_IN>\n",
															
 
																+    "HIDDEN_SIZE=<FILL_IN>\n",
															
 
																     "NUM_ATTN_HEADS=<FILL_IN>\n",
															
 
																-    "MICRO_BZ=<FILL_IN>\n",
															
 
																-    "GLOBAL_BZ=<FILL_IN>\n",
															
 
																     "SEQ_LEN=<FILL_IN>\n",
															
 
																     "MAX_POS_EM=<FILL_IN>\n",
															
 
																+    "MICRO_BZ=<FILL_IN>\n",
															
 
																+    "GLOBAL_BZ=<FILL_IN>\n",
															
 
																     "\n",
															
 
																-    "# -------------------- #####  End of modifiable blocks ##### ------------------------ \n",
															
 
																+    "##############   end of modifiable sectio, do NOT modify anything below this line    ####################\n",
															
 
																     "\n",
															
 
																-    "##################  DO NOT modify anything below this line ##################\n",
															
 
																     "export OMP_NUM_THREADS=1\n",
															
 
																     "DISTRIBUTED_ARGS=\"--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT\"\n",
															
 
																     "\n",
															
 
																-    "## We turn off nsys profiling decoration to avoid the small overhead\n",
															
 
																+    "## for nsys run\n",
															
 
																     "#nsys profile --stats=false --force-overwrite=true --duration=300 --trace=cudnn,cuda,osrt,nvtx -o $PROFILE_OUTPUT_PATH \\\n",
															
 
																     "python -m torch.distributed.launch $DISTRIBUTED_ARGS \\\n",
															
 
																-    "    ./Megatron-LM/Dlprof_pretrain_gpt.py \\\n",
															
 
																+    "    ./Megatron-LM/pretrain_gpt.py \\\n",
															
 
																     "       --tensor-model-parallel-size ${TENSOR_MP_SIZE} \\\n",
															
 
																     "       --pipeline-model-parallel-size ${PIPELINE_MP_SIZE} \\\n",
															
 
																-    "       --num-layers ${LAYERS} \\\n",
															
 
																-    "       --hidden-size ${HIDDEN_SZ} \\\n",
															
 
																+    "       --num-layers ${NUM_LYS} \\\n",
															
 
																+    "       --hidden-size ${HIDDEN_SIZE} \\\n",
															
 
																     "       --num-attention-heads ${NUM_ATTN_HEADS} \\\n",
															
 
																     "       --micro-batch-size ${MICRO_BZ} \\\n",
															
 
																     "       --global-batch-size ${GLOBAL_BZ} \\\n",
															
@@ -148,7 +141,7 @@
 
																     "       --train-samples 100 \\\n",
															
 
																     "       --save ${CHECKPOINT_PATH} \\\n",
															
 
																     "       --load ${CHECKPOINT_PATH} \\\n",
															
 
																-    "       --data-path 1. ${DATA_PATH} \\\n",
															
 
																+    "       --data-path ${DATA_PATH} \\\n",
															
 
																     "       --vocab-file ${VOCAB_FILE} \\\n",
															
 
																     "       --merge-file ${MERGE_FILE} \\\n",
															
 
																     "       --data-impl mmap \\\n",
															
@@ -170,12 +163,12 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "determined-cliff",
															
 
																+   "id": "formal-turner",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																-    "Check how big is your model. By modify the parameters in the [params_cnt.sh](./params_cnt.sh)\n",
															
 
																+    "Check how big is your model. By modify the parameters in the [params_cnt.sh](./params_cnt.sh) to match the training parames above.\n",
															
 
																     "\n",
															
 
																-    "I got 6.6 Billion :)  what about you ?\n",
															
 
																+    "I got 1.6 Billion :)  what about you ?\n",
															
 
																     "\n",
															
 
																     "Modify the [params count](./params_cnt.sh) accoring to your training configuration.\n",
															
 
																     "\n",
															
@@ -185,7 +178,7 @@
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "green-magic",
															
 
																+   "id": "welcome-donor",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
@@ -194,18 +187,18 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "awful-candle",
															
 
																+   "id": "noticed-trinity",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Below is an example of expected outputs:\n",
															
 
																     "    \n",
															
 
																-    "        6 <-- One could get different number depend on your training config\n",
															
 
																-    "        6675628032 <-- One could get different number depend on your training config\n"
															
 
																+    "        1 <-- One could get different number depend on your training config\n",
															
 
																+    "        1678049280 <-- One could get different number depend on your training config\n"
															
 
																    ]
															
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "great-league",
															
 
																+   "id": "convenient-ontario",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Re-run this cell below to get an even bigger GPT model\n",
															
@@ -220,16 +213,16 @@
 
																   {
															
 
																    "cell_type": "code",
															
 
																    "execution_count": null,
															
 
																-   "id": "italian-karma",
															
 
																+   "id": "representative-kentucky",
															
 
																    "metadata": {},
															
 
																    "outputs": [],
															
 
																    "source": [
															
 
																-    "!./Megatron-LM/SV_GPT_goingBIG.sh"
															
 
																+    "!bash ./Megatron-LM/SV_GPT_goingBIG.sh"
															
 
																    ]
															
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "outstanding-application",
															
 
																+   "id": "unnecessary-african",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "Below is an example of expected outputs:\n",
															
@@ -252,7 +245,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "blessed-grammar",
															
 
																+   "id": "pretty-handle",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "---\n",
															
@@ -263,7 +256,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "velvet-nylon",
															
 
																+   "id": "caroline-induction",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "-----\n",
															
@@ -272,7 +265,7 @@
 
																   },
															
 
																   {
															
 
																    "cell_type": "markdown",
															
 
																-   "id": "framed-blood",
															
 
																+   "id": "ranking-pillow",
															
 
																    "metadata": {},
															
 
																    "source": [
															
 
																     "-----\n",