4 anos atrás · 8e1ad51f3d
--- a/ai/Megatron/English/Python/jupyter_notebook/Day2-2_MegatronFundementals.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day2-2_MegatronFundementals.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "prerequisite-disaster",
			
 
				+   "id": "selected-material",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "#  Understanding Megatron-LM's core - MPU\n",
			
@@ -26,7 +26,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "peaceful-article",
			
 
				+   "id": "renewable-simon",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---------------------------------------------------------------------------\n",
			
@@ -74,7 +74,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 1,
			
 
				-   "id": "reflected-israeli",
			
 
				+   "id": "greek-simpson",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -154,7 +154,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "moving-strip",
			
 
				+   "id": "transparent-myanmar",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -175,7 +175,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 2,
			
 
				-   "id": "promotional-stack",
			
 
				+   "id": "physical-lightweight",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -215,7 +215,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 3,
			
 
				-   "id": "loose-haven",
			
 
				+   "id": "confidential-mills",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -257,7 +257,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "nearby-immunology",
			
 
				+   "id": "designed-guidance",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "----------------------------------------------------------------------\n",
			
@@ -268,8 +268,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				-   "id": "consolidated-operation",
			
 
				+   "execution_count": 3,
			
 
				+   "id": "organized-orange",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -282,14 +282,14 @@
 
				     "import random\n",
			
 
				     "from megatron import *\n",
			
 
				     "from megatron.mpu.tests import *\n",
			
 
				-    "tensor_model_parallel_size=4\n",
			
 
				+    "\n",
			
 
				     "from megatron.mpu.utils import *"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 6,
			
 
				-   "id": "expressed-builder",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "compound-morning",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -391,8 +391,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				-   "id": "elect-detail",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "editorial-refund",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -444,10 +444,9 @@
 
				     "    print(\"per_partition_per_stride_size \",per_partition_per_stride_size)\n",
			
 
				     "    weight_list = torch.split(master_weight, per_partition_per_stride_size,\n",
			
 
				     "                              dim=partition_dim)\n",
			
 
				-    "    \n",
			
 
				-    "    #print(\"weight_list\", [wl.size() for wl in weight_list] , len(weight_list))\n",
			
 
				-    "    #print(\"----\"*5)\n",
			
 
				-    "    tensor_model_parallel_gp=[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13], [14, 15]]\n",
			
 
				+    "    ########  tensor_model_parallel_gp below is hard-coded for tensor_model_parallel_size= 2 , pipeline_model_parallel_size= 4 ########\n",
			
 
				+    "    ########    if you use other model parallel configuration , please copy and paste it below    ########\n",
			
 
				+    "    tensor_model_parallel_gp=[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13], [14, 15]] \n",
			
 
				     "    my_weight_list = get_weight_list(master_weight,tensor_model_parallel_gp)\n",
			
 
				     "    \n",
			
 
				     "    with torch.no_grad():\n",
			
@@ -459,7 +458,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "dress-proportion",
			
 
				+   "id": "distinguished-rhythm",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "## Peek inside Column Parallel Class"
			
@@ -467,8 +466,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 8,
			
 
				-   "id": "german-method",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "under-secondary",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -486,8 +485,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "tensor_model_parallel_size= 4 \n",
			
 
				-    "pipeline_model_parallel_size= 2  \n",
			
 
				+    "tensor_model_parallel_size= 2 \n",
			
 
				+    "pipeline_model_parallel_size= 4  \n",
			
 
				     "input_size = 1024 # 1024 rows\n",
			
 
				     "output_size = 512 # 256 columns\n",
			
 
				     "which_model_parallel='col'\n",
			
@@ -500,8 +499,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 9,
			
 
				-   "id": "ethical-secondary",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "selective-snake",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -511,8 +510,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				-   "id": "republican-saint",
			
 
				+   "execution_count": 8,
			
 
				+   "id": "inside-france",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -521,7 +520,7 @@
 
				        "__main__.myColumnParallelLinear"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 10,
			
 
				+     "execution_count": 8,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -532,8 +531,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				-   "id": "sealed-eclipse",
			
 
				+   "execution_count": 9,
			
 
				+   "id": "agricultural-marine",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -542,7 +541,7 @@
 
				        "(1024, 512)"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 11,
			
 
				+     "execution_count": 9,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -553,7 +552,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "played-romantic",
			
 
				+   "id": "experienced-profit",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "----------------------------------------------------------------------\n",
			
@@ -564,8 +563,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 53,
			
 
				-   "id": "logical-union",
			
 
				+   "execution_count": 10,
			
 
				+   "id": "mineral-adapter",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -672,8 +671,8 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 74,
			
 
				-   "id": "annual-commonwealth",
			
 
				+   "execution_count": 11,
			
 
				+   "id": "nearby-latino",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -707,8 +706,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "tensor_model_parallel_size= 4 \n",
			
 
				-    "pipeline_model_parallel_size= 2  \n",
			
 
				+    "tensor_model_parallel_size= 2 \n",
			
 
				+    "pipeline_model_parallel_size= 4  \n",
			
 
				     "input_size = 1024 # first dimension of the matrix\n",
			
 
				     "output_size = 512 # 2nd dimension of the matrix\n",
			
 
				     "print(\"this is how A is sliced Row-wised ...\\n\")\n",
			
@@ -723,7 +722,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 72,
			
 
				-   "id": "complex-ultimate",
			
 
				+   "id": "economic-istanbul",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -734,7 +733,7 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 58,
			
 
				-   "id": "neither-johnson",
			
 
				+   "id": "pursuant-denial",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -754,7 +753,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "wired-contents",
			
 
				+   "id": "stretch-creature",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "--- \n",
			
@@ -768,7 +767,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "patent-caution",
			
 
				+   "id": "stopped-software",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "---\n",
			
@@ -780,7 +779,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "markdown",
			
 
				-   "id": "together-paragraph",
			
 
				+   "id": "dated-garbage",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "-----\n",
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Day2_0_intro.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day2_0_intro.ipynb
@@ -1,102 +0,0 @@
 
				-{
			
 
				- "cells": [
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "trained-reform",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "## The Bootcamp computer environment, a SuperPOD cluster  -\n",
			
 
				-    "\n",
			
 
				-    "For this bootcamp We will get access to NVIDIA DGX A100 systems. In general, it is highly recommanded to have access to large compute cluster when training very large language models.\n",
			
 
				-    "\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "enabling-jason",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "## Learning Objectives\n",
			
 
				-    "This bootcamp is designed to help you quickly go through one time the default Magatron workflow ( Day2 ),thereafter ( Day3 ) we will be focus on catering to the specifics of local langauge needs, in this case Swedish. We will give recommandations which can be optionally applied to your workflow and include some practical, useful scripts to help you kick-start your own journey in training local langauge Megatron GPT2/3 models. \n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "fifth-argument",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "<!--**IMPORTANT**: Before we start please download the files specified in **Preparation work**.-->\n",
			
 
				-    "\n",
			
 
				-    "### Bootcamp Outline ( Day 2 )\n",
			
 
				-    "This is day 2 of the bootcamp ,we are focusing on familiarize ourselves with the Megatron default workflow,\n",
			
 
				-    "given the superPOD environment with ? gpus / per attendees. \n",
			
 
				-    "We will quickly get ourselves up and running with [Megatron repo](https://github.com/NVIDIA/Megatron-LM) and aiming to understand how to utilize gpus performance via experimenting on various Megatron GPT training configuration. \n",
			
 
				-    "\n",
			
 
				-    "- [Estimate hours/days needed to execute one end-to-end run per Megatron configuration](./Day2-1_EstimateComputeDaysNeeded.ipynb)\n",
			
 
				-    "- [Understanding the core of Megatron - mpu ](./Day2-2_MegatronFundementals.ipynb)\n",
			
 
				-    "- [About GPT's tokenizer](./Day2-3_GPT_vocab_merge_files.ipynb)\n",
			
 
				-    "- [Data preprocessing](./Day2-4_jsonfy_and_process2mmap.ipynb)\n",
			
 
				-    "- [Megatron runs vs config](./BootCampDay2-4_Verify_GPT_runs_locally.ipynb)\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "### Tutorial Duration\n",
			
 
				-    "The lab material will be presented in an 4-hour session. A Link to the scripts (without the data) is available for download at the end of the bootcamp.\n",
			
 
				-    "\n",
			
 
				-    "### Content Level\n",
			
 
				-    "Intermediate , advanced \n",
			
 
				-    "\n",
			
 
				-    "### Target Audience and Prerequisites\n",
			
 
				-    "The target audience for this lab are NLP researchers, data scientists and NLP engineers who are interested in adopting Megatron to train their own GPT2/3 models on their own langauge.\n",
			
 
				-    "\n",
			
 
				-    "Basic experience with Python programming is needed. No GPU programming knowledge is required."
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "fleet-subject",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "---\n",
			
 
				-    "## Up Next : \n",
			
 
				-    "\n",
			
 
				-    "[Estimate Compute hours/ Days Needed](./Day2-1_EstimateComputeDaysNeeded.ipynb)\n",
			
 
				-    "\n",
			
 
				-    "## Back To Start Menu\n",
			
 
				-    "[start menu](../Start_Here.ipynb)"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "accessible-palestine",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "---\n",
			
 
				-    "\n",
			
 
				-    "## Licensing \n",
			
 
				-    "\n",
			
 
				-    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				-   ]
			
 
				-  }
			
 
				- ],
			
 
				- "metadata": {
			
 
				-  "kernelspec": {
			
 
				-   "display_name": "Python 3",
			
 
				-   "language": "python",
			
 
				-   "name": "python3"
			
 
				-  },
			
 
				-  "language_info": {
			
 
				-   "codemirror_mode": {
			
 
				-    "name": "ipython",
			
 
				-    "version": 3
			
 
				-   },
			
 
				-   "file_extension": ".py",
			
 
				-   "mimetype": "text/x-python",
			
 
				-   "name": "python",
			
 
				-   "nbconvert_exporter": "python",
			
 
				-   "pygments_lexer": "ipython3",
			
 
				-   "version": "3.8.8"
			
 
				-  }
			
 
				- },
			
 
				- "nbformat": 4,
			
 
				- "nbformat_minor": 5
			
 
				-}
			
--- a/ai/Megatron/English/Python/jupyter_notebook/Day3-0_overview.ipynb
+++ b/ai/Megatron/English/Python/jupyter_notebook/Day3-0_overview.ipynb
@@ -1,100 +0,0 @@
 
				-{
			
 
				- "cells": [
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "whole-offset",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "## The bootcamp computer environment BerzeLiUs ( SuperPOD in Sweden ) -\n",
			
 
				-    "The [BerzeLiUS](https://blogs.nvidia.com/blog/2021/03/23/ai-supercomputer-sweden/) system consists of 60 NVIDIA DGX A100 systems, \n",
			
 
				-    "linked on a 200 Gbit/second NVIDIA Mellanox InfiniBand HDR network. \n",
			
 
				-    "The same network links the processors to 1.5 petabytes of flash memory on four storage servers from DataDirect Networks.\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "martial-cathedral",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "## Learning Objectives\n",
			
 
				-    "Today ( Day3 ) we will be focus on catering to the specifics of local langauge needs, in this case Swedish. We will give recommandations which can be optionally applied to your workflow and include some practical, useful scripts to help you kick-start your own journey in training local langauge Megatron GPT2/3 models. \n",
			
 
				-    "\n",
			
 
				-    "## Dataset -\n",
			
 
				-    "Today we will be fetching and extracting Swedish data from [Språkbank webnyheter2013 ](https://spraakbanken.gu.se/en/resources/webbnyheter2013)\n",
			
 
				-    "\n",
			
 
				-    "### Bootcamp Outline ( Day 3 )\n",
			
 
				-    "This is day 2 of the bootcamp ,we are focusing on familiarize ourselves with the Megatron default workflow,\n",
			
 
				-    "given the superPOD environment with ? gpus / per attendees. \n",
			
 
				-    "We will start from data cleansing [Megatron repo](https://github.com/NVIDIA/Megatron-LM/tools/openwebtext) and aiming to understand how to utilize gpus performance via experimenting on various Megatron GPT training configuration. \n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "    \n",
			
 
				-    "- [Fetch and extract Swedish data](./Megatron-LM/tools/openwebtext/Day3-1_acquiring_data.ipynb)\n",
			
 
				-    "- [Find sentence boundary and deduplicate your data](./Megatron-LM/tools/openwebtext/Day3-2_SentenceBoundary_and_Deduplicate.ipynb)\n",
			
 
				-    "    - [mini challenge - approaching groundtruth](./Megatron-LM/tools/openwebtext/Day3-1_SentenceBoundary_and_Deduplicate.ipynb#TheChallenge)\n",
			
 
				-    "- [Train your own GPTBPE Tokenizer on your own data ](./Day3-3_train_own_GPT2BPETokenizer.ipynb)\n",
			
 
				-    "- [customize preprocess data python script and convert to mmap](./Day3-4_customize_process2mmap.ipynb)\n",
			
 
				-    "- [The Challenge - Go Big or go home!](./Day3-4_run_Megatron_with_varying_config.ipynb)\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "### Tutorial Duration\n",
			
 
				-    "The lab material will be presented in an 4-hour session. A Link to the scripts (without the data) is available for download at the end of the bootcamp.\n",
			
 
				-    "\n",
			
 
				-    "### Content Level\n",
			
 
				-    "Intermediate , advanced \n",
			
 
				-    "\n",
			
 
				-    "### Target Audience and Prerequisites\n",
			
 
				-    "The target audience for this lab are NLP researchers, data scientists and NLP engineers who are interested in adopting Megatron to train their own GPT2/3 models on their own langauge.\n",
			
 
				-    "\n",
			
 
				-    "Basic experience with Python programming is needed. No GPU programming knowledge is required.\n",
			
 
				-    "\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "nearby-village",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "---\n",
			
 
				-    "## Up Next : \n",
			
 
				-    "\n",
			
 
				-    "[Fetch and extract Swedish data](./Megatron-LM/tools/openwebtext/Day3-1_acquiring_data.ipynb)\n",
			
 
				-    "\n",
			
 
				-    "## Back To Start Menu\n",
			
 
				-    "[start menu](../Start_Here.ipynb)"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "markdown",
			
 
				-   "id": "lined-participation",
			
 
				-   "metadata": {},
			
 
				-   "source": [
			
 
				-    "-----\n",
			
 
				-    "## Licensing \n",
			
 
				-    "\n",
			
 
				-    "This material is released by OpenACC-Standard.org, in collaboration with NVIDIA Corporation, under the Creative Commons Attribution 4.0 International (CC BY 4.0). "
			
 
				-   ]
			
 
				-  }
			
 
				- ],
			
 
				- "metadata": {
			
 
				-  "kernelspec": {
			
 
				-   "display_name": "Python 3",
			
 
				-   "language": "python",
			
 
				-   "name": "python3"
			
 
				-  },
			
 
				-  "language_info": {
			
 
				-   "codemirror_mode": {
			
 
				-    "name": "ipython",
			
 
				-    "version": 3
			
 
				-   },
			
 
				-   "file_extension": ".py",
			
 
				-   "mimetype": "text/x-python",
			
 
				-   "name": "python",
			
 
				-   "nbconvert_exporter": "python",
			
 
				-   "pygments_lexer": "ipython3",
			
 
				-   "version": "3.8.8"
			
 
				-  }
			
 
				- },
			
 
				- "nbformat": 4,
			
 
				- "nbformat_minor": 5
			
 
				-}