| 
					
				 | 
			
			
				@@ -1,12 +1,12 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # Copyright (c) 2020 NVIDIA Corporation.  All rights reserved. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-GPUS_PER_NODE=8 # <--- remember to change the number of GPUs you actually have in your system 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+GPUS_PER_NODE=2 # <--- remember to change the number of GPUs you actually have in your system 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # Change for multinode config 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 MASTER_ADDR=localhost 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 MASTER_PORT=6000 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 NNODES=1 #<-- currently we are using 1 node multigpus 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 NODE_RANK=0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-WORLD_SIZE=8 # <--- remember to change the number of GPUs you actually have in your system 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-TENSOR_MP_SIZE=8 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+WORLD_SIZE=2 # <--- remember to change the number of GPUs you actually have in your system 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+TENSOR_MP_SIZE=2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 PIPELINE_MP_SIZE=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ### modify this section to point the file to its own path  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 CHECKPOINT_PATH='../sv_ckpt/' ## modify this path if you customize it  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -25,12 +25,12 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --tensor-model-parallel-size $TENSOR_MP_SIZE \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --pipeline-model-parallel-size $PIPELINE_MP_SIZE \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --num-layers 32 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-       --hidden-size 1024 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       --hidden-size 2048 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --num-attention-heads 32 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-       --micro-batch-size 64 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-       --global-batch-size 512 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-       --seq-length 512 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-       --max-position-embeddings 512 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       --micro-batch-size 16 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       --global-batch-size 128 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       --seq-length 1024 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       --max-position-embeddings 1024 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --train-samples 100 \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --save $CHECKPOINT_PATH \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        --load $CHECKPOINT_PATH \ 
			 |