Hamid Shojanazeri
							
						 | 
						
							
							
								5b916114eb
							
							merge main branch
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								668c364f6b
							
							add rank to save_train_params
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								231c9e7da9
							
							adding train_param.yaml saving for fsdp checkpoint loading for inference
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								475e67b4ec
							
							clean up
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								50e9d17045
							
							add the default option for find the HF model_name/path from train_param.yaml
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								41dd7ff1cb
							
							Merge branch 'main' into checkpoint_handler_path_fix
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								31d6ce8bf6
							
							adding expnadable sgement and dist debug flag info
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a955ed1999
							
							added checks for dist barrier and commented cuda exapnadable segements and dist_dbug
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a2403c7c1a
							
							clean up
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								e9559d2669
							
							fixing the train/eval_loss calcualtion
						 | 
						2 年之前 | 
					
				
					
						
							
								   Geeta Chauhan
							
						 | 
						
							
							
								2243b962fa
							
							Create spellcheck.yml (#50)
						 | 
						2 年之前 | 
					
				
					
						
							
								   Geeta Chauhan
							
						 | 
						
							
							
								3cc2b3787f
							
							Fix broken links in Dataset.md (#49)
						 | 
						2 年之前 | 
					
				
					
						
							
								   Geeta Chauhan
							
						 | 
						
							
							
								021ed8e312
							
							adding active mem stat (#44)
						 | 
						2 年之前 | 
					
				
					
						
							
								   Geeta Chauhan
							
						 | 
						
							
							
								09db361d23
							
							Templates updates (#67)
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								4ba4400a75
							
							adding dist barrier before and after checkpointing
						 | 
						2 年之前 | 
					
				
					
						
							
								   chauhang
							
						 | 
						
							
							
								95d59afcb8
							
							Update PR template
						 | 
						2 年之前 | 
					
				
					
						
							
								   chauhang
							
						 | 
						
							
							
								857a3ade4e
							
							Add PR template
						 | 
						2 年之前 | 
					
				
					
						
							
								   chauhang
							
						 | 
						
							
							
								9f9532d34c
							
							comm
						 | 
						2 年之前 | 
					
				
					
						
							
								   Christian Miller
							
						 | 
						
							
							
								9b2f72e1f5
							
							update README: python 3.8 rec + fix formatting
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a49a2c2804
							
							adding PT cuda allocation expand flag
						 | 
						2 年之前 | 
					
				
					
						
							
								   Geeta Chauhan
							
						 | 
						
							
							
								905f633dab
							
							adding issue tempalte (#57)
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								b814704b5f
							
							adding issue tempalte
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								442c1ccf7c
							
							adding barrier to end of trainer loop
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								f74d57dc08
							
							printing scores based on fsdp usage or single gpu
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								3d887ea483
							
							update with active memory and removing rank0 for eval score
						 | 
						2 年之前 | 
					
				
					
						
							
								   sekyonda
							
						 | 
						
							
							
								0d9c1a909f
							
							Update markdown_link_check_config.json
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								bedb96b78a
							
							fixing the full state path in checkpoint handler
						 | 
						2 年之前 | 
					
				
					
						
							
								   sekyondaMeta
							
						 | 
						
							
							
								b625dceb9b
							
							Create spellcheck.yml
						 | 
						2 年之前 | 
					
				
					
						
							
								   Kaiser Pister
							
						 | 
						
							
							
								b61c45d31d
							
							Fix broken links in Dataset.md
						 | 
						2 年之前 | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								569f8b7976
							
							fixed arg names
						 | 
						2 年之前 |