lchu
							
						 | 
						
							
							
								feaa344af3
							
							resolve conflicts
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								75f291fe1c
							
							resolved conflicts
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								44ef280d31
							
							adding flash attention and xformer memory efficient through PT SDPA
						 | 
						2 rokov pred | 
					
				
					
						
							
								   luoyifan
							
						 | 
						
							
							
								79d0d4fc4e
							
							fix some typos.
						 | 
						2 rokov pred | 
					
				
					
						
							
								   lchu
							
						 | 
						
							
							
								80a4c36707
							
							further fix #90
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								88d3e1febc
							
							fix the save_train_param condition
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								62be60355a
							
							resolving conflicts
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								017cadd04b
							
							Merge branch 'checkpoint_handler_path_fix' of https://github.com/facebookresearch/llama-recipes into checkpoint_handler_path_fix
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								4f70348b94
							
							remove the redundant lr step
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								5b916114eb
							
							merge main branch
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								668c364f6b
							
							add rank to save_train_params
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								231c9e7da9
							
							adding train_param.yaml saving for fsdp checkpoint loading for inference
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								41dd7ff1cb
							
							Merge branch 'main' into checkpoint_handler_path_fix
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a955ed1999
							
							added checks for dist barrier and commented cuda exapnadable segements and dist_dbug
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a2403c7c1a
							
							clean up
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								e9559d2669
							
							fixing the train/eval_loss calcualtion
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								4ba4400a75
							
							adding dist barrier before and after checkpointing
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a49a2c2804
							
							adding PT cuda allocation expand flag
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								442c1ccf7c
							
							adding barrier to end of trainer loop
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								f74d57dc08
							
							printing scores based on fsdp usage or single gpu
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								3d887ea483
							
							update with active memory and removing rank0 for eval score
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								bedb96b78a
							
							fixing the full state path in checkpoint handler
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								563e572f7c
							
							adding active mem stat
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								bd01f64cbd
							
							Merge branch 'main' into fix-cuda_id
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Andrew Gu
							
						 | 
						
							
							
								71fdc4920a
							
							Save memory and fix typos
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								a7156dfb5d
							
							fixing the cuda id
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								707af7ea24
							
							adding cuda:0 for non-fsdp situations
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								6678be75ad
							
							fixing identation
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								6a84e9e4d5
							
							fixing scaler for both fsdp and non fsdp
						 | 
						2 rokov pred | 
					
				
					
						
							
								   Hamid Shojanazeri
							
						 | 
						
							
							
								065ddaa77b
							
							fixing the condition for moving to cuda
						 | 
						2 rokov pred |