|
@@ -432,7 +432,7 @@ def setup_environ_flags(rank):
|
|
|
os.environ["NCCL_ASYNC_ERROR_HANDLING"] = str(1)
|
|
|
# os.environ["TORCH_DISTRIBUTED_DEBUG"] = "DETAIL"
|
|
|
# This flag will help with CUDA memory fragmentations that can lead into OOM in some cases.
|
|
|
- # Note this is only availble in PyTorch Nighlies (as of July 30 2023)
|
|
|
+ # Note this is only available in PyTorch Nighlies (as of July 30 2023)
|
|
|
# os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'
|
|
|
if rank == 0:
|
|
|
print(f"--> Running with torch dist debug set to detail")
|