|
@@ -65,7 +65,7 @@
|
|
|
{
|
|
|
"data": {
|
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
|
- "model_id": "c7963d43806d432aaa3d00e2055e355c",
|
|
|
+ "model_id": "68838a4f42f84545912e95b339a31034",
|
|
|
"version_major": 2,
|
|
|
"version_minor": 0
|
|
|
},
|
|
@@ -75,13 +75,6 @@
|
|
|
},
|
|
|
"metadata": {},
|
|
|
"output_type": "display_data"
|
|
|
- },
|
|
|
- {
|
|
|
- "name": "stderr",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
|
|
- ]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
@@ -101,6 +94,7 @@
|
|
|
"train_config.context_length = 1024 if torch.cuda.get_device_properties(0).total_memory < 16e9 else 2048 # T4 16GB or A10 24GB\n",
|
|
|
"train_config.batching_strategy = \"packing\"\n",
|
|
|
"train_config.output_dir = \"meta-llama-samsum\"\n",
|
|
|
+ "train_config.use_peft = True\n",
|
|
|
"\n",
|
|
|
"from transformers import BitsAndBytesConfig\n",
|
|
|
"config = BitsAndBytesConfig(\n",
|
|
@@ -205,7 +199,7 @@
|
|
|
"model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n",
|
|
|
"\n",
|
|
|
"model.eval()\n",
|
|
|
- "with torch.no_grad():\n",
|
|
|
+ "with torch.inference_mode():\n",
|
|
|
" print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))"
|
|
|
]
|
|
|
},
|
|
@@ -230,34 +224,20 @@
|
|
|
"name": "stderr",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/datasets/load.py:1486: FutureWarning: The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum\n",
|
|
|
- "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
|
|
|
- "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n",
|
|
|
- " warnings.warn(\n",
|
|
|
- "Preprocessing dataset: 100%|██████████| 14732/14732 [00:02<00:00, 6124.69it/s]\n"
|
|
|
+ "/home/ubuntu/llama-recipes/src/llama_recipes/model_checkpointing/checkpoint_handler.py:17: DeprecationWarning: `torch.distributed._shard.checkpoint` will be deprecated, use `torch.distributed.checkpoint` instead\n",
|
|
|
+ " from torch.distributed._shard.checkpoint import (\n",
|
|
|
+ "Preprocessing dataset: 100%|██████████| 14732/14732 [00:02<00:00, 5872.02it/s]\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
"from llama_recipes.configs.datasets import samsum_dataset\n",
|
|
|
- "from llama_recipes.data.concatenator import ConcatDataset\n",
|
|
|
- "from llama_recipes.utils.config_utils import get_dataloader_kwargs\n",
|
|
|
- "from llama_recipes.utils.dataset_utils import get_preprocessed_dataset\n",
|
|
|
- "\n",
|
|
|
- "train_dataset = get_preprocessed_dataset(tokenizer, samsum_dataset, 'train')\n",
|
|
|
- "\n",
|
|
|
- "train_dl_kwargs = get_dataloader_kwargs(train_config, train_dataset, tokenizer, \"train\")\n",
|
|
|
+ "from llama_recipes.utils.dataset_utils import get_dataloader\n",
|
|
|
"\n",
|
|
|
- "if train_config.batching_strategy == \"packing\":\n",
|
|
|
- " train_dataset = ConcatDataset(train_dataset, chunk_size=train_config.context_length)\n",
|
|
|
+ "samsum_dataset.trust_remote_code = True\n",
|
|
|
"\n",
|
|
|
- "# Create DataLoaders for the training and validation dataset\n",
|
|
|
- "train_dataloader = torch.utils.data.DataLoader(\n",
|
|
|
- " train_dataset,\n",
|
|
|
- " num_workers=train_config.num_workers_dataloader,\n",
|
|
|
- " pin_memory=True,\n",
|
|
|
- " **train_dl_kwargs,\n",
|
|
|
- ")"
|
|
|
+ "train_dataloader = get_dataloader(tokenizer, samsum_dataset, train_config)\n",
|
|
|
+ "eval_dataloader = get_dataloader(tokenizer, samsum_dataset, train_config, \"val\")"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -310,17 +290,23 @@
|
|
|
"name": "stderr",
|
|
|
"output_type": "stream",
|
|
|
"text": [
|
|
|
- "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/cuda/memory.py:330: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.\n",
|
|
|
+ "/home/ubuntu/llama-recipes/src/llama_recipes/utils/train_utils.py:92: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
|
|
|
+ " scaler = torch.cuda.amp.GradScaler()\n",
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/cuda/memory.py:343: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.\n",
|
|
|
" warnings.warn(\n",
|
|
|
"Training Epoch: 1: 0%|\u001b[34m \u001b[0m| 0/319 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
"To disable this warning, you can either:\n",
|
|
|
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
|
|
|
- "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
|
|
|
- " warnings.warn(\n",
|
|
|
+ "/home/ubuntu/llama-recipes/src/llama_recipes/utils/train_utils.py:151: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n",
|
|
|
+ " with autocast():\n",
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py:600: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
|
|
|
+ " return fn(*args, **kwargs)\n",
|
|
|
"/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/bitsandbytes/autograd/_functions.py:316: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
|
" warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
|
- "Training Epoch: 1/1, step 1278/1279 completed (loss: 0.27870458364486694): : 320it [2:07:09, 23.84s/it] 3.94s/it] \n"
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
|
|
|
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n",
|
|
|
+ "Training Epoch: 1/1, step 1278/1279 completed (loss: 0.28094857931137085): : 320it [2:08:50, 24.16s/it] 4.21s/it] \n"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -332,7 +318,7 @@
|
|
|
"Peak active CUDA memory was 15 GB\n",
|
|
|
"CUDA Malloc retries : 0\n",
|
|
|
"CPU Total Peak Memory consumed during the train (max): 2 GB\n",
|
|
|
- "Epoch 1: train_perplexity=1.3403, train_epoch_loss=0.2929, epoch time 7630.169942979002s\n"
|
|
|
+ "Epoch 1: train_perplexity=1.3404, train_epoch_loss=0.2930, epoch time 7730.981359725998s\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
@@ -354,7 +340,7 @@
|
|
|
"results = train(\n",
|
|
|
" model,\n",
|
|
|
" train_dataloader,\n",
|
|
|
- " None,\n",
|
|
|
+ " eval_dataloader,\n",
|
|
|
" tokenizer,\n",
|
|
|
" optimizer,\n",
|
|
|
" scheduler,\n",
|
|
@@ -380,16 +366,7 @@
|
|
|
"cell_type": "code",
|
|
|
"execution_count": 7,
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "name": "stderr",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
|
|
- " warnings.warn(\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"model.save_pretrained(train_config.output_dir)"
|
|
|
]
|
|
@@ -440,13 +417,13 @@
|
|
|
"A: He said he’d name it after his dead hamster – Lemmy - he's a great Motorhead fan :-)))\n",
|
|
|
"---\n",
|
|
|
"Summary:\n",
|
|
|
- "A wants to get a puppy for her son. She will take him to the animal shelter tomorrow. B is not sure if he can go with her, but he's willing to.\n"
|
|
|
+ "A wants to get a puppy for his son. A took him to the animal shelter last Monday and he showed A one he really liked. A wants to get him one of those little dogs. A and B agree that raising a dog is a tough issue.\n"
|
|
|
]
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
"model.eval()\n",
|
|
|
- "with torch.no_grad():\n",
|
|
|
+ "with torch.inference_mode():\n",
|
|
|
" print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))\n"
|
|
|
]
|
|
|
}
|
|
@@ -467,7 +444,7 @@
|
|
|
"name": "python",
|
|
|
"nbconvert_exporter": "python",
|
|
|
"pygments_lexer": "ipython3",
|
|
|
- "version": "3.10.14"
|
|
|
+ "version": "3.11.9"
|
|
|
},
|
|
|
"vscode": {
|
|
|
"interpreter": {
|