|
@@ -30,7 +30,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 1,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
@@ -52,9 +52,31 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 2,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [],
|
|
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "application/vnd.jupyter.widget-view+json": {
|
|
|
|
+ "model_id": "c7963d43806d432aaa3d00e2055e355c",
|
|
|
|
+ "version_major": 2,
|
|
|
|
+ "version_minor": 0
|
|
|
|
+ },
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "display_data"
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
"source": [
|
|
"source": [
|
|
"import torch\n",
|
|
"import torch\n",
|
|
"from transformers import LlamaForCausalLM, AutoTokenizer\n",
|
|
"from transformers import LlamaForCausalLM, AutoTokenizer\n",
|
|
@@ -103,9 +125,52 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 3,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [],
|
|
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "\n",
|
|
|
|
+ "Summarize this dialog:\n",
|
|
|
|
+ "A: Hi Tom, are you busy tomorrow’s afternoon?\n",
|
|
|
|
+ "B: I’m pretty sure I am. What’s up?\n",
|
|
|
|
+ "A: Can you go with me to the animal shelter?.\n",
|
|
|
|
+ "B: What do you want to do?\n",
|
|
|
|
+ "A: I want to get a puppy for my son.\n",
|
|
|
|
+ "B: That will make him so happy.\n",
|
|
|
|
+ "A: Yeah, we’ve discussed it many times. I think he’s ready now.\n",
|
|
|
|
+ "B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \n",
|
|
|
|
+ "A: I'll get him one of those little dogs.\n",
|
|
|
|
+ "B: One that won't grow up too big;-)\n",
|
|
|
|
+ "A: And eat too much;-))\n",
|
|
|
|
+ "B: Do you know which one he would like?\n",
|
|
|
|
+ "A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\n",
|
|
|
|
+ "B: I bet you had to drag him away.\n",
|
|
|
|
+ "A: He wanted to take it home right away ;-).\n",
|
|
|
|
+ "B: I wonder what he'll name it.\n",
|
|
|
|
+ "A: He said he’d name it after his dead hamster – Lemmy - he's a great Motorhead fan :-)))\n",
|
|
|
|
+ "---\n",
|
|
|
|
+ "Summary:\n",
|
|
|
|
+ "A: Hi Tom, are you busy tomorrow’s afternoon?\n",
|
|
|
|
+ "B: I’m pretty sure I am. What’s up?\n",
|
|
|
|
+ "A: Can you go with me to the animal shelter?.\n",
|
|
|
|
+ "B: What do you want to do?\n",
|
|
|
|
+ "A: I want to get a puppy for my son.\n",
|
|
|
|
+ "B: That will make him so happy.\n",
|
|
|
|
+ "A: Yeah, we’ve discussed it many times. I think he’s ready now.\n",
|
|
|
|
+ "B: That’s good. Raising a dog is a tough issue\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
"source": [
|
|
"source": [
|
|
"eval_prompt = \"\"\"\n",
|
|
"eval_prompt = \"\"\"\n",
|
|
"Summarize this dialog:\n",
|
|
"Summarize this dialog:\n",
|
|
@@ -151,9 +216,21 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 4,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [],
|
|
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/datasets/load.py:1486: FutureWarning: The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum\n",
|
|
|
|
+ "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
|
|
|
|
+ "Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n",
|
|
|
|
+ " warnings.warn(\n",
|
|
|
|
+ "Preprocessing dataset: 100%|██████████| 14732/14732 [00:02<00:00, 6124.69it/s]\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
"source": [
|
|
"source": [
|
|
"from llama_recipes.configs.datasets import samsum_dataset\n",
|
|
"from llama_recipes.configs.datasets import samsum_dataset\n",
|
|
"from llama_recipes.data.concatenator import ConcatDataset\n",
|
|
"from llama_recipes.data.concatenator import ConcatDataset\n",
|
|
@@ -188,7 +265,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 5,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
@@ -219,9 +296,39 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 6,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [],
|
|
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/cuda/memory.py:330: FutureWarning: torch.cuda.reset_max_memory_allocated now calls torch.cuda.reset_peak_memory_stats, which resets /all/ peak memory stats.\n",
|
|
|
|
+ " warnings.warn(\n",
|
|
|
|
+ "Training Epoch: 1: 0%|\u001b[34m \u001b[0m| 0/319 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
|
+ "To disable this warning, you can either:\n",
|
|
|
|
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
|
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
|
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/torch/utils/checkpoint.py:464: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
|
|
|
|
+ " warnings.warn(\n",
|
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/bitsandbytes/autograd/_functions.py:316: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\n",
|
|
|
|
+ " warnings.warn(f\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\")\n",
|
|
|
|
+ "Training Epoch: 1/1, step 1278/1279 completed (loss: 0.27870458364486694): : 320it [2:07:09, 23.84s/it] 3.94s/it] \n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "Max CUDA memory allocated was 15 GB\n",
|
|
|
|
+ "Max CUDA memory reserved was 16 GB\n",
|
|
|
|
+ "Peak active CUDA memory was 15 GB\n",
|
|
|
|
+ "CUDA Malloc retries : 0\n",
|
|
|
|
+ "CPU Total Peak Memory consumed during the train (max): 2 GB\n",
|
|
|
|
+ "Epoch 1: train_perplexity=1.3403, train_epoch_loss=0.2929, epoch time 7630.169942979002s\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
"source": [
|
|
"source": [
|
|
"import torch.optim as optim\n",
|
|
"import torch.optim as optim\n",
|
|
"from llama_recipes.utils.train_utils import train\n",
|
|
"from llama_recipes.utils.train_utils import train\n",
|
|
@@ -264,9 +371,18 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 7,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [],
|
|
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "/home/ubuntu/miniconda3/envs/llama/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
|
|
|
+ " warnings.warn(\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
"source": [
|
|
"source": [
|
|
"model.save_pretrained(train_config.output_dir)"
|
|
"model.save_pretrained(train_config.output_dir)"
|
|
]
|
|
]
|
|
@@ -282,9 +398,45 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": null,
|
|
|
|
|
|
+ "execution_count": 8,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [],
|
|
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "\n",
|
|
|
|
+ "Summarize this dialog:\n",
|
|
|
|
+ "A: Hi Tom, are you busy tomorrow’s afternoon?\n",
|
|
|
|
+ "B: I’m pretty sure I am. What’s up?\n",
|
|
|
|
+ "A: Can you go with me to the animal shelter?.\n",
|
|
|
|
+ "B: What do you want to do?\n",
|
|
|
|
+ "A: I want to get a puppy for my son.\n",
|
|
|
|
+ "B: That will make him so happy.\n",
|
|
|
|
+ "A: Yeah, we’ve discussed it many times. I think he’s ready now.\n",
|
|
|
|
+ "B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \n",
|
|
|
|
+ "A: I'll get him one of those little dogs.\n",
|
|
|
|
+ "B: One that won't grow up too big;-)\n",
|
|
|
|
+ "A: And eat too much;-))\n",
|
|
|
|
+ "B: Do you know which one he would like?\n",
|
|
|
|
+ "A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\n",
|
|
|
|
+ "B: I bet you had to drag him away.\n",
|
|
|
|
+ "A: He wanted to take it home right away ;-).\n",
|
|
|
|
+ "B: I wonder what he'll name it.\n",
|
|
|
|
+ "A: He said he’d name it after his dead hamster – Lemmy - he's a great Motorhead fan :-)))\n",
|
|
|
|
+ "---\n",
|
|
|
|
+ "Summary:\n",
|
|
|
|
+ "A wants to get a puppy for her son. She will take him to the animal shelter tomorrow. B is not sure if he can go with her, but he's willing to.\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
"source": [
|
|
"source": [
|
|
"model.eval()\n",
|
|
"model.eval()\n",
|
|
"with torch.no_grad():\n",
|
|
"with torch.no_grad():\n",
|