Browse Source

Created using Colaboratory

Maxime Labonne 1 year ago
parent
commit
f29adf96b1
1 changed files with 16 additions and 16 deletions
  1. 16 16
      Fine_tune_Llama_2_in_Google_Colab.ipynb

+ 16 - 16
Fine_tune_Llama_2_in_Google_Colab.ipynb

@@ -6,7 +6,7 @@
       "provenance": [],
       "machine_shape": "hm",
       "gpuType": "T4",
-      "authorship_tag": "ABX9TyOiM3qH7dGB3tSOPgaXFX6a",
+      "authorship_tag": "ABX9TyO7/3B03I6/nYxIak9u97IH",
       "include_colab_link": true
     },
     "kernelspec": {
@@ -7223,7 +7223,7 @@
         "\n",
         "❤️ Created by [@maximelabonne](), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da).\n",
         "\n",
-        "This notebook runs on a T4 GPU with high RAM. (Last update: 28 Jul 2023)\n"
+        "This notebook runs on a T4 GPU with high RAM. (Last update: 30 Jul 2023)\n"
       ],
       "metadata": {
         "id": "OSHlAbqzDFDq"
@@ -7231,7 +7231,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "id": "GLXwJqbjtPho"
       },
@@ -7261,14 +7261,14 @@
       "metadata": {
         "id": "nAMzy_0FtaUZ"
       },
-      "execution_count": 2,
+      "execution_count": null,
       "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "# The model that you want to train from the Hugging Face hub\n",
-        "model_name = \"daryl149/llama-2-7b-chat-hf\"\n",
+        "model_name = \"NousHermes/llama-2-7b-chat-hf\"\n",
         "\n",
         "# The instruction dataset to use\n",
         "dataset_name = \"mlabonne/guanaco-llama2-1k\"\n",
@@ -7335,7 +7335,7 @@
         "max_grad_norm = 0.3\n",
         "\n",
         "# Initial learning rate (AdamW optimizer)\n",
-        "learning_rate = 2e-4\n",
+        "learning_rate = 1e-5\n",
         "\n",
         "# Weight decay to apply to all layers except bias/LayerNorm weights\n",
         "weight_decay = 0.001\n",
@@ -7343,8 +7343,8 @@
         "# Optimizer to use\n",
         "optim = \"paged_adamw_32bit\"\n",
         "\n",
-        "# Learning rate schedule (constant a bit better than cosine)\n",
-        "lr_scheduler_type = \"constant\"\n",
+        "# Learning rate schedule\n",
+        "lr_scheduler_type = \"cosine\"\n",
         "\n",
         "# Number of training steps (overrides num_train_epochs)\n",
         "max_steps = -1\n",
@@ -7357,7 +7357,7 @@
         "group_by_length = True\n",
         "\n",
         "# Save checkpoint every X updates steps\n",
-        "save_steps = 25\n",
+        "save_steps = 0\n",
         "\n",
         "# Log every X updates steps\n",
         "logging_steps = 25\n",
@@ -7378,7 +7378,7 @@
       "metadata": {
         "id": "ib_We3NLtj2E"
       },
-      "execution_count": 3,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -7664,7 +7664,7 @@
         },
         "outputId": "a29901b3-5257-45a7-983c-f3f5f4eb7b94"
       },
-      "execution_count": 4,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -7999,7 +7999,7 @@
       "metadata": {
         "id": "crj9svNe4hU5"
       },
-      "execution_count": 5,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -8021,7 +8021,7 @@
         },
         "outputId": "757676f4-1d4c-4777-e25b-3c92d37c51d8"
       },
-      "execution_count": 6,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -8098,7 +8098,7 @@
         },
         "outputId": "463dab09-f8e9-4d99-ac87-19df4a51dbb5"
       },
-      "execution_count": 4,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -8128,7 +8128,7 @@
         "id": "x-xPb-_qB0dz",
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 373,
+          "height": 385,
           "referenced_widgets": [
             "8d703173f8ea40f1ab04cb87ef1f083b",
             "c603d7b4c2d243ac9c5892cde3b0d2d5",
@@ -8167,7 +8167,7 @@
         },
         "outputId": "97d27e69-4112-4e02-dfcc-83a96be1686f"
       },
-      "execution_count": 5,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",