2 年之前 · 21f7408d61
--- a/Mergekit.ipynb
+++ b/Mergekit.ipynb
@@ -5,7 +5,7 @@
 
				     "colab": {
			
 
				       "provenance": [],
			
 
				       "machine_shape": "hm",
			
 
				-      "authorship_tag": "ABX9TyMuGevIbBdnvORov5ZLmtGx",
			
 
				+      "authorship_tag": "ABX9TyNkCdo3uzEUbLA4CS6VfaEM",
			
 
				       "include_colab_link": true
			
 
				     },
			
 
				     "kernelspec": {
			
@@ -1481,67 +1481,42 @@
 
				       "cell_type": "code",
			
 
				       "execution_count": null,
			
 
				       "metadata": {
			
 
				-        "colab": {
			
 
				-          "base_uri": "https://localhost:8080/"
			
 
				-        },
			
 
				-        "id": "NPNPie5Eo3EZ",
			
 
				-        "outputId": "450c623b-7fc8-44df-c437-ea72b44a5a75"
			
 
				+        "id": "NPNPie5Eo3EZ"
			
 
				       },
			
 
				-      "outputs": [
			
 
				-        {
			
 
				-          "output_type": "stream",
			
 
				-          "name": "stdout",
			
 
				-          "text": [
			
 
				-            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
			
 
				-            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
			
 
				-            "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
			
 
				-            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
			
 
				-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
			
 
				-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m258.1/258.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
			
 
				-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m395.4/395.4 kB\u001b[0m \u001b[31m20.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
			
 
				-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m168.3/168.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
			
 
				-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
			
 
				-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m17.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
			
 
				-            "\u001b[?25h  Building wheel for mergekit (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
			
 
				-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
			
 
				-            "lida 0.0.10 requires fastapi, which is not installed.\n",
			
 
				-            "lida 0.0.10 requires kaleido, which is not installed.\n",
			
 
				-            "lida 0.0.10 requires python-multipart, which is not installed.\n",
			
 
				-            "lida 0.0.10 requires uvicorn, which is not installed.\n",
			
 
				-            "llmx 0.0.15a0 requires cohere, which is not installed.\n",
			
 
				-            "llmx 0.0.15a0 requires openai, which is not installed.\n",
			
 
				-            "llmx 0.0.15a0 requires tiktoken, which is not installed.\n",
			
 
				-            "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
			
 
				-            "\u001b[0m"
			
 
				-          ]
			
 
				-        }
			
 
				-      ],
			
 
				+      "outputs": [],
			
 
				       "source": [
			
 
				         "!git clone https://github.com/cg123/mergekit.git\n",
			
 
				         "%cd mergekit\n",
			
 
				-        "!pip install -qe ."
			
 
				+        "!pip install -e ."
			
 
				       ]
			
 
				     },
			
 
				     {
			
 
				       "cell_type": "code",
			
 
				       "source": [
			
 
				-        "MODEL_NAME = \"NeuralPipe-9B-merged\"\n",
			
 
				+        "MODEL_NAME = \"Marcoro14-7B-slerp\"\n",
			
 
				         "yaml_config = \"\"\"\n",
			
 
				         "slices:\n",
			
 
				         "  - sources:\n",
			
 
				-        "    - model: OpenPipe/mistral-ft-optimized-1218\n",
			
 
				-        "      layer_range: [0, 32]\n",
			
 
				-        "  - sources:\n",
			
 
				-        "    - model: mlabonne/NeuralHermes-2.5-Mistral-7B\n",
			
 
				-        "      layer_range: [24, 32]\n",
			
 
				-        "merge_method: passthrough\n",
			
 
				+        "      - model: AIDC-ai-business/Marcoroni-7B-v3\n",
			
 
				+        "        layer_range: [0, 32]\n",
			
 
				+        "      - model: EmbeddedLLM/Mistral-7B-Merge-14-v0.1\n",
			
 
				+        "        layer_range: [0, 32]\n",
			
 
				+        "merge_method: slerp\n",
			
 
				+        "base_model: AIDC-ai-business/Marcoroni-7B-v3\n",
			
 
				+        "parameters:\n",
			
 
				+        "  t:\n",
			
 
				+        "    - filter: self_attn\n",
			
 
				+        "      value: [0, 0.5, 0.3, 0.7, 1]\n",
			
 
				+        "    - filter: mlp\n",
			
 
				+        "      value: [1, 0.5, 0.7, 0.3, 0]\n",
			
 
				+        "    - value: 0.5\n",
			
 
				         "dtype: bfloat16\n",
			
 
				         "\"\"\""
			
 
				       ],
			
 
				       "metadata": {
			
 
				         "id": "LGd7jlfCpNcg"
			
 
				       },
			
 
				-      "execution_count": null,
			
 
				+      "execution_count": 21,
			
 
				       "outputs": []
			
 
				     },
			
 
				     {
			
@@ -1611,9 +1586,17 @@
 
				         "# Create a Jinja template object\n",
			
 
				         "jinja_template = Template(template_text.strip())\n",
			
 
				         "\n",
			
 
				+        "# Get list of models from config\n",
			
 
				+        "if \"models\" in data:\n",
			
 
				+        "    models = [data[\"models\"][i][\"model\"] for i in range(len(data[\"models\"])) if \"parameters\" in data[\"models\"][i]]\n",
			
 
				+        "elif \"parameters\" in data:\n",
			
 
				+        "    models = [data[\"slices\"][0][\"sources\"][i][\"model\"] for i in range(len(data[\"slices\"][0][\"sources\"]))]\n",
			
 
				+        "elif \"slices\" in data:\n",
			
 
				+        "    models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n",
			
 
				+        "else:\n",
			
 
				+        "    raise Exception(\"No models or slices found in yaml config\")\n",
			
 
				+        "\n",
			
 
				         "# Fill the template\n",
			
 
				-        "models = [data[\"slices\"][i][\"sources\"][0][\"model\"] for i in range(len(data[\"slices\"]))]\n",
			
 
				-        "# models = [\"OpenPipe/mistral-ft-optimized-1218\", \"mlabonne/NeuralHermes-2.5-Mistral-7B\"]\n",
			
 
				         "content = jinja_template.render(\n",
			
 
				         "    model_name=MODEL_NAME,\n",
			
 
				         "    models=models,\n",