|
@@ -29,7 +29,7 @@
|
|
|
"!pip install -Uqqq chromadb==0.3.21 --progress-bar off\n",
|
|
|
"!pip install -Uqqq tiktoken==0.3.3 --progress-bar off\n",
|
|
|
"!pip install -Uqqq youtube-transcript-api==0.5.0 --progress-bar off\n",
|
|
|
- "!pip install -Uqqq pytube==12.1.3 --progress-bar offff\n",
|
|
|
+ "!pip install -Uqqq pytube==12.1.3 --progress-bar off\n",
|
|
|
"!pip install -Uqqq unstructured[local-inference]==0.5.12 --progress-bar off"
|
|
|
]
|
|
|
},
|
|
@@ -54,7 +54,7 @@
|
|
|
"metadata": {
|
|
|
"id": "qWD240smBU6z"
|
|
|
},
|
|
|
- "execution_count": 102,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -70,7 +70,7 @@
|
|
|
"id": "3FFyV3BNBSnd",
|
|
|
"outputId": "24e25ebc-ba5c-445c-ff71-95658117db2e"
|
|
|
},
|
|
|
- "execution_count": 2,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -105,7 +105,7 @@
|
|
|
"metadata": {
|
|
|
"id": "GUqPyc20MxUh"
|
|
|
},
|
|
|
- "execution_count": 3,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -116,7 +116,7 @@
|
|
|
"metadata": {
|
|
|
"id": "C-6WIHwKJ-ev"
|
|
|
},
|
|
|
- "execution_count": 53,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -132,7 +132,7 @@
|
|
|
"id": "IV7etHSwC4jm",
|
|
|
"outputId": "d1cbc8dc-138e-4a32-836b-1d964f69fa0e"
|
|
|
},
|
|
|
- "execution_count": 10,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -154,7 +154,7 @@
|
|
|
"metadata": {
|
|
|
"id": "O9URGZ4EJs8p"
|
|
|
},
|
|
|
- "execution_count": 35,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -169,7 +169,7 @@
|
|
|
"id": "buDonlV1J5uT",
|
|
|
"outputId": "2e8f4563-d085-4525-c562-87ee470625fa"
|
|
|
},
|
|
|
- "execution_count": 36,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -194,7 +194,7 @@
|
|
|
"id": "W3Z7djTcMBJz",
|
|
|
"outputId": "22794e99-0516-44b1-f8a9-f16f0ca916aa"
|
|
|
},
|
|
|
- "execution_count": 27,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -222,7 +222,7 @@
|
|
|
"id": "wf0aLZD4MrDU",
|
|
|
"outputId": "71e8cd56-72b6-417d-8360-dac56f678ef2"
|
|
|
},
|
|
|
- "execution_count": 34,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -254,7 +254,7 @@
|
|
|
"metadata": {
|
|
|
"id": "TlIgJ0hsD1gX"
|
|
|
},
|
|
|
- "execution_count": 37,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -270,7 +270,7 @@
|
|
|
"id": "Q626C3NND5fh",
|
|
|
"outputId": "def93b6b-17f5-42a3-fdcf-8ec3b90f5063"
|
|
|
},
|
|
|
- "execution_count": 39,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -293,12 +293,12 @@
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
"base_uri": "https://localhost:8080/",
|
|
|
- "height": 107
|
|
|
+ "height": 125
|
|
|
},
|
|
|
"id": "Ca9PaoBUECde",
|
|
|
"outputId": "d63ad7b5-1252-4961-9464-1a9f58d69008"
|
|
|
},
|
|
|
- "execution_count": 40,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -329,7 +329,7 @@
|
|
|
"id": "062XoRf2Uhcp",
|
|
|
"outputId": "5e733687-e107-4e27-bdac-52073b31c0ea"
|
|
|
},
|
|
|
- "execution_count": 12,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -352,7 +352,7 @@
|
|
|
"id": "ReR1tt38kDDz",
|
|
|
"outputId": "fc56c30f-0757-414b-9930-e6ed85427099"
|
|
|
},
|
|
|
- "execution_count": 22,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -378,7 +378,7 @@
|
|
|
"id": "9radjxmDa__J",
|
|
|
"outputId": "944ed615-7f1c-413a-b952-af48562ee2ce"
|
|
|
},
|
|
|
- "execution_count": 13,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -415,7 +415,7 @@
|
|
|
"id": "FdUOyWP1iFNf",
|
|
|
"outputId": "c2f8406f-de0c-4eb4-a930-2394fee20d96"
|
|
|
},
|
|
|
- "execution_count": 48,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -441,7 +441,7 @@
|
|
|
"id": "sH1j_Gosj2j1",
|
|
|
"outputId": "d9a2a64f-a400-490f-9dfa-ed57eb26a099"
|
|
|
},
|
|
|
- "execution_count": 49,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -467,7 +467,7 @@
|
|
|
"id": "Zz8mt_iciQO8",
|
|
|
"outputId": "48aefe51-7eed-435b-9a01-bc214e9c6b29"
|
|
|
},
|
|
|
- "execution_count": 50,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -493,7 +493,7 @@
|
|
|
"id": "kfXXJ1ZviSpM",
|
|
|
"outputId": "c8492d7e-2d9e-406c-d57a-4363244f4fa8"
|
|
|
},
|
|
|
- "execution_count": 51,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -525,7 +525,7 @@
|
|
|
"metadata": {
|
|
|
"id": "PTJdcEPFzGPw"
|
|
|
},
|
|
|
- "execution_count": 108,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -537,12 +537,12 @@
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
"base_uri": "https://localhost:8080/",
|
|
|
- "height": 125
|
|
|
+ "height": 161
|
|
|
},
|
|
|
"id": "ZsvwTQSC0nPK",
|
|
|
"outputId": "0daaffba-0359-46c6-b6c4-0580e9ec15e0"
|
|
|
},
|
|
|
- "execution_count": 120,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -572,7 +572,7 @@
|
|
|
"id": "xaxpig8Vzl9X",
|
|
|
"outputId": "09e3fb26-a2cf-4846-ccf0-bcb9bae360e9"
|
|
|
},
|
|
|
- "execution_count": 115,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -598,7 +598,7 @@
|
|
|
"id": "hoaqgGy20ICi",
|
|
|
"outputId": "ccd778b7-e8c8-4f2f-dd21-33720df2d193"
|
|
|
},
|
|
|
- "execution_count": 116,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -629,7 +629,7 @@
|
|
|
"metadata": {
|
|
|
"id": "OepRIjzu7POT"
|
|
|
},
|
|
|
- "execution_count": 117,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -645,7 +645,7 @@
|
|
|
"id": "b1mJ8ksB0dCE",
|
|
|
"outputId": "3296cd99-9966-42ff-ea81-b3a9388fdade"
|
|
|
},
|
|
|
- "execution_count": 119,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -662,35 +662,35 @@
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
"source": [
|
|
|
- "hf_embedding[0][:10]"
|
|
|
+ "openai_embedding[0][:10]"
|
|
|
],
|
|
|
"metadata": {
|
|
|
"colab": {
|
|
|
"base_uri": "https://localhost:8080/"
|
|
|
},
|
|
|
"id": "s3x6CRko0hqC",
|
|
|
- "outputId": "c88df9c5-89d4-4dbc-9c7c-1d773c8ff975"
|
|
|
+ "outputId": "a6939f92-8967-495b-cb93-859852cad02e"
|
|
|
},
|
|
|
- "execution_count": 122,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
|
"data": {
|
|
|
"text/plain": [
|
|
|
- "[-0.0012547640362754464,\n",
|
|
|
- " 0.05444266274571419,\n",
|
|
|
- " -0.041984450072050095,\n",
|
|
|
- " -0.019023854285478592,\n",
|
|
|
- " 0.007353615947067738,\n",
|
|
|
- " -0.012013374827802181,\n",
|
|
|
- " 0.06387557089328766,\n",
|
|
|
- " -0.02246193215250969,\n",
|
|
|
- " -0.04335080459713936,\n",
|
|
|
- " -0.04206854850053787]"
|
|
|
+ "[-0.0034319146679993133,\n",
|
|
|
+ " 0.016217479770247397,\n",
|
|
|
+ " 0.020403068874950882,\n",
|
|
|
+ " -0.03693009233481942,\n",
|
|
|
+ " 0.01301435869943405,\n",
|
|
|
+ " 0.025678797149630162,\n",
|
|
|
+ " -0.00714645780273548,\n",
|
|
|
+ " 0.017321074689020152,\n",
|
|
|
+ " -0.03157361652884209,\n",
|
|
|
+ " -0.020618405559186648]"
|
|
|
]
|
|
|
},
|
|
|
"metadata": {},
|
|
|
- "execution_count": 122
|
|
|
+ "execution_count": 129
|
|
|
}
|
|
|
]
|
|
|
},
|
|
@@ -715,7 +715,7 @@
|
|
|
"id": "5eBmO0Wy7npk",
|
|
|
"outputId": "41fa8d8a-cd51-482a-dc91-6f28c389eca6"
|
|
|
},
|
|
|
- "execution_count": 82,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -738,7 +738,7 @@
|
|
|
"id": "FWDSsxEDpLE4",
|
|
|
"outputId": "3ff57635-c131-4dc8-b4c3-1aee62a0aba5"
|
|
|
},
|
|
|
- "execution_count": 83,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -780,7 +780,7 @@
|
|
|
"id": "PkWBekh9o8Ie",
|
|
|
"outputId": "64aa811c-f9f8-4f32-975e-083455ae7834"
|
|
|
},
|
|
|
- "execution_count": 87,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -803,7 +803,7 @@
|
|
|
"id": "0DO8OgwXtU-L",
|
|
|
"outputId": "c50133e2-f86b-4b5e-c521-a81b7bd02bc0"
|
|
|
},
|
|
|
- "execution_count": 88,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -828,7 +828,7 @@
|
|
|
"metadata": {
|
|
|
"id": "EiRCXYiWpJFz"
|
|
|
},
|
|
|
- "execution_count": 89,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -852,7 +852,7 @@
|
|
|
"id": "JB6kMo0GqT1h",
|
|
|
"outputId": "1d34d0c0-3b1d-4cef-c661-c20644e30080"
|
|
|
},
|
|
|
- "execution_count": 90,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -875,7 +875,7 @@
|
|
|
"id": "7T0R9eofq5hN",
|
|
|
"outputId": "735d3713-4bdd-496f-e324-790f24fc4518"
|
|
|
},
|
|
|
- "execution_count": 91,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "execute_result",
|
|
@@ -913,7 +913,7 @@
|
|
|
"metadata": {
|
|
|
"id": "aIoylGwastp8"
|
|
|
},
|
|
|
- "execution_count": 97,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -925,7 +925,7 @@
|
|
|
"metadata": {
|
|
|
"id": "B2LeOFLwtkho"
|
|
|
},
|
|
|
- "execution_count": 100,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": []
|
|
|
},
|
|
|
{
|
|
@@ -940,7 +940,7 @@
|
|
|
"id": "uYVgNvqxtqv2",
|
|
|
"outputId": "a34f53b5-3fcb-49f1-f842-53ec3eba6618"
|
|
|
},
|
|
|
- "execution_count": 123,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -967,7 +967,7 @@
|
|
|
"id": "mR_yuCXVC5tV",
|
|
|
"outputId": "f67c3286-f566-4bc2-97ca-7e2e440400ce"
|
|
|
},
|
|
|
- "execution_count": 128,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|
|
@@ -1002,7 +1002,7 @@
|
|
|
"id": "BbaGBZhMB2IC",
|
|
|
"outputId": "20cfd89e-9de4-45ae-d4cf-cadd873a988e"
|
|
|
},
|
|
|
- "execution_count": 127,
|
|
|
+ "execution_count": null,
|
|
|
"outputs": [
|
|
|
{
|
|
|
"output_type": "stream",
|