Browse Source

updated llama3 notebook

RichmondAlake 1 year ago
parent
commit
652ffc6af6

+ 7 - 4
recipes/use_cases/chatbots/RAG_chatbot/vectorstore/mongodb/rag_mongodb_llama3_huggingface_open_source.ipynb

@@ -25,9 +25,9 @@
         " or sign in to your existing Atlas account.\n",
         "\n",
         "2. [Follow the instructions](https://www.mongodb.com/docs/atlas/tutorial/deploy-free-tier-cluster/)\n",
-        " (select Atlas UI as the procedure)  to deploy your first cluster.\n",
+        " (select Atlas UI as the procedure) to deploy your first cluster, which distributes your data across multiple servers for improved performance and redundancy.\n",
         "\n",
-        "3. Create the database: `movies`.\n",
+        "3. Create the database: `knowledge_base`, and collection `research_papers`\n",
         "\n",
         "\n"
       ]
@@ -81,10 +81,13 @@
         "# Load Dataset\n",
         "from datasets import load_dataset\n",
         "import pandas as pd\n",
+        "import os\n",
         "\n",
         "# Make sure you have an Hugging Face token(HF_TOKEN) in your development environemnt before runing the code below\n",
         "# How to get a token: https://huggingface.co/docs/hub/en/security-tokens\n",
         "# Dataset Location: https://huggingface.co/datasets/MongoDB/subset_arxiv_papers_with_embeddings\n",
+        "os.environ[\"HF_TOKEN\"] = \"place_hugging_face_access_token here\" # Do not use this in production environment, use a .env file instead\n",
+        "\n",
         "dataset = load_dataset(\"MongoDB/subset_arxiv_papers_with_embeddings\")\n",
         "\n",
         "# Convert the dataset to a pandas dataframe\n",
@@ -662,8 +665,8 @@
         "mongo_client = get_mongo_client(mongo_uri)\n",
         "\n",
         "# Ingest data into MongoDB\n",
-        "db = mongo_client['movies']\n",
-        "collection = db['movie_collection_2']"
+        "db = mongo_client['knowledge_base']\n",
+        "collection = db['research_papers']"
       ]
     },
     {