|
@@ -25,9 +25,9 @@
|
|
|
" or sign in to your existing Atlas account.\n",
|
|
|
"\n",
|
|
|
"2. [Follow the instructions](https://www.mongodb.com/docs/atlas/tutorial/deploy-free-tier-cluster/)\n",
|
|
|
- " (select Atlas UI as the procedure) to deploy your first cluster.\n",
|
|
|
+ " (select Atlas UI as the procedure) to deploy your first cluster, which distributes your data across multiple servers for improved performance and redundancy.\n",
|
|
|
"\n",
|
|
|
- "3. Create the database: `movies`.\n",
|
|
|
+ "3. Create the database: `knowledge_base`, and collection `research_papers`\n",
|
|
|
"\n",
|
|
|
"\n"
|
|
|
]
|
|
@@ -81,10 +81,13 @@
|
|
|
"# Load Dataset\n",
|
|
|
"from datasets import load_dataset\n",
|
|
|
"import pandas as pd\n",
|
|
|
+ "import os\n",
|
|
|
"\n",
|
|
|
"# Make sure you have an Hugging Face token(HF_TOKEN) in your development environemnt before runing the code below\n",
|
|
|
"# How to get a token: https://huggingface.co/docs/hub/en/security-tokens\n",
|
|
|
"# Dataset Location: https://huggingface.co/datasets/MongoDB/subset_arxiv_papers_with_embeddings\n",
|
|
|
+ "os.environ[\"HF_TOKEN\"] = \"place_hugging_face_access_token here\" # Do not use this in production environment, use a .env file instead\n",
|
|
|
+ "\n",
|
|
|
"dataset = load_dataset(\"MongoDB/subset_arxiv_papers_with_embeddings\")\n",
|
|
|
"\n",
|
|
|
"# Convert the dataset to a pandas dataframe\n",
|
|
@@ -662,8 +665,8 @@
|
|
|
"mongo_client = get_mongo_client(mongo_uri)\n",
|
|
|
"\n",
|
|
|
"# Ingest data into MongoDB\n",
|
|
|
- "db = mongo_client['movies']\n",
|
|
|
- "collection = db['movie_collection_2']"
|
|
|
+ "db = mongo_client['knowledge_base']\n",
|
|
|
+ "collection = db['research_papers']"
|
|
|
]
|
|
|
},
|
|
|
{
|