|
@@ -1,13 +1,21 @@
|
|
|
{
|
|
|
"cells": [
|
|
|
{
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "1f53f753-12c6-4fac-b910-6e96677d8a49",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/langgraph-rag-agent-local.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
"cell_type": "code",
|
|
|
"execution_count": null,
|
|
|
- "id": "8520d840-fcf6-4458-b85c-8a2ff80a34eb",
|
|
|
+ "id": "6b9ab14a-fd80-4ca2-afc5-efe1c39532bf",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "! pip install -U langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python gpt4all"
|
|
|
+ "! pip install -U langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python sentence-transformers"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -20,15 +28,15 @@
|
|
|
"id": "0216de30-29cf-4464-9cc3-6e9a6d6c3e40",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "# Local LangGraph RAG agent with LLaMA3\n",
|
|
|
+ "# Local LangGraph RAG agent with Llama 3\n",
|
|
|
"\n",
|
|
|
- "Previously, we showed how to build simple agents with LangGraph and Llama3.\n",
|
|
|
+ "Previously, we showed how to build simple agents with LangGraph and Llama 3.\n",
|
|
|
"\n",
|
|
|
- "Now, we'll pick a more advanced use-case: advanced RAG, with the requirment that it runs locally (on my laptop!).\n",
|
|
|
+ "Now, we'll pick a more advanced use-case: advanced RAG, with the requirment that it runs locally.\n",
|
|
|
"\n",
|
|
|
"## Ideas\n",
|
|
|
"\n",
|
|
|
- "We'll combine ideas from paper RAG papers into a RAG agent:\n",
|
|
|
+ "We'll combine ideas from three RAG papers into a RAG agent:\n",
|
|
|
"\n",
|
|
|
"- **Routing:** Adaptive RAG ([paper](https://arxiv.org/abs/2403.14403)). Route questions to different retrieval approaches\n",
|
|
|
"- **Fallback:** Corrective RAG ([paper](https://arxiv.org/pdf/2401.15884.pdf)). Fallback to web search if docs are not relevant to query\n",
|
|
@@ -70,7 +78,7 @@
|
|
|
"### Tracing (optional)\n",
|
|
|
"os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n",
|
|
|
"os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n",
|
|
|
- "os.environ['LANGCHAIN_API_KEY'] = <your-api-key>\n",
|
|
|
+ "os.environ['LANGCHAIN_API_KEY'] = 'LANGCHAIN_API_KEY'\n",
|
|
|
"```\n",
|
|
|
"\n",
|
|
|
"### Search\n",
|
|
@@ -85,12 +93,18 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "os.environ['TAVILY_API_KEY'] = <your-api-key>"
|
|
|
+ "import os\n",
|
|
|
+ "\n",
|
|
|
+ "os.environ['LANGCHAIN_TRACING_V2'] = 'true'\n",
|
|
|
+ "os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'\n",
|
|
|
+ "os.environ['LANGCHAIN_API_KEY'] = 'LANGCHAIN_API_KEY'\n",
|
|
|
+ "\n",
|
|
|
+ "os.environ['TAVILY_API_KEY'] = 'TAVILY_API_KEY'"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 1,
|
|
|
+ "execution_count": null,
|
|
|
"id": "2096d49c-d3dc-4329-ada7-aff56d210198",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
@@ -102,7 +116,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 4,
|
|
|
+ "execution_count": null,
|
|
|
"id": "267c63e1-4c2f-439d-8d95-4c6aa01f41cf",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
@@ -112,7 +126,7 @@
|
|
|
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
|
|
"from langchain_community.document_loaders import WebBaseLoader\n",
|
|
|
"from langchain_community.vectorstores import Chroma\n",
|
|
|
- "from langchain_community.embeddings import GPT4AllEmbeddings\n",
|
|
|
+ "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
|
|
|
"\n",
|
|
|
"urls = [\n",
|
|
|
" \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n",
|
|
@@ -132,25 +146,17 @@
|
|
|
"vectorstore = Chroma.from_documents(\n",
|
|
|
" documents=doc_splits,\n",
|
|
|
" collection_name=\"rag-chroma\",\n",
|
|
|
- " embedding=GPT4AllEmbeddings(),\n",
|
|
|
+ " embedding=HuggingFaceEmbeddings(),\n",
|
|
|
")\n",
|
|
|
"retriever = vectorstore.as_retriever()"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 5,
|
|
|
+ "execution_count": null,
|
|
|
"id": "b008df98-8394-49da-8fb8-aefe2c90d03c",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "name": "stdout",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "{'score': 'yes'}\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"### Retrieval Grader \n",
|
|
|
"\n",
|
|
@@ -162,14 +168,18 @@
|
|
|
"llm = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
|
|
|
"\n",
|
|
|
"prompt = PromptTemplate(\n",
|
|
|
- " template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance \n",
|
|
|
+ " template=\"\"\"You are a grader assessing relevance \n",
|
|
|
" of a retrieved document to a user question. If the document contains keywords related to the user question, \n",
|
|
|
- " grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \\n\n",
|
|
|
- " Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \\n\n",
|
|
|
+ " grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n",
|
|
|
+ " \n",
|
|
|
+ " Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.\n",
|
|
|
" Provide the binary score as a JSON with a single key 'score' and no premable or explaination.\n",
|
|
|
- " <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
|
|
|
- " Here is the retrieved document: \\n\\n {document} \\n\\n\n",
|
|
|
- " Here is the user question: {question} \\n <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n",
|
|
|
+ " \n",
|
|
|
+ " Here is the retrieved document: \n",
|
|
|
+ " {document}\n",
|
|
|
+ " \n",
|
|
|
+ " Here is the user question: \n",
|
|
|
+ " {question}\n",
|
|
|
" \"\"\",\n",
|
|
|
" input_variables=[\"question\", \"document\"],\n",
|
|
|
")\n",
|
|
@@ -183,18 +193,10 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 6,
|
|
|
+ "execution_count": null,
|
|
|
"id": "1d531a81-6d4d-405e-975a-01ef1c9679fa",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "name": "stdout",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "The context mentions that the memory component of an LLM-powered autonomous agent system includes a long-term memory module (external database) that records a comprehensive list of agents' experience in natural language, referred to as \"memory stream\". This suggests that the agent has some form of memory or recall mechanism.\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"### Generate\n",
|
|
|
"\n",
|
|
@@ -204,12 +206,13 @@
|
|
|
"\n",
|
|
|
"# Prompt\n",
|
|
|
"prompt = PromptTemplate(\n",
|
|
|
- " template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. \n",
|
|
|
+ " template=\"\"\"You are an assistant for question-answering tasks. \n",
|
|
|
" Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. \n",
|
|
|
- " Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
|
|
|
+ " Use three sentences maximum and keep the answer concise:\n",
|
|
|
" Question: {question} \n",
|
|
|
" Context: {context} \n",
|
|
|
- " Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
|
|
|
+ " Answer: \n",
|
|
|
+ " \"\"\",\n",
|
|
|
" input_variables=[\"question\", \"document\"],\n",
|
|
|
")\n",
|
|
|
"\n",
|
|
@@ -231,21 +234,10 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 7,
|
|
|
+ "execution_count": null,
|
|
|
"id": "0261a9a4-de13-4dd8-b082-95305a3e43ca",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "data": {
|
|
|
- "text/plain": [
|
|
|
- "{'score': 'yes'}"
|
|
|
- ]
|
|
|
- },
|
|
|
- "execution_count": 7,
|
|
|
- "metadata": {},
|
|
|
- "output_type": "execute_result"
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"### Hallucination Grader \n",
|
|
|
"\n",
|
|
@@ -254,15 +246,17 @@
|
|
|
"\n",
|
|
|
"# Prompt\n",
|
|
|
"prompt = PromptTemplate(\n",
|
|
|
- " template=\"\"\" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether \n",
|
|
|
+ " template=\"\"\"You are a grader assessing whether \n",
|
|
|
" an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate \n",
|
|
|
" whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a \n",
|
|
|
- " single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>\n",
|
|
|
+ " single key 'score' and no preamble or explanation.\n",
|
|
|
+ " \n",
|
|
|
" Here are the facts:\n",
|
|
|
- " \\n ------- \\n\n",
|
|
|
" {documents} \n",
|
|
|
- " \\n ------- \\n\n",
|
|
|
- " Here is the answer: {generation} <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
|
|
|
+ "\n",
|
|
|
+ " Here is the answer: \n",
|
|
|
+ " {generation}\n",
|
|
|
+ " \"\"\",\n",
|
|
|
" input_variables=[\"generation\", \"documents\"],\n",
|
|
|
")\n",
|
|
|
"\n",
|
|
@@ -272,21 +266,10 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 8,
|
|
|
+ "execution_count": null,
|
|
|
"id": "df9f6944-4fee-4971-b3a7-2b81b44ed433",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "data": {
|
|
|
- "text/plain": [
|
|
|
- "{'score': 'yes'}"
|
|
|
- ]
|
|
|
- },
|
|
|
- "execution_count": 8,
|
|
|
- "metadata": {},
|
|
|
- "output_type": "execute_result"
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"### Answer Grader \n",
|
|
|
"\n",
|
|
@@ -295,14 +278,15 @@
|
|
|
"\n",
|
|
|
"# Prompt\n",
|
|
|
"prompt = PromptTemplate(\n",
|
|
|
- " template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an \n",
|
|
|
+ " template=\"\"\"You are a grader assessing whether an \n",
|
|
|
" answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is \n",
|
|
|
" useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.\n",
|
|
|
- " <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:\n",
|
|
|
- " \\n ------- \\n\n",
|
|
|
+ " \n",
|
|
|
+ " Here is the answer:\n",
|
|
|
" {generation} \n",
|
|
|
- " \\n ------- \\n\n",
|
|
|
- " Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
|
|
|
+ "\n",
|
|
|
+ " Here is the question: {question}\n",
|
|
|
+ " \"\"\",\n",
|
|
|
" input_variables=[\"generation\", \"question\"],\n",
|
|
|
")\n",
|
|
|
"\n",
|
|
@@ -312,26 +296,10 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 9,
|
|
|
+ "execution_count": null,
|
|
|
"id": "a9c910c1-738c-4bf7-bf9e-801862b227eb",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "name": "stderr",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "/Users/rlm/miniforge3/envs/llama-test-env/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.\n",
|
|
|
- " warn_deprecated(\n"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "name": "stdout",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "{'datasource': 'vectorstore'}\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"### Router\n",
|
|
|
"\n",
|
|
@@ -343,12 +311,15 @@
|
|
|
"llm = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
|
|
|
"\n",
|
|
|
"prompt = PromptTemplate(\n",
|
|
|
- " template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a \n",
|
|
|
+ " template=\"\"\"You are an expert at routing a \n",
|
|
|
" user question to a vectorstore or web search. Use the vectorstore for questions on LLM agents, \n",
|
|
|
" prompt engineering, and adversarial attacks. You do not need to be stringent with the keywords \n",
|
|
|
" in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search' \n",
|
|
|
" or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and \n",
|
|
|
- " no premable or explaination. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>\"\"\",\n",
|
|
|
+ " no premable or explaination. \n",
|
|
|
+ " \n",
|
|
|
+ " Question to route: \n",
|
|
|
+ " {question}\"\"\",\n",
|
|
|
" input_variables=[\"question\"],\n",
|
|
|
")\n",
|
|
|
"\n",
|
|
@@ -361,7 +332,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 10,
|
|
|
+ "execution_count": null,
|
|
|
"id": "023ff2db-eb4e-4d44-904c-ea061abc16d9",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
@@ -382,7 +353,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 11,
|
|
|
+ "execution_count": null,
|
|
|
"id": "07fa3d08-6a86-4705-a28b-e2721070bc5e",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
@@ -616,7 +587,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 12,
|
|
|
+ "execution_count": null,
|
|
|
"id": "d9a4b9e4-3ba8-47d6-958c-e5a7112ac6f4",
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
@@ -653,49 +624,10 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 13,
|
|
|
+ "execution_count": null,
|
|
|
"id": "13043b0f-17c7-49d3-9ea7-8f2c0f0c8691",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "name": "stdout",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "---ROUTE QUESTION---\n",
|
|
|
- "What are the types of agent memory?\n",
|
|
|
- "{'datasource': 'vectorstore'}\n",
|
|
|
- "vectorstore\n",
|
|
|
- "---ROUTE QUESTION TO RAG---\n",
|
|
|
- "---RETRIEVE---\n",
|
|
|
- "'Finished running: retrieve:'\n",
|
|
|
- "---CHECK DOCUMENT RELEVANCE TO QUESTION---\n",
|
|
|
- "---GRADE: DOCUMENT RELEVANT---\n",
|
|
|
- "---GRADE: DOCUMENT RELEVANT---\n",
|
|
|
- "---GRADE: DOCUMENT RELEVANT---\n",
|
|
|
- "---GRADE: DOCUMENT RELEVANT---\n",
|
|
|
- "---ASSESS GRADED DOCUMENTS---\n",
|
|
|
- "---DECISION: GENERATE---\n",
|
|
|
- "'Finished running: grade_documents:'\n",
|
|
|
- "---GENERATE---\n",
|
|
|
- "---CHECK HALLUCINATIONS---\n",
|
|
|
- "---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n",
|
|
|
- "---GRADE GENERATION vs QUESTION---\n",
|
|
|
- "---DECISION: GENERATION ADDRESSES QUESTION---\n",
|
|
|
- "'Finished running: generate:'\n",
|
|
|
- "('According to the provided context, there are several types of memory '\n",
|
|
|
- " 'mentioned:\\n'\n",
|
|
|
- " '\\n'\n",
|
|
|
- " '1. Sensory Memory: This is the earliest stage of memory, providing the '\n",
|
|
|
- " 'ability to retain impressions of sensory information (visual, auditory, etc) '\n",
|
|
|
- " 'after the original stimuli have ended.\\n'\n",
|
|
|
- " '2. Maximum Inner Product Search (MIPS): This is a long-term memory module '\n",
|
|
|
- " \"that records a comprehensive list of agents' experience in natural \"\n",
|
|
|
- " 'language.\\n'\n",
|
|
|
- " '\\n'\n",
|
|
|
- " 'These are the types of agent memory mentioned in the context.')\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"# Compile\n",
|
|
|
"app = workflow.compile()\n",
|
|
@@ -721,32 +653,10 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 14,
|
|
|
+ "execution_count": null,
|
|
|
"id": "fbfcec3e-a09a-40b4-9c15-fead97bf4e0a",
|
|
|
"metadata": {},
|
|
|
- "outputs": [
|
|
|
- {
|
|
|
- "name": "stdout",
|
|
|
- "output_type": "stream",
|
|
|
- "text": [
|
|
|
- "---ROUTE QUESTION---\n",
|
|
|
- "Who are the Bears expected to draft first in the NFL draft?\n",
|
|
|
- "{'datasource': 'web_search'}\n",
|
|
|
- "web_search\n",
|
|
|
- "---ROUTE QUESTION TO WEB SEARCH---\n",
|
|
|
- "---WEB SEARCH---\n",
|
|
|
- "'Finished running: websearch:'\n",
|
|
|
- "---GENERATE---\n",
|
|
|
- "---CHECK HALLUCINATIONS---\n",
|
|
|
- "---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n",
|
|
|
- "---GRADE GENERATION vs QUESTION---\n",
|
|
|
- "---DECISION: GENERATION ADDRESSES QUESTION---\n",
|
|
|
- "'Finished running: generate:'\n",
|
|
|
- "('The Bears are expected to draft Caleb Williams, a quarterback from USC, as '\n",
|
|
|
- " 'their first pick in the NFL draft.')\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
+ "outputs": [],
|
|
|
"source": [
|
|
|
"# Compile\n",
|
|
|
"app = workflow.compile()\n",
|