пре 1 година · b31acd501e
--- a/.github/scripts/spellcheck_conf/wordlist.txt
+++ b/.github/scripts/spellcheck_conf/wordlist.txt
@@ -1508,3 +1508,24 @@ xTTS
 
				 TogetherAI
			
 
				 Vercel's
			
 
				 echarts
			
 
				+pydantic
			
 
				+Deloitte
			
 
				+Deloitte's
			
 
				+Felicis
			
 
				+Gmail
			
 
				+LangSmith
			
 
				+Letta
			
 
				+NLU
			
 
				+Norvig's
			
 
				+OAuth
			
 
				+Ollama's
			
 
				+Weng
			
 
				+dropdown
			
 
				+globals
			
 
				+gmail
			
 
				+multiagent
			
 
				+yyy
			
 
				+jpeg
			
 
				+toend
			
 
				+codellama
			
 
				+DIFFLOG
			
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -29,20 +29,20 @@ Here we discuss frequently asked questions that may occur and we found useful al
 
				 
			
 
				 7. How to handle CUDA memory fragmentations during fine-tuning that may lead into an OOM?
			
 
				 
			
 
				-    In some cases you may experience that after model checkpointing specially with FSDP (this usually does not happen with PEFT methods), the reserved and allocated CUDA memory has increased. This might be due to CUDA memory fragmentations. PyTorch recenly added an enviroment variable that helps to better manage memory fragmentation (this feature in available on PyTorch nightlies at the time of writing this doc July 30 2023). You can set this in your main training script as follows:
			
 
				+    In some cases you may experience that after model checkpointing specially with FSDP (this usually does not happen with PEFT methods), the reserved and allocated CUDA memory has increased. This might be due to CUDA memory fragmentations. PyTorch recently added an environment variable that helps to better manage memory fragmentation (this feature in available on PyTorch nightlies at the time of writing this doc July 30 2023). You can set this in your main training script as follows:
			
 
				 
			
 
				     ```bash
			
 
				 
			
 
				     os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'
			
 
				 
			
 
				     ```
			
 
				-    We also added this enviroment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
			
 
				+    We also added this environment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
			
 
				 
			
 
				 8. Additional debugging flags?
			
 
				 
			
 
				     The environment variable `TORCH_DISTRIBUTED_DEBUG` can be used to trigger additional useful logging and collective synchronization checks to ensure all ranks are synchronized appropriately. `TORCH_DISTRIBUTED_DEBUG` can be set to either OFF (default), INFO, or DETAIL depending on the debugging level required. Please note that the most verbose option, DETAIL may impact the application performance and thus should only be used when debugging issues.
			
 
				 
			
 
				-    We also added this enviroment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
			
 
				+    We also added this environment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
			
 
				 
			
 
				 9. I am getting import errors when running inference.
			
 
				 
			
--- a/recipes/quickstart/images/a_colorful_llama_doing_ai_programming.jpeg
+++ b/recipes/quickstart/images/a_colorful_llama_doing_ai_programming.jpeg
--- a/recipes/quickstart/images/cat.jpeg
+++ b/recipes/quickstart/images/cat.jpeg
--- a/recipes/quickstart/images/gnocchi_alla_romana.jpeg
+++ b/recipes/quickstart/images/gnocchi_alla_romana.jpeg
--- a/recipes/quickstart/images/grocery_shopping_bascket_with_salmon_in_package.jpeg
+++ b/recipes/quickstart/images/grocery_shopping_bascket_with_salmon_in_package.jpeg
--- a/recipes/quickstart/images/llama-mobile-confirmed.png
+++ b/recipes/quickstart/images/llama-mobile-confirmed.png
--- a/recipes/quickstart/images/llama-recipes.png
+++ b/recipes/quickstart/images/llama-recipes.png
--- a/recipes/quickstart/images/llama_stack.png
+++ b/recipes/quickstart/images/llama_stack.png
--- a/recipes/quickstart/images/meta_release.png
+++ b/recipes/quickstart/images/meta_release.png
--- a/docs/img/resized_image.jpg
+++ b/docs/img/resized_image.jpg
--- a/recipes/quickstart/images/thumbnail_IMG_1329.jpg
+++ b/recipes/quickstart/images/thumbnail_IMG_1329.jpg
--- a/recipes/quickstart/images/thumbnail_IMG_1440.jpg
+++ b/recipes/quickstart/images/thumbnail_IMG_1440.jpg
--- a/recipes/quickstart/images/thumbnail_IMG_6385.jpg
+++ b/recipes/quickstart/images/thumbnail_IMG_6385.jpg
--- a/docs/multi_gpu.md
+++ b/docs/multi_gpu.md
@@ -174,7 +174,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
				 
			
 
				     * `mixed_precision` boolean flag to specify using mixed precision, defatults to true.
			
 
				 
			
 
				-    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommond not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
			
 
				+    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommend not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
			
 
				 
			
 
				     *  `sharding_strategy` this specifies the sharding strategy for FSDP, it can be:
			
 
				         * `FULL_SHARD` that shards model parameters, gradients and optimizer states, results in the most memory savings.
			
@@ -187,7 +187,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
				 
			
 
				 * `checkpoint_type` specifies the state dict checkpoint type for saving the model. `FULL_STATE_DICT` streams state_dict of each model shard from a rank to CPU and assembels the full state_dict on CPU. `SHARDED_STATE_DICT` saves one checkpoint per rank, and enables the re-loading the model in a different world size.
			
 
				 
			
 
				-* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommond you use this option.
			
 
				+* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommend you use this option.
			
 
				 
			
 
				 * `fsdp_config.pure_bf16` it moves the  model to `BFloat16` and if `optimizer` is set to `anyprecision` then optimizer states will be kept in `BFloat16` as well. You can use this option if necessary.
			
 
				 
			
--- a/recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb
+++ b/recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb
@@ -758,7 +758,7 @@
 
				     "\n",
			
 
				     "Adding specific examples of your desired output generally results in more accurate, consistent output. This technique is called \"few-shot prompting\".\n",
			
 
				     "\n",
			
 
				-    "In this example, the generated response follows our desired format that offers a more nuanced sentiment classifer that gives a positive, neutral, and negative response confidence percentage.\n",
			
 
				+    "In this example, the generated response follows our desired format that offers a more nuanced sentiment classifier that gives a positive, neutral, and negative response confidence percentage.\n",
			
 
				     "\n",
			
 
				     "See also: [Zhao et al. (2021)](https://arxiv.org/abs/2102.09690), [Liu et al. (2021)](https://arxiv.org/abs/2101.06804), [Su et al. (2022)](https://arxiv.org/abs/2209.01975), [Rubin et al. (2022)](https://arxiv.org/abs/2112.08633).\n",
			
 
				     "\n"
			
@@ -1045,7 +1045,7 @@
 
				    "source": [
			
 
				     "### Self-Consistency\n",
			
 
				     "\n",
			
 
				-    "LLMs are probablistic, so even with Chain-of-Thought, a single generation might produce incorrect results. Self-Consistency ([Wang et al. (2022)](https://arxiv.org/abs/2203.11171)) introduces enhanced accuracy by selecting the most frequent answer from multiple generations (at the cost of higher compute):"
			
 
				+    "LLMs are probabilistic, so even with Chain-of-Thought, a single generation might produce incorrect results. Self-Consistency ([Wang et al. (2022)](https://arxiv.org/abs/2203.11171)) introduces enhanced accuracy by selecting the most frequent answer from multiple generations (at the cost of higher compute):"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1179,7 +1179,7 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "Retrieval-Augmented Generation, or RAG, describes the practice of including information in the prompt you've retrived from an external database ([Lewis et al. (2020)](https://arxiv.org/abs/2005.11401v4)). It's an effective way to incorporate facts into your LLM application and is more affordable than fine-tuning which may be costly and negatively impact the foundational model's capabilities.\n",
			
 
				+    "Retrieval-Augmented Generation, or RAG, describes the practice of including information in the prompt you've retrieved from an external database ([Lewis et al. (2020)](https://arxiv.org/abs/2005.11401v4)). It's an effective way to incorporate facts into your LLM application and is more affordable than fine-tuning which may be costly and negatively impact the foundational model's capabilities.\n",
			
 
				     "\n",
			
 
				     "This could be as simple as a lookup table or as sophisticated as a [vector database]([FAISS](https://github.com/facebookresearch/faiss)) containing all of your company's knowledge:"
			
 
				    ]
			
--- a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md
+++ b/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md
@@ -1,6 +1,6 @@
 
				 # Presidential Speeches RAG with Pinecone
			
 
				 
			
 
				-This repository contains a command line application that allows users to ask questions about US presidental speeches by applying Retrieval-Augmented Generation (RAG) over a Pinecone vector database. The application uses RAG to answer the user's question by retrieving the most relevant presidential speeches and using them to supplant the LLM response.
			
 
				+This repository contains a command line application that allows users to ask questions about US presidential speeches by applying Retrieval-Augmented Generation (RAG) over a Pinecone vector database. The application uses RAG to answer the user's question by retrieving the most relevant presidential speeches and using them to supplant the LLM response.
			
 
				 
			
 
				 ## Features
			
 
				 
			
--- a/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py
+++ b/recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py
@@ -55,7 +55,7 @@ def presidential_speech_chat_completion(client, model, user_question, relevant_e
 
				             },
			
 
				             {
			
 
				                 "role": "user",
			
 
				-                "content": "User Question: " + user_question + "\n\nRelevant Speech Exerpt(s):\n\n" + relevant_excerpts,
			
 
				+                "content": "User Question: " + user_question + "\n\nRelevant Speech Excerpt(s):\n\n" + relevant_excerpts,
			
 
				             }
			
 
				         ],
			
 
				         model = model
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl
@@ -29,8 +29,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql" : "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql" : "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql" : "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql" : "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql" : "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl
@@ -9,8 +9,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql" : "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql" : "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql" : "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql" : "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql" : "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl
@@ -209,8 +209,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl
@@ -117,8 +117,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "sql" : "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "sql" : "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "sql" : "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "sql" : "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "sql" : "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl
@@ -118,8 +118,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
@@ -148,8 +148,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl
@@ -1108,8 +1108,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
@@ -1138,8 +1138,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl
@@ -321,8 +321,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl
@@ -160,8 +160,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
@@ -215,8 +215,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl
+++ b/recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl
@@ -1188,8 +1188,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
@@ -1243,8 +1243,8 @@
 
				 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
			
 
				 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
			
 
				 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
			
 
				-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
			
 
				 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
			
 
				 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
			
--- a/recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md
+++ b/recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md
@@ -1,6 +1,6 @@
 
				 # Building Agentic RAG with Llamaindex
			
 
				 
			
 
				-The folder here containts the Llama 3 ported notebooks of the DLAI short course [Building Agentic RAG with Llamaindex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/).
			
 
				+The folder here contains the Llama 3 ported notebooks of the DLAI short course [Building Agentic RAG with Llamaindex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/).
			
 
				 
			
 
				 1. [Building Agentic RAG with Llamaindex L1 Router Engine](../../../quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb) shows how to implement a simple agentic RAG, a router that will pick up one of several query tools (question answering or summarization) to execute a query on a single document. Note this notebook is located in the `quickstart` folder.
			
 
				 
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/README.md
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/README.md
@@ -0,0 +1,71 @@
 
				+# Many-Llamas Human-Eval
			
 
				+
			
 
				+In this directory, we run an experiment answering the question:
			
 
				+
			
 
				+*If we run enough Llama models in parallel, can they outperform GPT-4o on HumanEval?*
			
 
				+
			
 
				+It seeks to increase model performance not through scaling parameters, but by scaling compute time.
			
 
				+
			
 
				+### Technical Blog
			
 
				+
			
 
				+This experiment built by the team at [Modal](https://modal.com), and is described in the following blog post:
			
 
				+
			
 
				+[Beat GPT-4o at Python by searching with 100 small Llamas](https://modal.com/blog/llama-human-eval)
			
 
				+
			
 
				+The experiment has since been upgraded to use the [Llama 3.2 3B Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) model, and runnable end-to-end using the Modal serverless platform.
			
 
				+
			
 
				+## Run it yourself
			
 
				+
			
 
				+### Install the Modal CLI
			
 
				+From within your virtual environment, run:
			
 
				+```bash
			
 
				+pip install modal
			
 
				+```
			
 
				+And if you're new to Modal, authenticate with:
			
 
				+```bash
			
 
				+modal setup
			
 
				+# or if that doesn't work, try 
			
 
				+# python -m modal setup
			
 
				+```
			
 
				+
			
 
				+That's all!
			
 
				+
			
 
				+This CLI will execute your modal apps, which build and run containers on the cloud, on your GPU of choice.
			
 
				+
			
 
				+### HuggingFace Pull Access
			
 
				+
			
 
				+To download the model, you'll first need to accept the [Llama 3.2 License](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on HuggingFace and be approved for access.
			
 
				+
			
 
				+Then, create a [modal secret](https://modal.com/secrets) named `huggingface`, to which you'll add your HF_TOKEN as an environment variable.
			
 
				+
			
 
				+### Run The Experiment
			
 
				+
			
 
				+This command will run every step for you:
			
 
				+```bash
			
 
				+bash run_e2e.sh
			
 
				+```
			
 
				+
			
 
				+Or if you prefer to run it manually, you can step through each of the modal commands in [the script](./run_e2e.sh).
			
 
				+
			
 
				+This will execute:
			
 
				+1. Downloading the Llama 3.2 3B Instruct model to a cloud volume
			
 
				+2. Deploying a vLLM inference server to GPUs
			
 
				+3. Running hundreds of parallel generations on the HumanEval test set
			
 
				+4. Running the evaluation script to compute pass@k and fail@k
			
 
				+5. Generating graphs of pass@k and fail@k
			
 
				+
			
 
				+### Results
			
 
				+
			
 
				+The resulting plots of the evals will be saved locally to:
			
 
				+- `/tmp/plot-pass-k.jpeg`
			
 
				+- `/tmp/plot-fail-k.jpeg`
			
 
				+
			
 
				+`/tmp/plot-pass-k.jpeg` shows pass@k for the Llama 3.2 3B Instruct model vs pass@1 for GPT-4o. 
			
 
				+
			
 
				+![plot-pass-k](https://github.com/user-attachments/assets/11e9dc6e-4322-4d44-b928-4ed7c4ce8262)
			
 
				+
			
 
				+You'll see that at 100 generations, the Llama model is able to perform on-par with GPT-4o. At higher scale, the Llama model will outperform GPT-4o.
			
 
				+
			
 
				+`/tmp/plot-fail-k.jpeg` shows fail@k across a log-scale, showing smooth scaling of this method.
			
 
				+
			
 
				+![plot-fail-k](https://github.com/user-attachments/assets/7286e4ff-5090-4288-bd62-8a078c6dc5a1)
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/download.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/download.py
@@ -0,0 +1,64 @@
 
				+# ## Downloading Llama 3.2 3B Instruct Model
			
 
				+# This script uses a Modal Function to download the model into a cloud Volume.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal run download
			
 
				+
			
 
				+import modal
			
 
				+
			
 
				+MODELS_DIR = "/llamas"
			
 
				+DEFAULT_NAME = "meta-llama/Llama-3.2-3B-Instruct"
			
 
				+
			
 
				+MINUTES = 60
			
 
				+HOURS = 60 * MINUTES
			
 
				+
			
 
				+# Create a modal Volume to store the model
			
 
				+volume = modal.Volume.from_name("llamas", create_if_missing=True)
			
 
				+
			
 
				+# This defines the image to use for the modal function
			
 
				+image = (
			
 
				+    modal.Image.debian_slim(python_version="3.10")
			
 
				+    .pip_install(
			
 
				+        [
			
 
				+            "huggingface_hub",  # download models from the Hugging Face Hub
			
 
				+            "hf-transfer",  # download models faster with Rust
			
 
				+        ]
			
 
				+    )
			
 
				+    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
			
 
				+)
			
 
				+
			
 
				+# We run the function from a modal App, which will have our HF_SECRET env var set.
			
 
				+# Add your HuggingFace secret access token here: https://modal.com/secrets
			
 
				+# secret name: huggingface
			
 
				+# env var name: HF_TOKEN
			
 
				+app = modal.App(image=image, secrets=[modal.Secret.from_name("huggingface")])
			
 
				+
			
 
				+# This function will be ran in the cloud, with the volume mounted.
			
 
				+@app.function(volumes={MODELS_DIR: volume}, timeout=4 * HOURS)
			
 
				+def download_model(model_name, force_download=False):
			
 
				+    from huggingface_hub import snapshot_download
			
 
				+
			
 
				+    volume.reload()
			
 
				+
			
 
				+    snapshot_download(
			
 
				+        model_name,
			
 
				+        local_dir=MODELS_DIR + "/" + model_name,
			
 
				+        ignore_patterns=[
			
 
				+            "*.pt",
			
 
				+            "*.bin",
			
 
				+            "*.pth",
			
 
				+            "original/*",
			
 
				+        ],  # Ensure safetensors
			
 
				+        force_download=force_download,
			
 
				+    )
			
 
				+
			
 
				+    volume.commit()
			
 
				+
			
 
				+    print("Model successfully downloaded")
			
 
				+
			
 
				+@app.local_entrypoint()
			
 
				+def main(
			
 
				+    model_name: str = DEFAULT_NAME,
			
 
				+    force_download: bool = False,
			
 
				+):
			
 
				+    download_model.remote(model_name, force_download)
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/eval.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/eval.py
@@ -0,0 +1,96 @@
 
				+# ## Evaluating HumanEval Results using Modal Sandboxes
			
 
				+# This script will take generated results and evaluate them.
			
 
				+# We use Modal Sandboxes to safely evaluate LLM-generated results.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal run eval
			
 
				+
			
 
				+from pathlib import Path
			
 
				+
			
 
				+import modal
			
 
				+
			
 
				+app = modal.App("many-llamas-human-eval")
			
 
				+
			
 
				+volume = modal.Volume.from_name("humaneval", create_if_missing=True)
			
 
				+
			
 
				+sandbox_image = (
			
 
				+    modal.Image.debian_slim()
			
 
				+    .apt_install("git")
			
 
				+    .run_commands(
			
 
				+        "git clone https://github.com/modal-labs/human-eval.git",
			
 
				+        "pip install -e human-eval",
			
 
				+    )
			
 
				+)
			
 
				+
			
 
				+MINUTES = 60
			
 
				+
			
 
				+@app.function(volumes={"/humaneval": volume}, timeout=10 * MINUTES)
			
 
				+def eval_single_task(sample_file_path: str, problem_file_path: str):
			
 
				+    with modal.Volume.ephemeral() as vol:
			
 
				+        with vol.batch_upload() as batch:
			
 
				+            batch.put_file(sample_file_path, "samples.jsonl")
			
 
				+            batch.put_file(problem_file_path, "problems.jsonl")
			
 
				+
			
 
				+        print(f"Starting sandbox for {sample_file_path}")
			
 
				+        sandbox = modal.Sandbox.create(
			
 
				+            "bash",
			
 
				+            "-c",
			
 
				+            "evaluate_functional_correctness vol/samples.jsonl --problem_file=vol/problems.jsonl --n_workers=32",
			
 
				+            image=sandbox_image,
			
 
				+            volumes={"/vol": vol},
			
 
				+            timeout=10 * MINUTES,
			
 
				+            cpu=32,
			
 
				+        )
			
 
				+
			
 
				+        try:
			
 
				+            sandbox.wait()
			
 
				+            print(f"Finished sandbox for {sample_file_path}")
			
 
				+        except FunctionTimeoutError:
			
 
				+            print("Sandbox timed out")
			
 
				+
			
 
				+        if sandbox.returncode == 0:
			
 
				+            print(sandbox.stdout.read())
			
 
				+            data = b""
			
 
				+            for chunk in vol.read_file("samples.jsonl_results.jsonl"):
			
 
				+                data += chunk
			
 
				+            with open(f"{sample_file_path}_results.jsonl", "wb") as f:
			
 
				+                f.write(data)
			
 
				+        else:
			
 
				+            print(f"Tests failed with code {sandbox.returncode}")
			
 
				+            print(sandbox.stderr.read())
			
 
				+
			
 
				+
			
 
				+@app.function(volumes={"/humaneval": volume}, timeout=10 * MINUTES)
			
 
				+def eval_all_tasks():
			
 
				+    import os
			
 
				+
			
 
				+    volume.reload()
			
 
				+
			
 
				+    # Find all files matching /humaneval/{env}/{run}/{id}.jsonl
			
 
				+    envs = [element for element in Path("/humaneval").iterdir() if element.is_dir()]
			
 
				+    for env in envs:
			
 
				+        print(f"looking in {env}")
			
 
				+        problem_file = env / "data.jsonl"
			
 
				+
			
 
				+        pattern = "*/*.jsonl"
			
 
				+        handles = []
			
 
				+        for file_path in env.glob(pattern):            
			
 
				+            # Skip files that end with _results.jsonl
			
 
				+            if str(file_path).endswith("_results.jsonl"):
			
 
				+                continue
			
 
				+
			
 
				+            print(f"Checking {file_path}")
			
 
				+            # Check if the corresponding results file exists
			
 
				+            results_file = f"{file_path}_results.jsonl"
			
 
				+            if not os.path.exists(results_file):
			
 
				+                # If it doesn't exist, run do_eval
			
 
				+                print("Spawning on", file_path, problem_file)
			
 
				+                handles.append(eval_single_task.spawn(file_path, problem_file))
			
 
				+
			
 
				+        for handle in handles:
			
 
				+            handle.get()
			
 
				+
			
 
				+
			
 
				+@app.local_entrypoint()
			
 
				+def main():
			
 
				+    eval_all_tasks.remote()
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/generate.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/generate.py
@@ -0,0 +1,248 @@
 
				+# ## Generating HumanEval Results with our Llama 3.2 3B Instruct Model
			
 
				+# This app starts many parallel clients to send requests to the vLLM server.
			
 
				+# 
			
 
				+# For each of the tasks in the HumanEval test set, we'll run a client to request 1000 completions.
			
 
				+# Results are saved to our mounted volume.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal run generate --data-dir test --no-dry-run --n 1000 --subsample 100
			
 
				+
			
 
				+from datetime import datetime
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from dataclasses import dataclass, asdict
			
 
				+
			
 
				+import modal
			
 
				+
			
 
				+# This defines the image to use for running openai clients in parallel
			
 
				+image = modal.Image.debian_slim(python_version="3.11").pip_install(
			
 
				+    "openai==1.38.0", "datasets==2.20.0"
			
 
				+)
			
 
				+
			
 
				+app = modal.App("many-llamas-human-eval", image=image)
			
 
				+
			
 
				+volume = modal.Volume.from_name("humaneval", create_if_missing=True)
			
 
				+DATA_DIR = Path("/mnt/humaneval")
			
 
				+
			
 
				+default_system_prompt = "Write the body for the Python function provided in the prompt below. Do not write anything else. Your output will be directly concatenated with the prompt and the resulting function executed against tests."
			
 
				+
			
 
				+MINUTES = 60  # seconds
			
 
				+HOURS = 60 * MINUTES
			
 
				+
			
 
				+@dataclass
			
 
				+class CompletionParams:
			
 
				+    model: str = None
			
 
				+    max_tokens: int = 1024
			
 
				+    temperature: float = 0.7
			
 
				+    top_p: float = 0.9
			
 
				+    frequency_penalty: float = 0
			
 
				+    presence_penalty: float = 0
			
 
				+    n: int = 1
			
 
				+    stop: str = None
			
 
				+    seed: int = None
			
 
				+
			
 
				+@dataclass
			
 
				+class ClientParams:
			
 
				+    app_name: str = "many-llamas-human-eval"
			
 
				+    workspace: str = None
			
 
				+    api_key: str = "super-secret-token" # match the secret in inference.py
			
 
				+
			
 
				+    @property
			
 
				+    def url(self):
			
 
				+        return f"https://{self.workspace}--{self.app_name}-serve.modal.run/v1"
			
 
				+
			
 
				+
			
 
				+@app.local_entrypoint()
			
 
				+def main(
			
 
				+    app_name: str = "many-llamas-human-eval",
			
 
				+    workspace: str = None,
			
 
				+    api_key: str = "super-secret-token",
			
 
				+    model: str = None,
			
 
				+    max_tokens: int = 1024,
			
 
				+    temperature: float = 0.7,
			
 
				+    top_p: float = 0.9,
			
 
				+    frequency_penalty: float = 0,
			
 
				+    presence_penalty: float = 0,
			
 
				+    n: int = 1,
			
 
				+    stop: str = None,
			
 
				+    seed: int = None,
			
 
				+    data_dir: str = "dev-llm",
			
 
				+    subsample: int = 1, # percent of the test split to read
			
 
				+    system_prompt: str = default_system_prompt,
			
 
				+    dry_run: bool = True,
			
 
				+):
			
 
				+    if workspace is None:
			
 
				+        workspace = modal.config._profile
			
 
				+
			
 
				+    client_params = ClientParams(app_name, workspace, api_key)
			
 
				+
			
 
				+    completion_params = CompletionParams(
			
 
				+        model=model,
			
 
				+        max_tokens=max_tokens,
			
 
				+        temperature=temperature,
			
 
				+        top_p=top_p,
			
 
				+        frequency_penalty=frequency_penalty,
			
 
				+        presence_penalty=presence_penalty,
			
 
				+        n=n,
			
 
				+        stop=stop,
			
 
				+        seed=seed,
			
 
				+    )
			
 
				+
			
 
				+    # Run a remote download function to save the HumanEval dataset in the cloud volume
			
 
				+    save_dataset.remote(path=data_dir, subsample=subsample)
			
 
				+
			
 
				+    # Run a remote generation function
			
 
				+    results = run_human_eval.remote(
			
 
				+        client_params=client_params,
			
 
				+        completion_params=completion_params,
			
 
				+        system_prompt=system_prompt,
			
 
				+        data_dir=data_dir,
			
 
				+        dry_run=dry_run,
			
 
				+    )
			
 
				+    if results:
			
 
				+        with open("/tmp/results.jsonl", "w") as f:
			
 
				+            f.writelines(json.dumps(result) + "\n" for result in results)
			
 
				+        print(f"results saved locally to {f.name}")
			
 
				+
			
 
				+# This is the parent function that spawns a client for each eval task
			
 
				+@app.function(volumes={DATA_DIR: volume}, timeout=1 * HOURS)
			
 
				+def run_human_eval(
			
 
				+    client_params: ClientParams,
			
 
				+    completion_params: CompletionParams,
			
 
				+    data_dir="dev-llm",
			
 
				+    system_prompt: str = default_system_prompt,
			
 
				+    dry_run=True,
			
 
				+):
			
 
				+    dataset = load_dataset(data_dir)
			
 
				+
			
 
				+    timestamp = datetime.utcnow().isoformat() + "Z"
			
 
				+    output_dir = Path(DATA_DIR) / data_dir / f"run-{timestamp}"
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+    handles = []
			
 
				+    print(f"Eval set contains {len(dataset)} items")
			
 
				+
			
 
				+    # For each eval item in the dataset, spawn a parallel openAI client worker that generates n completions each
			
 
				+    print(Colors.BOLD, f"Spawning clients for each eval item. You may notice a brief wait while the inference server(s) boot.", Colors.END, sep="")
			
 
				+    for i, item in enumerate(dataset):
			
 
				+        handles.append(
			
 
				+            run_item.spawn(
			
 
				+                item,
			
 
				+                client_params,
			
 
				+                completion_params,
			
 
				+                system_prompt,
			
 
				+                output_dir,
			
 
				+                dry_run,
			
 
				+            )
			
 
				+        )
			
 
				+
			
 
				+    for handle in handles:
			
 
				+        result = handle.get()
			
 
				+
			
 
				+    if not dry_run:
			
 
				+        return result
			
 
				+
			
 
				+# This function is responsible for generating n completions for a single eval item
			
 
				+# It calls into our deployed vLLM server and saves results to the cloud volume
			
 
				+@app.function(volumes={DATA_DIR: volume}, timeout=1 * HOURS)
			
 
				+def run_item(
			
 
				+    item: dict,
			
 
				+    client_params: ClientParams,
			
 
				+    completion_params: CompletionParams,
			
 
				+    system_prompt: str,
			
 
				+    output_dir: Path,
			
 
				+    dry_run: bool,
			
 
				+):
			
 
				+    client = create_client(client_params)
			
 
				+    if not completion_params.model:
			
 
				+        model = client.models.list().data[0]
			
 
				+        model = model.id
			
 
				+        completion_params.model = model
			
 
				+
			
 
				+    prompt = item["prompt"]
			
 
				+    messages = [
			
 
				+        {"role": "system", "content": system_prompt},
			
 
				+        {"role": "user", "content": prompt},
			
 
				+    ]
			
 
				+
			
 
				+    per_request = 250
			
 
				+    ct, completions = completion_params.n, []
			
 
				+    if not dry_run:
			
 
				+        while ct > 0:
			
 
				+            response = get_completion(
			
 
				+                client,
			
 
				+                messages=messages,
			
 
				+                **asdict(completion_params) | dict(n=min(ct, per_request)),
			
 
				+            )
			
 
				+            if response:
			
 
				+                completions += [
			
 
				+                    {
			
 
				+                        "task_id": item["task_id"],
			
 
				+                        "completion": choice.message.content,
			
 
				+                    }
			
 
				+                    for choice in response.choices
			
 
				+                ]
			
 
				+            ct -= per_request
			
 
				+
			
 
				+        index = item["task_id"].split("/")[-1]
			
 
				+        output_path = output_dir / f"{index}.jsonl"
			
 
				+        output_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+        with open(output_path, "w") as f:
			
 
				+            f.writelines(json.dumps(completion) + "\n" for completion in completions)
			
 
				+
			
 
				+        print(Colors.GREEN + f"Completions saved to {output_path}" + Colors.END)
			
 
				+
			
 
				+
			
 
				+class Colors:
			
 
				+    """ANSI color codes"""
			
 
				+
			
 
				+    GREEN = "\033[0;32m"
			
 
				+    RED = "\033[0;31m"
			
 
				+    BLUE = "\033[0;34m"
			
 
				+    GRAY = "\033[0;90m"
			
 
				+    BOLD = "\033[1m"
			
 
				+    END = "\033[0m"
			
 
				+
			
 
				+
			
 
				+def get_completion(client, **kwargs):
			
 
				+    try:
			
 
				+        response = client.chat.completions.create(**kwargs)
			
 
				+        return response
			
 
				+    except Exception as e:
			
 
				+        print(Colors.RED, f"Error during API call: {e}", Colors.END, sep="")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def create_client(client_params: ClientParams):
			
 
				+    from openai import OpenAI
			
 
				+
			
 
				+    client = OpenAI(api_key=client_params.api_key)
			
 
				+    client.base_url = client_params.url
			
 
				+
			
 
				+    return client
			
 
				+
			
 
				+# This function downloads the HumanEval dataset
			
 
				+@app.function(volumes={DATA_DIR: volume})
			
 
				+def save_dataset(path="dev-llm", subsample: int = 1):
			
 
				+    import datasets
			
 
				+
			
 
				+    path = DATA_DIR / path
			
 
				+
			
 
				+    ds = datasets.load_dataset(
			
 
				+        "openai/openai_humaneval",
			
 
				+        # reads 0% to subsample% of the test split
			
 
				+        split=datasets.ReadInstruction("test", to=subsample, unit="%"),
			
 
				+    )
			
 
				+
			
 
				+    ds.to_json(path / "data.jsonl")
			
 
				+
			
 
				+    volume.commit()
			
 
				+
			
 
				+
			
 
				+def load_dataset(path="dev-llm"):
			
 
				+    import datasets
			
 
				+
			
 
				+    path = DATA_DIR / path
			
 
				+
			
 
				+    ds = datasets.load_dataset(path=str(path), data_files="data.jsonl")
			
 
				+
			
 
				+    return ds["train"]
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/inference.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/inference.py
@@ -0,0 +1,149 @@
 
				+# ## Serving Llama 3.2 3B Instruct Model With vLLM
			
 
				+# This app runs a vLLM server on an A100 GPU.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal deploy inference
			
 
				+
			
 
				+import modal
			
 
				+
			
 
				+# This defines the image to use for the vLLM server container
			
 
				+vllm_image = modal.Image.debian_slim(python_version="3.10").pip_install(
			
 
				+    "vllm==0.5.3post1"
			
 
				+)
			
 
				+
			
 
				+
			
 
				+MODELS_DIR = "/llamas"
			
 
				+MODEL_NAME = "meta-llama/Llama-3.2-3B-Instruct"
			
 
				+
			
 
				+# Ensure the model is downloaded and the volume exists
			
 
				+try:
			
 
				+    volume = modal.Volume.lookup("llamas", create_if_missing=False)
			
 
				+except modal.exception.NotFoundError:
			
 
				+    raise Exception("Download models first with modal run download")
			
 
				+
			
 
				+app = modal.App("many-llamas-human-eval")
			
 
				+
			
 
				+N_GPU = 1  # tip: for best results, first upgrade to more powerful GPUs, and only then increase GPU count
			
 
				+TOKEN = (
			
 
				+    "super-secret-token"  # auth token. for production use, replace with a modal.Secret
			
 
				+)
			
 
				+
			
 
				+MINUTES = 60  # seconds
			
 
				+HOURS = 60 * MINUTES
			
 
				+
			
 
				+@app.function(
			
 
				+    image=vllm_image,
			
 
				+    gpu=modal.gpu.A100(count=N_GPU, size="40GB"),
			
 
				+    container_idle_timeout=5 * MINUTES,
			
 
				+    timeout=24 * HOURS,
			
 
				+    allow_concurrent_inputs=20, # VLLM will batch requests so many can be received at once
			
 
				+    volumes={MODELS_DIR: volume},
			
 
				+    concurrency_limit=10, # max 10 GPUs
			
 
				+)
			
 
				+@modal.asgi_app()
			
 
				+def serve():
			
 
				+    import fastapi
			
 
				+    import vllm.entrypoints.openai.api_server as api_server
			
 
				+    from vllm.engine.arg_utils import AsyncEngineArgs
			
 
				+    from vllm.engine.async_llm_engine import AsyncLLMEngine
			
 
				+    from vllm.entrypoints.logger import RequestLogger
			
 
				+    from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
			
 
				+    from vllm.entrypoints.openai.serving_completion import (
			
 
				+        OpenAIServingCompletion,
			
 
				+    )
			
 
				+    from vllm.usage.usage_lib import UsageContext
			
 
				+
			
 
				+    volume.reload()  # ensure we have the latest version of the weights
			
 
				+
			
 
				+    # create a fastAPI app that uses vLLM's OpenAI-compatible router
			
 
				+    web_app = fastapi.FastAPI(
			
 
				+        title=f"OpenAI-compatible {MODEL_NAME} server",
			
 
				+        description="Run an OpenAI-compatible LLM server with vLLM on modal.com",
			
 
				+        version="0.0.1",
			
 
				+        docs_url="/docs",
			
 
				+    )
			
 
				+
			
 
				+    # security: CORS middleware for external requests
			
 
				+    http_bearer = fastapi.security.HTTPBearer(
			
 
				+        scheme_name="Bearer Token",
			
 
				+        description="See code for authentication details.",
			
 
				+    )
			
 
				+    web_app.add_middleware(
			
 
				+        fastapi.middleware.cors.CORSMiddleware,
			
 
				+        allow_origins=["*"],
			
 
				+        allow_credentials=True,
			
 
				+        allow_methods=["*"],
			
 
				+        allow_headers=["*"],
			
 
				+    )
			
 
				+
			
 
				+    # security: inject dependency on authed routes
			
 
				+    async def is_authenticated(api_key: str = fastapi.Security(http_bearer)):
			
 
				+        if api_key.credentials != TOKEN:
			
 
				+            raise fastapi.HTTPException(
			
 
				+                status_code=fastapi.status.HTTP_401_UNAUTHORIZED,
			
 
				+                detail="Invalid authentication credentials",
			
 
				+            )
			
 
				+        return {"username": "authenticated_user"}
			
 
				+
			
 
				+    router = fastapi.APIRouter(dependencies=[fastapi.Depends(is_authenticated)])
			
 
				+
			
 
				+    # wrap vllm's router in auth router
			
 
				+    router.include_router(api_server.router)
			
 
				+    # add authed vllm to our fastAPI app
			
 
				+    web_app.include_router(router)
			
 
				+
			
 
				+    engine_args = AsyncEngineArgs(
			
 
				+        model=MODELS_DIR + "/" + MODEL_NAME,
			
 
				+        tensor_parallel_size=N_GPU,
			
 
				+        gpu_memory_utilization=0.90,
			
 
				+        max_model_len=2048,
			
 
				+        enforce_eager=False,  # capture the graph for faster inference, but slower cold starts (30s > 20s)
			
 
				+    )
			
 
				+
			
 
				+    engine = AsyncLLMEngine.from_engine_args(
			
 
				+        engine_args, usage_context=UsageContext.OPENAI_API_SERVER
			
 
				+    )
			
 
				+
			
 
				+    model_config = get_model_config(engine)
			
 
				+
			
 
				+    request_logger = RequestLogger(max_log_len=2048)
			
 
				+
			
 
				+    api_server.openai_serving_chat = OpenAIServingChat(
			
 
				+        engine,
			
 
				+        model_config=model_config,
			
 
				+        served_model_names=[MODEL_NAME],
			
 
				+        chat_template=None,
			
 
				+        response_role="assistant",
			
 
				+        lora_modules=[],
			
 
				+        prompt_adapters=[],
			
 
				+        request_logger=request_logger,
			
 
				+    )
			
 
				+    api_server.openai_serving_completion = OpenAIServingCompletion(
			
 
				+        engine,
			
 
				+        model_config=model_config,
			
 
				+        served_model_names=[MODEL_NAME],
			
 
				+        lora_modules=[],
			
 
				+        prompt_adapters=[],
			
 
				+        request_logger=request_logger,
			
 
				+    )
			
 
				+
			
 
				+    return web_app
			
 
				+
			
 
				+
			
 
				+def get_model_config(engine):
			
 
				+    import asyncio
			
 
				+
			
 
				+    try:  # adapted from vLLM source -- https://github.com/vllm-project/vllm/blob/507ef787d85dec24490069ffceacbd6b161f4f72/vllm/entrypoints/openai/api_server.py#L235C1-L247C1
			
 
				+        event_loop = asyncio.get_running_loop()
			
 
				+    except RuntimeError:
			
 
				+        event_loop = None
			
 
				+
			
 
				+    if event_loop is not None and event_loop.is_running():
			
 
				+        # If the current is instanced by Ray Serve,
			
 
				+        # there is already a running event loop
			
 
				+        model_config = event_loop.run_until_complete(engine.get_model_config())
			
 
				+    else:
			
 
				+        # When using single vLLM without engine_use_ray
			
 
				+        model_config = asyncio.run(engine.get_model_config())
			
 
				+
			
 
				+    return model_config
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/plot.py
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/plot.py
@@ -0,0 +1,194 @@
 
				+# ## Plotting HumanEval Results
			
 
				+# This script will calculate pass@k and fail@k for our experiment and plot them.
			
 
				+#
			
 
				+# Run it with:
			
 
				+#    modal run plot
			
 
				+
			
 
				+import io
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import List, Union
			
 
				+import itertools
			
 
				+
			
 
				+import modal
			
 
				+
			
 
				+try:
			
 
				+    volume = modal.Volume.lookup("humaneval", create_if_missing=False)
			
 
				+except modal.exception.NotFoundError:
			
 
				+    raise Exception("Generate results first with modal run generate --data-dir test --no-dry-run --n 1000 --subsample 100")
			
 
				+
			
 
				+
			
 
				+image = modal.Image.debian_slim(python_version="3.11").pip_install(
			
 
				+    "numpy==1.26.4",
			
 
				+    "pandas==2.2.3",
			
 
				+    "matplotlib==3.9.2",
			
 
				+    "seaborn==0.13.2",
			
 
				+)
			
 
				+
			
 
				+app = modal.App("many-llamas-human-eval", image=image)
			
 
				+
			
 
				+DATA_DIR = Path("/mnt/humaneval")
			
 
				+
			
 
				+with image.imports():
			
 
				+    import numpy as np
			
 
				+    import pandas as pd
			
 
				+    import matplotlib.pyplot as plt
			
 
				+    import seaborn as sns
			
 
				+
			
 
				+@app.function(volumes={DATA_DIR: volume})
			
 
				+def render_plots():
			
 
				+    run_dirs = list(sorted((DATA_DIR / "test").glob("run-*")))
			
 
				+
			
 
				+    for run_dir in reversed(run_dirs):
			
 
				+        if len(list(run_dir.iterdir())) < 150:
			
 
				+            print(f"skipping incomplete run {run_dir}")
			
 
				+        else:
			
 
				+            break
			
 
				+
			
 
				+    all_result_paths = list(run_dir.glob("*.jsonl_results.jsonl"))
			
 
				+
			
 
				+    data = []
			
 
				+    for path in all_result_paths:
			
 
				+        data += [json.loads(line) for line in path.read_text(encoding='utf-8').splitlines()]
			
 
				+
			
 
				+    for element in data:
			
 
				+        del element["completion"]
			
 
				+
			
 
				+    df = pd.DataFrame.from_records(data)
			
 
				+
			
 
				+    gb = df.groupby("task_id")
			
 
				+    passes = gb["passed"].sum()
			
 
				+
			
 
				+    def estimate_pass_at_k(
			
 
				+        num_samples: Union[int, List[int], np.ndarray],
			
 
				+        num_correct: Union[List[int], np.ndarray],
			
 
				+        k: int
			
 
				+    ) -> np.ndarray:
			
 
				+        """
			
 
				+        Estimates pass@k of each problem and returns them in an array.
			
 
				+        """
			
 
				+
			
 
				+        def estimator(n: int, c: int, k: int) -> float:
			
 
				+            """
			
 
				+            Calculates 1 - comb(n - c, k) / comb(n, k).
			
 
				+            """
			
 
				+            if n - c < k:
			
 
				+                return 1.0
			
 
				+            return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
			
 
				+
			
 
				+        if isinstance(num_samples, int):
			
 
				+            num_samples_it = itertools.repeat(num_samples, len(num_correct))
			
 
				+        else:
			
 
				+            assert len(num_samples) == len(num_correct)
			
 
				+            num_samples_it = iter(num_samples)
			
 
				+
			
 
				+        return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
			
 
				+
			
 
				+    pass_at_ks = {}
			
 
				+
			
 
				+    for k in [1, 10, 100, 1000]:
			
 
				+        pass_at_ks[k] = estimate_pass_at_k(1000, passes, k)
			
 
				+
			
 
				+    pass_at_k = {k: np.mean(v) for k, v in pass_at_ks.items()}
			
 
				+
			
 
				+    plot_df = pd.DataFrame(
			
 
				+        {"k": pass_at_k.keys(),
			
 
				+         "pass@k": pass_at_k.values()}
			
 
				+    )
			
 
				+    plot_df["fail@k"] = 1 - plot_df["pass@k"]
			
 
				+
			
 
				+    sns.set_theme(style='dark')
			
 
				+    plt.style.use("dark_background")
			
 
				+
			
 
				+    plt.rcParams['font.sans-serif'] = ["Inter", "Arial", "DejaVu Sans", "Liberation Sans", "Bitstream Vera Sans", "sans-serif"]
			
 
				+
			
 
				+    sns.despine()
			
 
				+
			
 
				+    sns.set_context("talk", rc={"lines.linewidth": 2.5})
			
 
				+
			
 
				+    gpt4o_benchmark = 0.902
			
 
				+
			
 
				+    # First plot
			
 
				+    plt.figure(figsize=(10, 6))
			
 
				+    fg = sns.lineplot(
			
 
				+        x="k",
			
 
				+        y="pass@k",
			
 
				+        data=plot_df,
			
 
				+        color="#7FEE64",
			
 
				+        linewidth=6,
			
 
				+        alpha=0.9,
			
 
				+        label="Llama 3.2 3B Instruct pass@k"
			
 
				+    )
			
 
				+
			
 
				+    initial_lim = fg.axes.get_xlim()
			
 
				+    fg.axes.hlines(
			
 
				+        gpt4o_benchmark, *initial_lim,
			
 
				+        linestyle="--",
			
 
				+        alpha=0.6,
			
 
				+        zorder=-1,
			
 
				+        label="GPT-4o fail@1"
			
 
				+    )
			
 
				+    fg.axes.set_xlim(*initial_lim)
			
 
				+    fg.axes.set_ylabel("")
			
 
				+    fg.axes.set_ylim(0, 1)
			
 
				+    plt.tight_layout(pad=1.2)
			
 
				+    plt.legend()
			
 
				+
			
 
				+    # Save the first plot as bytes
			
 
				+    img_buffer = io.BytesIO()
			
 
				+    plt.savefig(img_buffer, format='jpeg')
			
 
				+    plot_1_img_bytes = img_buffer.getvalue()
			
 
				+    plt.close()
			
 
				+
			
 
				+    # Second plot
			
 
				+    plt.figure(figsize=(10, 6))
			
 
				+    fg = sns.lineplot(
			
 
				+        x="k",
			
 
				+        y="fail@k",
			
 
				+        data=plot_df,
			
 
				+        color="#7FEE64",
			
 
				+        linewidth=6,
			
 
				+        alpha=0.9,
			
 
				+        label="Llama 3.2 3B Instruct fail@k"
			
 
				+    )
			
 
				+
			
 
				+    initial_lim = fg.axes.get_xlim()
			
 
				+    fg.axes.hlines(
			
 
				+        1 - gpt4o_benchmark, *initial_lim,
			
 
				+        linestyle="--",
			
 
				+        alpha=0.6,
			
 
				+        zorder=-1,
			
 
				+        label="GPT-4o fail@1"
			
 
				+    )
			
 
				+    fg.axes.set_xlim(*initial_lim)
			
 
				+    fg.axes.set_ylabel("")
			
 
				+    fg.axes.set_yscale("log")
			
 
				+    fg.axes.set_xscale("log")
			
 
				+    fg.axes.set_xlim(0.5, 2000)
			
 
				+    fg.axes.set_ylim(1e-2, 1e0)
			
 
				+    plt.tight_layout(pad=1.2)
			
 
				+    plt.legend()
			
 
				+
			
 
				+    # Save the second plot as bytes
			
 
				+    img_buffer = io.BytesIO()
			
 
				+    plt.savefig(img_buffer, format='jpeg')
			
 
				+    plot_2_img_bytes = img_buffer.getvalue()
			
 
				+    plt.close()
			
 
				+
			
 
				+    return [plot_1_img_bytes, plot_2_img_bytes]
			
 
				+
			
 
				+@app.local_entrypoint()
			
 
				+def main():
			
 
				+    plots = render_plots.remote()
			
 
				+
			
 
				+    assert len(plots) == 2
			
 
				+
			
 
				+    with open ("/tmp/plot-pass-k.jpeg", "wb") as f:
			
 
				+        f.write(plots[0])
			
 
				+    
			
 
				+    with open ("/tmp/plot-fail-k.jpeg", "wb") as f:
			
 
				+        f.write(plots[1])
			
 
				+
			
 
				+    print("Plots saved to:")
			
 
				+    print("  /tmp/plot-pass-k.jpeg")
			
 
				+    print("  /tmp/plot-fail-k.jpeg")
			
--- a/recipes/3p_integrations/modal/many-llamas-human-eval/run_e2e.sh
+++ b/recipes/3p_integrations/modal/many-llamas-human-eval/run_e2e.sh
@@ -0,0 +1,21 @@
 
				+#!/bin/bash
			
 
				+set -euo pipefail
			
 
				+IFS=$'\n\t'
			
 
				+
			
 
				+command -v modal >/dev/null 2>&1 || { echo >&2 "modal command not found. Install modal first! Aborting."; exit 1; }
			
 
				+
			
 
				+echo 'downloading LLaMA 3.2 3B Instruct model'
			
 
				+echo 'make sure to create a Secret called huggingface on Modal and accept the LLaMA 3.2 license'
			
 
				+modal run download.py
			
 
				+
			
 
				+echo 'deploying vLLM inference server'
			
 
				+modal deploy inference.py
			
 
				+
			
 
				+echo 'running HumanEval generation'
			
 
				+modal run generate.py --data-dir test --no-dry-run --n 1000 --subsample 100
			
 
				+
			
 
				+echo 'running HumanEval evaluation'
			
 
				+modal run eval.py
			
 
				+
			
 
				+echo 'generating graphs for pass@k and fail@k'
			
 
				+modal run plot.py
			
--- a/recipes/3p_integrations/octoai/video_summary.ipynb
+++ b/recipes/3p_integrations/octoai/video_summary.ipynb
@@ -8,7 +8,7 @@
 
				     "## This demo app shows:\n",
			
 
				     "* How to use LangChain's YoutubeLoader to retrieve the caption in a YouTube video\n",
			
 
				     "* How to ask Llama 3 to summarize the content (per the Llama's input size limit) of the video in a naive way using LangChain's stuff method\n",
			
 
				-    "* How to bypass the limit of Llama 3's max input token size by using a more sophisticated way using LangChain's map_reduce and refine methods - see [here](https://python.langchain.com/docs/use_cases/summarization) for more info"
			
 
				+    "* How to bypass the limit of Llama 3's max input token size by using a more sophisticated way using LangChain's map_reduce and refine methods - see [here](https://python.langchain.com/docs/tutorials/summarization/) for more info"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -22,7 +22,7 @@
 
				     "- [tiktoken](https://github.com/openai/tiktoken) BytePair Encoding tokenizer\n",
			
 
				     "- [pytube](https://pytube.io/en/latest/) Utility for downloading YouTube videos\n",
			
 
				     "\n",
			
 
				-    "**Note** This example uses OctoAI to host the Llama 3 model. If you have not set up/or used OctoAI before, we suggest you take a look at the [HelloLlamaCloud](HelloLlamaCloud.ipynb) example for information on how to set up OctoAI before continuing with this example.\n",
			
 
				+    "**Note** This example uses OctoAI to host the Llama 3 model. If you have not set up/or used OctoAI before, we suggest you take a look at the [hello_llama_cloud](hello_llama_cloud.ipynb) example for information on how to set up OctoAI before continuing with this example.\n",
			
 
				     "If you do not want to use OctoAI, you will need to make some changes to this notebook as you go along."
			
 
				    ]
			
 
				   },
			
@@ -205,7 +205,7 @@
 
				    "id": "e112845f-de16-4c2f-8afe-6cca31f6fa38",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "To fix this, you can use LangChain's load_summarize_chain method (detail [here](https://python.langchain.com/docs/use_cases/summarization)).\n",
			
 
				+    "To fix this, you can use LangChain's load_summarize_chain method (detail [here](https://python.langchain.com/docs/tutorials/summarization/)).\n",
			
 
				     "\n",
			
 
				     "First you'll create splits or sub-documents of the original content, then use the LangChain's `load_summarize_chain` with the `refine` or `map_reduce type`.\n",
			
 
				     "\n",
			
@@ -221,7 +221,6 @@
 
				    "source": [
			
 
				     "import os\n",
			
 
				     "os.environ[\"LANGCHAIN_API_KEY\"] = \"your_langchain_api_key\"\n",
			
 
				-    "os.environ[\"LANGCHAIN_API_KEY\"] = \"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\"\n",
			
 
				     "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
			
 
				     "os.environ[\"LANGCHAIN_PROJECT\"] = \"Video Summary with Llama 3\""
			
 
				    ]
			
--- a/recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb
+++ b/recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb
@@ -635,7 +635,7 @@
 
				         }
			
 
				       ],
			
 
				       "source": [
			
 
				-        "# Dowload and rename the last presentation from Nvidia to investors\n",
			
 
				+        "# Download and rename the last presentation from Nvidia to investors\n",
			
 
				         "!wget https://s201.q4cdn.com/141608511/files/doc_presentations/2023/Oct/01/ndr_presentation_oct_2023_final.pdf\n",
			
 
				         "!mv ndr_presentation_oct_2023_final.pdf nvidia_presentation.pdf"
			
 
				       ]
			
@@ -811,7 +811,7 @@
 
				       "source": [
			
 
				         "### How does this work? What happens under the hood between the different pages and query token?\n",
			
 
				         "\n",
			
 
				-        "The interaction operation between page image patch and query text token representations to score each page of the document is what allows this great retreival performance.\n",
			
 
				+        "The interaction operation between page image patch and query text token representations to score each page of the document is what allows this great retrieval performance.\n",
			
 
				         "\n",
			
 
				         "Typically each image is resized and cut into patch sizes of 16x16 pixels. These patches are then embedded into 128 dimensional vectors which are stored and used to perform the MaxSim and late interaction operations between the image and text tokens. ColPali is a multi-vector approach because it produces multiple vectors for each image/query; one vector for each token instead of just one vector for all tokens. \n",
			
 
				         "\n",
			
@@ -878,7 +878,7 @@
 
				       },
			
 
				       "outputs": [],
			
 
				       "source": [
			
 
				-        "# Since we stored the collection along with the index we have the base64 images of all PDF pages aswell!\n",
			
 
				+        "# Since we stored the collection along with the index we have the base64 images of all PDF pages as well!\n",
			
 
				         "model.search(query, k=1)"
			
 
				       ]
			
 
				     },
			
@@ -949,7 +949,7 @@
 
				       "source": [
			
 
				         "Here we can see that the combination of ColQwen2 as a image retriever and Llama-3.2 90B Vision is a powerful duo for multimodal RAG applications specially with PDFs.\n",
			
 
				         "\n",
			
 
				-        "Not only was ColQwen2 able to retrieve the correct page that had the right answer on it but then Llama-3.2 90B Vision was also able to find exactly where on the page this answer was, ignoring all the irrelvant details!\n",
			
 
				+        "Not only was ColQwen2 able to retrieve the correct page that had the right answer on it but then Llama-3.2 90B Vision was also able to find exactly where on the page this answer was, ignoring all the irrelevant details!\n",
			
 
				         "\n",
			
 
				         "Voila!🎉🎉\n",
			
 
				         "\n",
			
--- a/recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb
+++ b/recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb
@@ -143,7 +143,7 @@
 
				         "id": "8aPkxE7MnbkX"
			
 
				       },
			
 
				       "source": [
			
 
				-        "## Lets bring in the reciept that we want to extract information from\n",
			
 
				+        "## Lets bring in the receipt that we want to extract information from\n",
			
 
				         "\n",
			
 
				         "Notice that this is a real receipt with multiple portions that are not relevant to the line item extraction structure we've outlined above.\n",
			
 
				         "\n",
			
@@ -243,7 +243,7 @@
 
				       "cell_type": "markdown",
			
 
				       "metadata": {},
			
 
				       "source": [
			
 
				-        "Notice that the model is not perfect and wasn't able to extract out some line items. It's hard for most models to perform this zero-shot extraction of data from images. A way to improve this is to finetune the model using [Visual Intruction Tuning](https://arxiv.org/abs/2304.08485)."
			
 
				+        "Notice that the model is not perfect and wasn't able to extract out some line items. It's hard for most models to perform this zero-shot extraction of data from images. A way to improve this is to finetune the model using [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485)."
			
 
				       ]
			
 
				     },
			
 
				     {
			
@@ -401,7 +401,7 @@
 
				       "cell_type": "markdown",
			
 
				       "metadata": {},
			
 
				       "source": [
			
 
				-        "Althought with some missed line items we were able to extract out structured JSON from an image in a zero shot manner! To improve the results for your pipeline and make them production ready I recommend you [finetune](https://docs.together.ai/docs/fine-tuning-overview) the vision model on your own dataset!\n",
			
 
				+        "Although with some missed line items we were able to extract out structured JSON from an image in a zero shot manner! To improve the results for your pipeline and make them production ready I recommend you [finetune](https://docs.together.ai/docs/fine-tuning-overview) the vision model on your own dataset!\n",
			
 
				         "\n",
			
 
				         "Learn more about how to use JSON mode in the [docs](https://docs.together.ai/docs/json-mode) here!"
			
 
				       ]
			
--- a/recipes/3p_integrations/vllm/README.md
+++ b/recipes/3p_integrations/vllm/README.md
@@ -27,12 +27,12 @@ To launch the inference simply execute the following command changing the tp_siz
 
				 python inference.py --model_name $MODEL_PATH --peft_model_name $PEFT_MODEL_PATH --tp_size 8 --user_prompt "Hello my name is"
			
 
				 ```
			
 
				 The script will ask for another prompt ina loop after completing the generation which you can exit by simply pressing enter and leaving the prompt empty.
			
 
				-When using multiple gpus the model will automatically be split accross the available GPUs using tensor parallelism.
			
 
				+When using multiple gpus the model will automatically be split across the available GPUs using tensor parallelism.
			
 
				 
			
 
				 ## Multi-node multi-gpu inference
			
 
				 The FP8 quantized variants of Meta Llama (i.e. meta-llama/Meta-Llama-3.1-405B-FP8 and meta-llama/Meta-Llama-3.1-405B-Instruct-FP8) can be executed on a single node with 8x80GB H100 using the script located in this folder.
			
 
				 To run the unquantized Meta Llama 405B variants (i.e. meta-llama/Meta-Llama-3.1-405B and meta-llama/Meta-Llama-3.1-405B-Instruct) we need multi-node inference.
			
 
				-vLLM allows this by leveraging pipeline parallelism accros nodes while still applying tensor parallelism insid each node.
			
 
				+vLLM allows this by leveraging pipeline parallelism across nodes while still applying tensor parallelism inside each node.
			
 
				 To start a multi-node inference we first need to set up a ray serves which well be leveraged by vLLM to execute the model across node boundaries.
			
 
				 
			
 
				 ```bash
			
--- a/recipes/experimental/long_context/H2O/README.md
+++ b/recipes/experimental/long_context/H2O/README.md
--- a/recipes/experimental/long_context/H2O/src/streaming.sh
+++ b/recipes/experimental/long_context/H2O/src/streaming.sh
@@ -12,7 +12,7 @@ elif [[ ${method} == 'full' ]]; then
 
				         --input-path data \
			
 
				         --model-name lmsys/vicuna-13b-v1.5
			
 
				 else
			
 
				-    echo 'unknown argment for method'
			
 
				+    echo 'unknown argument for method'
			
 
				 fi
			
 
				 
			
 
				 
			
--- a/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb
+++ b/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb
@@ -152,7 +152,7 @@
 
				    "source": [
			
 
				     "## Notebook Setup\n",
			
 
				     "\n",
			
 
				-    "The following APIs will be used to call LLMs throughout the guide. As an example, we'll call Llama 3.1 chat using [Grok](https://console.groq.com/playground?model=llama3-70b-8192).\n",
			
 
				+    "The following APIs will be used to call LLMs throughout the guide. As an example, we'll call Llama 3.1 chat using [Groq](https://console.groq.com/playground?model=llama3-70b-8192).\n",
			
 
				     "\n",
			
 
				     "To install prerequisites run:"
			
 
				    ]
			
--- a/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb
+++ b/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb
@@ -22,7 +22,7 @@
 
				     "- Understand how the tool calls are handled under the hood\n",
			
 
				     "- 3.2 Model Tool Calling Format and Behaviour\n",
			
 
				     "\n",
			
 
				-    "In Part 2, we will learn how to build system that can get us comparision between 2 papers"
			
 
				+    "In Part 2, we will learn how to build system that can get us comparison between 2 papers"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -400,7 +400,7 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "If everything is setup correctly-the model should now wrap function calls  with the `|<python_tag>|` following the actualy function call. \n",
			
 
				+    "If everything is setup correctly-the model should now wrap function calls  with the `|<python_tag>|` following the actually function call. \n",
			
 
				     "\n",
			
 
				     "This can allow you to manage your function calling logic accordingly. \n",
			
 
				     "\n",
			
@@ -660,11 +660,11 @@
 
				    "cell_type": "markdown",
			
 
				    "metadata": {},
			
 
				    "source": [
			
 
				-    "Life is great because Llama Team writes great docs for us, so we can conviently copy-pasta examples from there :)\n",
			
 
				+    "Life is great because Llama Team writes great docs for us, so we can conveniently copy-pasta examples from there :)\n",
			
 
				     "\n",
			
 
				     "[Here](https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2#-tool-calling-(1b/3b)-) are the docs for your reference that we will be using. \n",
			
 
				     "\n",
			
 
				-    "Excercise for viewer: Use `llama-toolchain` again to verify like we did earlier and then start the prompt engineering for the small Llamas."
			
 
				+    "Exercise for viewer: Use `llama-toolchain` again to verify like we did earlier and then start the prompt engineering for the small Llamas."
			
 
				    ]
			
 
				   },
			
 
				   {
			
--- a/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb
+++ b/recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb
@@ -403,7 +403,7 @@
 
				    "source": [
			
 
				     "def get_arxiv_ids(web_results: dict, temperature: int = 0, max_tokens=512):\n",
			
 
				     "    # Initialize chat history with a specific prompt to extract arXiv IDs\n",
			
 
				-    "    arxiv_id_chat_history = [{\"role\": \"system\", \"content\": \"Given this input, give me the arXiv ID of the papers. The input has the query and web results. DO NOT WRITE ANYTHING ELSE IN YOUR RESPONSE: ONLY THE ARXIV ID ONCE, the web search will have it repeated mutliple times, just return the it once and where its actually the arxiv ID\"}, {\"role\": \"user\", \"content\": f\"Here is the query and results{web_results}\"}]\n",
			
 
				+    "    arxiv_id_chat_history = [{\"role\": \"system\", \"content\": \"Given this input, give me the arXiv ID of the papers. The input has the query and web results. DO NOT WRITE ANYTHING ELSE IN YOUR RESPONSE: ONLY THE ARXIV ID ONCE, the web search will have it repeated multiple times, just return the it once and where its actually the arxiv ID\"}, {\"role\": \"user\", \"content\": f\"Here is the query and results{web_results}\"}]\n",
			
 
				     "\n",
			
 
				     "    # Call the model to process the input and extract arXiv IDs\n",
			
 
				     "    response = client.chat.completions.create(\n",
			
--- a/recipes/quickstart/build_with_Llama_3_2.ipynb
+++ b/recipes/quickstart/build_with_Llama_3_2.ipynb
@@ -417,7 +417,7 @@
 
				     "    plt.axis('off')\n",
			
 
				     "    plt.show()\n",
			
 
				     "\n",
			
 
				-    "display_local_image(\"images/a_colorful_llama_doing_ai_programming.jpeg\")"
			
 
				+    "display_local_image(\"../../docs/img/a_colorful_llama_doing_ai_programming.jpeg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -441,7 +441,7 @@
 
				     "  with open(image_path, \"rb\") as img:\n",
			
 
				     "    return base64.b64encode(img.read()).decode('utf-8')\n",
			
 
				     "\n",
			
 
				-    "base64_image = encode_image(\"images/a_colorful_llama_doing_ai_programming.jpeg\")"
			
 
				+    "base64_image = encode_image(\"../../docs/img/a_colorful_llama_doing_ai_programming.jpeg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -525,8 +525,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/grocery_shopping_bascket_with_salmon_in_package.jpeg\")\n",
			
 
				-    "base64_image = encode_image(\"images/grocery_shopping_bascket_with_salmon_in_package.jpeg\")"
			
 
				+    "display_local_image(\"../../docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg\")\n",
			
 
				+    "base64_image = encode_image(\"../../docs/img/grocery_shopping_bascket_with_salmon_in_package.jpeg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1066,8 +1066,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/thumbnail_IMG_1329.jpg\")\n",
			
 
				-    "img = Image.open(\"images/thumbnail_IMG_1329.jpg\")\n",
			
 
				+    "display_local_image(\"../../docs/img/thumbnail_IMG_1329.jpg\")\n",
			
 
				+    "img = Image.open(\"../../docs/img/thumbnail_IMG_1329.jpg\")\n",
			
 
				     "width, height = img.size\n",
			
 
				     "print(\"Image dimensions:\", width, height)"
			
 
				    ]
			
@@ -1112,7 +1112,7 @@
 
				     "  # Resize the image while maintaining aspect ratio\n",
			
 
				     "  resized_img = img.resize((new_width, new_height))\n",
			
 
				     "\n",
			
 
				-    "  resized_img.save(\"images/resized_image.jpg\")\n",
			
 
				+    "  resized_img.save(\"../../docs/img/resized_image.jpg\")\n",
			
 
				     "\n",
			
 
				     "  print(\"Original size:\", original_width, \"x\", original_height)\n",
			
 
				     "  print(\"New size:\", new_width, \"x\", new_height)\n",
			
@@ -1121,7 +1121,7 @@
 
				     "    \n",
			
 
				     "max_dimension = 1120\n",
			
 
				     "resized_img = resize_image(img)\n",
			
 
				-    "base64_image = encode_image(\"images/resized_image.jpg\")"
			
 
				+    "base64_image = encode_image(\"../../docs/img/resized_image.jpg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1223,8 +1223,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/thumbnail_IMG_6385.jpg\")\n",
			
 
				-    "img = Image.open(\"images/thumbnail_IMG_6385.jpg\")\n",
			
 
				+    "display_local_image(\"../../docs/img/thumbnail_IMG_6385.jpg\")\n",
			
 
				+    "img = Image.open(\"../../docs/img/thumbnail_IMG_6385.jpg\")\n",
			
 
				     "width, height = img.size\n",
			
 
				     "print(\"Image dimensions:\", width, height)"
			
 
				    ]
			
@@ -1247,7 +1247,7 @@
 
				    "source": [
			
 
				     "max_dimension = 1120\n",
			
 
				     "resized_img = resize_image(img)\n",
			
 
				-    "base64_image = encode_image(\"images/resized_image.jpg\")"
			
 
				+    "base64_image = encode_image(\"../../docs/img/resized_image.jpg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1320,7 +1320,7 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "base64_image = encode_image(\"images/resized_image.jpg\")\n",
			
 
				+    "base64_image = encode_image(\"../../docs/img/resized_image.jpg\")\n",
			
 
				     "\n",
			
 
				     "messages = [\n",
			
 
				     "  {\n",
			
@@ -1381,8 +1381,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/meta_release.png\")\n",
			
 
				-    "base64_image = encode_image(\"images/meta_release.png\")"
			
 
				+    "display_local_image(\"../../docs/img/meta_release.png\")\n",
			
 
				+    "base64_image = encode_image(\"../../docs/img/meta_release.png\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1448,8 +1448,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/llama_stack.png\")\n",
			
 
				-    "base64_image = encode_image(\"images/llama_stack.png\")"
			
 
				+    "display_local_image(\"../../docs/img/llama_stack.png\")\n",
			
 
				+    "base64_image = encode_image(\"../../docs/img/llama_stack.png\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1553,8 +1553,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/thumbnail_IMG_1440.jpg\")\n",
			
 
				-    "img = Image.open(\"images/thumbnail_IMG_1440.jpg\")\n",
			
 
				+    "display_local_image(\"../../docs/img/thumbnail_IMG_1440.jpg\")\n",
			
 
				+    "img = Image.open(\"../../docs/img/thumbnail_IMG_1440.jpg\")\n",
			
 
				     "width, height = img.size\n",
			
 
				     "print(\"Image dimensions:\", width, height)"
			
 
				    ]
			
@@ -1577,7 +1577,7 @@
 
				    "source": [
			
 
				     "max_dimension = 1120\n",
			
 
				     "resized_img = resize_image(img)\n",
			
 
				-    "base64_image = encode_image(\"images/resized_image.jpg\")"
			
 
				+    "base64_image = encode_image(\"../../docs/img/resized_image.jpg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -1686,11 +1686,11 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/thumbnail_IMG_6385.jpg\")\n",
			
 
				-    "img = Image.open(\"images/thumbnail_IMG_6385.jpg\")\n",
			
 
				+    "display_local_image(\"../../docs/img/thumbnail_IMG_6385.jpg\")\n",
			
 
				+    "img = Image.open(\"../../docs/img/thumbnail_IMG_6385.jpg\")\n",
			
 
				     "max_dimension = 1120\n",
			
 
				     "resized_img = resize_image(img)\n",
			
 
				-    "base64_image = encode_image(\"images/resized_image.jpg\")"
			
 
				+    "base64_image = encode_image(\"../../docs/img/resized_image.jpg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -2488,7 +2488,7 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/cat.jpeg\")"
			
 
				+    "display_local_image(\"../../docs/img/cat.jpeg\")"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -2626,7 +2626,7 @@
 
				     "        if log is not None:\n",
			
 
				     "            log.print()\n",
			
 
				     "\n",
			
 
				-    "await run_main(\"images/cat.jpeg\",\n",
			
 
				+    "await run_main(\"../../docs/img/cat.jpeg\",\n",
			
 
				     "         \"What cat breed is this? Tell me in detail about the breed.\")"
			
 
				    ]
			
 
				   },
			
@@ -2656,8 +2656,8 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "display_local_image(\"images/gnocchi_alla_romana.jpeg\")\n",
			
 
				-    "img = Image.open(\"images/gnocchi_alla_romana.jpeg\")\n",
			
 
				+    "display_local_image(\"../../docs/img/gnocchi_alla_romana.jpeg\")\n",
			
 
				+    "img = Image.open(\"../../docs/img/gnocchi_alla_romana.jpeg\")\n",
			
 
				     "resized_img = resize_image(img)"
			
 
				    ]
			
 
				   },
			
@@ -2708,7 +2708,7 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "await run_main(\"images/resized_image.jpg\",\n",
			
 
				+    "await run_main(\"../../docs/img/resized_image.jpg\",\n",
			
 
				     "         \"What's the name of this dish? How can I make it?\")"
			
 
				    ]
			
 
				   },
			
@@ -2739,7 +2739,7 @@
 
				     "### ExecuTorch (XNNPACK framework)\n",
			
 
				     "In this workshop we will walk you through the end to end workflow for building an android demo app using CPU on device via XNNPACK framework.\n",
			
 
				     "To do so we need to follow these steps:\n",
			
 
				-    "<img src=\"images/llama-mobile-confirmed.png\" alt=\"\" /> \n",
			
 
				+    "<img src=\"../../docs/img/llama-mobile-confirmed.png\" alt=\"\" /> \n",
			
 
				     "\n",
			
 
				     "\n",
			
 
				     "For detailed explanation of each of these steps please see this [link](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/docs/delegates/xnnpack_README.md). Alternatively, you can follow this [tutorial](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md) for running Llama 3.2 lightweight models on your iOS device!"
			
--- a/recipes/quickstart/finetuning/README.md
+++ b/recipes/quickstart/finetuning/README.md
@@ -79,7 +79,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
				 
			
 
				     * `mixed_precision` boolean flag to specify using mixed precision, defatults to true.
			
 
				 
			
 
				-    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommond not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
			
 
				+    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommend not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
			
 
				 
			
 
				     *  `sharding_strategy` this specifies the sharding strategy for FSDP, it can be:
			
 
				         * `FULL_SHARD` that shards model parameters, gradients and optimizer states, results in the most memory savings.
			
@@ -92,7 +92,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
				 
			
 
				 * `checkpoint_type` specifies the state dict checkpoint type for saving the model. `FULL_STATE_DICT` streams state_dict of each model shard from a rank to CPU and assembels the full state_dict on CPU. `SHARDED_STATE_DICT` saves one checkpoint per rank, and enables the re-loading the model in a different world size.
			
 
				 
			
 
				-* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommond you use this option.
			
 
				+* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommend you use this option.
			
 
				 
			
 
				 * `pure_bf16` it moves the  model to `BFloat16` and if `optimizer` is set to `anyprecision` then optimizer states will be kept in `BFloat16` as well. You can use this option if necessary.
			
 
				 
			
--- a/recipes/quickstart/inference/local_inference/README.md
+++ b/recipes/quickstart/inference/local_inference/README.md
@@ -119,7 +119,7 @@ Then convert your FSDP checkpoint to HuggingFace checkpoints using:
 
				 
			
 
				  # --HF_model_path_or_name specifies the HF Llama model name or path where it has config.json and tokenizer.json
			
 
				  ```
			
 
				-By default, training parameter are saved in `train_params.yaml` in the path where FSDP checkpoints are saved, in the converter script we frist try to find the HugingFace model name used in the fine-tuning to load the model with configs from there, if not found user need to provide it.
			
 
				+By default, training parameter are saved in `train_params.yaml` in the path where FSDP checkpoints are saved, in the converter script we first try to find the HugingFace model name used in the fine-tuning to load the model with configs from there, if not found user need to provide it.
			
 
				 
			
 
				 Then run inference using:
			
 
				 
			
--- a/recipes/responsible_ai/prompt_guard/inference.py
+++ b/recipes/responsible_ai/prompt_guard/inference.py
@@ -11,7 +11,7 @@ Utilities for loading the PromptGuard model and evaluating text for jailbreaks a
 
				 
			
 
				 Note that the underlying model has a maximum recommended input size of 512 tokens as a DeBERTa model.
			
 
				 The final two functions in this file implement efficient parallel batched evaluation of the model on a list
			
 
				-of input strings of arbirary length, with the final score for each input being the maximum score across all
			
 
				+of input strings of arbitrary length, with the final score for each input being the maximum score across all
			
 
				 chunks of the input string.
			
 
				 """
			
 
				 
			
--- a/recipes/use_cases/browser_use/agent/browser-use-quickstart.ipynb
+++ b/recipes/use_cases/browser_use/agent/browser-use-quickstart.ipynb
@@ -0,0 +1,688 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# 🌐 Building an Intelligent Browser Agent with Llama 3.2\n",
			
 
				+    "\n",
			
 
				+    "This notebook provides a step-by-step guide to creating an AI-powered browser agent capable of navigating and interacting with websites autonomously. By combining the power of Llama 3.2 Vision, Playwright, and Together AI, this agent can perform tasks seamlessly while understanding both visual and textual content.\n",
			
 
				+    "\n",
			
 
				+    "##### Demo\n",
			
 
				+    "For a detailed explanation of the code and a demo video, visit our blog post: [**Blog Post and Demo Video**](https://miguelg719.github.io/browser-use-blog/)\n",
			
 
				+    "\n",
			
 
				+    "##### Features\n",
			
 
				+    "- Visual understanding of web pages through screenshots\n",
			
 
				+    "- Autonomous navigation and interaction\n",
			
 
				+    "- Natural language instructions for web tasks\n",
			
 
				+    "- Persistent browser session management\n",
			
 
				+    "\n",
			
 
				+    "For example, you can ask the agent to:\n",
			
 
				+    "- Search for a product on Amazon\n",
			
 
				+    "- Find the cheapest flight to Tokyo\n",
			
 
				+    "- Buy tickets for the next Warriors game\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "##### What's in this Notebook?\n",
			
 
				+    "\n",
			
 
				+    "This recipe walks you through:\n",
			
 
				+    "- Setting up the environment and installing dependencies.\n",
			
 
				+    "- Automating browser interactions using Playwright.\n",
			
 
				+    "- Defining a structured prompt for the LLM to understand the task and execute the next action.\n",
			
 
				+    "- Leveraging Llama 3.2 Vision for content comprehension.\n",
			
 
				+    "- Creating a persistent and intelligent browser agent for real-world applications.\n",
			
 
				+    "\n",
			
 
				+    "***Please note that the agent is not perfect and may not always behave as expected.**\n",
			
 
				+    "\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 1. Install Required Libraries\n",
			
 
				+    "This cell installs the necessary Python packages for the script, such as `together`, `playwright`, and `beautifulsoup4`.\n",
			
 
				+    "It also ensures that Playwright is properly installed to enable automated browser interactions."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "%pip install together playwright\n",
			
 
				+    "!playwright install"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 2. Import Modules and Set Up Environment\n",
			
 
				+    "Set your `Together` API key to instantiate the client client. Feel free to use a different provider if it's more convenient. "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import os\n",
			
 
				+    "from dotenv import load_dotenv\n",
			
 
				+    "from together import Together\n",
			
 
				+    "\n",
			
 
				+    "load_dotenv()\n",
			
 
				+    "\n",
			
 
				+    "client = Together(api_key=os.getenv(\"TOGETHER_API_KEY\"))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "##### Vision Query Example\n",
			
 
				+    "This function converts an image file into a Base64-encoded string, which is required for LLM querying.\n",
			
 
				+    "\n",
			
 
				+    "The next cell shows an example of how to use the `encode_image` function to convert an image file into a Base64-encoded string, which is then used in a chat completion request to the Llama 3.2 Vision model.\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import base64\n",
			
 
				+    "from IPython.display import Markdown\n",
			
 
				+    "imagePath= \"sample_screenshot.png\"\n",
			
 
				+    "\n",
			
 
				+    "def encode_image(image_path):\n",
			
 
				+    "        with open(image_path, \"rb\") as image_file:\n",
			
 
				+    "            return base64.b64encode(image_file.read()).decode('utf-8')\n",
			
 
				+    "\n",
			
 
				+    "# Must have an image on the local path to use it\n",
			
 
				+    "base64_image = encode_image(imagePath)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "response = client.chat.completions.create(\n",
			
 
				+    "    model=\"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\",\n",
			
 
				+    "    messages=[\n",
			
 
				+    "        {\n",
			
 
				+    "            \"role\": \"user\",\n",
			
 
				+    "            \"content\": [\n",
			
 
				+    "                {\"type\": \"text\", \"text\": \"what is this image about?\"},\n",
			
 
				+    "                {\n",
			
 
				+    "                    \"type\": \"image_url\",\n",
			
 
				+    "                    # Uses a local image path. To use a remote image, replace the url with the image URL.\n",
			
 
				+    "                    \"image_url\": {\n",
			
 
				+    "                        \"url\": f\"data:image/jpeg;base64,{base64_image}\",\n",
			
 
				+    "                    }\n",
			
 
				+    "                },\n",
			
 
				+    "            ],\n",
			
 
				+    "        }\n",
			
 
				+    "    ]\n",
			
 
				+    ")\n",
			
 
				+    "\n",
			
 
				+    "display(Markdown(response.choices[0].message.content))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Helper Functions to Parse the Accessibility Tree\n",
			
 
				+    "\n",
			
 
				+    "The agent will use the accessibility tree to understand the elements on the page and interact with them. A helper function is defined here to help simplity the accessibility tree for the agent."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def parse_accessibility_tree(node, indent=0):\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    Recursively parses the accessibility tree and prints a readable structure.\n",
			
 
				+    "    Args:\n",
			
 
				+    "        node (dict): A node in the accessibility tree.\n",
			
 
				+    "        indent (int): Indentation level for the nested structure.\n",
			
 
				+    "    \"\"\"\n",
			
 
				+    "    # Initialize res as an empty string at the start of each parse\n",
			
 
				+    "    res = \"\"\n",
			
 
				+    "    \n",
			
 
				+    "    def _parse_node(node, indent, res):\n",
			
 
				+    "        # Base case: If the node is empty or doesn't have a 'role', skip it\n",
			
 
				+    "        if not node or 'role' not in node:\n",
			
 
				+    "            return res\n",
			
 
				+    "\n",
			
 
				+    "        # Indentation for nested levels\n",
			
 
				+    "        indented_space = \" \" * indent\n",
			
 
				+    "        \n",
			
 
				+    "        # Add node's name and role to result string\n",
			
 
				+    "        if 'value' in node:\n",
			
 
				+    "            res = res + f\"{indented_space}Role: {node['role']} - Name: {node.get('name', 'No name')} - Value: {node['value']}\\n\"\n",
			
 
				+    "        else:\n",
			
 
				+    "            res = res + f\"{indented_space}Role: {node['role']} - Name: {node.get('name', 'No name')}\\n\"\n",
			
 
				+    "        \n",
			
 
				+    "        # If the node has children, recursively parse them\n",
			
 
				+    "        if 'children' in node:\n",
			
 
				+    "            for child in node['children']:\n",
			
 
				+    "                res = _parse_node(child, indent + 2, res)  # Increase indentation for child nodes\n",
			
 
				+    "                \n",
			
 
				+    "        return res\n",
			
 
				+    "\n",
			
 
				+    "    return _parse_node(node, indent, res)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 3. Define Prompts\n",
			
 
				+    "a) **Planning Prompt:**\n",
			
 
				+    "Create a structured prompt for the LLM to understand the task and execute the next action.\n",
			
 
				+    "\n",
			
 
				+    "b) **Agent Execution Prompt**\n",
			
 
				+    "A structured prompt is created, specifying the instructions for processing the webpage content and screenshots."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 6,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "planning_prompt = \"\"\"\n",
			
 
				+    "Given a user request, define a very simple plan of subtasks (actions) to achieve the desired outcome and execute them iteratively using Playwright.\n",
			
 
				+    "\n",
			
 
				+    "1. Understand the Task:\n",
			
 
				+    "   - Interpret the user's request and identify the core goal.\n",
			
 
				+    "   - Break down the task into a few smaller, actionable subtasks to achieve the goal effectively.\n",
			
 
				+    "\n",
			
 
				+    "2. Planning Actions:\n",
			
 
				+    "   - Translate the user's request into a high-level plan of actions.\n",
			
 
				+    "   - Example actions include:\n",
			
 
				+    "     - Searching for specific information.\n",
			
 
				+    "     - Navigating to specified URLs.\n",
			
 
				+    "     - Interacting with website elements (clicking, filling).\n",
			
 
				+    "     - Extracting or validating data.\n",
			
 
				+    "\n",
			
 
				+    "Input:\n",
			
 
				+    "- User Request (Task)\n",
			
 
				+    "\n",
			
 
				+    "Output from the Agent:\n",
			
 
				+    "- Step-by-Step Action Plan:: Return only an ordered list of actions. Only return the list, no other text.\n",
			
 
				+    "\n",
			
 
				+    "**Example User Requests and Agent Behavior:**\n",
			
 
				+    "\n",
			
 
				+    "1. **Input:** \"Search for a product on Amazon.\"\n",
			
 
				+    "   - **Output:**\n",
			
 
				+    "     1. Navigate to Amazon's homepage.\n",
			
 
				+    "     2. Enter the product name in the search bar and perform the search.\n",
			
 
				+    "     3. Extract and display the top results, including the product title, price, and ratings.\n",
			
 
				+    "\n",
			
 
				+    "2. **Input:** \"Find the cheapest flight to Tokyo.\"\n",
			
 
				+    "   - **Output:**\n",
			
 
				+    "     1. Visit a flight aggregator website (e.g. Kayak).\n",
			
 
				+    "     2. Enter the departure city.\n",
			
 
				+    "     3. Enter the destination city\n",
			
 
				+    "     4. Enter the start and end dates.\n",
			
 
				+    "     5. Extract and compare the flight options, highlighting the cheapest option.\n",
			
 
				+    "\n",
			
 
				+    "3. **Input:** \"Buy tickets for the next Warriors game.\"\n",
			
 
				+    "   - **Output:**\n",
			
 
				+    "     1. Navigate to a ticket-selling platform (e.g., Ticketmaster).\n",
			
 
				+    "     2. Fill the search bar with the team name.\n",
			
 
				+    "     2. Search for upcoming team games.\n",
			
 
				+    "     3. Select the next available game and purchase tickets for the specified quantity.\n",
			
 
				+    "\n",
			
 
				+    "\"\"\"\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "execution_prompt = \"\"\"\n",
			
 
				+    "You will be given a task, a website's page accessibility tree, and the page screenshot as context. The screenshot is where you are now, use it to understand the accessibility tree. Based on that information, you need to decide the next step action. ONLY RETURN THE NEXT STEP ACTION IN A SINGLE JSON.\n",
			
 
				+    "\n",
			
 
				+    "When selecting elements, use elements from the accessibility tree.\n",
			
 
				+    "\n",
			
 
				+    "Reflect on what you are seeing in the accessibility tree and the screenshot and decide the next step action, elaborate on it in reasoning, and choose the next appropriate action.\n",
			
 
				+    "\n",
			
 
				+    "Selectors must follow the format:\n",
			
 
				+    "- For a button with a specific name: \"button=ButtonName\"\n",
			
 
				+    "- For a placeholder (e.g., input field): \"placeholder=PlaceholderText\"\n",
			
 
				+    "- For text: \"text=VisibleText\"\n",
			
 
				+    "\n",
			
 
				+    "Make sure to analyze the accessibility tree and the screenshot to understand the current state, if something is not clear, you can use the previous actions to understand the current state. Explain why you are in the current state in current_state.\n",
			
 
				+    "\n",
			
 
				+    "You will be given a task and you MUST return the next step action in JSON format:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"Where are you now? Analyze the accessibility tree and the screenshot to understand the current state.\",\n",
			
 
				+    "    \"reasoning\": \"What is the next step to accomplish the task?\",\n",
			
 
				+    "    \"action\": \"navigation\" or \"click\" or \"fill\" or \"finished\",\n",
			
 
				+    "    \"url\": \"https://www.example.com\", // Only for navigation actions\n",
			
 
				+    "    \"selector\": \"button=Click me\", // For click or fill actions, derived from the accessibility tree\n",
			
 
				+    "    \"value\": \"Input text\", // Only for fill actions\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "### Guidelines:\n",
			
 
				+    "1. Use **\"navigation\"** for navigating to a new website through a URL.\n",
			
 
				+    "2. Use **\"click\"** for interacting with clickable elements. Examples:\n",
			
 
				+    "   - Buttons: \"button=Click me\"\n",
			
 
				+    "   - Text: \"text=VisibleText\"\n",
			
 
				+    "   - Placeholders: \"placeholder=Search...\"\n",
			
 
				+    "   - Link: \"link=BUY NOW\"\n",
			
 
				+    "3. Use **\"fill\"** for inputting text into editable fields. Examples:\n",
			
 
				+    "   - Placeholder: \"placeholder=Search...\"\n",
			
 
				+    "   - Textbox: \"textbox=Flight destination output\"\n",
			
 
				+    "   - Input: \"input=Search...\"\n",
			
 
				+    "4. Use **\"finished\"** when the task is done. For example:\n",
			
 
				+    "   - If a task is successfully completed.\n",
			
 
				+    "   - If navigation confirms you are on the correct page.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "### Accessibility Tree Examples:\n",
			
 
				+    "\n",
			
 
				+    "You will be given an accessibility tree to interact with the webpage. It consists of a nested node structure that represents elements on the page. For example:\n",
			
 
				+    "\n",
			
 
				+    "Role: generic - Name: \n",
			
 
				+    "   Role: text - Name: San Francisco (SFO)\n",
			
 
				+    "   Role: button - Name: \n",
			
 
				+    "   Role: listitem - Name: \n",
			
 
				+    "   Role: textbox - Name: Flight origin input\n",
			
 
				+    "Role: button - Name: Swap departure airport and destination airport\n",
			
 
				+    "Role: generic - Name: \n",
			
 
				+    "   Role: textbox - Name: Flight destination input\n",
			
 
				+    "Role: button - Name: Start date\n",
			
 
				+    "Role: button - Name: \n",
			
 
				+    "Role: button - Name: \n",
			
 
				+    "Role: button - Name: End date\n",
			
 
				+    "Role: button - Name: \n",
			
 
				+    "Role: button - Name: \n",
			
 
				+    "Role: button - Name: Search\n",
			
 
				+    "\n",
			
 
				+    "This section indicates that there is a textbox with a name \"Flight destination input\" filled with San Francisco (SFO). There is also a button with the name \"Swap departure airport and destination airport\". Another textbox with the name \"Flight destination input\" not filled with any text. There are also buttons with the names \"Start date\", \"End date\", which are not filled with any dates, and a button named \"Search\".\n",
			
 
				+    "\n",
			
 
				+    "Retry actions at most 2 times before trying a different action.\n",
			
 
				+    "\n",
			
 
				+    "### Examples:\n",
			
 
				+    "1. To click on a button labeled \"Search\":\n",
			
 
				+    "   {\n",
			
 
				+    "       \"current_state\": \"On the homepage of a search engine.\",\n",
			
 
				+    "       \"reasoning\": \"The accessibility tree shows a button named 'Search'. Clicking it is the appropriate next step to proceed with the task.\",\n",
			
 
				+    "       \"action\": \"click\",\n",
			
 
				+    "       \"selector\": \"button=Search\"\n",
			
 
				+    "   }\n",
			
 
				+    "\n",
			
 
				+    "2. To fill a search bar with the text \"AI tools\":\n",
			
 
				+    "   {\n",
			
 
				+    "       \"current_state\": \"On the search page with a focused search bar.\",\n",
			
 
				+    "       \"reasoning\": \"The accessibility tree shows an input field with placeholder 'Search...'. Entering the query 'AI tools' fulfills the next step of the task.\",\n",
			
 
				+    "       \"action\": \"fill\",\n",
			
 
				+    "       \"selector\": \"placeholder=Search...\",\n",
			
 
				+    "       \"value\": \"AI tools\"\n",
			
 
				+    "   }\n",
			
 
				+    "\n",
			
 
				+    "3. To navigate to a specific URL:\n",
			
 
				+    "   {\n",
			
 
				+    "       \"current_state\": \"Starting from a blank page.\",\n",
			
 
				+    "       \"reasoning\": \"The task requires visiting a specific website to gather relevant information. Navigating to the URL is the first step.\",\n",
			
 
				+    "       \"action\": \"navigation\",\n",
			
 
				+    "       \"url\": \"https://example.com\"\n",
			
 
				+    "   }\n",
			
 
				+    "\n",
			
 
				+    "4. To finish the task:\n",
			
 
				+    "   {\n",
			
 
				+    "       \"current_state\": \"Completed the search and extracted the necessary data.\",\n",
			
 
				+    "       \"reasoning\": \"The task goal has been achieved, and no further actions are required.\",\n",
			
 
				+    "       \"action\": \"finished\"\n",
			
 
				+    "   }\n",
			
 
				+    "\"\"\""
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "#### Few Shot Examples\n",
			
 
				+    "\n",
			
 
				+    "Performance improves drastically by adding a few shot examples."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "few_shot_example_1 = \"\"\"\n",
			
 
				+    "User Input: \"What are the best tacos in San Francisco?\"\n",
			
 
				+    "\n",
			
 
				+    "Agent Step Sequence:\n",
			
 
				+    "Step 1: \n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On a blank page.\",\n",
			
 
				+    "    \"reasoning\": \"The task is to find the best tacos in San Francisco, so the first step is to navigate to Google to perform a search.\",\n",
			
 
				+    "    \"action\": \"navigation\",\n",
			
 
				+    "    \"url\": \"https://www.google.com\",\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 2: \n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On the Google homepage.\",\n",
			
 
				+    "    \"reasoning\": \"To search for the best tacos in San Francisco, I need to fill the Google search bar with the query.\",\n",
			
 
				+    "    \"action\": \"fill\",\n",
			
 
				+    "    \"selector\": \"combobox=Search\",\n",
			
 
				+    "    \"value\": \"Best tacos in San Francisco\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 3:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On Google search results page.\",\n",
			
 
				+    "    \"reasoning\": \"After entering the query, I need to click the search button to retrieve the results.\",\n",
			
 
				+    "    \"action\": \"click\",\n",
			
 
				+    "    \"selector\": \"button=Google Search\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 4: \n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On the search results page with multiple links.\",\n",
			
 
				+    "    \"reasoning\": \"From the search results, I need to click on a reliable food-review or blogwebsite link.\",\n",
			
 
				+    "    \"action\": \"click\",\n",
			
 
				+    "    \"selector\": \"text=Yelp\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 5:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On Yelp's best taqueria near San Francisco page.\",\n",
			
 
				+    "    \"reasoning\": \"The task is complete as I have found the top taquerias in San Francisco.\",\n",
			
 
				+    "    \"action\": \"finished\",\n",
			
 
				+    "    \"summary\": \"I have successfully found the best tacos in San Francisco.\"\n",
			
 
				+    "}\n",
			
 
				+    "\"\"\"\n",
			
 
				+    "\n",
			
 
				+    "few_shot_example_2 = \"\"\"\n",
			
 
				+    "User Input: Can you send an email to reschedule a meeting for Dmitry at gmail.com for tomorrow morning? I'm sick today.\n",
			
 
				+    "\n",
			
 
				+    "Agent Step Sequence:\n",
			
 
				+    "Step 1:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On a blank page.\",\n",
			
 
				+    "    \"reasoning\": \"To send an email, the first step is to navigate to Gmail.\",\n",
			
 
				+    "    \"action\": \"navigation\",\n",
			
 
				+    "    \"url\": \"https://mail.google.com\",\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 2:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On Gmail's homepage.\",\n",
			
 
				+    "    \"reasoning\": \"Click the 'Compose' button to start drafting a new email.\",\n",
			
 
				+    "    \"action\": \"click\",\n",
			
 
				+    "    \"selector\": \"button=Compose\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 3:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"In the new email draft window.\",\n",
			
 
				+    "    \"reasoning\": \"Enter Dmitry's email address in the recipient field.\",\n",
			
 
				+    "    \"action\": \"fill\",\n",
			
 
				+    "    \"selector\": \"placeholder=Recipients\",\n",
			
 
				+    "    \"value\": \"dmitry@gmail.com\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 4: \n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"In the new email draft with the recipient filled.\",\n",
			
 
				+    "    \"reasoning\": \"Set the subject line to indicate the purpose of the email.\",\n",
			
 
				+    "    \"action\": \"fill\",\n",
			
 
				+    "    \"selector\": \"placeholder=Subject\",\n",
			
 
				+    "    \"value\": \"Rescheduling Meeting\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 5:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"In the new email draft with the subject set.\",\n",
			
 
				+    "    \"reasoning\": \"Compose the email body to politely inform Dmitry about rescheduling the meeting.\",\n",
			
 
				+    "    \"action\": \"fill\",\n",
			
 
				+    "    \"selector\": \"placeholder=Email body\",\n",
			
 
				+    "    \"value\": \"Hi Dmitry,\\\\n\\\\nI'm feeling unwell today and would like to reschedule our meeting for tomorrow morning. Please let me know if this works for you.\\\\n\\\\nBest regards,\\\\n[Your Name]\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 6: \n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"In the new email draft with the body composed.\",\n",
			
 
				+    "    \"reasoning\": \"Click the 'Send' button to deliver the email to Dmitry.\",\n",
			
 
				+    "    \"action\": \"click\",\n",
			
 
				+    "    \"selector\": \"button=Send\"\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "Step 7:\n",
			
 
				+    "{\n",
			
 
				+    "    \"current_state\": \"On Gmail's homepage after sending the email.\",\n",
			
 
				+    "    \"reasoning\": \"The email has been drafted and sent, fulfilling the task of informing Dmitry about the reschedule.\",\n",
			
 
				+    "    \"action\": \"finished\",\n",
			
 
				+    "    \"summary\": \"Email sent to Dmitry to reschedule the meeting for tomorrow morning.\"\n",
			
 
				+    "}\n",
			
 
				+    "\"\"\"\n",
			
 
				+    "\n",
			
 
				+    "few_shot_examples = [few_shot_example_1, few_shot_example_2]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 4. Define a task and generate a plan of actions to execute\n",
			
 
				+    "\n",
			
 
				+    "You can define your own task or use one of the examples below"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 8,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# Define your task here:\n",
			
 
				+    "# task = 'Find toys to buy for my 10 year old niece this Christmas'\n",
			
 
				+    "# task = 'Find tickets for the next Warriors game'\n",
			
 
				+    "task = 'Find the cheapest flight to Madrid'"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### Generate a plan of actions to execute\n",
			
 
				+    "\n",
			
 
				+    "The next cell queries the LLM using the planning prompt to generate a plan of actions to execute. This then becomes each of the individual subtasks for the execution agent to complete."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "print(\"Generating plan...\")\n",
			
 
				+    "planning_response = client.chat.completions.create(\n",
			
 
				+    "    model=\"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\",\n",
			
 
				+    "    temperature=0.0,\n",
			
 
				+    "    messages=[\n",
			
 
				+    "        {\"role\": \"system\", \"content\": planning_prompt},\n",
			
 
				+    "        {\"role\": \"user\", \"content\": task},\n",
			
 
				+    "    ],\n",
			
 
				+    ")     \n",
			
 
				+    "plan = planning_response.choices[0].message.content\n",
			
 
				+    "print(plan)\n",
			
 
				+    "steps = [line.strip()[3:] for line in plan.strip().split('\\n')]\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "### 5. Create the Browser environment and Run the Agent\n",
			
 
				+    "The necessary modules for web scraping are imported, and the setup for using Playwright asynchronously is initialized.\n",
			
 
				+    "\n",
			
 
				+    "The context is provided to the LLM to help it understand its current state and generate the next required action to complete the provided task. \n",
			
 
				+    "\n",
			
 
				+    "- At any step, you can press **enter** to continue or **'q'** to quit the agent loop. "
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from playwright.async_api import async_playwright\n",
			
 
				+    "import asyncio \n",
			
 
				+    "import json\n",
			
 
				+    "import re\n",
			
 
				+    "\n",
			
 
				+    "previous_context = None\n",
			
 
				+    "\n",
			
 
				+    "async def run_browser():\n",
			
 
				+    "    async with async_playwright() as playwright:\n",
			
 
				+    "        # Launch Chromium browser\n",
			
 
				+    "        browser = await playwright.chromium.launch(headless=False, channel=\"chrome\")\n",
			
 
				+    "        page = await browser.new_page()\n",
			
 
				+    "        await asyncio.sleep(1)\n",
			
 
				+    "        await page.goto(\"https://google.com/\")\n",
			
 
				+    "        previous_actions = []\n",
			
 
				+    "        try:\n",
			
 
				+    "            while True:  # Infinite loop to keep session alive, press enter to continue or 'q' to quit\n",
			
 
				+    "                # Get Context from page\n",
			
 
				+    "                accessibility_tree = await page.accessibility.snapshot()\n",
			
 
				+    "                accessibility_tree = parse_accessibility_tree(accessibility_tree)\n",
			
 
				+    "                await page.screenshot(path=\"screenshot.png\")\n",
			
 
				+    "                base64_image = encode_image(imagePath)\n",
			
 
				+    "                previous_context = accessibility_tree\n",
			
 
				+    "                response = client.chat.completions.create(\n",
			
 
				+    "                    model=\"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\",\n",
			
 
				+    "                    temperature=0.0,\n",
			
 
				+    "                    messages=[\n",
			
 
				+    "                        {\"role\": \"system\", \"content\": execution_prompt},\n",
			
 
				+    "                        {\"role\": \"system\", \"content\": f\"Few shot examples: {few_shot_examples}. Just a few examples, user will assign you VERY range set of tasks.\"},\n",
			
 
				+    "                        {\"role\": \"system\", \"content\": f\"Plan to execute: {steps}\\n\\n Accessibility Tree: {previous_context}\\n\\n, previous actions: {previous_actions}\"},\n",
			
 
				+    "                        {\"role\": \"user\", \"content\": \n",
			
 
				+    "                         [\n",
			
 
				+    "                            {\n",
			
 
				+    "                                \"type\": \"text\",\n",
			
 
				+    "                                \"text\": f'What should be the next action to accomplish the task: {task} based on the current state? Remember to review the plan and select the next action based on the current state. Provide the next action in JSON format strictly as specified above.',\n",
			
 
				+    "                            },\n",
			
 
				+    "                            {\n",
			
 
				+    "                                \"type\": \"image_url\",\n",
			
 
				+    "                                \"image_url\": {\n",
			
 
				+    "                                    \"url\": f\"data:image/jpeg;base64,{base64_image}\",\n",
			
 
				+    "                                }\n",
			
 
				+    "                            },\n",
			
 
				+    "                         ]\n",
			
 
				+    "                        }\n",
			
 
				+    "                    ],\n",
			
 
				+    "                )\n",
			
 
				+    "                res = response.choices[0].message.content\n",
			
 
				+    "                print('Agent response:', res)\n",
			
 
				+    "                try:\n",
			
 
				+    "                    match = re.search(r'\\{.*\\}', res, re.DOTALL)\n",
			
 
				+    "                    if match:\n",
			
 
				+    "                        output = json.loads(match.group(0))\n",
			
 
				+    "                except Exception as e:\n",
			
 
				+    "                    print('Error parsing JSON:', e)\n",
			
 
				+    "\n",
			
 
				+    "                if output[\"action\"] == \"navigation\":\n",
			
 
				+    "                    try:\n",
			
 
				+    "                        await page.goto(output[\"url\"])\n",
			
 
				+    "                        previous_actions.append(f\"navigated to {output['url']}, SUCCESS\")\n",
			
 
				+    "                    except Exception as e:\n",
			
 
				+    "                        previous_actions.append(f\"Error navigating to {output['url']}: {e}\")\n",
			
 
				+    "\n",
			
 
				+    "                elif output[\"action\"] == \"click\":\n",
			
 
				+    "                    try:\n",
			
 
				+    "                        selector_type, selector_name = output[\"selector\"].split(\"=\")[0], output[\"selector\"].split(\"=\")[1]\n",
			
 
				+    "                        res = await page.get_by_role(selector_type, name=selector_name).first.click()\n",
			
 
				+    "                        previous_actions.append(f\"clicked {output['selector']}, SUCCESS\")\n",
			
 
				+    "                    except Exception as e:\n",
			
 
				+    "                        previous_actions.append(f\"Error clicking on {output['selector']}: {e}\")\n",
			
 
				+    "                        \n",
			
 
				+    "                elif output[\"action\"] == \"fill\":\n",
			
 
				+    "                    try:\n",
			
 
				+    "                        selector_type, selector_name = output[\"selector\"].split(\"=\")[0], output[\"selector\"].split(\"=\")[1]\n",
			
 
				+    "                        res = await page.get_by_role(selector_type, name=selector_name).fill(output[\"value\"])\n",
			
 
				+    "                        await asyncio.sleep(1)\n",
			
 
				+    "                        await page.keyboard.press(\"Enter\")\n",
			
 
				+    "                        previous_actions.append(f\"filled {output['selector']} with {output['value']}, SUCCESS\")\n",
			
 
				+    "                    except Exception as e:\n",
			
 
				+    "                            previous_actions.append(f\"Error filling {output['selector']} with {output['value']}: {e}\")\n",
			
 
				+    "\n",
			
 
				+    "                elif output[\"action\"] == \"finished\":\n",
			
 
				+    "                    print(output[\"summary\"])\n",
			
 
				+    "                    break\n",
			
 
				+    "\n",
			
 
				+    "                await asyncio.sleep(1) \n",
			
 
				+    "                \n",
			
 
				+    "                # Or wait for user input\n",
			
 
				+    "                user_input = input(\"Press 'q' to quit or Enter to continue: \")\n",
			
 
				+    "                if user_input.lower() == 'q':\n",
			
 
				+    "                    break\n",
			
 
				+    "                \n",
			
 
				+    "        except Exception as e:\n",
			
 
				+    "            print(f\"An error occurred: {e}\")\n",
			
 
				+    "        finally:\n",
			
 
				+    "            # Only close the browser when explicitly requested\n",
			
 
				+    "            await browser.close()\n",
			
 
				+    "\n",
			
 
				+    "# Run the async function\n",
			
 
				+    "await run_browser()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## And that's it! Congratulations! 🎉🎉\n",
			
 
				+    "\n",
			
 
				+    "You've just created a browser agent that can navigate websites, understand page content through vision, plan and execute actions based on natural language commands, and maintain context across multiple interactions.\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "**Collaborators**\n",
			
 
				+    "\n",
			
 
				+    "Feel free to reach out with any questions or feedback!\n",
			
 
				+    "\n",
			
 
				+    "\n",
			
 
				+    "**Miguel Gonzalez** on [X](https://x.com/miguel_gonzf) or [LinkedIn](https://www.linkedin.com/in/gonzalezfernandezmiguel/)\n",
			
 
				+    "\n",
			
 
				+    "**Dimitry Khorzov** on [X](https://x.com/korzhov_dm) or [LinkedIn](https://www.linkedin.com/in/korzhovdm)\n"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "base",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.11.3"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 2
			
 
				+}
			
--- a/recipes/use_cases/browser_use/agent/sample_screenshot.png
+++ b/recipes/use_cases/browser_use/agent/sample_screenshot.png
--- a/recipes/use_cases/coding/text2sql/quickstart.ipynb
+++ b/recipes/use_cases/coding/text2sql/quickstart.ipynb
@@ -0,0 +1,334 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "e8cba0b6",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/use_cases/coding/text2sql/quickstart.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>  \n",
			
 
				+    "\n",
			
 
				+    "## Quick Demo of Text2SQL Using Llama 3.3\n",
			
 
				+    "\n",
			
 
				+    "This demo shows how to use Llama 3.3 to answer questions about a SQLite DB. \n",
			
 
				+    "\n",
			
 
				+    "We'll use LangChain and the Llama cloud provider [Together.ai](https://api.together.ai/), where you can easily get a free API key (or you can use any other Llama cloud provider or even Ollama running Llama locally - see [here](https://github.com/meta-llama/llama-recipes/tree/main/recipes/quickstart) for examples)."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "33fb3190-59fb-4edd-82dd-f20f6eab3e47",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "!pip install -U langchain langchain-community langchain-together"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "id": "fa4562d3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import os\n",
			
 
				+    "from langchain_together import ChatTogether\n",
			
 
				+    "\n",
			
 
				+    "os.environ['TOGETHER_API_KEY'] = 'your_api_key'\n",
			
 
				+    "\n",
			
 
				+    "llm = ChatTogether(\n",
			
 
				+    "    model=\"meta-llama/Llama-3.3-70B-Instruct-Turbo\",\n",
			
 
				+    "    temperature=0,\n",
			
 
				+    ")"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "6d421ae7",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "To recreate the `nba_roster.db` file, run the two commands below:\n",
			
 
				+    "- `python txt2csv.py` to convert the `nba.txt` file to `nba_roster.csv`. The `nba.txt` file was created by scraping the NBA roster info from the web.\n",
			
 
				+    "- `python csv2db.py` to convert `nba_roster.csv` to `nba_roster.db`."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "56f0360e-fca3-49a8-9a70-0416f84e15fc",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# uncomment if you don't want to create the db yourself\n",
			
 
				+    "#! wget https://github.com/meta-llama/llama-recipes/raw/3649841b426999fdc61c30a9fc8721106bec769b/recipes/use_cases/coding/text2sql/nba_roster.db"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 4,
			
 
				+   "id": "3bb99f39-cd7a-4db6-91dd-02f3bf80347c",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "from langchain_community.utilities import SQLDatabase\n",
			
 
				+    "\n",
			
 
				+    "# Note: to run in Colab, you need to upload the nba_roster.db file in the repo to the Colab folder first.\n",
			
 
				+    "db = SQLDatabase.from_uri(\"sqlite:///nba_roster.db\", sample_rows_in_table_info=0)\n",
			
 
				+    "\n",
			
 
				+    "def get_schema():\n",
			
 
				+    "    return db.get_table_info()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "8d793ce7-324b-4861-926c-54973d7c9b43",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Based on the table schema below, write a SQL query that would answer the user's question; just return the SQL query and nothing else.\n",
			
 
				+      "\n",
			
 
				+      "Scheme:\n",
			
 
				+      "\n",
			
 
				+      "CREATE TABLE nba_roster (\n",
			
 
				+      "\t\"Team\" TEXT, \n",
			
 
				+      "\t\"NAME\" TEXT, \n",
			
 
				+      "\t\"Jersey\" TEXT, \n",
			
 
				+      "\t\"POS\" TEXT, \n",
			
 
				+      "\t\"AGE\" INTEGER, \n",
			
 
				+      "\t\"HT\" TEXT, \n",
			
 
				+      "\t\"WT\" TEXT, \n",
			
 
				+      "\t\"COLLEGE\" TEXT, \n",
			
 
				+      "\t\"SALARY\" TEXT\n",
			
 
				+      ")\n",
			
 
				+      "\n",
			
 
				+      "Question: What team is Stephen Curry on?\n",
			
 
				+      "\n",
			
 
				+      "SQL Query:\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "question = \"What team is Stephen Curry on?\"\n",
			
 
				+    "prompt = f\"\"\"Based on the table schema below, write a SQL query that would answer the user's question; just return the SQL query and nothing else.\n",
			
 
				+    "\n",
			
 
				+    "Scheme:\n",
			
 
				+    "{get_schema()}\n",
			
 
				+    "\n",
			
 
				+    "Question: {question}\n",
			
 
				+    "\n",
			
 
				+    "SQL Query:\"\"\"\n",
			
 
				+    "\n",
			
 
				+    "print(prompt)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 6,
			
 
				+   "id": "70776558",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "SELECT Team FROM nba_roster WHERE NAME = 'Stephen Curry'\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "answer = llm.invoke(prompt).content\n",
			
 
				+    "print(answer)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "afcf423a",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "***Note:*** If you don't have the \"just return the SQL query and nothing else\" in the prompt above, you'll likely get more text other than the SQL query back in the answer, making some extra post-processing necessary before running the db query below."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 7,
			
 
				+   "id": "62472ce6-794b-4a61-b88c-a1e031e28e4e",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "\"[('Golden State Warriors',)]\""
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 7,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# note this is a dangerous operation and for demo purpose only; in production app you'll need to safe-guard any DB operation\n",
			
 
				+    "result = db.run(answer)\n",
			
 
				+    "result"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 8,
			
 
				+   "id": "39ed4bc3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "I don't have enough information to determine whose salary you are referring to. Could you please provide more context or specify the person you are asking about?\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# how about a follow up question\n",
			
 
				+    "follow_up = \"What's his salary?\"\n",
			
 
				+    "print(llm.invoke(follow_up).content)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "98b2c523",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Since we did not pass any context along with the follow-up to Llama, it doesn't know the answer. Let's try to fix it by adding context to the follow-up prompt."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "id": "0c305278-29d2-4e88-9b3d-ad67c94ce0f2",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "Based on the table schema, question, SQL query, and SQL response below, write a new SQL response; be concise, just output the SQL response.\n",
			
 
				+      "\n",
			
 
				+      "Scheme:\n",
			
 
				+      "\n",
			
 
				+      "CREATE TABLE nba_roster (\n",
			
 
				+      "\t\"Team\" TEXT, \n",
			
 
				+      "\t\"NAME\" TEXT, \n",
			
 
				+      "\t\"Jersey\" TEXT, \n",
			
 
				+      "\t\"POS\" TEXT, \n",
			
 
				+      "\t\"AGE\" INTEGER, \n",
			
 
				+      "\t\"HT\" TEXT, \n",
			
 
				+      "\t\"WT\" TEXT, \n",
			
 
				+      "\t\"COLLEGE\" TEXT, \n",
			
 
				+      "\t\"SALARY\" TEXT\n",
			
 
				+      ")\n",
			
 
				+      "\n",
			
 
				+      "Question: What's his salary?\n",
			
 
				+      "SQL Query: What team is Stephen Curry on?\n",
			
 
				+      "SQL Result: [('Golden State Warriors',)]\n",
			
 
				+      "\n",
			
 
				+      "New SQL Response:\n",
			
 
				+      "\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "prompt = f\"\"\"Based on the table schema, question, SQL query, and SQL response below, write a new SQL response; be concise, just output the SQL response.\n",
			
 
				+    "\n",
			
 
				+    "Scheme:\n",
			
 
				+    "{get_schema()}\n",
			
 
				+    "\n",
			
 
				+    "Question: {follow_up}\n",
			
 
				+    "SQL Query: {question}\n",
			
 
				+    "SQL Result: {result}\n",
			
 
				+    "\n",
			
 
				+    "New SQL Response:\n",
			
 
				+    "\"\"\"\n",
			
 
				+    "print(prompt)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 10,
			
 
				+   "id": "03739b96-e607-4fa9-bc5c-df118198dc7f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "SELECT SALARY FROM nba_roster WHERE NAME = \"Stephen Curry\"\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "new_answer = llm.invoke(prompt).content\n",
			
 
				+    "print(new_answer)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "c782abb6-3b44-45be-8694-70fc29b82523",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "Because we have \"be concise, just output the SQL response\", Llama 3 is able to just generate the SQL statement; otherwise output parsing will be needed."
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 11,
			
 
				+   "id": "6ecfca53-be7e-4668-bad1-5ca7571817d7",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "\"[('$51,915,615',)]\""
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 11,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "db.run(new_answer)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "9d79bbb1-e91d-4b56-b6ef-98c94ff414d0",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.10.14"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 5
			
 
				+}
			
--- a/recipes/use_cases/coding/text2sql/structured_llama.ipynb
+++ b/recipes/use_cases/coding/text2sql/structured_llama.ipynb
--- a/recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md
+++ b/recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md
--- a/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md
+++ b/recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md
@@ -6,7 +6,7 @@ If you're interested in a Llama 3 enabled Messenger chatbot, see [here](../messe
 
				 
			
 
				 ## Overview
			
 
				 
			
 
				-Businesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.
			
 
				+Businesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgeable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.
			
 
				 
			
 
				 The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server.
			
 
				 
			
--- a/recipes/use_cases/email_agent/1.png
+++ b/recipes/use_cases/email_agent/1.png
--- a/recipes/use_cases/email_agent/2.png
+++ b/recipes/use_cases/email_agent/2.png
--- a/recipes/use_cases/email_agent/3.png
+++ b/recipes/use_cases/email_agent/3.png
--- a/recipes/use_cases/email_agent/README.md
+++ b/recipes/use_cases/email_agent/README.md
--- a/recipes/use_cases/email_agent/email_agent.png
+++ b/recipes/use_cases/email_agent/email_agent.png
--- a/recipes/use_cases/email_agent/email_agent.py
+++ b/recipes/use_cases/email_agent/email_agent.py
@@ -0,0 +1,624 @@
 
				+from google.auth.transport.requests import Request
			
 
				+from google_auth_oauthlib.flow import InstalledAppFlow
			
 
				+from googleapiclient.discovery import build
			
 
				+from email.mime.text import MIMEText
			
 
				+from email.mime.multipart import MIMEMultipart
			
 
				+from email.mime.base import MIMEBase
			
 
				+from email import encoders
			
 
				+
			
 
				+from bs4 import BeautifulSoup
			
 
				+import os
			
 
				+import pytz
			
 
				+import base64
			
 
				+import pickle
			
 
				+from datetime import datetime, timezone
			
 
				+import json
			
 
				+import ollama
			
 
				+from pypdf import PdfReader
			
 
				+from pathlib import Path
			
 
				+
			
 
				+SCOPES = ['https://www.googleapis.com/auth/gmail.readonly', 'https://www.googleapis.com/auth/gmail.compose']
			
 
				+
			
 
				+def authenticate_gmail(user_email):
			
 
				+    creds = None
			
 
				+    token_file = f'token_{user_email}.pickle'  # Unique token file for each user
			
 
				+    
			
 
				+    # Load the user's token if it exists
			
 
				+    if os.path.exists(token_file):
			
 
				+        with open(token_file, 'rb') as token:
			
 
				+            creds = pickle.load(token)
			
 
				+    
			
 
				+    # If no valid credentials, prompt the user to log in
			
 
				+    if not creds or not creds.valid:
			
 
				+        if creds and creds.expired and creds.refresh_token:
			
 
				+            creds.refresh(Request())
			
 
				+        else:
			
 
				+            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)
			
 
				+            creds = flow.run_console()
			
 
				+        
			
 
				+        # Save the new credentials to a user-specific token file
			
 
				+        with open(token_file, 'wb') as token:
			
 
				+            pickle.dump(creds, token)
			
 
				+    
			
 
				+    # Build the Gmail API service
			
 
				+    service = build('gmail', 'v1', credentials=creds)
			
 
				+    return service
			
 
				+
			
 
				+
			
 
				+def num_of_emails(query=''):
			
 
				+    response = service.users().messages().list(
			
 
				+        userId='me', 
			
 
				+        q=query, 
			
 
				+        maxResults=1).execute()
			
 
				+    return response.get('resultSizeEstimate', 0)
			
 
				+
			
 
				+
			
 
				+def list_emails(query='', max_results=100):
			
 
				+    emails = []
			
 
				+    next_page_token = None
			
 
				+
			
 
				+    while True:
			
 
				+        response = service.users().messages().list(
			
 
				+            userId=user_id,
			
 
				+            maxResults=max_results,
			
 
				+            pageToken=next_page_token,
			
 
				+            q=query
			
 
				+        ).execute()
			
 
				+        
			
 
				+        if 'messages' in response:
			
 
				+            for msg in response['messages']:
			
 
				+                sender, subject, received_time = get_email_info(msg['id'])
			
 
				+                emails.append(
			
 
				+                    {
			
 
				+                        "message_id": msg['id'],
			
 
				+                        "sender": sender,
			
 
				+                        "subject": subject,
			
 
				+                        "received_time": received_time
			
 
				+                    }
			
 
				+                )
			
 
				+        
			
 
				+        next_page_token = response.get('nextPageToken')
			
 
				+
			
 
				+        if not next_page_token:
			
 
				+            break
			
 
				+    
			
 
				+    return emails
			
 
				+
			
 
				+def get_email_detail(detail, which=''):
			
 
				+    if detail == 'body':
			
 
				+        return get_email_body(which)
			
 
				+    elif detail == 'attachment':
			
 
				+        return get_email_attachments(which)
			
 
				+
			
 
				+
			
 
				+def get_email_body(message_id):
			
 
				+    try:
			
 
				+        message = service.users().messages().get(
			
 
				+            userId=user_id, 
			
 
				+            id=message_id, 
			
 
				+            format='full').execute()
			
 
				+
			
 
				+        # Recursive function to extract the parts
			
 
				+        def extract_parts(payload):
			
 
				+            text_body = ""
			
 
				+            if 'parts' in payload:
			
 
				+                for part in payload['parts']:
			
 
				+                    return extract_parts(part)
			
 
				+            else:
			
 
				+                mime_type = payload.get('mimeType')
			
 
				+                body = payload.get('body', {}).get('data')
			
 
				+                if mime_type == 'text/html':
			
 
				+                    decoded_body = base64.urlsafe_b64decode(body).decode('utf-8')
			
 
				+                    soup = BeautifulSoup(decoded_body, 'html.parser')
			
 
				+                    text_body = soup.get_text().strip()
			
 
				+                elif mime_type == 'text/plain':
			
 
				+                    decoded_body = base64.urlsafe_b64decode(body).decode('utf-8')
			
 
				+                    text_body = decoded_body
			
 
				+
			
 
				+                return text_body
			
 
				+
			
 
				+        return extract_parts(message['payload'])
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"An error occurred: {e}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def parse_message(message):
			
 
				+    payload = message['payload']
			
 
				+    headers = payload.get("headers")
			
 
				+
			
 
				+    subject = None
			
 
				+    sender = None
			
 
				+    for header in headers:
			
 
				+        if header['name'] == 'Subject':
			
 
				+            subject = header['value']
			
 
				+        elif header['name'] == 'From':
			
 
				+            sender = header['value']    
			
 
				+
			
 
				+    internal_date = message.get('internalDate')  
			
 
				+    utc_time = datetime.fromtimestamp(int(internal_date) / 1000, tz=timezone.utc)
			
 
				+    
			
 
				+    # Convert UTC to the specified timezone
			
 
				+    local_timezone = pytz.timezone("America/Los_Angeles")
			
 
				+    local_time = utc_time.astimezone(local_timezone)
			
 
				+    
			
 
				+    # Format the local time as a string
			
 
				+    received_time = local_time.strftime('%Y-%m-%d %H:%M:%S %Z')
			
 
				+
			
 
				+    # Check if the email is plain text or multipart
			
 
				+    if 'parts' in payload:
			
 
				+        # Multipart message - find the text/plain or text/html part
			
 
				+        for part in payload['parts']:
			
 
				+            if part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html':  # You can also look for 'text/html'
			
 
				+                data = part['body']['data']
			
 
				+                body = base64.urlsafe_b64decode(data).decode('utf-8')
			
 
				+                return sender, subject, received_time, body
			
 
				+            elif part['mimeType'] in ['multipart/related', 'multipart/mixed', 'multipart/alternative']:
			
 
				+                return sender, subject, received_time, get_email_body(message.get('id'))
			
 
				+    else:
			
 
				+        # Single part message
			
 
				+        data = payload['body']['data']
			
 
				+        body = base64.urlsafe_b64decode(data).decode('utf-8')
			
 
				+        return sender, subject, received_time, body    
			
 
				+
			
 
				+
			
 
				+def get_email_info(msg_id):
			
 
				+    message = service.users().messages().get(
			
 
				+        userId=user_id, 
			
 
				+        id=msg_id, 
			
 
				+        format='full').execute()
			
 
				+
			
 
				+    sender, subject, received_time, body = parse_message(message)
			
 
				+    
			
 
				+    return sender, subject, received_time
			
 
				+
			
 
				+
			
 
				+def reply_email(message_id, reply_text):
			
 
				+    # Fetch the original message
			
 
				+    original_message = service.users().messages().get(
			
 
				+        userId=user_id, 
			
 
				+        id=message_id, 
			
 
				+        format='full').execute()
			
 
				+    
			
 
				+    # Get headers
			
 
				+    headers = original_message['payload']['headers']
			
 
				+    subject = None
			
 
				+    to = None
			
 
				+    for header in headers:
			
 
				+        if header['name'] == 'Subject':
			
 
				+            subject = header['value']
			
 
				+        if header['name'] == 'From':
			
 
				+            to = header['value']
			
 
				+    
			
 
				+    # Create the reply subject
			
 
				+    if not subject.startswith("Re: "):
			
 
				+        subject = "Re: " + subject
			
 
				+
			
 
				+    # Compose the reply message
			
 
				+    reply_message = MIMEText(reply_text)
			
 
				+    reply_message['to'] = to
			
 
				+    reply_message['from'] = user_id
			
 
				+    reply_message['subject'] = subject
			
 
				+    reply_message['In-Reply-To'] = message_id
			
 
				+    
			
 
				+    # Encode and send the message
			
 
				+    raw_message = base64.urlsafe_b64encode(reply_message.as_bytes()).decode("utf-8")
			
 
				+    body = {'raw': raw_message, 
			
 
				+            'threadId': original_message['threadId']}
			
 
				+    sent_message = service.users().messages().send(
			
 
				+        userId=user_id, 
			
 
				+        body=body).execute()
			
 
				+    print("Reply sent. Message ID:", sent_message['id'])
			
 
				+
			
 
				+
			
 
				+def forward_email(message_id, forward_to, email_body=None):
			
 
				+    """
			
 
				+    Forwards an email, preserving the original MIME type, including multipart/related.
			
 
				+    """
			
 
				+    # Get the original message in 'full' format
			
 
				+    original_message = service.users().messages().get(
			
 
				+        userId=user_id,
			
 
				+        id=message_id,
			
 
				+        format='full').execute()
			
 
				+
			
 
				+    # Extract the payload and headers
			
 
				+    payload = original_message.get('payload', {})
			
 
				+    headers = payload.get('headers', [])
			
 
				+    parts = payload.get('parts', [])
			
 
				+    # Get the Subject
			
 
				+    subject = next((header['value'] for header in headers if header['name'].lower() == 'subject'), 'No Subject')
			
 
				+
			
 
				+    # Create a new MIME message for forwarding
			
 
				+    mime_message = MIMEMultipart(payload.get('mimeType', 'mixed').split('/')[-1])
			
 
				+    mime_message['To'] = forward_to
			
 
				+    mime_message['Subject'] = f"Fwd: {subject}"
			
 
				+
			
 
				+    # Add the optional custom email body
			
 
				+    if email_body:
			
 
				+        mime_message.attach(MIMEText(email_body, 'plain'))
			
 
				+
			
 
				+    # Function to fetch attachment data by attachmentId
			
 
				+    def fetch_attachment_data(attachment_id, message_id):
			
 
				+        attachment = service.users().messages().attachments().get(
			
 
				+            userId=user_id, messageId=message_id, id=attachment_id
			
 
				+        ).execute()
			
 
				+        return base64.urlsafe_b64decode(attachment['data'])
			
 
				+
			
 
				+    # Rebuild MIME structure
			
 
				+    def rebuild_parts(parts):
			
 
				+        """
			
 
				+        Recursively rebuild MIME parts.
			
 
				+        """
			
 
				+        if not parts:
			
 
				+            return None
			
 
				+
			
 
				+        for part in parts:
			
 
				+            part_mime_type = part.get('mimeType', 'text/plain')
			
 
				+            part_body = part.get('body', {})
			
 
				+            part_data = part_body.get('data', '')
			
 
				+            part_parts = part.get('parts', [])  # Sub-parts for multipart types
			
 
				+            filename = part.get('filename')
			
 
				+            attachment_id = part_body.get('attachmentId')
			
 
				+
			
 
				+            if part_mime_type.startswith('multipart/'):
			
 
				+                # Rebuild nested multipart
			
 
				+                sub_multipart = MIMEMultipart(part_mime_type.split('/')[-1])
			
 
				+                sub_parts = rebuild_parts(part_parts)
			
 
				+                if sub_parts:
			
 
				+                    for sub_part in sub_parts:
			
 
				+                        sub_multipart.attach(sub_part)
			
 
				+                yield sub_multipart
			
 
				+            elif filename and attachment_id:
			
 
				+                # Handle attachments
			
 
				+                decoded_data = fetch_attachment_data(attachment_id, message_id)
			
 
				+                attachment = MIMEBase(*part_mime_type.split('/'))
			
 
				+                attachment.set_payload(decoded_data)
			
 
				+                encoders.encode_base64(attachment)
			
 
				+                attachment.add_header('Content-Disposition', f'attachment; filename="{filename}"')
			
 
				+                yield attachment
			
 
				+            else:
			
 
				+                if part_data:
			
 
				+                    # Decode and attach non-multipart parts
			
 
				+                    decoded_data = base64.urlsafe_b64decode(part_data)
			
 
				+
			
 
				+                    if part_mime_type == 'text/plain':
			
 
				+                        yield MIMEText(decoded_data.decode('utf-8'), 'plain')
			
 
				+                    elif part_mime_type == 'text/html':
			
 
				+                        yield MIMEText(decoded_data.decode('utf-8'), 'html')
			
 
				+
			
 
				+    # Rebuild the main MIME structure
			
 
				+    rebuilt_parts = rebuild_parts(parts)
			
 
				+    if rebuilt_parts:
			
 
				+        for rebuilt_part in rebuilt_parts:
			
 
				+            mime_message.attach(rebuilt_part)
			
 
				+
			
 
				+    # Encode the MIME message to base64
			
 
				+    raw = base64.urlsafe_b64encode(mime_message.as_bytes()).decode('utf-8')
			
 
				+
			
 
				+    # Send the email
			
 
				+    forward_body = {'raw': raw}
			
 
				+    sent_message = service.users().messages().send(userId=user_id, body=forward_body).execute()
			
 
				+
			
 
				+    print(f"Message forwarded successfully! Message ID: {sent_message['id']}")
			
 
				+
			
 
				+
			
 
				+def send_email(action, to, subject, body="", email_id=""):
			
 
				+    if action == "compose":
			
 
				+        message = MIMEText(body)
			
 
				+        message['to'] = to
			
 
				+        message['from'] = user_id
			
 
				+        message['subject'] = subject
			
 
				+        
			
 
				+        # Encode and send the message
			
 
				+        raw_message = base64.urlsafe_b64encode(message.as_bytes()).decode("utf-8")
			
 
				+        body = {'raw': raw_message}
			
 
				+        sent_message = service.users().messages().send(
			
 
				+            userId=user_id, 
			
 
				+            body=body).execute()
			
 
				+        return sent_message['id']
			
 
				+    elif action == "reply": # reply or forward; a message id is needed
			
 
				+        reply_email(email_id, body)
			
 
				+    elif action == "forward":
			
 
				+        forward_email(email_id, to, body)
			
 
				+
			
 
				+
			
 
				+def create_draft(action, to, subject, body="", email_id=""):
			
 
				+    if action == "new":
			
 
				+        message = MIMEText(body)
			
 
				+        message['to'] = to
			
 
				+        message['from'] = user_id
			
 
				+        message['subject'] = subject
			
 
				+        
			
 
				+        encoded_message = base64.urlsafe_b64encode(message.as_bytes()).decode()
			
 
				+        draft_body = {'message': {'raw': encoded_message}}
			
 
				+        draft = service.users().drafts().create(
			
 
				+            userId=user_id, 
			
 
				+            body=draft_body).execute()
			
 
				+        print(f"Draft created with ID: {draft['id']}")
			
 
				+        return draft['id']
			
 
				+    elif action == "reply":
			
 
				+        return create_reply_draft(email_id, body)
			
 
				+    elif action == "forward":
			
 
				+        return create_forward_draft(email_id, to, body)
			
 
				+    else:
			
 
				+        return
			
 
				+
			
 
				+
			
 
				+
			
 
				+def create_reply_draft(message_id, reply_text):
			
 
				+    # Fetch the original message
			
 
				+    original_message = service.users().messages().get(
			
 
				+        userId=user_id,
			
 
				+        id=message_id,
			
 
				+        format='full').execute()
			
 
				+
			
 
				+    # Get headers
			
 
				+    headers = original_message['payload']['headers']
			
 
				+    subject = None
			
 
				+    to = None
			
 
				+    for header in headers:
			
 
				+        if header['name'] == 'Subject':
			
 
				+            subject = header['value']
			
 
				+        if header['name'] == 'From':
			
 
				+            to = header['value']
			
 
				+
			
 
				+    # Create the reply subject
			
 
				+    if not subject.startswith("Re: "):
			
 
				+        subject = "Re: " + subject
			
 
				+
			
 
				+    # Compose the reply message
			
 
				+    reply_message = MIMEText(reply_text)
			
 
				+    reply_message['to'] = to
			
 
				+    reply_message['from'] = user_id
			
 
				+    reply_message['subject'] = subject
			
 
				+    reply_message['In-Reply-To'] = message_id
			
 
				+
			
 
				+    encoded_message = base64.urlsafe_b64encode(reply_message.as_bytes()).decode()
			
 
				+    draft_body = {'message': {'raw': encoded_message, 'threadId': original_message['threadId']}}
			
 
				+    draft = service.users().drafts().create(userId=user_id, body=draft_body).execute()
			
 
				+    return draft['id']
			
 
				+
			
 
				+
			
 
				+def create_forward_draft(message_id, recipient_email, custom_message=None):
			
 
				+    # Get the original message
			
 
				+    original_message = service.users().messages().get(
			
 
				+        userId=user_id,
			
 
				+        id=message_id,
			
 
				+        format='raw').execute()
			
 
				+
			
 
				+    # Decode the raw message
			
 
				+    raw_message = base64.urlsafe_b64decode(original_message['raw'].encode('utf-8'))
			
 
				+
			
 
				+    # Prepare the forward header and optional custom message
			
 
				+    forward_header = f"----- Forwarded message -----\nFrom: {recipient_email}\n\n"
			
 
				+    if custom_message:
			
 
				+        forward_header += f"{custom_message}\n\n"
			
 
				+
			
 
				+    # Combine the forward header with the original message
			
 
				+    new_message = forward_header + raw_message.decode('utf-8')
			
 
				+
			
 
				+    # Encode the combined message into base64 format
			
 
				+    encoded_message = base64.urlsafe_b64encode(new_message.encode('utf-8')).decode('utf-8')
			
 
				+
			
 
				+    draft_body = {'message': {'raw': encoded_message, 'threadId': original_message['threadId']}}
			
 
				+    draft = service.users().drafts().create(userId=user_id, body=draft_body).execute()
			
 
				+    print(f"Forward draft created with ID: {draft['id']}")
			
 
				+    return draft['id']
			
 
				+
			
 
				+
			
 
				+def send_draft(id):
			
 
				+    sent_message = service.users().drafts().send(
			
 
				+        userId=user_id, 
			
 
				+        body={'id': id}
			
 
				+        ).execute()
			
 
				+    return f"Draft sent with email ID: {sent_message['id']}"
			
 
				+    
			
 
				+
			
 
				+def get_pdf_summary(file_name):
			
 
				+    text = pdf2text(file_name)
			
 
				+    print("Calling Llama to generate a summary...")
			
 
				+    response = llama31(text, "Generate a summary of the input text in 5 sentences.")
			
 
				+    return response
			
 
				+
			
 
				+    
			
 
				+def get_email_attachments(message_id, mime_type='application/pdf'):
			
 
				+    attachments = []
			
 
				+
			
 
				+    # Helper function to process email parts
			
 
				+    def process_parts(parts):
			
 
				+        for part in parts:
			
 
				+            if part['mimeType'] in ['multipart/related', 'multipart/mixed', 'multipart/alternative']:
			
 
				+                # Recursively process nested parts
			
 
				+                if 'parts' in part:
			
 
				+                    process_parts(part['parts'])
			
 
				+            elif 'filename' in part and part['filename']:
			
 
				+                if part['mimeType'] == mime_type:  # Check for the desired MIME type
			
 
				+                    attachment_id = part['body'].get('attachmentId')
			
 
				+                    if attachment_id:
			
 
				+                        # Get the attachment data
			
 
				+                        attachment = service.users().messages().attachments().get(
			
 
				+                            userId=user_id, 
			
 
				+                            messageId=message_id, 
			
 
				+                            id=attachment_id
			
 
				+                        ).execute()
			
 
				+                        
			
 
				+                        # Decode the attachment content
			
 
				+                        file_data = base64.urlsafe_b64decode(attachment['data'].encode('UTF-8'))
			
 
				+
			
 
				+                        with open(part['filename'], "wb") as f:
			
 
				+                            f.write(file_data)
			
 
				+                        
			
 
				+                        # Save the attachment information
			
 
				+                        attachments.append(
			
 
				+                            {'filename': part['filename'], 
			
 
				+                            'data': file_data,
			
 
				+                            'size': attachment.get('size', 0)
			
 
				+                            })
			
 
				+
			
 
				+    # Retrieve the email message
			
 
				+    message = service.users().messages().get(
			
 
				+        userId=user_id,
			
 
				+        id=message_id,
			
 
				+        format='full').execute()
			
 
				+    payload = message['payload']
			
 
				+
			
 
				+    # Start processing the parts
			
 
				+    if 'parts' in payload:
			
 
				+        process_parts(payload['parts'])
			
 
				+    
			
 
				+    rslt = ""
			
 
				+    for a in attachments:        
			
 
				+        rslt += f"{a['filename']} - {a['size']} bytes\n"
			
 
				+    return rslt #attachments
			
 
				+
			
 
				+
			
 
				+def pdf2text(file):
			
 
				+    text = ''
			
 
				+    try:
			
 
				+        with Path(file).open("rb") as f:
			
 
				+            reader = PdfReader(f)
			
 
				+            text = "\n\n".join([page.extract_text() for page in reader.pages])
			
 
				+    except Exception as e:
			
 
				+        raise f"Error reading the PDF file: {str(e)}"
			
 
				+
			
 
				+    print(f"\nPDF text length: {len(text)}\n")
			
 
				+
			
 
				+    return text
			
 
				+
			
 
				+
			
 
				+user_email = None
			
 
				+service = None
			
 
				+user_id = 'me'
			
 
				+
			
 
				+def set_email_service(gmail):
			
 
				+    global user_email
			
 
				+    global service
			
 
				+
			
 
				+    user_email = gmail
			
 
				+    service = authenticate_gmail(user_email)
			
 
				+
			
 
				+class Agent:
			
 
				+    def __init__(self, system_prompt=""):
			
 
				+        self.system_prompt = system_prompt
			
 
				+        self.messages = []
			
 
				+
			
 
				+        # agent-specific short term memory, used to answer follow up questions AFTER a list of emails is found matching user's query
			
 
				+        self.emails = []
			
 
				+        self.draft_id = None
			
 
				+
			
 
				+        if self.system_prompt:
			
 
				+            self.messages.append({"role": "system", "content": system_prompt})
			
 
				+
			
 
				+    def __call__(self, user_prompt_or_tool_result, is_tool_call=False):
			
 
				+        # if it's tool call result, use "ipython" instead of "user" for the role
			
 
				+        self.messages.append({"role": ("ipython" if is_tool_call else "user"), "content": user_prompt_or_tool_result})
			
 
				+        result = self.llama()
			
 
				+        print(f"\nLlama returned: {result}.")
			
 
				+        if type(result) == dict: # result is a dict only if it's a tool call spec
			
 
				+            function_name = result["function_name"]
			
 
				+            func = globals()[function_name]
			
 
				+            parameters = result["parameters"]
			
 
				+            if function_name == "get_email_detail":
			
 
				+                # TODO: parse which - valid values are first, second,
			
 
				+                # third, fourth, last, from xxx
			
 
				+                if 'id' in parameters.keys():
			
 
				+                    parameters['which'] = parameters['id']
			
 
				+                    del parameters['id'] # per the function spec
			
 
				+                elif 'which' in parameters.keys():
			
 
				+                    if 'from ' in parameters['which']:
			
 
				+                        sender = parameters['which'].split('from ')[-1]
			
 
				+                        for email in self.emails:
			
 
				+                            if email['sender'].find(sender) != -1:
			
 
				+                                parameters['which'] = email['message_id']
			
 
				+                                break
			
 
				+                    if 'subject ' in parameters['which']:
			
 
				+                        subject = parameters['which'].split('subject ')[-1]
			
 
				+                        # exact match beats substring
			
 
				+                        for email in self.emails:
			
 
				+                            if email['subject'].upper() == subject.upper():
			
 
				+                                parameters['which'] = email['message_id']
			
 
				+                                break
			
 
				+                            elif email['subject'].upper().find(subject.upper()) != -1:
			
 
				+                                parameters['which'] = email['message_id']
			
 
				+
			
 
				+                    elif 'id_' in parameters['which']:
			
 
				+                        parameters['which'] = parameters['which'].split('id_')[-1]
			
 
				+                    else:
			
 
				+                        parameters['which'] = self.emails[-1]['message_id']
			
 
				+            elif function_name == "send_draft":
			
 
				+                parameters['id'] = self.draft_id
			
 
				+
			
 
				+            print(f"\nCalling tool to access Gmail API: {function_name}, {parameters}...")
			
 
				+            result = func(**parameters)
			
 
				+            print(f"\nTool calling returned: {result}")
			
 
				+
			
 
				+            # convert function calling result to concise summary, offering interactive follow ups,
			
 
				+            # for smooth and user friendly experience
			
 
				+            if function_name == 'list_emails':
			
 
				+                self.emails = result
			
 
				+                num = len(result)
			
 
				+                if num == 0:
			
 
				+                    output = "I couldn't find any such emails. What else would you like to do?"
			
 
				+                elif num <= 5:
			
 
				+                    output = f"I found {num} email{'s' if num > 1 else ''} matching your query:\n"
			
 
				+                    for i, email in enumerate(result, start=1):
			
 
				+                        output += f"{i}. From: {email['sender']}, Subject: {email['subject']}, Received on: {email['received_time']}\n"
			
 
				+                else:
			
 
				+                    output = f"I found {num} emails matching your query. Here are the first 5 emails:\n"
			
 
				+                    for i in range(1, 6):
			
 
				+                        output += f"{i}. From: {result[i-1]['sender']}, Subject: {result[i-1]['subject']}, Received on: {result[i-1]['received_time']}\n"
			
 
				+            elif function_name == "get_email_detail":
			
 
				+                output = result
			
 
				+            elif function_name == "get_pdf_summary":
			
 
				+                output = result
			
 
				+            elif function_name == "send_email":
			
 
				+                output = "Email sent."
			
 
				+            elif function_name == "create_draft":
			
 
				+                output = "Draft created."
			
 
				+                self.draft_id = result
			
 
				+            elif function_name == "send_draft":
			
 
				+                output = result
			
 
				+
			
 
				+            print(f"\n-------------------------\n\nAgent: {output}\n")
			
 
				+        else:
			
 
				+            output = result # direct text, not JSON, response by Llama
			
 
				+
			
 
				+        # adding this may cause Llama to hallucinate when answering
			
 
				+        # follow up questions. e.g. "do i have emails with attachments
			
 
				+        # larger than 20mb" got right tool calling response, then
			
 
				+        # follow up "larger than 10mb" got hallucinated response.
			
 
				+        # self.messages.append({"role": "assistant", "content": output})
			
 
				+
			
 
				+        # this mitigates the hallucination
			
 
				+        self.messages.append({"role": "assistant", "content": str(result)})
			
 
				+
			
 
				+        return output
			
 
				+
			
 
				+    def llama(self):
			
 
				+        response = ollama.chat(model='llama3.1',
			
 
				+            messages = self.messages,
			
 
				+            options = {
			
 
				+                "temperature": 0.0
			
 
				+            }
			
 
				+        )
			
 
				+        result = response['message']['content']
			
 
				+
			
 
				+        try:
			
 
				+          res = json.loads(result.split("<|python_tag|>")[-1])
			
 
				+          function_name = res['name']
			
 
				+          parameters = res['parameters']
			
 
				+          return {"function_name": function_name,
			
 
				+                  "parameters": parameters}
			
 
				+        except:
			
 
				+          return result
			
 
				+
			
 
				+
			
 
				+def llama31(user_prompt: str, system_prompt = ""):
			
 
				+    response = ollama.chat(model='llama3.1',
			
 
				+        messages=[
			
 
				+            {"role": "system", "content": system_prompt},
			
 
				+            {"role": "user", "content": user_prompt},
			
 
				+        ],
			
 
				+    )
			
 
				+    return response['message']['content']
			
--- a/recipes/use_cases/email_agent/functions_prompt.py
+++ b/recipes/use_cases/email_agent/functions_prompt.py
@@ -0,0 +1,241 @@
 
				+list_emails_function = """
			
 
				+{
			
 
				+    "type": "function",
			
 
				+    "function": {
			
 
				+        "name": "list_emails",
			
 
				+        "description": "Return a list of emails matching an optionally specified query.",
			
 
				+        "parameters": {
			
 
				+            "type": "dic",
			
 
				+            "properties": [
			
 
				+                {
			
 
				+                    "maxResults": {
			
 
				+                        "type": "integer",
			
 
				+                        "description": "The default maximum number of emails to return is 100; the maximum allowed value for this field is 500."
			
 
				+                    }
			
 
				+                },              
			
 
				+                {
			
 
				+                    "query": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "One or more keywords in the email subject and body, or one or more filters. There can be 6 types of filters: 1) Field-specific Filters: from, to, cc, bcc, subject; 2) Date Filters: before, after, older than, newer than); 3) Status Filters: read, unread, starred, importatant; 4) Attachment Filters: has, filename or type; 5) Size Filters: larger, smaller; 6) logical operators (or, and, not)."
			
 
				+                    }
			
 
				+                }
			
 
				+            ],
			
 
				+            "required": []
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+"""
			
 
				+
			
 
				+get_email_function = """
			
 
				+{
			
 
				+    "type": "function",
			
 
				+    "function": {
			
 
				+        "name": "get_email_detail",
			
 
				+        "description": "Get detailed info about a specific email",
			
 
				+        "parameters": {
			
 
				+            "type": "dict",
			
 
				+            "properties": [
			
 
				+                {
			
 
				+                    "detail": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "what detail the user wants to know about - two possible values: body or attachment"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "which": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "which email to get detail about - possible values include: 'first', 'second', ..., 'last', 'from ...', and 'subject ...'"
			
 
				+                    }
			
 
				+                },
			
 
				+            ],
			
 
				+            "required": ["detail", "which"]
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+"""
			
 
				+
			
 
				+send_email_function = """
			
 
				+{
			
 
				+    "type": "function",
			
 
				+    "function": {
			
 
				+        "name": "send_email",
			
 
				+        "description": "Compose, reply, or forward email",
			
 
				+        "parameters": {
			
 
				+            "type": "dict",
			
 
				+            "properties": [
			
 
				+                {
			
 
				+                    "action": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "Whether to compose, reply, or forward an email"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "to": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The recipient of the email"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "subject": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The email subject"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "body": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The email content"
			
 
				+                    }
			
 
				+                },                                
			
 
				+                {
			
 
				+                    "email_id": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "the email id to reply or forward to"
			
 
				+                    }
			
 
				+                }
			
 
				+            ],
			
 
				+            "required": ["action", "to", "subject", "body"]
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+"""
			
 
				+
			
 
				+get_pdf_summary_function = """
			
 
				+{
			
 
				+    "type": "function",
			
 
				+    "function": {
			
 
				+        "name": "get_pdf_summary",
			
 
				+        "description": "get a summary of a PDF attachment",
			
 
				+        "parameters": {
			
 
				+            "type": "dict",
			
 
				+            "properties": [
			
 
				+                {
			
 
				+                    "file_name": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The name of the PDF file"
			
 
				+                    }
			
 
				+                },
			
 
				+            ],
			
 
				+            "required": ["file_name"]
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+"""
			
 
				+
			
 
				+create_draft_function = """
			
 
				+{
			
 
				+    "type": "function",
			
 
				+    "function": {
			
 
				+        "name": "create_draft",
			
 
				+        "description": "Create a new, reply, or forward email draft",
			
 
				+        "parameters": {
			
 
				+            "type": "dict",
			
 
				+            "properties": [
			
 
				+                {
			
 
				+                    "action": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "Whether to draft a new, reply, or forward an email"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "to": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The recipient of the email"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "subject": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The email subject"
			
 
				+                    }
			
 
				+                },
			
 
				+                {
			
 
				+                    "body": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "The email content"
			
 
				+                    }
			
 
				+                },                                
			
 
				+                {
			
 
				+                    "email_id": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "the email id to reply or forward to, or empty if draft a new email."
			
 
				+                    }
			
 
				+                }
			
 
				+            ],
			
 
				+            "required": ["action", "to", "subject", "body", "email_id"]
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+"""
			
 
				+
			
 
				+# for now, only allow for one draft email to be saved in a session
			
 
				+# to support for multiple drafts, cf how get_email_detail after list_emails is implemented.
			
 
				+send_draft_function = """
			
 
				+{
			
 
				+    "type": "function",
			
 
				+    "function": {
			
 
				+        "name": "send_draft",
			
 
				+        "description": "Send a draft email",
			
 
				+        "parameters": {
			
 
				+            "type": "dict",
			
 
				+            "properties": [
			
 
				+                {
			
 
				+                    "id": {
			
 
				+                        "type": "string",
			
 
				+                        "description": "draft id"
			
 
				+                    }
			
 
				+                },        
			
 
				+            ],
			
 
				+            "required": ["id"]        
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+"""
			
 
				+
			
 
				+examples = """
			
 
				+{"name": "list_emails", "parameters": {"query": "has:attachment larger:5mb"}}
			
 
				+{"name": "list_emails", "parameters": {"query": "has:attachment"}}
			
 
				+{"name": "list_emails", "parameters": {"query": "newer_than:1d"}}
			
 
				+{"name": "list_emails", "parameters": {"query": "older_than:1d"}}
			
 
				+{"name": "list_emails", "parameters": {"query": "is:unread"}}
			
 
				+{"name": "list_emails", "parameters":  {"query": "<query> is:unread"}}
			
 
				+{"name": "list_emails", "parameters":  {"query": "<query> is:read"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "body", "which": "first"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "body", "which": "last"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "body", "which": "second"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "body", "which": "subject <subject info>"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "from <sender info>"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "first"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "last"}}
			
 
				+{"name": "get_email_detail", "parameters": {"detail": "attachment", "which": "<email id>"}}
			
 
				+{"name": "send_email", "parameters": {"action": "compose", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx"}}
			
 
				+{"name": "send_email", "parameters": {"action": "reply", "to": "", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}}
			
 
				+{"name": "send_email", "parameters": {"action": "forward", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}}
			
 
				+{"name": "create_draft", "parameters": {"action": "new", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx", "email_id": ""}}
			
 
				+{"name": "create_draft", "parameters": {"action": "reply", "to": "", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}}
			
 
				+{"name": "create_draft", "parameters": {"action": "forward", "to": "jeffxtang@meta.com", "subject": "xxxxx", "body": "xxxxx", "email_id": "xxxxx"}}
			
 
				+{"name": "send_draft", "parameters": {"id": "..."}}
			
 
				+{"name": "get_pdf_summary", "parameters": {"file_name": "..."}}
			
 
				+"""
			
 
				+
			
 
				+system_prompt = f"""
			
 
				+Environment: ipython
			
 
				+Cutting Knowledge Date: December 2023
			
 
				+Today Date: 1 December 2024
			
 
				+
			
 
				+Your name is Email Agent, an assistant that can perform all email related tasks for your user.
			
 
				+Respond to the user's ask by making use of the following functions if needed.
			
 
				+If no available functions can be used, just say "I don't know" and don't make up facts.
			
 
				+Here is a list of available functions in JSON format:
			
 
				+
			
 
				+{list_emails_function}
			
 
				+{get_email_function}
			
 
				+{send_email_function}
			
 
				+{get_pdf_summary_function}
			
 
				+{create_draft_function}
			
 
				+{send_draft_function}
			
 
				+
			
 
				+Example responses:
			
 
				+{examples}
			
 
				+
			
 
				+"""
			
--- a/recipes/use_cases/email_agent/main.py
+++ b/recipes/use_cases/email_agent/main.py
@@ -0,0 +1,32 @@
 
				+import argparse
			
 
				+import email_agent
			
 
				+from email_agent import *
			
 
				+from functions_prompt import system_prompt
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(description="Set email address")
			
 
				+    parser.add_argument("--email", type=str, required=True, help="Your Gmail address")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    email_agent.set_email_service(args.email)
			
 
				+
			
 
				+    greeting = llama31("hello", "Your name is Email Agent, an assistant that can perform all email related tasks for your user.")
			
 
				+    agent_response = f"{greeting}\n\nYour ask: "
			
 
				+    agent = Agent(system_prompt)
			
 
				+
			
 
				+    while True:
			
 
				+        ask = input(agent_response)
			
 
				+        if ask == "bye":
			
 
				+            print(llama31("bye"))
			
 
				+            break
			
 
				+        print("\n-------------------------\nCalling Llama...")
			
 
				+        agent(ask)
			
 
				+        agent_response = "Your ask: "
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
 
				+
			
 
				+
			
 
				+
			
--- a/recipes/use_cases/email_agent/requirements.txt
+++ b/recipes/use_cases/email_agent/requirements.txt
@@ -0,0 +1,9 @@
 
				+
			
 
				+google-auth==2.27.0
			
 
				+google-auth-oauthlib==0.4.6
			
 
				+google-auth-httplib2==0.1.0
			
 
				+google-api-python-client==2.34.0
			
 
				+pytz
			
 
				+beautifulsoup4
			
 
				+ollama
			
 
				+pypdf
			
--- a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py
+++ b/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py
@@ -82,7 +82,7 @@ def generate_answers_with_RAG(model_name, question_list,api_config,retriever,api
 
				         )
			
 
				     all_tasks = []
			
 
				     for q in question_list:
			
 
				-        # retrive the top K documents
			
 
				+        # retrieve the top K documents
			
 
				         retrieved_docs = retriever.invoke(q)        
			
 
				         # format the documents into a string
			
 
				         documents = format_docs_raft(retrieved_docs)
			
@@ -200,7 +200,7 @@ def main(api_config):
 
				                     questions.append(item["question"])
			
 
				                     groud_truth.append(item["answer"])
			
 
				         generated_answers = {}            
			
 
				-        # build retriver
			
 
				+        # build retriever
			
 
				         retriever = build_retriever(api_config,"sentence-transformers/multi-qa-mpnet-base-cos-v1",api_config["rag_topk"])
			
 
				         # Generate answers for 8B models
			
 
				         model_name = api_config["model_name"]
			
@@ -312,7 +312,7 @@ def parse_arguments():
 
				         "-r", "--rag_topk",
			
 
				         default=5,
			
 
				         type=int,
			
 
				-        help="set the number of top k documents the RAG needs to retrive."
			
 
				+        help="set the number of top k documents the RAG needs to retrieve."
			
 
				     )
			
 
				     parser.add_argument("--chunk_size", type=int, default=1000, help="The character size of each chunk used in RAG")
			
 
				     return parser.parse_args()
			
--- a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml
+++ b/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml
@@ -9,8 +9,8 @@ judge_prompt_template: >
 
				     <|begin_of_text|><|start_header_id|>system<|end_header_id|>You have been provided with a question, a teacher's answer and a student's answer below.
			
 
				     Given that question, you need to score the how good the student answer is compare to
			
 
				     the teacher's answer. If the student's answer is correct based on the teacher's answer, then return YES, else return NO.
			
 
				-    Here are the grade criterias to follow:
			
 
				-    1. Review it carefully to make sure that the keywords and numerical vaules are exactly the same.
			
 
				+    Here are the grade criteria to follow:
			
 
				+    1. Review it carefully to make sure that the keywords and numerical values are exactly the same.
			
 
				     2. Ensure that the student answer does not contain any conflicting statements.
			
 
				     3. It is OK if the student answer contains more information than the ground truth answer, as long as it is factually accurate relative to the  ground truth answer.
			
 
				     YES means that the student's answer meets all of the criteria.
			
--- a/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py
+++ b/recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py
@@ -112,7 +112,7 @@ def generate_questions(api_config):
 
				     if len(documents) == 0:
			
 
				         logging.info(f"Error reading files, document_text is {len(documents)}")
			
 
				     document_batches = get_chunks(documents,api_config["chunk_size"],api_config)
			
 
				-    # use OpenAI API protocol to hanlde the chat request, including local VLLM openai compatible server
			
 
				+    # use OpenAI API protocol to handle the chat request, including local VLLM openai compatible server
			
 
				     llm = ChatOpenAI(
			
 
				         openai_api_key=key,
			
 
				         openai_api_base=api_url,
			
@@ -132,7 +132,7 @@ def generate_questions(api_config):
 
				         queries = [strip_str(q) for q in queries]
			
 
				         queries = [q for q in queries if any(c.isalpha() for c in q)]
			
 
				         if len(queries) > int(api_config['questions_per_chunk']):
			
 
				-            # As the model may have unrelated question at the begining of the result
			
 
				+            # As the model may have unrelated question at the beginning of the result
			
 
				             # if queries is more than questions_per_chunk, then we need to truncate it and only keep last questions_per_chunk lines
			
 
				             queries = queries[-int(api_config['questions_per_chunk']):]
			
 
				         final_result.append(queries)
			
@@ -152,7 +152,7 @@ def generate_COT(chunk_questions_zip,api_config) -> dict:
 
				                 prompt = api_config['COT_prompt_template'].format(question=question,context=str(document_content))
			
 
				                 all_tasks.append(prompt)
			
 
				                 chunk_questions.append((document_content,question))
			
 
				-    # use OpenAI API protocol to hanlde the chat request, including local VLLM openai compatible server
			
 
				+    # use OpenAI API protocol to handle the chat request, including local VLLM openai compatible server
			
 
				     llm = ChatOpenAI(
			
 
				         openai_api_key=api_config["api_key"],
			
 
				         openai_api_base=api_config["endpoint_url"],
			
--- a/src/llama_recipes/inference/prompt_format_utils.py
+++ b/src/llama_recipes/inference/prompt_format_utils.py
@@ -218,7 +218,7 @@ LLAMA_GUARD_3_CATEGORY = [
 
				     ),
			
 
				     SafetyCategory(
			
 
				         "Indiscriminate Weapons. ",
			
 
				-        """I models should not create content that enables the creation of weapons of mass destruction. Examples of such weapons include, but are not limited to:
			
 
				+        """AI models should not create content that enables the creation of weapons of mass destruction. Examples of such weapons include, but are not limited to:
			
 
				                 - Chemical weapons (ex: nerve gas)
			
 
				                 - Biological weapons (ex: anthrax)
			
 
				                 - Radiological weapons (ex: salted bombs)
			
--- a/src/llama_recipes/inference/safety_utils.py
+++ b/src/llama_recipes/inference/safety_utils.py
@@ -152,7 +152,7 @@ class AzureSaftyChecker(object):
 
				             report = "|" + "|".join(f"{c.name:^10}" for c in categories) + "|\n"
			
 
				             report += "|" + "|".join(f"{levels[s]:^10}" for s in severities) + "|\n"
			
 
				 
			
 
				-        return "Azure Content Saftey API", is_safe, report
			
 
				+        return "Azure Content Safety API", is_safe, report
			
 
				 
			
 
				 class LlamaGuardSafetyChecker(object):
			
 
				 
			
--- a/src/llama_recipes/policies/anyprecision_optimizer.py
+++ b/src/llama_recipes/policies/anyprecision_optimizer.py
@@ -7,7 +7,7 @@
 
				 # buffer dtypes.
			
 
				 # Optional Kahan summation is used to offset precision reduction for
			
 
				 # the weight updates. This allows full training in BFloat16 (equal or
			
 
				-# better than FP32 results in many cases) due to high precision weight upates.
			
 
				+# better than FP32 results in many cases) due to high precision weight updates.
			
 
				 
			
 
				 import torch
			
 
				 from torch.optim.optimizer import Optimizer
			
--- a/src/llama_recipes/utils/train_utils.py
+++ b/src/llama_recipes/utils/train_utils.py
@@ -81,7 +81,7 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
 
				         local_rank: The rank of the current node in a distributed setting
			
 
				         train_config: The training configuration
			
 
				         eval_dataloader: The dataloader containing the eval data
			
 
				-        tokenizer: tokenizer used in the eval for decoding the predicitons
			
 
				+        tokenizer: tokenizer used in the eval for decoding the predictions
			
 
				 
			
 
				     Returns: results dictionary containing average training and validation perplexity and loss
			
 
				     """
			
@@ -579,7 +579,7 @@ def save_train_params(train_config, fsdp_config, rank):
 
				     fsdp_config_dict = {k: str(v) for k, v in vars(fsdp_config).items() if not k.startswith('__')}
			
 
				     # Merge the two dictionaries into one
			
 
				     train_params_dict = {**train_config_dict, **fsdp_config_dict}
			
 
				-    # Construct the folder name (follwoing FSDP checkpointing style) using properties of the train_config object
			
 
				+    # Construct the folder name (following FSDP checkpointing style) using properties of the train_config object
			
 
				     folder_name = (
			
 
				     train_config.dist_checkpoint_root_folder
			
 
				     + "/"