Browse Source

fix/fix few typos (#834)

multiple typos fixes
Igor Kasianenko 3 months ago
parent
commit
1521c5b550
33 changed files with 71 additions and 71 deletions
  1. 3 3
      docs/FAQ.md
  2. 2 2
      docs/multi_gpu.md
  3. 3 3
      recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb
  4. 1 1
      recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md
  5. 1 1
      recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py
  6. 2 2
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl
  7. 2 2
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl
  8. 2 2
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl
  9. 2 2
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl
  10. 4 4
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl
  11. 4 4
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl
  12. 2 2
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl
  13. 4 4
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl
  14. 4 4
      recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl
  15. 1 1
      recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md
  16. 4 4
      recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb
  17. 3 3
      recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb
  18. 2 2
      recipes/3p_integrations/vllm/README.md
  19. 1 1
      recipes/experimental/long_context/H2O/README.md
  20. 1 1
      recipes/experimental/long_context/H2O/src/streaming.sh
  21. 4 4
      recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb
  22. 1 1
      recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb
  23. 2 2
      recipes/quickstart/finetuning/README.md
  24. 1 1
      recipes/quickstart/inference/local_inference/README.md
  25. 1 1
      recipes/responsible_ai/prompt_guard/inference.py
  26. 1 1
      recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md
  27. 1 1
      recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md
  28. 3 3
      recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py
  29. 2 2
      recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml
  30. 3 3
      recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py
  31. 1 1
      src/llama_recipes/inference/safety_utils.py
  32. 1 1
      src/llama_recipes/policies/anyprecision_optimizer.py
  33. 2 2
      src/llama_recipes/utils/train_utils.py

+ 3 - 3
docs/FAQ.md

@@ -29,20 +29,20 @@ Here we discuss frequently asked questions that may occur and we found useful al
 
 7. How to handle CUDA memory fragmentations during fine-tuning that may lead into an OOM?
 
-    In some cases you may experience that after model checkpointing specially with FSDP (this usually does not happen with PEFT methods), the reserved and allocated CUDA memory has increased. This might be due to CUDA memory fragmentations. PyTorch recenly added an enviroment variable that helps to better manage memory fragmentation (this feature in available on PyTorch nightlies at the time of writing this doc July 30 2023). You can set this in your main training script as follows:
+    In some cases you may experience that after model checkpointing specially with FSDP (this usually does not happen with PEFT methods), the reserved and allocated CUDA memory has increased. This might be due to CUDA memory fragmentations. PyTorch recently added an environment variable that helps to better manage memory fragmentation (this feature in available on PyTorch nightlies at the time of writing this doc July 30 2023). You can set this in your main training script as follows:
 
     ```bash
 
     os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'
 
     ```
-    We also added this enviroment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
+    We also added this environment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
 
 8. Additional debugging flags?
 
     The environment variable `TORCH_DISTRIBUTED_DEBUG` can be used to trigger additional useful logging and collective synchronization checks to ensure all ranks are synchronized appropriately. `TORCH_DISTRIBUTED_DEBUG` can be set to either OFF (default), INFO, or DETAIL depending on the debugging level required. Please note that the most verbose option, DETAIL may impact the application performance and thus should only be used when debugging issues.
 
-    We also added this enviroment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
+    We also added this environment variable in `setup_environ_flags` of the [train_utils.py](../src/llama_recipes/utils/train_utils.py), feel free to uncomment it if required.
 
 9. I am getting import errors when running inference.
 

+ 2 - 2
docs/multi_gpu.md

@@ -174,7 +174,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
     * `mixed_precision` boolean flag to specify using mixed precision, defatults to true.
 
-    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommond not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
+    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommend not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
 
     *  `sharding_strategy` this specifies the sharding strategy for FSDP, it can be:
         * `FULL_SHARD` that shards model parameters, gradients and optimizer states, results in the most memory savings.
@@ -187,7 +187,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
 * `checkpoint_type` specifies the state dict checkpoint type for saving the model. `FULL_STATE_DICT` streams state_dict of each model shard from a rank to CPU and assembels the full state_dict on CPU. `SHARDED_STATE_DICT` saves one checkpoint per rank, and enables the re-loading the model in a different world size.
 
-* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommond you use this option.
+* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommend you use this option.
 
 * `fsdp_config.pure_bf16` it moves the  model to `BFloat16` and if `optimizer` is set to `anyprecision` then optimizer states will be kept in `BFloat16` as well. You can use this option if necessary.
 

+ 3 - 3
recipes/3p_integrations/aws/prompt_engineering_with_llama_2_on_amazon_bedrock.ipynb

@@ -758,7 +758,7 @@
     "\n",
     "Adding specific examples of your desired output generally results in more accurate, consistent output. This technique is called \"few-shot prompting\".\n",
     "\n",
-    "In this example, the generated response follows our desired format that offers a more nuanced sentiment classifer that gives a positive, neutral, and negative response confidence percentage.\n",
+    "In this example, the generated response follows our desired format that offers a more nuanced sentiment classifier that gives a positive, neutral, and negative response confidence percentage.\n",
     "\n",
     "See also: [Zhao et al. (2021)](https://arxiv.org/abs/2102.09690), [Liu et al. (2021)](https://arxiv.org/abs/2101.06804), [Su et al. (2022)](https://arxiv.org/abs/2209.01975), [Rubin et al. (2022)](https://arxiv.org/abs/2112.08633).\n",
     "\n"
@@ -1045,7 +1045,7 @@
    "source": [
     "### Self-Consistency\n",
     "\n",
-    "LLMs are probablistic, so even with Chain-of-Thought, a single generation might produce incorrect results. Self-Consistency ([Wang et al. (2022)](https://arxiv.org/abs/2203.11171)) introduces enhanced accuracy by selecting the most frequent answer from multiple generations (at the cost of higher compute):"
+    "LLMs are probabilistic, so even with Chain-of-Thought, a single generation might produce incorrect results. Self-Consistency ([Wang et al. (2022)](https://arxiv.org/abs/2203.11171)) introduces enhanced accuracy by selecting the most frequent answer from multiple generations (at the cost of higher compute):"
    ]
   },
   {
@@ -1179,7 +1179,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Retrieval-Augmented Generation, or RAG, describes the practice of including information in the prompt you've retrived from an external database ([Lewis et al. (2020)](https://arxiv.org/abs/2005.11401v4)). It's an effective way to incorporate facts into your LLM application and is more affordable than fine-tuning which may be costly and negatively impact the foundational model's capabilities.\n",
+    "Retrieval-Augmented Generation, or RAG, describes the practice of including information in the prompt you've retrieved from an external database ([Lewis et al. (2020)](https://arxiv.org/abs/2005.11401v4)). It's an effective way to incorporate facts into your LLM application and is more affordable than fine-tuning which may be costly and negatively impact the foundational model's capabilities.\n",
     "\n",
     "This could be as simple as a lookup table or as sophisticated as a [vector database]([FAISS](https://github.com/facebookresearch/faiss)) containing all of your company's knowledge:"
    ]

+ 1 - 1
recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/README.md

@@ -1,6 +1,6 @@
 # Presidential Speeches RAG with Pinecone
 
-This repository contains a command line application that allows users to ask questions about US presidental speeches by applying Retrieval-Augmented Generation (RAG) over a Pinecone vector database. The application uses RAG to answer the user's question by retrieving the most relevant presidential speeches and using them to supplant the LLM response.
+This repository contains a command line application that allows users to ask questions about US presidential speeches by applying Retrieval-Augmented Generation (RAG) over a Pinecone vector database. The application uses RAG to answer the user's question by retrieving the most relevant presidential speeches and using them to supplant the LLM response.
 
 ## Features
 

+ 1 - 1
recipes/3p_integrations/groq/groq-example-templates/presidential-speeches-rag-with-pinecone/main.py

@@ -55,7 +55,7 @@ def presidential_speech_chat_completion(client, model, user_question, relevant_e
             },
             {
                 "role": "user",
-                "content": "User Question: " + user_question + "\n\nRelevant Speech Exerpt(s):\n\n" + relevant_excerpts,
+                "content": "User Question: " + user_question + "\n\nRelevant Speech Excerpt(s):\n\n" + relevant_excerpts,
             }
         ],
         model = model

+ 2 - 2
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set-v2.jsonl

@@ -29,8 +29,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql" : "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql" : "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql" : "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql" : "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql" : "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 2 - 2
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/gold-test-set.jsonl

@@ -9,8 +9,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql" : "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql" : "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql" : "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql" : "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql" : "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 2 - 2
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_large_filtered_cleaned.jsonl

@@ -209,8 +209,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 2 - 2
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/archive/generated_queries_v2_large_filtered_cleaned.jsonl

@@ -117,8 +117,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "sql" : "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "sql" : "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "sql" : "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "sql" : "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "sql" : "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "sql" : "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "sql" : "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 4 - 4
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries.jsonl

@@ -118,8 +118,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
@@ -148,8 +148,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 4 - 4
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large.jsonl

@@ -1108,8 +1108,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
@@ -1138,8 +1138,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 2 - 2
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_large_filtered.jsonl

@@ -321,8 +321,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 4 - 4
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2.jsonl

@@ -160,8 +160,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
@@ -215,8 +215,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 4 - 4
recipes/3p_integrations/lamini/text2sql_memory_tuning/data/training_data/generated_queries_v2_large.jsonl

@@ -1188,8 +1188,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}
@@ -1243,8 +1243,8 @@
 {"question": "Would you please let me know what the highest paid players are for each position?", "answer": "The highest paid players are Nikola Jokic (C), Paul George (F), Norman Powell (G), Kevin Durant (PF), Stephen Curry (PG), LeBron James (SF), Bradley Beal (SG).", "sql": "SELECT name, pos, MAX(CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER)) as max_salary FROM nba_roster WHERE SALARY!= '--' GROUP BY POS;"}
 {"question": "Is Jalen Johnson 23 years old?", "answer": "No, Jalen Johnson is 21 years old", "sql": "Select name, age from nba_roster where name='Jalen Johnson';"}
 {"question": "Who is the oldest player on the Brooklyn Nets?", "answer": "Spencer Dinwiddie, Dorian Finney-Smith, Royce O'Neale", "sql": "SELECT NAME FROM nba_roster WHERE TEAM = 'Brooklyn Nets' AND AGE = (SELECT MAX(AGE) FROM nba_roster WHERE TEAM = 'Brooklyn Nets');"}
-{"question": "Who has the higest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
-{"question": "Which player has the higest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Who has the highest salary on the Memphis Grizzlies?", "answer": "Ja Morant", "sql": "select salary, name from nba_roster where team='Memphis Grizzlies' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
+{"question": "Which player has the highest salary on the Cleveland Cavaliers?", "answer": "Darius Garland", "sql": "select salary, name from nba_roster where team='Cleveland Cavaliers' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "Who is the highest paid center on the Dallas Mavericks?", "answer": "Dereck Lively II", "sql": "select salary, name from nba_roster where team='Dallas Mavericks' and POS='C' and SALARY!= '--' ORDER BY CAST(REPLACE(REPLACE(SALARY, '$', ''), ',','') AS INTEGER) DESC LIMIT 1;"}
 {"question": "How much is Marcus Smart getting paid?", "answer": "$18,833,712", "sql": "select salary from nba_roster where name='Marcus Smart';"}
 {"question": "What's the average age of the Trail Blazers?", "answer": "24", "sql": "select avg(age) from nba_roster where team='Portland Trail Blazers';"}

+ 1 - 1
recipes/3p_integrations/llamaindex/dlai_agentic_rag/README.md

@@ -1,6 +1,6 @@
 # Building Agentic RAG with Llamaindex
 
-The folder here containts the Llama 3 ported notebooks of the DLAI short course [Building Agentic RAG with Llamaindex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/).
+The folder here contains the Llama 3 ported notebooks of the DLAI short course [Building Agentic RAG with Llamaindex](https://www.deeplearning.ai/short-courses/building-agentic-rag-with-llamaindex/).
 
 1. [Building Agentic RAG with Llamaindex L1 Router Engine](../../../quickstart/agents/DeepLearningai_Course_Notebooks/Building_Agentic_RAG_with_Llamaindex_L1_Router_Engine.ipynb) shows how to implement a simple agentic RAG, a router that will pick up one of several query tools (question answering or summarization) to execute a query on a single document. Note this notebook is located in the `quickstart` folder.
 

+ 4 - 4
recipes/3p_integrations/togetherai/multimodal_RAG_with_nvidia_investor_slide_deck.ipynb

@@ -635,7 +635,7 @@
         }
       ],
       "source": [
-        "# Dowload and rename the last presentation from Nvidia to investors\n",
+        "# Download and rename the last presentation from Nvidia to investors\n",
         "!wget https://s201.q4cdn.com/141608511/files/doc_presentations/2023/Oct/01/ndr_presentation_oct_2023_final.pdf\n",
         "!mv ndr_presentation_oct_2023_final.pdf nvidia_presentation.pdf"
       ]
@@ -811,7 +811,7 @@
       "source": [
         "### How does this work? What happens under the hood between the different pages and query token?\n",
         "\n",
-        "The interaction operation between page image patch and query text token representations to score each page of the document is what allows this great retreival performance.\n",
+        "The interaction operation between page image patch and query text token representations to score each page of the document is what allows this great retrieval performance.\n",
         "\n",
         "Typically each image is resized and cut into patch sizes of 16x16 pixels. These patches are then embedded into 128 dimensional vectors which are stored and used to perform the MaxSim and late interaction operations between the image and text tokens. ColPali is a multi-vector approach because it produces multiple vectors for each image/query; one vector for each token instead of just one vector for all tokens. \n",
         "\n",
@@ -878,7 +878,7 @@
       },
       "outputs": [],
       "source": [
-        "# Since we stored the collection along with the index we have the base64 images of all PDF pages aswell!\n",
+        "# Since we stored the collection along with the index we have the base64 images of all PDF pages as well!\n",
         "model.search(query, k=1)"
       ]
     },
@@ -949,7 +949,7 @@
       "source": [
         "Here we can see that the combination of ColQwen2 as a image retriever and Llama-3.2 90B Vision is a powerful duo for multimodal RAG applications specially with PDFs.\n",
         "\n",
-        "Not only was ColQwen2 able to retrieve the correct page that had the right answer on it but then Llama-3.2 90B Vision was also able to find exactly where on the page this answer was, ignoring all the irrelvant details!\n",
+        "Not only was ColQwen2 able to retrieve the correct page that had the right answer on it but then Llama-3.2 90B Vision was also able to find exactly where on the page this answer was, ignoring all the irrelevant details!\n",
         "\n",
         "Voila!🎉🎉\n",
         "\n",

+ 3 - 3
recipes/3p_integrations/togetherai/structured_text_extraction_from_images.ipynb

@@ -143,7 +143,7 @@
         "id": "8aPkxE7MnbkX"
       },
       "source": [
-        "## Lets bring in the reciept that we want to extract information from\n",
+        "## Lets bring in the receipt that we want to extract information from\n",
         "\n",
         "Notice that this is a real receipt with multiple portions that are not relevant to the line item extraction structure we've outlined above.\n",
         "\n",
@@ -243,7 +243,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Notice that the model is not perfect and wasn't able to extract out some line items. It's hard for most models to perform this zero-shot extraction of data from images. A way to improve this is to finetune the model using [Visual Intruction Tuning](https://arxiv.org/abs/2304.08485)."
+        "Notice that the model is not perfect and wasn't able to extract out some line items. It's hard for most models to perform this zero-shot extraction of data from images. A way to improve this is to finetune the model using [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485)."
       ]
     },
     {
@@ -401,7 +401,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Althought with some missed line items we were able to extract out structured JSON from an image in a zero shot manner! To improve the results for your pipeline and make them production ready I recommend you [finetune](https://docs.together.ai/docs/fine-tuning-overview) the vision model on your own dataset!\n",
+        "Although with some missed line items we were able to extract out structured JSON from an image in a zero shot manner! To improve the results for your pipeline and make them production ready I recommend you [finetune](https://docs.together.ai/docs/fine-tuning-overview) the vision model on your own dataset!\n",
         "\n",
         "Learn more about how to use JSON mode in the [docs](https://docs.together.ai/docs/json-mode) here!"
       ]

+ 2 - 2
recipes/3p_integrations/vllm/README.md

@@ -27,12 +27,12 @@ To launch the inference simply execute the following command changing the tp_siz
 python inference.py --model_name $MODEL_PATH --peft_model_name $PEFT_MODEL_PATH --tp_size 8 --user_prompt "Hello my name is"
 ```
 The script will ask for another prompt ina loop after completing the generation which you can exit by simply pressing enter and leaving the prompt empty.
-When using multiple gpus the model will automatically be split accross the available GPUs using tensor parallelism.
+When using multiple gpus the model will automatically be split across the available GPUs using tensor parallelism.
 
 ## Multi-node multi-gpu inference
 The FP8 quantized variants of Meta Llama (i.e. meta-llama/Meta-Llama-3.1-405B-FP8 and meta-llama/Meta-Llama-3.1-405B-Instruct-FP8) can be executed on a single node with 8x80GB H100 using the script located in this folder.
 To run the unquantized Meta Llama 405B variants (i.e. meta-llama/Meta-Llama-3.1-405B and meta-llama/Meta-Llama-3.1-405B-Instruct) we need multi-node inference.
-vLLM allows this by leveraging pipeline parallelism accros nodes while still applying tensor parallelism insid each node.
+vLLM allows this by leveraging pipeline parallelism across nodes while still applying tensor parallelism inside each node.
 To start a multi-node inference we first need to set up a ray serves which well be leveraged by vLLM to execute the model across node boundaries.
 
 ```bash

File diff suppressed because it is too large
+ 1 - 1
recipes/experimental/long_context/H2O/README.md


+ 1 - 1
recipes/experimental/long_context/H2O/src/streaming.sh

@@ -12,7 +12,7 @@ elif [[ ${method} == 'full' ]]; then
         --input-path data \
         --model-name lmsys/vicuna-13b-v1.5
 else
-    echo 'unknown argment for method'
+    echo 'unknown argument for method'
 fi
 
 

+ 4 - 4
recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_101.ipynb

@@ -22,7 +22,7 @@
     "- Understand how the tool calls are handled under the hood\n",
     "- 3.2 Model Tool Calling Format and Behaviour\n",
     "\n",
-    "In Part 2, we will learn how to build system that can get us comparision between 2 papers"
+    "In Part 2, we will learn how to build system that can get us comparison between 2 papers"
    ]
   },
   {
@@ -400,7 +400,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "If everything is setup correctly-the model should now wrap function calls  with the `|<python_tag>|` following the actualy function call. \n",
+    "If everything is setup correctly-the model should now wrap function calls  with the `|<python_tag>|` following the actually function call. \n",
     "\n",
     "This can allow you to manage your function calling logic accordingly. \n",
     "\n",
@@ -660,11 +660,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Life is great because Llama Team writes great docs for us, so we can conviently copy-pasta examples from there :)\n",
+    "Life is great because Llama Team writes great docs for us, so we can conveniently copy-pasta examples from there :)\n",
     "\n",
     "[Here](https://www.llama.com/docs/model-cards-and-prompt-formats/llama3_2#-tool-calling-(1b/3b)-) are the docs for your reference that we will be using. \n",
     "\n",
-    "Excercise for viewer: Use `llama-toolchain` again to verify like we did earlier and then start the prompt engineering for the small Llamas."
+    "Exercise for viewer: Use `llama-toolchain` again to verify like we did earlier and then start the prompt engineering for the small Llamas."
    ]
   },
   {

+ 1 - 1
recipes/quickstart/agents/Agents_Tutorial/Tool_Calling_201.ipynb

@@ -403,7 +403,7 @@
    "source": [
     "def get_arxiv_ids(web_results: dict, temperature: int = 0, max_tokens=512):\n",
     "    # Initialize chat history with a specific prompt to extract arXiv IDs\n",
-    "    arxiv_id_chat_history = [{\"role\": \"system\", \"content\": \"Given this input, give me the arXiv ID of the papers. The input has the query and web results. DO NOT WRITE ANYTHING ELSE IN YOUR RESPONSE: ONLY THE ARXIV ID ONCE, the web search will have it repeated mutliple times, just return the it once and where its actually the arxiv ID\"}, {\"role\": \"user\", \"content\": f\"Here is the query and results{web_results}\"}]\n",
+    "    arxiv_id_chat_history = [{\"role\": \"system\", \"content\": \"Given this input, give me the arXiv ID of the papers. The input has the query and web results. DO NOT WRITE ANYTHING ELSE IN YOUR RESPONSE: ONLY THE ARXIV ID ONCE, the web search will have it repeated multiple times, just return the it once and where its actually the arxiv ID\"}, {\"role\": \"user\", \"content\": f\"Here is the query and results{web_results}\"}]\n",
     "\n",
     "    # Call the model to process the input and extract arXiv IDs\n",
     "    response = client.chat.completions.create(\n",

+ 2 - 2
recipes/quickstart/finetuning/README.md

@@ -79,7 +79,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
     * `mixed_precision` boolean flag to specify using mixed precision, defatults to true.
 
-    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommond not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
+    * `use_fp16` boolean flag to specify using FP16 for mixed precision, defatults to False. We recommend not setting this flag, and only set `mixed_precision` that will use `BF16`, this will help with speed and memory savings while avoiding challenges of scaler accuracies with `FP16`.
 
     *  `sharding_strategy` this specifies the sharding strategy for FSDP, it can be:
         * `FULL_SHARD` that shards model parameters, gradients and optimizer states, results in the most memory savings.
@@ -92,7 +92,7 @@ It lets us specify the training settings for everything from `model_name` to `da
 
 * `checkpoint_type` specifies the state dict checkpoint type for saving the model. `FULL_STATE_DICT` streams state_dict of each model shard from a rank to CPU and assembels the full state_dict on CPU. `SHARDED_STATE_DICT` saves one checkpoint per rank, and enables the re-loading the model in a different world size.
 
-* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommond you use this option.
+* `fsdp_activation_checkpointing` enables activation checkpoining for FSDP, this saves significant amount of memory with the trade off of recomputing itermediate activations during the backward pass. The saved memory can be re-invested in higher batch sizes to increase the throughput. We recommend you use this option.
 
 * `pure_bf16` it moves the  model to `BFloat16` and if `optimizer` is set to `anyprecision` then optimizer states will be kept in `BFloat16` as well. You can use this option if necessary.
 

+ 1 - 1
recipes/quickstart/inference/local_inference/README.md

@@ -119,7 +119,7 @@ Then convert your FSDP checkpoint to HuggingFace checkpoints using:
 
  # --HF_model_path_or_name specifies the HF Llama model name or path where it has config.json and tokenizer.json
  ```
-By default, training parameter are saved in `train_params.yaml` in the path where FSDP checkpoints are saved, in the converter script we frist try to find the HugingFace model name used in the fine-tuning to load the model with configs from there, if not found user need to provide it.
+By default, training parameter are saved in `train_params.yaml` in the path where FSDP checkpoints are saved, in the converter script we first try to find the HugingFace model name used in the fine-tuning to load the model with configs from there, if not found user need to provide it.
 
 Then run inference using:
 

+ 1 - 1
recipes/responsible_ai/prompt_guard/inference.py

@@ -11,7 +11,7 @@ Utilities for loading the PromptGuard model and evaluating text for jailbreaks a
 
 Note that the underlying model has a maximum recommended input size of 512 tokens as a DeBERTa model.
 The final two functions in this file implement efficient parallel batched evaluation of the model on a list
-of input strings of arbirary length, with the final score for each input being the maximum score across all
+of input strings of arbitrary length, with the final score for each input being the maximum score across all
 chunks of the input string.
 """
 

File diff suppressed because it is too large
+ 1 - 1
recipes/use_cases/customerservice_chatbots/messenger_chatbot/messenger_llama3.md


+ 1 - 1
recipes/use_cases/customerservice_chatbots/whatsapp_chatbot/whatsapp_llama3.md

@@ -6,7 +6,7 @@ If you're interested in a Llama 3 enabled Messenger chatbot, see [here](../messe
 
 ## Overview
 
-Businesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.
+Businesses of all sizes can use the [WhatsApp Business API](https://developers.facebook.com/docs/whatsapp/cloud-api/overview) to connect their customers with human agents or Llama 3 powered chatbots. The benefits of an intelligent and knowledgeable chatbot are obvious, including cost saving and better customer experience such as 24x7 availability. In this blog, we'll cover the details of integrating Llama 3 with the WhatsApp Business API to build a basic Llama 3 enabled chatbot.
 
 The diagram below shows the components and overall data flow of the Llama 3 enabled WhatsApp chatbot demo we built, using Amazon EC2 instance as an example for running the web server.
 

+ 3 - 3
recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval.py

@@ -82,7 +82,7 @@ def generate_answers_with_RAG(model_name, question_list,api_config,retriever,api
         )
     all_tasks = []
     for q in question_list:
-        # retrive the top K documents
+        # retrieve the top K documents
         retrieved_docs = retriever.invoke(q)        
         # format the documents into a string
         documents = format_docs_raft(retrieved_docs)
@@ -200,7 +200,7 @@ def main(api_config):
                     questions.append(item["question"])
                     groud_truth.append(item["answer"])
         generated_answers = {}            
-        # build retriver
+        # build retriever
         retriever = build_retriever(api_config,"sentence-transformers/multi-qa-mpnet-base-cos-v1",api_config["rag_topk"])
         # Generate answers for 8B models
         model_name = api_config["model_name"]
@@ -312,7 +312,7 @@ def parse_arguments():
         "-r", "--rag_topk",
         default=5,
         type=int,
-        help="set the number of top k documents the RAG needs to retrive."
+        help="set the number of top k documents the RAG needs to retrieve."
     )
     parser.add_argument("--chunk_size", type=int, default=1000, help="The character size of each chunk used in RAG")
     return parser.parse_args()

+ 2 - 2
recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_eval_config.yaml

@@ -9,8 +9,8 @@ judge_prompt_template: >
     <|begin_of_text|><|start_header_id|>system<|end_header_id|>You have been provided with a question, a teacher's answer and a student's answer below.
     Given that question, you need to score the how good the student answer is compare to
     the teacher's answer. If the student's answer is correct based on the teacher's answer, then return YES, else return NO.
-    Here are the grade criterias to follow:
-    1. Review it carefully to make sure that the keywords and numerical vaules are exactly the same.
+    Here are the grade criteria to follow:
+    1. Review it carefully to make sure that the keywords and numerical values are exactly the same.
     2. Ensure that the student answer does not contain any conflicting statements.
     3. It is OK if the student answer contains more information than the ground truth answer, as long as it is factually accurate relative to the  ground truth answer.
     YES means that the student's answer meets all of the criteria.

+ 3 - 3
recipes/use_cases/end2end-recipes/RAFT-Chatbot/raft_utils.py

@@ -112,7 +112,7 @@ def generate_questions(api_config):
     if len(documents) == 0:
         logging.info(f"Error reading files, document_text is {len(documents)}")
     document_batches = get_chunks(documents,api_config["chunk_size"],api_config)
-    # use OpenAI API protocol to hanlde the chat request, including local VLLM openai compatible server
+    # use OpenAI API protocol to handle the chat request, including local VLLM openai compatible server
     llm = ChatOpenAI(
         openai_api_key=key,
         openai_api_base=api_url,
@@ -132,7 +132,7 @@ def generate_questions(api_config):
         queries = [strip_str(q) for q in queries]
         queries = [q for q in queries if any(c.isalpha() for c in q)]
         if len(queries) > int(api_config['questions_per_chunk']):
-            # As the model may have unrelated question at the begining of the result
+            # As the model may have unrelated question at the beginning of the result
             # if queries is more than questions_per_chunk, then we need to truncate it and only keep last questions_per_chunk lines
             queries = queries[-int(api_config['questions_per_chunk']):]
         final_result.append(queries)
@@ -152,7 +152,7 @@ def generate_COT(chunk_questions_zip,api_config) -> dict:
                 prompt = api_config['COT_prompt_template'].format(question=question,context=str(document_content))
                 all_tasks.append(prompt)
                 chunk_questions.append((document_content,question))
-    # use OpenAI API protocol to hanlde the chat request, including local VLLM openai compatible server
+    # use OpenAI API protocol to handle the chat request, including local VLLM openai compatible server
     llm = ChatOpenAI(
         openai_api_key=api_config["api_key"],
         openai_api_base=api_config["endpoint_url"],

+ 1 - 1
src/llama_recipes/inference/safety_utils.py

@@ -152,7 +152,7 @@ class AzureSaftyChecker(object):
             report = "|" + "|".join(f"{c.name:^10}" for c in categories) + "|\n"
             report += "|" + "|".join(f"{levels[s]:^10}" for s in severities) + "|\n"
 
-        return "Azure Content Saftey API", is_safe, report
+        return "Azure Content Safety API", is_safe, report
 
 class LlamaGuardSafetyChecker(object):
 

+ 1 - 1
src/llama_recipes/policies/anyprecision_optimizer.py

@@ -7,7 +7,7 @@
 # buffer dtypes.
 # Optional Kahan summation is used to offset precision reduction for
 # the weight updates. This allows full training in BFloat16 (equal or
-# better than FP32 results in many cases) due to high precision weight upates.
+# better than FP32 results in many cases) due to high precision weight updates.
 
 import torch
 from torch.optim.optimizer import Optimizer

+ 2 - 2
src/llama_recipes/utils/train_utils.py

@@ -81,7 +81,7 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
         local_rank: The rank of the current node in a distributed setting
         train_config: The training configuration
         eval_dataloader: The dataloader containing the eval data
-        tokenizer: tokenizer used in the eval for decoding the predicitons
+        tokenizer: tokenizer used in the eval for decoding the predictions
 
     Returns: results dictionary containing average training and validation perplexity and loss
     """
@@ -579,7 +579,7 @@ def save_train_params(train_config, fsdp_config, rank):
     fsdp_config_dict = {k: str(v) for k, v in vars(fsdp_config).items() if not k.startswith('__')}
     # Merge the two dictionaries into one
     train_params_dict = {**train_config_dict, **fsdp_config_dict}
-    # Construct the folder name (follwoing FSDP checkpointing style) using properties of the train_config object
+    # Construct the folder name (following FSDP checkpointing style) using properties of the train_config object
     folder_name = (
     train_config.dist_checkpoint_root_folder
     + "/"