| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 | 
							- import os
 
- from groq import Groq
 
- import duckdb
 
- import yaml
 
- import glob
 
- import json
 
- def get_verified_queries(directory_path):
 
-     """
 
-     Reads YAML files from the specified directory, loads the verified SQL queries and their descriptions,
 
-     and stores them in a dictionary.
 
-     Parameters:
 
-         directory_path (str): The path to the directory containing the YAML files with verified queries.
 
-     Returns:
 
-         dict: A dictionary where the keys are the names of the YAML files (without the directory path and file extension)
 
-               and the values are the parsed content of the YAML files.
 
-     """
 
-     verified_queries_yaml_files = glob.glob(os.path.join(directory_path, '*.yaml'))
 
-     verified_queries_dict = {}
 
-     for file in verified_queries_yaml_files:
 
-         with open(file, 'r') as stream:
 
-             try:
 
-                 file_name = file[len(directory_path):-5]
 
-                 verified_queries_dict[file_name] = yaml.safe_load(stream)
 
-             except yaml.YAMLError as exc:
 
-                 continue
 
-         
 
-     return verified_queries_dict
 
- def execute_duckdb_query_function_calling(query_name,verified_queries_dict):
 
-     """
 
-     Executes a SQL query from the verified queries dictionary using DuckDB and returns the result as a DataFrame.
 
-     Parameters:
 
-         query_name (str): The name of the query to be executed, corresponding to a key in the verified queries dictionary.
 
-         verified_queries_dict (dict): A dictionary containing verified queries, where the keys are query names and the values
 
-                                       are dictionaries with query details including the SQL statement.
 
-     Returns:
 
-         pandas.DataFrame: The result of the executed query as a DataFrame.
 
-     """
 
-     
 
-     original_cwd = os.getcwd()
 
-     os.chdir('data')
 
-     query = verified_queries_dict[query_name]['sql']
 
-     
 
-     try:
 
-         conn = duckdb.connect(database=':memory:', read_only=False)
 
-         query_result = conn.execute(query).fetchdf().reset_index(drop=True)
 
-     finally:
 
-         os.chdir(original_cwd)
 
-     return query_result
 
- model = "llama3-8b-8192"
 
- # Initialize the Groq client
 
- groq_api_key = os.getenv('GROQ_API_KEY')
 
- client = Groq(
 
-     api_key=groq_api_key
 
- )
 
- directory_path = 'verified-queries/'
 
- verified_queries_dict = get_verified_queries(directory_path)
 
- # Display the title and introduction of the application
 
- multiline_text = """
 
- Welcome! Ask questions about employee data or purchase details, like "Show the 5 most recent purchases" or "What was the most expensive purchase?". The app matches your question to pre-verified SQL queries for accurate results.
 
- """
 
- print(multiline_text)
 
-     
 
- while True:
 
-     # Get user input from the console
 
-     user_input = input("You: ")
 
-     
 
-     #Simplify verified_queries_dict to just show query name and description
 
-     query_description_mapping = {key: subdict['description'] for key, subdict in verified_queries_dict.items()}
 
-     
 
-     # Step 1: send the conversation and available functions to the model
 
-     # Define the messages to be sent to the Groq API
 
-     messages = [
 
-         {
 
-             "role": "system",
 
-             "content": '''You are a function calling LLM that uses the data extracted from the execute_duckdb_query_function_calling function to answer questions around a DuckDB dataset.
 
-             Extract the query_name parameter from this mapping by finding the one whose description best matches the user's question: 
 
-             {query_description_mapping}
 
-             '''.format(query_description_mapping=query_description_mapping)
 
-         },
 
-         {
 
-             "role": "user",
 
-             "content": user_input,
 
-         }
 
-     ]
 
-     # Define the tool (function) to be used by the Groq API
 
-     tools = [
 
-         {
 
-             "type": "function",
 
-             "function": {
 
-                 "name": "execute_duckdb_query_function_calling",
 
-                 "description": "Executes a verified DuckDB SQL Query",
 
-                 "parameters": {
 
-                     "type": "object",
 
-                     "properties": {
 
-                         "query_name": {
 
-                             "type": "string",
 
-                             "description": "The name of the verified query (i.e. 'most-recent-purchases')",
 
-                         }
 
-                     },
 
-                     "required": ["query_name"],
 
-                 },
 
-             },
 
-         }
 
-     ]
 
-     # Send the conversation and available functions to the Groq API
 
-     response = client.chat.completions.create(
 
-         model=model,
 
-         messages=messages,
 
-         tools=tools,
 
-         tool_choice="auto",  
 
-         max_tokens=4096
 
-     )
 
-     # Extract the response message and any tool calls from the response
 
-     response_message = response.choices[0].message
 
-     tool_calls = response_message.tool_calls
 
-     # Define a dictionary of available functions
 
-     available_functions = {
 
-         "execute_duckdb_query_function_calling": execute_duckdb_query_function_calling,
 
-     }
 
-     # Iterate over the tool calls in the response
 
-     for tool_call in tool_calls:
 
-         function_name = tool_call.function.name  # Get the function name
 
-         function_to_call = available_functions[function_name]  # Get the function to call
 
-         function_args = json.loads(tool_call.function.arguments)  # Parse the function arguments
 
-         print('Query found: ', function_args.get("query_name"))
 
-         
 
-         # Call the function with the provided arguments
 
-         function_response = function_to_call(
 
-             query_name=function_args.get("query_name"),
 
-             verified_queries_dict=verified_queries_dict
 
-         )
 
-     # Print the function response (query result)
 
-     print(function_response)
 
 
  |