123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- import os
- from groq import Groq
- import duckdb
- import yaml
- import glob
- import json
- def get_verified_queries(directory_path):
- """
- Reads YAML files from the specified directory, loads the verified SQL queries and their descriptions,
- and stores them in a dictionary.
- Parameters:
- directory_path (str): The path to the directory containing the YAML files with verified queries.
- Returns:
- dict: A dictionary where the keys are the names of the YAML files (without the directory path and file extension)
- and the values are the parsed content of the YAML files.
- """
- verified_queries_yaml_files = glob.glob(os.path.join(directory_path, '*.yaml'))
- verified_queries_dict = {}
- for file in verified_queries_yaml_files:
- with open(file, 'r') as stream:
- try:
- file_name = file[len(directory_path):-5]
- verified_queries_dict[file_name] = yaml.safe_load(stream)
- except yaml.YAMLError as exc:
- continue
-
- return verified_queries_dict
- def execute_duckdb_query_function_calling(query_name,verified_queries_dict):
- """
- Executes a SQL query from the verified queries dictionary using DuckDB and returns the result as a DataFrame.
- Parameters:
- query_name (str): The name of the query to be executed, corresponding to a key in the verified queries dictionary.
- verified_queries_dict (dict): A dictionary containing verified queries, where the keys are query names and the values
- are dictionaries with query details including the SQL statement.
- Returns:
- pandas.DataFrame: The result of the executed query as a DataFrame.
- """
-
- original_cwd = os.getcwd()
- os.chdir('data')
- query = verified_queries_dict[query_name]['sql']
-
- try:
- conn = duckdb.connect(database=':memory:', read_only=False)
- query_result = conn.execute(query).fetchdf().reset_index(drop=True)
- finally:
- os.chdir(original_cwd)
- return query_result
- model = "llama3-8b-8192"
- # Initialize the Groq client
- groq_api_key = os.getenv('GROQ_API_KEY')
- client = Groq(
- api_key=groq_api_key
- )
- directory_path = 'verified-queries/'
- verified_queries_dict = get_verified_queries(directory_path)
- # Display the title and introduction of the application
- multiline_text = """
- Welcome! Ask questions about employee data or purchase details, like "Show the 5 most recent purchases" or "What was the most expensive purchase?". The app matches your question to pre-verified SQL queries for accurate results.
- """
- print(multiline_text)
-
- while True:
- # Get user input from the console
- user_input = input("You: ")
-
- #Simplify verified_queries_dict to just show query name and description
- query_description_mapping = {key: subdict['description'] for key, subdict in verified_queries_dict.items()}
-
- # Step 1: send the conversation and available functions to the model
- # Define the messages to be sent to the Groq API
- messages = [
- {
- "role": "system",
- "content": '''You are a function calling LLM that uses the data extracted from the execute_duckdb_query_function_calling function to answer questions around a DuckDB dataset.
- Extract the query_name parameter from this mapping by finding the one whose description best matches the user's question:
- {query_description_mapping}
- '''.format(query_description_mapping=query_description_mapping)
- },
- {
- "role": "user",
- "content": user_input,
- }
- ]
- # Define the tool (function) to be used by the Groq API
- tools = [
- {
- "type": "function",
- "function": {
- "name": "execute_duckdb_query_function_calling",
- "description": "Executes a verified DuckDB SQL Query",
- "parameters": {
- "type": "object",
- "properties": {
- "query_name": {
- "type": "string",
- "description": "The name of the verified query (i.e. 'most-recent-purchases')",
- }
- },
- "required": ["query_name"],
- },
- },
- }
- ]
- # Send the conversation and available functions to the Groq API
- response = client.chat.completions.create(
- model=model,
- messages=messages,
- tools=tools,
- tool_choice="auto",
- max_tokens=4096
- )
- # Extract the response message and any tool calls from the response
- response_message = response.choices[0].message
- tool_calls = response_message.tool_calls
- # Define a dictionary of available functions
- available_functions = {
- "execute_duckdb_query_function_calling": execute_duckdb_query_function_calling,
- }
- # Iterate over the tool calls in the response
- for tool_call in tool_calls:
- function_name = tool_call.function.name # Get the function name
- function_to_call = available_functions[function_name] # Get the function to call
- function_args = json.loads(tool_call.function.arguments) # Parse the function arguments
- print('Query found: ', function_args.get("query_name"))
-
- # Call the function with the provided arguments
- function_response = function_to_call(
- query_name=function_args.get("query_name"),
- verified_queries_dict=verified_queries_dict
- )
- # Print the function response (query result)
- print(function_response)
|