radu
/
LLamaRecipes
spegling av https://github.com/facebookresearch/llama-recipes.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
							import os
from groq import Groq
import duckdb
import yaml
import glob
import json

def get_verified_queries(directory_path):
    """
    Reads YAML files from the specified directory, loads the verified SQL queries and their descriptions,
    and stores them in a dictionary.

    Parameters:
        directory_path (str): The path to the directory containing the YAML files with verified queries.

    Returns:
        dict: A dictionary where the keys are the names of the YAML files (without the directory path and file extension)
              and the values are the parsed content of the YAML files.
    """
    verified_queries_yaml_files = glob.glob(os.path.join(directory_path, '*.yaml'))
    verified_queries_dict = {}
    for file in verified_queries_yaml_files:
        with open(file, 'r') as stream:
            try:
                file_name = file[len(directory_path):-5]
                verified_queries_dict[file_name] = yaml.safe_load(stream)
            except yaml.YAMLError as exc:
                continue
        
    return verified_queries_dict


def execute_duckdb_query_function_calling(query_name,verified_queries_dict):
    """
    Executes a SQL query from the verified queries dictionary using DuckDB and returns the result as a DataFrame.

    Parameters:
        query_name (str): The name of the query to be executed, corresponding to a key in the verified queries dictionary.
        verified_queries_dict (dict): A dictionary containing verified queries, where the keys are query names and the values
                                      are dictionaries with query details including the SQL statement.

    Returns:
        pandas.DataFrame: The result of the executed query as a DataFrame.
    """
    
    original_cwd = os.getcwd()
    os.chdir('data')

    query = verified_queries_dict[query_name]['sql']
    
    try:
        conn = duckdb.connect(database=':memory:', read_only=False)
        query_result = conn.execute(query).fetchdf().reset_index(drop=True)
    finally:
        os.chdir(original_cwd)

    return query_result


model = "llama3-8b-8192"

# Initialize the Groq client
groq_api_key = os.getenv('GROQ_API_KEY')
client = Groq(
    api_key=groq_api_key
)

directory_path = 'verified-queries/'
verified_queries_dict = get_verified_queries(directory_path)

# Display the title and introduction of the application
multiline_text = """
Welcome! Ask questions about employee data or purchase details, like "Show the 5 most recent purchases" or "What was the most expensive purchase?". The app matches your question to pre-verified SQL queries for accurate results.
"""

print(multiline_text)

    
while True:
    # Get user input from the console
    user_input = input("You: ")

    
    #Simplify verified_queries_dict to just show query name and description
    query_description_mapping = {key: subdict['description'] for key, subdict in verified_queries_dict.items()}
    
    # Step 1: send the conversation and available functions to the model
    # Define the messages to be sent to the Groq API
    messages = [
        {
            "role": "system",
            "content": '''You are a function calling LLM that uses the data extracted from the execute_duckdb_query_function_calling function to answer questions around a DuckDB dataset.

            Extract the query_name parameter from this mapping by finding the one whose description best matches the user's question: 
            {query_description_mapping}
            '''.format(query_description_mapping=query_description_mapping)
        },
        {
            "role": "user",
            "content": user_input,
        }
    ]

    # Define the tool (function) to be used by the Groq API
    tools = [
        {
            "type": "function",
            "function": {
                "name": "execute_duckdb_query_function_calling",
                "description": "Executes a verified DuckDB SQL Query",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query_name": {
                            "type": "string",
                            "description": "The name of the verified query (i.e. 'most-recent-purchases')",
                        }
                    },
                    "required": ["query_name"],
                },
            },
        }
    ]

    # Send the conversation and available functions to the Groq API
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools,
        tool_choice="auto",  
        max_tokens=4096
    )

    # Extract the response message and any tool calls from the response
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls

    # Define a dictionary of available functions
    available_functions = {
        "execute_duckdb_query_function_calling": execute_duckdb_query_function_calling,
    }

    # Iterate over the tool calls in the response
    for tool_call in tool_calls:
        function_name = tool_call.function.name  # Get the function name
        function_to_call = available_functions[function_name]  # Get the function to call
        function_args = json.loads(tool_call.function.arguments)  # Parse the function arguments
        print('Query found: ', function_args.get("query_name"))
        
        # Call the function with the provided arguments
        function_response = function_to_call(
            query_name=function_args.get("query_name"),
            verified_queries_dict=verified_queries_dict
        )

    # Print the function response (query result)
    print(function_response)