main.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import pandas as pd
  2. import numpy as np
  3. from groq import Groq
  4. from pinecone import Pinecone
  5. import os
  6. from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
  7. from langchain_pinecone import PineconeVectorStore
  8. def get_relevant_excerpts(user_question, docsearch):
  9. """
  10. This function retrieves the most relevant excerpts from presidential speeches based on the user's question.
  11. Parameters:
  12. user_question (str): The question asked by the user.
  13. docsearch (PineconeVectorStore): The Pinecone vector store containing the presidential speeches.
  14. Returns:
  15. str: A string containing the most relevant excerpts from presidential speeches.
  16. """
  17. # Perform a similarity search on the Pinecone vector store using the user's question
  18. relevent_docs = docsearch.similarity_search(user_question)
  19. # Extract the page content from the top 3 most relevant documents and join them into a single string
  20. relevant_excerpts = '\n\n------------------------------------------------------\n\n'.join([doc.page_content for doc in relevent_docs[:3]])
  21. return relevant_excerpts
  22. def presidential_speech_chat_completion(client, model, user_question, relevant_excerpts):
  23. """
  24. This function generates a response to the user's question using a pre-trained model.
  25. Parameters:
  26. client (Groq): The Groq client used to interact with the pre-trained model.
  27. model (str): The name of the pre-trained model.
  28. user_question (str): The question asked by the user.
  29. relevant_excerpts (str): A string containing the most relevant excerpts from presidential speeches.
  30. Returns:
  31. str: A string containing the response to the user's question.
  32. """
  33. # Define the system prompt
  34. system_prompt = '''
  35. You are a presidential historian. Given the user's question and relevant excerpts from
  36. presidential speeches, answer the question by including direct quotes from presidential speeches.
  37. When using a quote, site the speech that it was from (ignoring the chunk).
  38. '''
  39. # Generate a response to the user's question using the pre-trained model
  40. chat_completion = client.chat.completions.create(
  41. messages = [
  42. {
  43. "role": "system",
  44. "content": system_prompt
  45. },
  46. {
  47. "role": "user",
  48. "content": "User Question: " + user_question + "\n\nRelevant Speech Exerpt(s):\n\n" + relevant_excerpts,
  49. }
  50. ],
  51. model = model
  52. )
  53. # Extract the response from the chat completion
  54. response = chat_completion.choices[0].message.content
  55. return response
  56. def main():
  57. """
  58. This is the main function that runs the application. It initializes the Groq client and the SentenceTransformer model,
  59. gets user input from the Streamlit interface, retrieves relevant excerpts from presidential speeches based on the user's question,
  60. generates a response to the user's question using a pre-trained model, and displays the response.
  61. """
  62. model = 'llama3-8b-8192'
  63. embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
  64. # Initialize the Groq client
  65. groq_api_key = os.getenv('GROQ_API_KEY')
  66. pinecone_api_key=os.getenv('PINECONE_API_KEY')
  67. pinecone_index_name = "presidential-speeches"
  68. client = Groq(
  69. api_key=groq_api_key
  70. )
  71. pc = Pinecone(api_key = pinecone_api_key)
  72. docsearch = PineconeVectorStore(index_name=pinecone_index_name, embedding=embedding_function)
  73. # Display the title and introduction of the application
  74. print("Presidential Speeches RAG")
  75. multiline_text = """
  76. Welcome! Ask questions about U.S. presidents, like "What were George Washington's views on democracy?" or "What did Abraham Lincoln say about national unity?". The app matches your question to relevant excerpts from presidential speeches and generates a response using a pre-trained model.
  77. """
  78. print(multiline_text)
  79. while True:
  80. # Get the user's question
  81. user_question = input("Ask a question about a US president: ")
  82. if user_question:
  83. pinecone_index_name = "presidential-speeches"
  84. relevant_excerpts = get_relevant_excerpts(user_question, docsearch)
  85. response = presidential_speech_chat_completion(client, model, user_question, relevant_excerpts)
  86. print(response)
  87. if __name__ == "__main__":
  88. main()