ec2_services.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. from together import Together
  2. from openai import OpenAI
  3. import os
  4. import base64
  5. import asyncio
  6. import requests
  7. import httpx
  8. from PIL import Image
  9. from dotenv import load_dotenv
  10. from io import BytesIO
  11. from pathlib import Path
  12. from groq import Groq
  13. load_dotenv()
  14. TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
  15. LLAMA_API_KEY = os.getenv("LLAMA_API_KEY")
  16. #LLAMA_API_URL = os.getenv("API_URL")
  17. GROQ_API_KEY = os.getenv("GROQ_API_KEY")
  18. META_ACCESS_TOKEN = os.getenv("META_ACCESS_TOKEN")
  19. PHONE_NUMBER_ID = os.getenv("PHONE_NUMBER_ID")
  20. WHATSAPP_API_URL = os.getenv("WHATSAPP_API_URL")
  21. def text_to_speech(text: str, output_path: str = "reply.mp3") -> str:
  22. """
  23. Synthesizes a given text into an audio file using Groq's TTS service.
  24. Args:
  25. text (str): The text to be synthesized.
  26. output_path (str): The path where the output audio file will be saved. Defaults to "reply.mp3".
  27. Returns:
  28. str: The path to the output audio file, or None if the synthesis failed.
  29. """
  30. try:
  31. client = Groq(api_key=GROQ_API_KEY)
  32. response = client.audio.speech.create(
  33. model="playai-tts",
  34. voice="Aaliyah-PlayAI",
  35. response_format="mp3",
  36. input=text
  37. )
  38. # Convert string path to Path object and stream the response to a file
  39. path_obj = Path(output_path)
  40. response.write_to_file(path_obj)
  41. return str(path_obj)
  42. except Exception as e:
  43. print(f"TTS failed: {e}")
  44. return None
  45. def speech_to_text(input_path: str) -> str:
  46. """
  47. Transcribe an audio file using Groq.
  48. Args:
  49. input_path (str): Path to the audio file to be transcribed.
  50. output_path (str, optional): Path to the output file where the transcription will be saved. Defaults to "transcription.txt".
  51. Returns:
  52. str: The transcribed text.
  53. """
  54. client = Groq(api_key=GROQ_API_KEY)
  55. with open(input_path, "rb") as file:
  56. transcription = client.audio.transcriptions.create(
  57. model="distil-whisper-large-v3-en",
  58. response_format="verbose_json",
  59. file=(input_path, file.read())
  60. )
  61. transcription.text
  62. return transcription.text
  63. def get_llm_response(text_input: str, image_input : str = None) -> str:
  64. """
  65. Get the response from the Together AI LLM given a text input and an optional image input.
  66. Args:
  67. text_input (str): The text to be sent to the LLM.
  68. image_input (str, optional): The base64 encoded image to be sent to the LLM. Defaults to None.
  69. Returns:
  70. str: The response from the LLM.
  71. """
  72. messages = []
  73. # print(bool(image_input))
  74. if image_input:
  75. messages.append({
  76. "type": "image_url",
  77. "image_url": {"url": f"data:image/jpeg;base64,{image_input}"}
  78. })
  79. messages.append({
  80. "type": "text",
  81. "text": text_input
  82. })
  83. try:
  84. #client = Together(api_key=TOGETHER_API_KEY)
  85. client = OpenAI(base_url= "https://api.llama.com/compat/v1/")
  86. completion = client.chat.completions.create(
  87. model="Llama-4-Maverick-17B-128E-Instruct-FP8",
  88. messages=[
  89. {
  90. "role": "user",
  91. "content": messages
  92. }
  93. ]
  94. )
  95. if completion.choices and len(completion.choices) > 0:
  96. return completion.choices[0].message.content
  97. else:
  98. print("Empty response from Together API")
  99. return None
  100. except Exception as e:
  101. print(f"LLM error: {e}")
  102. return None
  103. async def fetch_media(media_id: str) -> str:
  104. """
  105. Fetches the URL of a media given its ID.
  106. Args:
  107. media_id (str): The ID of the media to fetch.
  108. Returns:
  109. str: The URL of the media.
  110. """
  111. url = "https://graph.facebook.com/v22.0/{media_id}"
  112. async with httpx.AsyncClient() as client:
  113. try:
  114. response = await client.get(
  115. url.format(media_id=media_id),
  116. headers={"Authorization": f"Bearer {META_ACCESS_TOKEN}"}
  117. )
  118. if response.status_code == 200:
  119. return response.json().get("url")
  120. else:
  121. print(f"Failed to fetch media: {response.text}")
  122. except Exception as e:
  123. print(f"Exception during media fetch: {e}")
  124. return None
  125. async def handle_image_message(media_id: str) -> str:
  126. """
  127. Handle an image message by fetching the image media, converting it to base64,
  128. and returning the base64 string.
  129. Args:
  130. media_id (str): The ID of the image media to fetch.
  131. Returns:
  132. str: The base64 string of the image.
  133. """
  134. media_url = await fetch_media(media_id)
  135. # print(media_url)
  136. async with httpx.AsyncClient() as client:
  137. headers = {"Authorization": f"Bearer {META_ACCESS_TOKEN}"}
  138. response = await client.get(media_url, headers=headers)
  139. response.raise_for_status()
  140. # Convert image to base64
  141. image = Image.open(BytesIO(response.content))
  142. buffered = BytesIO()
  143. image.save(buffered, format="JPEG") # Save as JPEG
  144. # image.save("./test.jpeg", format="JPEG") # Optional save
  145. base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
  146. return base64_image
  147. async def handle_audio_message(media_id: str):
  148. """
  149. Handle an audio message by fetching the audio media, writing it to a temporary file,
  150. and then using Groq to transcribe the audio to text.
  151. Args:
  152. media_id (str): The ID of the audio media to fetch.
  153. Returns:
  154. str: The transcribed text.
  155. """
  156. media_url = await fetch_media(media_id)
  157. # print(media_url)
  158. async with httpx.AsyncClient() as client:
  159. headers = {"Authorization": f"Bearer {META_ACCESS_TOKEN}"}
  160. response = await client.get(media_url, headers=headers)
  161. response.raise_for_status()
  162. audio_bytes = response.content
  163. temp_audio_path = "temp_audio.m4a"
  164. with open(temp_audio_path, "wb") as f:
  165. f.write(audio_bytes)
  166. return speech_to_text(temp_audio_path)
  167. async def send_audio_message(to: str, file_path: str):
  168. """
  169. Send an audio message to a WhatsApp user.
  170. Args:
  171. to (str): The phone number of the recipient.
  172. file_path (str): The path to the audio file to be sent.
  173. Returns:
  174. None
  175. Raises:
  176. None
  177. """
  178. url = f"https://graph.facebook.com/v20.0/{PHONE_NUMBER_ID}/media"
  179. with open(file_path, "rb") as f:
  180. files = { "file": ("reply.mp3", open(file_path, "rb"), "audio/mpeg")}
  181. params = {
  182. "messaging_product": "whatsapp",
  183. "type": "audio",
  184. "access_token": META_ACCESS_TOKEN
  185. }
  186. response = requests.post(url, params=params, files=files)
  187. if response.status_code == 200:
  188. media_id = response.json().get("id")
  189. payload = {
  190. "messaging_product": "whatsapp",
  191. "to": to,
  192. "type": "audio",
  193. "audio": {"id": media_id}
  194. }
  195. headers = {
  196. "Authorization": f"Bearer {META_ACCESS_TOKEN}",
  197. "Content-Type": "application/json"
  198. }
  199. requests.post(WHATSAPP_API_URL, headers=headers, json=payload)
  200. else:
  201. print("Audio upload failed:", response.text)