ec2_endpoints.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from fastapi import FastAPI, HTTPException
  2. from fastapi.responses import FileResponse
  3. from pydantic import BaseModel
  4. from typing import Optional
  5. from service import text_to_speech, get_llm_response, handle_image_message,handle_audio_message,send_audio_message
  6. from enum import Enum
  7. app = FastAPI()
  8. class TextToSpeechRequest(BaseModel):
  9. text: str
  10. output_path: Optional[str] = "reply.mp3"
  11. class TextToSpeechResponse(BaseModel):
  12. file_path: Optional[str]
  13. error: Optional[str] = None
  14. class KindEnum(str, Enum):
  15. audio = "audio"
  16. image = "image"
  17. class LLMRequest(BaseModel):
  18. user_input: str
  19. media_id: Optional[str] = None
  20. kind: Optional[KindEnum] = None
  21. class LLMResponse(BaseModel):
  22. response: Optional[str]
  23. error: Optional[str] = None
  24. @app.post("/llm-response", response_model=LLMResponse)
  25. async def api_llm_response(req: LLMRequest):
  26. text_message = req.user_input
  27. image_base64 = None
  28. if req.kind == KindEnum.image:
  29. image_base64 = await handle_image_message(req.media_id)
  30. result = get_llm_response(text_message, image_input=image_base64)
  31. # print(result)
  32. elif req.kind == KindEnum.audio:
  33. text_message = await handle_audio_message(req.media_id)
  34. result = get_llm_response(text_message)
  35. audio_path = text_to_speech(text=result, output_path="reply.mp3")
  36. return FileResponse(audio_path, media_type="audio/mpeg", filename="reply.mp3")
  37. else:
  38. result = get_llm_response(text_message)
  39. if result is None:
  40. return LLMResponse(response=None, error="LLM response generation failed.")
  41. return LLMResponse(response=result)