12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- from fastapi import FastAPI, HTTPException
- from fastapi.responses import FileResponse
- from pydantic import BaseModel
- from typing import Optional
- from service import text_to_speech, get_llm_response, handle_image_message,handle_audio_message,send_audio_message
- from enum import Enum
- app = FastAPI()
- class TextToSpeechRequest(BaseModel):
- text: str
- output_path: Optional[str] = "reply.mp3"
- class TextToSpeechResponse(BaseModel):
- file_path: Optional[str]
- error: Optional[str] = None
- class KindEnum(str, Enum):
- audio = "audio"
- image = "image"
- class LLMRequest(BaseModel):
- user_input: str
- media_id: Optional[str] = None
- kind: Optional[KindEnum] = None
- class LLMResponse(BaseModel):
- response: Optional[str]
- error: Optional[str] = None
- @app.post("/llm-response", response_model=LLMResponse)
- async def api_llm_response(req: LLMRequest):
- text_message = req.user_input
- image_base64 = None
- if req.kind == KindEnum.image:
- image_base64 = await handle_image_message(req.media_id)
- result = get_llm_response(text_message, image_input=image_base64)
- # print(result)
- elif req.kind == KindEnum.audio:
- text_message = await handle_audio_message(req.media_id)
- result = get_llm_response(text_message)
- audio_path = text_to_speech(text=result, output_path="reply.mp3")
- return FileResponse(audio_path, media_type="audio/mpeg", filename="reply.mp3")
- else:
- result = get_llm_response(text_message)
-
- if result is None:
- return LLMResponse(response=None, error="LLM response generation failed.")
- return LLMResponse(response=result)
|