import logging from flask import request from flask_restx import fields, marshal_with, reqparse from werkzeug.exceptions import InternalServerError import services from controllers.web import api from controllers.web.error import ( AppUnavailableError, AudioTooLargeError, CompletionRequestError, NoAudioUploadedError, ProviderModelCurrentlyNotSupportError, ProviderNotInitializeError, ProviderNotSupportSpeechToTextError, ProviderQuotaExceededError, UnsupportedAudioTypeError, ) from controllers.web.wraps import WebApiResource from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError from models.model import App from services.audio_service import AudioService from services.errors.audio import ( AudioTooLargeServiceError, NoAudioUploadedServiceError, ProviderNotSupportSpeechToTextServiceError, UnsupportedAudioTypeServiceError, ) logger = logging.getLogger(__name__) class AudioApi(WebApiResource): audio_to_text_response_fields = { "text": fields.String, } @marshal_with(audio_to_text_response_fields) @api.doc("Audio to Text") @api.doc(description="Convert audio file to text using speech-to-text service.") @api.doc( responses={ 200: "Success", 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 413: "Audio file too large", 415: "Unsupported audio type", 500: "Internal Server Error", } ) def post(self, app_model: App, end_user): """Convert audio to text""" file = request.files["file"] try: response = AudioService.transcript_asr(app_model=app_model, file=file, end_user=end_user) return response except services.errors.app_model_config.AppModelConfigBrokenError: logger.exception("App model config broken.") raise AppUnavailableError() except NoAudioUploadedServiceError: raise NoAudioUploadedError() except AudioTooLargeServiceError as e: raise AudioTooLargeError(str(e)) except UnsupportedAudioTypeServiceError: raise UnsupportedAudioTypeError() except ProviderNotSupportSpeechToTextServiceError: raise ProviderNotSupportSpeechToTextError() except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) except QuotaExceededError: raise ProviderQuotaExceededError() except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() except InvokeError as e: raise CompletionRequestError(e.description) except ValueError as e: raise e except Exception as e: logger.exception("Failed to handle post request to AudioApi") raise InternalServerError() class TextApi(WebApiResource): text_to_audio_response_fields = { "audio_url": fields.String, "duration": fields.Float, } @marshal_with(text_to_audio_response_fields) @api.doc("Text to Audio") @api.doc(description="Convert text to audio using text-to-speech service.") @api.doc( responses={ 200: "Success", 400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 500: "Internal Server Error", } ) def post(self, app_model: App, end_user): """Convert text to audio""" try: parser = reqparse.RequestParser() parser.add_argument("message_id", type=str, required=False, location="json") parser.add_argument("voice", type=str, location="json") parser.add_argument("text", type=str, location="json") parser.add_argument("streaming", type=bool, location="json") args = parser.parse_args() message_id = args.get("message_id", None) text = args.get("text", None) voice = args.get("voice", None) response = AudioService.transcript_tts( app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id ) return response except services.errors.app_model_config.AppModelConfigBrokenError: logger.exception("App model config broken.") raise AppUnavailableError() except NoAudioUploadedServiceError: raise NoAudioUploadedError() except AudioTooLargeServiceError as e: raise AudioTooLargeError(str(e)) except UnsupportedAudioTypeServiceError: raise UnsupportedAudioTypeError() except ProviderNotSupportSpeechToTextServiceError: raise ProviderNotSupportSpeechToTextError() except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) except QuotaExceededError: raise ProviderQuotaExceededError() except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() except InvokeError as e: raise CompletionRequestError(e.description) except ValueError as e: raise e except Exception as e: logger.exception("Failed to handle post request to TextApi") raise InternalServerError() api.add_resource(AudioApi, "/audio-to-text") api.add_resource(TextApi, "/text-to-audio")