diff --git a/requirements.txt b/requirements.txt index 5170bd0..9ba5862 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ uvicorn loguru piper-tts coqui-tts[languages] +langdetect # Creating an environment where deepspeed works is complex, for now it will be disabled by default. #deepspeed @@ -12,4 +13,4 @@ torchaudio; sys_platform != "darwin" torch; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin" torchaudio; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin" -# ROCM (Linux only) - use requirements.amd.txt \ No newline at end of file +# ROCM (Linux only) - use requirements.amd.txt diff --git a/speech.py b/speech.py index 6cbeeb5..0cb5cd3 100755 --- a/speech.py +++ b/speech.py @@ -16,7 +16,7 @@ from openedai import OpenAIStub, BadRequestError, ServiceUnavailableError from pydantic import BaseModel import uvicorn - +from langdetect import detect @contextlib.asynccontextmanager async def lifespan(app): @@ -270,7 +270,21 @@ async def generate_speech(request: GenerateSpeechRequest): # Pipe the output from piper/xtts to the input of ffmpeg ffmpeg_args.extend(["-"]) - language = voice_map.pop('language', 'en') + language = voice_map.pop('language', 'auto') + if language == 'auto': + try: + language = detect(input_text) + if language not in [ + 'en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr', + 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'hu', 'ko', 'ja', 'hi' + ]: + logger.debug(f"Detected language {language} not supported, defaulting to en") + language = 'en' + else: + logger.debug(f"Detected language: {language}") + except: + language = 'en' + logger.debug(f"Failed to detect language, defaulting to en") comment = voice_map.pop('comment', None) # ignored.