Skip to content

Commit

Permalink
azure: stop specifying english for multilingual (#302)
Browse files Browse the repository at this point in the history
* azure: stop specifying english for multilingual

* - adds language code parameter to synthesizer_config
- splits on the voice_name to figure out the top level lang, then uses the language_code to house the message

* move reading data into ephemeral thread

---------

Co-authored-by: Ajay Raj <[email protected]>
  • Loading branch information
zaptrem and ajar98 authored Aug 18, 2023
1 parent 5642e02 commit df9cfbb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
1 change: 1 addition & 0 deletions vocode/streaming/models/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class AzureSynthesizerConfig(SynthesizerConfig, type=SynthesizerType.AZURE.value
voice_name: str = AZURE_SYNTHESIZER_DEFAULT_VOICE_NAME
pitch: int = AZURE_SYNTHESIZER_DEFAULT_PITCH
rate: int = AZURE_SYNTHESIZER_DEFAULT_RATE
language_code: str = "en-US"


DEFAULT_GOOGLE_LANGUAGE_CODE = "en-US"
Expand Down
17 changes: 12 additions & 5 deletions vocode/streaming/synthesizer/azure_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,18 @@ def word_boundary_cb(self, evt, pool):
def create_ssml(
self, message: str, bot_sentiment: Optional[BotSentiment] = None
) -> str:
voice_language_code = self.synthesizer_config.voice_name[:5]
ssml_root = ElementTree.fromstring(
'<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"></speak>'
f'<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="{voice_language_code}"></speak>'
)
voice = ElementTree.SubElement(ssml_root, "voice")
voice.set("name", self.voice_name)
voice_root = voice
if self.synthesizer_config.language_code != "en-US":
lang = ElementTree.SubElement(voice, "{%s}lang" % NAMESPACES.get(""))
lang.set("xml:lang", self.synthesizer_config.language_code)
voice_root = lang
else:
voice_root = voice
if bot_sentiment and bot_sentiment.emotion:
styled = ElementTree.SubElement(
voice, "{%s}express-as" % NAMESPACES.get("mstts")
Expand Down Expand Up @@ -247,9 +253,10 @@ async def chunk_generator(
audio_data_stream: speechsdk.AudioDataStream, chunk_transform=lambda x: x
):
audio_buffer = bytes(chunk_size)
while not audio_data_stream.can_read_data(chunk_size):
await asyncio.sleep(0)
filled_size = audio_data_stream.read_data(audio_buffer)
filled_size = await asyncio.get_event_loop().run_in_executor(
self.thread_pool_executor,
lambda: audio_data_stream.read_data(audio_buffer),
)
if filled_size != chunk_size:
yield SynthesisResult.ChunkResult(
chunk_transform(audio_buffer[offset:]), True
Expand Down

0 comments on commit df9cfbb

Please sign in to comment.