Fix/azure tts no device output (#79)

* enable audio logging if env AZURE_AUDIO_LOGGING is set * wip * per discussion with microsoft, add nullptr to creation of speechSynthesizer to ensure it knows we do not want it to play to device Signed-off-by: Dave Horton <daveh@beachdognet.com> * logging * fix bug in creation of config string * fix ticket 230 - Microsoft TTS having configuration data as part of audio generation * azure transcribe, resuse existing cap_cb if azure configuration is changed Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * clean up azure code for how to re-create gsstream when configuration is changed Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * fix review comments Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * fix review comment Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * fix review comment Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * wrap function in try catch --------- Signed-off-by: Dave Horton <daveh@beachdognet.com> Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> Co-authored-by: Hoan HL <quan.luuhoang8@gmail.com>
2026-01-25 02:08:27 +00:00 · 2024-06-23 14:54:36 -04:00
parent 2e553631dc
commit 8bd20703b8
3 changed files with 165 additions and 87 deletions
--- a/mod_azure_tts/azure_glue.cpp
+++ b/mod_azure_tts/azure_glue.cpp
@@ -15,11 +15,13 @@ typedef boost::circular_buffer<uint16_t> CircularBuffer_t;

 using namespace Microsoft::CognitiveServices::Speech;

+static const char* audioLogFile= std::getenv("AZURE_AUDIO_LOGGING");
+
 static std::string fullDirPath;

 static void start_synthesis(std::shared_ptr<SpeechSynthesizer> speechSynthesizer, const char* text, azure_t* a) {
    try {
-      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis calling \n");
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis calling, text %s\n", text);
      auto result = std::strncmp(text, "<speak", 6) == 0 ?
        speechSynthesizer->SpeakSsmlAsync(text).get() :
        speechSynthesizer->SpeakTextAsync(text).get();
@@ -43,6 +45,8 @@ static void start_synthesis(std::shared_ptr<SpeechSynthesizer> speechSynthesizer
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_azure_tts: Exception in start_synthesis %s\n",  e.what());
    }
    a->draining = 1;
+
+    free((void*) text);
 }

 extern "C" {
@@ -87,14 +91,7 @@ extern "C" {

  switch_status_t azure_speech_feed_tts(azure_t* a, char* text, switch_speech_flag_t *flags) {
    const int MAX_CHARS = 20;
-    char tempText[MAX_CHARS + 4]; // +4 for the ellipsis and null terminator
-
-    if (strlen(text) > MAX_CHARS) {
-        strncpy(tempText, text, MAX_CHARS);
-        strcpy(tempText + MAX_CHARS, "...");
-    } else {
-        strcpy(tempText, text);
-    }
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts %s\n", text);

    /* open cache file */
    if (a->cache_audio && fullDirPath.length() > 0) {
@@ -168,8 +165,14 @@ extern "C" {
 			speechConfig->SetEndpointId(a->endpointId);
 		}

+    if (audioLogFile) {
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts enabling audio logging to %s\n", audioLogFile);
+      speechConfig->SetProperty(PropertyId::Speech_LogFilename, audioLogFile);
+      speechConfig->EnableAudioLogging();
+    }
+
    try {
-      auto speechSynthesizer = SpeechSynthesizer::FromConfig(speechConfig);
+      auto speechSynthesizer = SpeechSynthesizer::FromConfig(speechConfig, nullptr);

      speechSynthesizer->SynthesisStarted += [a](const SpeechSynthesisEventArgs& e) {
          switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisStarted\n");
@@ -266,7 +269,8 @@ extern "C" {
        }
      };

-      std::thread(start_synthesis, speechSynthesizer, text, a).detach();
+      const char* dupText = strdup(text); // text will be freed in the thread
+      std::thread(start_synthesis, speechSynthesizer, dupText, a).detach();
      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts sent synthesize request\n");
    } catch (const std::exception& e) {
      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "mod_azure_tts: Exception: %s\n", e.what());