From e431c5d159ed7933a4458b2da8b504a21bf6e89f Mon Sep 17 00:00:00 2001
From: Dave Horton <daveh@beachdognet.com>
Date: Sun, 7 Apr 2024 11:39:09 -0400
Subject: [PATCH] changes to mod_azure_tts for event handling, resolve session
 locking issue in mod_whisper (#37)

---
 mod_azure_tts/Makefile.am          |  2 +-
 mod_azure_tts/azure_glue.cpp       | 33 ++++++++++--------------------
 mod_deepgram_tts/Makefile.am       |  2 +-
 mod_deepgram_tts/deepgram_glue.cpp |  4 ++--
 mod_whisper_tts/Makefile.am        |  2 +-
 mod_whisper_tts/mod_whisper_tts.c  | 11 +++++-----
 mod_whisper_tts/whisper_glue.cpp   | 12 +++++------
 7 files changed, 26 insertions(+), 40 deletions(-)
diff --git a/mod_azure_tts/Makefile.am b/mod_azure_tts/Makefile.am
index 27c5f63..d955b8a 100644
--- a/mod_azure_tts/Makefile.am
+++ b/mod_azure_tts/Makefile.am
@@ -7,4 +7,4 @@ mod_azure_tts_la_CFLAGS   = $(AM_CFLAGS)
 mod_azure_tts_la_CXXFLAGS = $(AM_CXXFLAGS) -std=c++14 -I/usr/local/include/MicrosoftSpeechSDK/cxx_api -I/usr/local/include/MicrosoftSpeechSDK/c_api
 
 mod_azure_tts_la_LIBADD   = $(switch_builddir)/libfreeswitch.la
-mod_azure_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -L/usr/local/lib/MicrosoftSpeechSDK/x64 -lMicrosoft.CognitiveServices.Speech.core -shared `pkg-config --libs boost` -lstdc++
\ No newline at end of file
+mod_azure_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -L/usr/local/lib/MicrosoftSpeechSDK/x64 -lMicrosoft.CognitiveServices.Speech.core -shared -lstdc++ -lboost_system -lboost_thread
\ No newline at end of file
diff --git a/mod_azure_tts/azure_glue.cpp b/mod_azure_tts/azure_glue.cpp
index 8ead3c0..94093cc 100644
--- a/mod_azure_tts/azure_glue.cpp
+++ b/mod_azure_tts/azure_glue.cpp
@@ -17,7 +17,7 @@ using namespace Microsoft::CognitiveServices::Speech;
 
 static std::string fullDirPath;
 
-static void start_synthesis(std::shared_ptr<SpeechSynthesizer> speechSynthesizer, const char* text) {
+static void start_synthesis(std::shared_ptr<SpeechSynthesizer> speechSynthesizer, const char* text, azure_t* a) {
     try {
       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis calling \n");
       auto result = std::strncmp(text, "<speak", 6) == 0 ?
@@ -25,20 +25,24 @@ static void start_synthesis(std::shared_ptr<SpeechSynthesizer> speechSynthesizer
         speechSynthesizer->SpeakTextAsync(text).get();
 
       if (result->Reason == ResultReason::SynthesizingAudioCompleted) {
-        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis completed id %s, audio data - bytes: %ld, milliseconds: %ld milliseconds\n", 
+        a->response_code = 200;
+        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis completed id %s, audio data - bytes: %ld, duration: %ldms\n", 
           result->ResultId.c_str(), result->GetAudioLength(), result->AudioDuration.count());
       } else if (result->Reason == ResultReason::Canceled) {
         auto cancellation = SpeechSynthesisCancellationDetails::FromResult(result);
-        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, 
-          "Error synthesizing text %s: (%d) %s.\n", text, static_cast<int>(cancellation->ErrorCode), cancellation->ErrorDetails.c_str());
+        a->response_code = static_cast<long int>(cancellation->ErrorCode);
+        a->err_msg = strdup(cancellation->ErrorDetails.c_str());
+        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error synthesizing text %d with error string: %s.\n",
+          static_cast<int>(cancellation->ErrorCode), cancellation->ErrorDetails.c_str());
       } else {
+        a->response_code = 500;
         switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error synthsize text %s (%d).\n", text, static_cast<int>(result->Reason));
       }
-
-      
     } catch (const std::exception& e) {
+        a->response_code = 500;
         switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_azure_tts: Exception in start_synthesis %s\n",  e.what());
     }
+    a->draining = 1;
 }
 
 extern "C" {
@@ -179,7 +183,6 @@ extern "C" {
 
     speechSynthesizer->SynthesisStarted += [a](const SpeechSynthesisEventArgs& e) {
         switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisStarted\n");
-        a->response_code = 200;
     };
 
     speechSynthesizer->Synthesizing += [a](const SpeechSynthesisEventArgs& e) {
@@ -240,21 +243,7 @@ extern "C" {
       }
     };
 
-    speechSynthesizer->SynthesisCompleted += [a](const SpeechSynthesisEventArgs& e) {
-       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisCompleted\n");
-       a->draining = 1;
-    };
-
-    speechSynthesizer->SynthesisCanceled += [a](const SpeechSynthesisEventArgs& e) {
-      if (e.Result->Reason == ResultReason::Canceled) {
-        auto cancellation = SpeechSynthesisCancellationDetails::FromResult(e.Result);
-        a->response_code = static_cast<long int>(cancellation->ErrorCode);
-        a->err_msg = strdup(cancellation->ErrorDetails.c_str());
-        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error synthesizing text %d with error string: %s.\n", static_cast<int>(cancellation->ErrorCode), cancellation->ErrorDetails.c_str());
-      }
-       a->draining = 1;
-    };
-    std::thread(start_synthesis, speechSynthesizer, text).detach();
+    std::thread(start_synthesis, speechSynthesizer, text, a).detach();
     switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts sent synthesize request\n");
     return SWITCH_STATUS_SUCCESS;
   }
diff --git a/mod_deepgram_tts/Makefile.am b/mod_deepgram_tts/Makefile.am
index fd8bb8e..beef7fe 100644
--- a/mod_deepgram_tts/Makefile.am
+++ b/mod_deepgram_tts/Makefile.am
@@ -5,4 +5,4 @@ mod_LTLIBRARIES = mod_deepgram_tts.la
 mod_deepgram_tts_la_SOURCES  = mod_deepgram_tts.c deepgram_glue.cpp
 mod_deepgram_tts_la_CFLAGS   = $(AM_CFLAGS)
 mod_deepgram_tts_la_LIBADD   = $(switch_builddir)/libfreeswitch.la
-mod_deepgram_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -shared `pkg-config --libs boost` -lstdc++
+mod_deepgram_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -shared -lstdc++ -lboost_system -lboost_thread
diff --git a/mod_deepgram_tts/deepgram_glue.cpp b/mod_deepgram_tts/deepgram_glue.cpp
index d244de5..0158739 100644
--- a/mod_deepgram_tts/deepgram_glue.cpp
+++ b/mod_deepgram_tts/deepgram_glue.cpp
@@ -461,6 +461,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
     switch_core_session_t* session = switch_core_session_locate(d->session_id);
     if (session) {
       switch_channel_t *channel = switch_core_session_get_channel(session);
+      switch_core_session_rwunlock(session);
       if (channel) {
         switch_event_t *event;
         if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_START) == SWITCH_STATUS_SUCCESS) {
@@ -501,7 +502,6 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
       else {
         switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n");
       }
-      switch_core_session_rwunlock(session);
     }
     else {
       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: session %s not found\n", d->session_id);
@@ -888,6 +888,7 @@ extern "C" {
       switch_core_session_t* session = switch_core_session_locate(d->session_id);
       if (session) {
         switch_channel_t *channel = switch_core_session_get_channel(session);
+        switch_core_session_rwunlock(session);
         if (channel) {
           switch_event_t *event;
           if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_STOP) == SWITCH_STATUS_SUCCESS) {
@@ -909,7 +910,6 @@ extern "C" {
         else {
           switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n");
         }
-        switch_core_session_rwunlock(session);
       }
     }
     return SWITCH_STATUS_SUCCESS;
diff --git a/mod_whisper_tts/Makefile.am b/mod_whisper_tts/Makefile.am
index 753f28b..494a517 100644
--- a/mod_whisper_tts/Makefile.am
+++ b/mod_whisper_tts/Makefile.am
@@ -5,4 +5,4 @@ mod_LTLIBRARIES = mod_whisper_tts.la
 mod_whisper_tts_la_SOURCES  = mod_whisper_tts.c whisper_glue.cpp
 mod_whisper_tts_la_CFLAGS   = $(AM_CFLAGS)
 mod_whisper_tts_la_LIBADD   = $(switch_builddir)/libfreeswitch.la
-mod_whisper_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -shared `pkg-config --libs boost` -lstdc++ -lmpg123
+mod_whisper_tts_la_LDFLAGS  = -avoid-version -module -no-undefined -shared -lstdc++ -lboost_system -lboost_thread -lmpg123
diff --git a/mod_whisper_tts/mod_whisper_tts.c b/mod_whisper_tts/mod_whisper_tts.c
index cbef2ce..609a616 100644
--- a/mod_whisper_tts/mod_whisper_tts.c
+++ b/mod_whisper_tts/mod_whisper_tts.c
@@ -6,7 +6,7 @@ SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_whisper_tts_shutdown);
 SWITCH_MODULE_DEFINITION(mod_whisper_tts, mod_whisper_tts_load, mod_whisper_tts_shutdown, NULL);
 
 static void clearWhisper(whisper_t* w, int freeAll) {
-  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "clearWhisper\n");
+  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "clearWhisper\n");
   if (w->api_key) free(w->api_key);
   if (w->model_id) free(w->model_id);
   if (w->speed) free(w->speed);
@@ -65,7 +65,7 @@ static switch_status_t w_speech_close(switch_speech_handle_t *sh, switch_speech_
 {
   switch_status_t rc;
   whisper_t *w = createOrRetrievePrivateData(sh);
-  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_close\n");
+  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_close\n");
 
   switch_mutex_destroy(w->mutex);
 
@@ -83,7 +83,7 @@ static switch_status_t w_speech_feed_tts(switch_speech_handle_t *sh, char *text,
   w->draining = 0;
   w->reads = 0;
 
-  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_feed_tts\n");
+  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_feed_tts\n");
 
   return whisper_speech_feed_tts(w, text, flags);
 }
@@ -94,7 +94,6 @@ static switch_status_t w_speech_feed_tts(switch_speech_handle_t *sh, char *text,
 static switch_status_t w_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, switch_speech_flag_t *flags)
 {
   whisper_t *w = createOrRetrievePrivateData(sh);
-  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_read_tts\n");
   return whisper_speech_read_tts(w, data, datalen, flags);
 }
 
@@ -104,7 +103,7 @@ static switch_status_t w_speech_read_tts(switch_speech_handle_t *sh, void *data,
 static void w_speech_flush_tts(switch_speech_handle_t *sh)
 {
   whisper_t *w = createOrRetrievePrivateData(sh);
-  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_flush_tts\n");
+  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_flush_tts\n");
   whisper_speech_flush_tts(w);
 
   clearWhisper(w, 0);
@@ -113,7 +112,7 @@ static void w_speech_flush_tts(switch_speech_handle_t *sh)
 static void w_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val)
 {
   whisper_t *w = createOrRetrievePrivateData(sh);
-  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_text_param_tts: %s=%s\n", param, val);
+  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_text_param_tts: %s=%s\n", param, val);
   if (0 == strcmp(param, "api_key")) {
     if (w->api_key) free(w->api_key);
     w->api_key = strdup(val);
diff --git a/mod_whisper_tts/whisper_glue.cpp b/mod_whisper_tts/whisper_glue.cpp
index b6a8a27..b9128d2 100644
--- a/mod_whisper_tts/whisper_glue.cpp
+++ b/mod_whisper_tts/whisper_glue.cpp
@@ -442,8 +442,6 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
   }
   {
     switch_mutex_lock(w->mutex);
-    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "write_cb: received data, response %ld\n", 
-      w->response_code);
 
     if (w->response_code > 0 && w->response_code != 200) {
       std::string body((char *) ptr, bytes_received);
@@ -482,6 +480,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
     switch_core_session_t* session = switch_core_session_locate(w->session_id);
     if (session) {
       switch_channel_t *channel = switch_core_session_get_channel(session);
+      switch_core_session_rwunlock(session);
       if (channel) {
         switch_event_t *event;
         if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_START) == SWITCH_STATUS_SUCCESS) {
@@ -525,7 +524,6 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
       else {
         switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n");
       }
-      switch_core_session_rwunlock(session);
     }
     else {
       switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: session %s not found\n", w->session_id);
@@ -810,6 +808,7 @@ extern "C" {
       switch_codec_implementation_t read_impl;
       switch_core_session_t *psession = switch_core_session_locate(w->session_id);
       switch_core_session_get_read_impl(psession, &read_impl);
+      switch_core_session_rwunlock(psession);
       uint32_t samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second;
       if (mpg123_param(mh, MPG123_FORCE_RATE, samples_per_second /*Hz*/, 0) != MPG123_OK) {
         switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error mpg123_param!\n");
@@ -897,8 +896,7 @@ extern "C" {
 
   switch_status_t whisper_speech_flush_tts(whisper_t* w) {
     bool download_complete = w->response_code == 200;
-    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "whisper_speech_flush_tts, download complete? %s\n", download_complete ? "yes" : "no") ;  
-
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "whisper_speech_flush_tts, download complete? %s\n", download_complete ? "yes" : "no") ;  
     ConnInfo_t *conn = (ConnInfo_t *) w->conn;
     CircularBuffer_t *cBuffer = (CircularBuffer_t *) w->circularBuffer;
     delete cBuffer;
@@ -930,6 +928,7 @@ extern "C" {
       switch_core_session_t* session = switch_core_session_locate(w->session_id);
       if (session) {
         switch_channel_t *channel = switch_core_session_get_channel(session);
+        switch_core_session_rwunlock(session);
         if (channel) {
           switch_event_t *event;
           if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_STOP) == SWITCH_STATUS_SUCCESS) {
@@ -951,14 +950,13 @@ extern "C" {
         else {
           switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n");
         }
-        switch_core_session_rwunlock(session);
       }
     }
     return SWITCH_STATUS_SUCCESS;
   }
 
 	switch_status_t whisper_speech_close(whisper_t* w) {
-    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "whisper_speech_close\n") ;
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "whisper_speech_close\n") ;
 		return SWITCH_STATUS_SUCCESS;
 	}
 }