From e431c5d159ed7933a4458b2da8b504a21bf6e89f Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Sun, 7 Apr 2024 11:39:09 -0400 Subject: [PATCH] changes to mod_azure_tts for event handling, resolve session locking issue in mod_whisper (#37) --- mod_azure_tts/Makefile.am | 2 +- mod_azure_tts/azure_glue.cpp | 33 ++++++++++-------------------- mod_deepgram_tts/Makefile.am | 2 +- mod_deepgram_tts/deepgram_glue.cpp | 4 ++-- mod_whisper_tts/Makefile.am | 2 +- mod_whisper_tts/mod_whisper_tts.c | 11 +++++----- mod_whisper_tts/whisper_glue.cpp | 12 +++++------ 7 files changed, 26 insertions(+), 40 deletions(-) diff --git a/mod_azure_tts/Makefile.am b/mod_azure_tts/Makefile.am index 27c5f63..d955b8a 100644 --- a/mod_azure_tts/Makefile.am +++ b/mod_azure_tts/Makefile.am @@ -7,4 +7,4 @@ mod_azure_tts_la_CFLAGS = $(AM_CFLAGS) mod_azure_tts_la_CXXFLAGS = $(AM_CXXFLAGS) -std=c++14 -I/usr/local/include/MicrosoftSpeechSDK/cxx_api -I/usr/local/include/MicrosoftSpeechSDK/c_api mod_azure_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_azure_tts_la_LDFLAGS = -avoid-version -module -no-undefined -L/usr/local/lib/MicrosoftSpeechSDK/x64 -lMicrosoft.CognitiveServices.Speech.core -shared `pkg-config --libs boost` -lstdc++ \ No newline at end of file +mod_azure_tts_la_LDFLAGS = -avoid-version -module -no-undefined -L/usr/local/lib/MicrosoftSpeechSDK/x64 -lMicrosoft.CognitiveServices.Speech.core -shared -lstdc++ -lboost_system -lboost_thread \ No newline at end of file diff --git a/mod_azure_tts/azure_glue.cpp b/mod_azure_tts/azure_glue.cpp index 8ead3c0..94093cc 100644 --- a/mod_azure_tts/azure_glue.cpp +++ b/mod_azure_tts/azure_glue.cpp @@ -17,7 +17,7 @@ using namespace Microsoft::CognitiveServices::Speech; static std::string fullDirPath; -static void start_synthesis(std::shared_ptr speechSynthesizer, const char* text) { +static void start_synthesis(std::shared_ptr speechSynthesizer, const char* text, azure_t* a) { try { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis calling \n"); auto result = std::strncmp(text, " speechSynthesizer speechSynthesizer->SpeakTextAsync(text).get(); if (result->Reason == ResultReason::SynthesizingAudioCompleted) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis completed id %s, audio data - bytes: %ld, milliseconds: %ld milliseconds\n", + a->response_code = 200; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis completed id %s, audio data - bytes: %ld, duration: %ldms\n", result->ResultId.c_str(), result->GetAudioLength(), result->AudioDuration.count()); } else if (result->Reason == ResultReason::Canceled) { auto cancellation = SpeechSynthesisCancellationDetails::FromResult(result); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, - "Error synthesizing text %s: (%d) %s.\n", text, static_cast(cancellation->ErrorCode), cancellation->ErrorDetails.c_str()); + a->response_code = static_cast(cancellation->ErrorCode); + a->err_msg = strdup(cancellation->ErrorDetails.c_str()); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error synthesizing text %d with error string: %s.\n", + static_cast(cancellation->ErrorCode), cancellation->ErrorDetails.c_str()); } else { + a->response_code = 500; switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error synthsize text %s (%d).\n", text, static_cast(result->Reason)); } - - } catch (const std::exception& e) { + a->response_code = 500; switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_azure_tts: Exception in start_synthesis %s\n", e.what()); } + a->draining = 1; } extern "C" { @@ -179,7 +183,6 @@ extern "C" { speechSynthesizer->SynthesisStarted += [a](const SpeechSynthesisEventArgs& e) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisStarted\n"); - a->response_code = 200; }; speechSynthesizer->Synthesizing += [a](const SpeechSynthesisEventArgs& e) { @@ -240,21 +243,7 @@ extern "C" { } }; - speechSynthesizer->SynthesisCompleted += [a](const SpeechSynthesisEventArgs& e) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisCompleted\n"); - a->draining = 1; - }; - - speechSynthesizer->SynthesisCanceled += [a](const SpeechSynthesisEventArgs& e) { - if (e.Result->Reason == ResultReason::Canceled) { - auto cancellation = SpeechSynthesisCancellationDetails::FromResult(e.Result); - a->response_code = static_cast(cancellation->ErrorCode); - a->err_msg = strdup(cancellation->ErrorDetails.c_str()); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error synthesizing text %d with error string: %s.\n", static_cast(cancellation->ErrorCode), cancellation->ErrorDetails.c_str()); - } - a->draining = 1; - }; - std::thread(start_synthesis, speechSynthesizer, text).detach(); + std::thread(start_synthesis, speechSynthesizer, text, a).detach(); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts sent synthesize request\n"); return SWITCH_STATUS_SUCCESS; } diff --git a/mod_deepgram_tts/Makefile.am b/mod_deepgram_tts/Makefile.am index fd8bb8e..beef7fe 100644 --- a/mod_deepgram_tts/Makefile.am +++ b/mod_deepgram_tts/Makefile.am @@ -5,4 +5,4 @@ mod_LTLIBRARIES = mod_deepgram_tts.la mod_deepgram_tts_la_SOURCES = mod_deepgram_tts.c deepgram_glue.cpp mod_deepgram_tts_la_CFLAGS = $(AM_CFLAGS) mod_deepgram_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_deepgram_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared `pkg-config --libs boost` -lstdc++ +mod_deepgram_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared -lstdc++ -lboost_system -lboost_thread diff --git a/mod_deepgram_tts/deepgram_glue.cpp b/mod_deepgram_tts/deepgram_glue.cpp index d244de5..0158739 100644 --- a/mod_deepgram_tts/deepgram_glue.cpp +++ b/mod_deepgram_tts/deepgram_glue.cpp @@ -461,6 +461,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) { switch_core_session_t* session = switch_core_session_locate(d->session_id); if (session) { switch_channel_t *channel = switch_core_session_get_channel(session); + switch_core_session_rwunlock(session); if (channel) { switch_event_t *event; if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_START) == SWITCH_STATUS_SUCCESS) { @@ -501,7 +502,6 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) { else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n"); } - switch_core_session_rwunlock(session); } else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: session %s not found\n", d->session_id); @@ -888,6 +888,7 @@ extern "C" { switch_core_session_t* session = switch_core_session_locate(d->session_id); if (session) { switch_channel_t *channel = switch_core_session_get_channel(session); + switch_core_session_rwunlock(session); if (channel) { switch_event_t *event; if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_STOP) == SWITCH_STATUS_SUCCESS) { @@ -909,7 +910,6 @@ extern "C" { else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n"); } - switch_core_session_rwunlock(session); } } return SWITCH_STATUS_SUCCESS; diff --git a/mod_whisper_tts/Makefile.am b/mod_whisper_tts/Makefile.am index 753f28b..494a517 100644 --- a/mod_whisper_tts/Makefile.am +++ b/mod_whisper_tts/Makefile.am @@ -5,4 +5,4 @@ mod_LTLIBRARIES = mod_whisper_tts.la mod_whisper_tts_la_SOURCES = mod_whisper_tts.c whisper_glue.cpp mod_whisper_tts_la_CFLAGS = $(AM_CFLAGS) mod_whisper_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_whisper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared `pkg-config --libs boost` -lstdc++ -lmpg123 +mod_whisper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared -lstdc++ -lboost_system -lboost_thread -lmpg123 diff --git a/mod_whisper_tts/mod_whisper_tts.c b/mod_whisper_tts/mod_whisper_tts.c index cbef2ce..609a616 100644 --- a/mod_whisper_tts/mod_whisper_tts.c +++ b/mod_whisper_tts/mod_whisper_tts.c @@ -6,7 +6,7 @@ SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_whisper_tts_shutdown); SWITCH_MODULE_DEFINITION(mod_whisper_tts, mod_whisper_tts_load, mod_whisper_tts_shutdown, NULL); static void clearWhisper(whisper_t* w, int freeAll) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "clearWhisper\n"); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "clearWhisper\n"); if (w->api_key) free(w->api_key); if (w->model_id) free(w->model_id); if (w->speed) free(w->speed); @@ -65,7 +65,7 @@ static switch_status_t w_speech_close(switch_speech_handle_t *sh, switch_speech_ { switch_status_t rc; whisper_t *w = createOrRetrievePrivateData(sh); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_close\n"); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_close\n"); switch_mutex_destroy(w->mutex); @@ -83,7 +83,7 @@ static switch_status_t w_speech_feed_tts(switch_speech_handle_t *sh, char *text, w->draining = 0; w->reads = 0; - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_feed_tts\n"); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_feed_tts\n"); return whisper_speech_feed_tts(w, text, flags); } @@ -94,7 +94,6 @@ static switch_status_t w_speech_feed_tts(switch_speech_handle_t *sh, char *text, static switch_status_t w_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, switch_speech_flag_t *flags) { whisper_t *w = createOrRetrievePrivateData(sh); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_read_tts\n"); return whisper_speech_read_tts(w, data, datalen, flags); } @@ -104,7 +103,7 @@ static switch_status_t w_speech_read_tts(switch_speech_handle_t *sh, void *data, static void w_speech_flush_tts(switch_speech_handle_t *sh) { whisper_t *w = createOrRetrievePrivateData(sh); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_flush_tts\n"); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_flush_tts\n"); whisper_speech_flush_tts(w); clearWhisper(w, 0); @@ -113,7 +112,7 @@ static void w_speech_flush_tts(switch_speech_handle_t *sh) static void w_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { whisper_t *w = createOrRetrievePrivateData(sh); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_text_param_tts: %s=%s\n", param, val); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_text_param_tts: %s=%s\n", param, val); if (0 == strcmp(param, "api_key")) { if (w->api_key) free(w->api_key); w->api_key = strdup(val); diff --git a/mod_whisper_tts/whisper_glue.cpp b/mod_whisper_tts/whisper_glue.cpp index b6a8a27..b9128d2 100644 --- a/mod_whisper_tts/whisper_glue.cpp +++ b/mod_whisper_tts/whisper_glue.cpp @@ -442,8 +442,6 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) { } { switch_mutex_lock(w->mutex); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "write_cb: received data, response %ld\n", - w->response_code); if (w->response_code > 0 && w->response_code != 200) { std::string body((char *) ptr, bytes_received); @@ -482,6 +480,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) { switch_core_session_t* session = switch_core_session_locate(w->session_id); if (session) { switch_channel_t *channel = switch_core_session_get_channel(session); + switch_core_session_rwunlock(session); if (channel) { switch_event_t *event; if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_START) == SWITCH_STATUS_SUCCESS) { @@ -525,7 +524,6 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) { else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n"); } - switch_core_session_rwunlock(session); } else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: session %s not found\n", w->session_id); @@ -810,6 +808,7 @@ extern "C" { switch_codec_implementation_t read_impl; switch_core_session_t *psession = switch_core_session_locate(w->session_id); switch_core_session_get_read_impl(psession, &read_impl); + switch_core_session_rwunlock(psession); uint32_t samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second; if (mpg123_param(mh, MPG123_FORCE_RATE, samples_per_second /*Hz*/, 0) != MPG123_OK) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error mpg123_param!\n"); @@ -897,8 +896,7 @@ extern "C" { switch_status_t whisper_speech_flush_tts(whisper_t* w) { bool download_complete = w->response_code == 200; - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "whisper_speech_flush_tts, download complete? %s\n", download_complete ? "yes" : "no") ; - + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "whisper_speech_flush_tts, download complete? %s\n", download_complete ? "yes" : "no") ; ConnInfo_t *conn = (ConnInfo_t *) w->conn; CircularBuffer_t *cBuffer = (CircularBuffer_t *) w->circularBuffer; delete cBuffer; @@ -930,6 +928,7 @@ extern "C" { switch_core_session_t* session = switch_core_session_locate(w->session_id); if (session) { switch_channel_t *channel = switch_core_session_get_channel(session); + switch_core_session_rwunlock(session); if (channel) { switch_event_t *event; if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_STOP) == SWITCH_STATUS_SUCCESS) { @@ -951,14 +950,13 @@ extern "C" { else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "write_cb: channel not found\n"); } - switch_core_session_rwunlock(session); } } return SWITCH_STATUS_SUCCESS; } switch_status_t whisper_speech_close(whisper_t* w) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "whisper_speech_close\n") ; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "whisper_speech_close\n") ; return SWITCH_STATUS_SUCCESS; } }