From 8bd20703b8d9a32bbbd93068f2c17bfafc11cbfd Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Sun, 23 Jun 2024 14:54:36 -0400 Subject: [PATCH] Fix/azure tts no device output (#79) * enable audio logging if env AZURE_AUDIO_LOGGING is set * wip * per discussion with microsoft, add nullptr to creation of speechSynthesizer to ensure it knows we do not want it to play to device Signed-off-by: Dave Horton * logging * fix bug in creation of config string * fix ticket 230 - Microsoft TTS having configuration data as part of audio generation * azure transcribe, resuse existing cap_cb if azure configuration is changed Signed-off-by: Hoan HL * clean up azure code for how to re-create gsstream when configuration is changed Signed-off-by: Hoan HL * fix review comments Signed-off-by: Hoan HL * fix review comment Signed-off-by: Hoan HL * fix review comment Signed-off-by: Hoan HL * wrap function in try catch --------- Signed-off-by: Dave Horton Signed-off-by: Hoan HL Co-authored-by: Hoan HL --- mod_audio_fork/lws_glue.cpp | 61 ++++--- .../azure_transcribe_glue.cpp | 165 ++++++++++++------ mod_azure_tts/azure_glue.cpp | 26 +-- 3 files changed, 165 insertions(+), 87 deletions(-) diff --git a/mod_audio_fork/lws_glue.cpp b/mod_audio_fork/lws_glue.cpp index 0dc488d..926e10a 100644 --- a/mod_audio_fork/lws_glue.cpp +++ b/mod_audio_fork/lws_glue.cpp @@ -43,36 +43,55 @@ namespace { uint16_t* data_uint16 = reinterpret_cast(data); std::vector pcm_data(data_uint16, data_uint16 + dataLength / sizeof(uint16_t)); + // resample if necessary + try { + if (tech_pvt->bidirectional_audio_resampler) { + std::vector in(pcm_data.begin(), pcm_data.end()); - if (tech_pvt->bidirectional_audio_resampler) { - std::vector in(pcm_data.begin(), pcm_data.end()); + std::vector out(dataLength); + spx_uint32_t in_len = pcm_data.size(); + spx_uint32_t out_len = out.size(); + speex_resampler_process_interleaved_int(tech_pvt->bidirectional_audio_resampler, in.data(), &in_len, out.data(), &out_len); - std::vector out(dataLength); - spx_uint32_t in_len = pcm_data.size(); - spx_uint32_t out_len = out.size(); - speex_resampler_process_interleaved_int(tech_pvt->bidirectional_audio_resampler, in.data(), &in_len, out.data(), &out_len); + if (out_len > out.size()) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Resampler output exceeded maximum buffer size!\n"); + return SWITCH_STATUS_FALSE; + } - if (out_len > out.size()) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Resampler output exceeded maximum buffer size!\n"); - return SWITCH_STATUS_FALSE; + // Resize the pcm_data to match the output length from resampler, and then copy the resampled data into it. + pcm_data.resize(out_len); + memcpy(pcm_data.data(), out.data(), out_len * sizeof(int16_t)); } - - // Resize the pcm_data to match the output length from resampler, and then copy the resampled data into it. - pcm_data.resize(out_len); - memcpy(pcm_data.data(), out.data(), out_len * sizeof(int16_t)); + } catch (const std::exception& e) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error resampling incoming binary message: %s\n", e.what()); + return SWITCH_STATUS_FALSE; + } catch (...) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error resampling incoming binary message\n"); + return SWITCH_STATUS_FALSE; } + switch_mutex_lock(tech_pvt->mutex); - // Resize the buffer if necessary - size_t bytesResampled = pcm_data.size() * sizeof(uint16_t); - if (cBuffer->capacity() - cBuffer->size() < bytesResampled / sizeof(uint16_t)) { - // If buffer exceeds some max size, you could return SWITCH_STATUS_FALSE to abort the transfer - // if (cBuffer->size() + std::max(bytesResampled / sizeof(uint16_t), (size_t)BUFFER_GROW_SIZE) > MAX_BUFFER_SIZE) return SWITCH_STATUS_FALSE; + try { + // Resize the buffer if necessary + size_t bytesResampled = pcm_data.size() * sizeof(uint16_t); + if (cBuffer->capacity() - cBuffer->size() < bytesResampled / sizeof(uint16_t)) { + // If buffer exceeds some max size, you could return SWITCH_STATUS_FALSE to abort the transfer + // if (cBuffer->size() + std::max(bytesResampled / sizeof(uint16_t), (size_t)BUFFER_GROW_SIZE) > MAX_BUFFER_SIZE) return SWITCH_STATUS_FALSE; - cBuffer->set_capacity(cBuffer->size() + std::max(bytesResampled / sizeof(uint16_t), (size_t)BUFFER_GROW_SIZE)); + cBuffer->set_capacity(cBuffer->size() + std::max(bytesResampled / sizeof(uint16_t), (size_t)BUFFER_GROW_SIZE)); + } + // Push the data into the buffer. + cBuffer->insert(cBuffer->end(), pcm_data.begin(), pcm_data.end()); + } catch (const std::exception& e) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error processing incoming binary message: %s\n", e.what()); + switch_mutex_unlock(tech_pvt->mutex); + return SWITCH_STATUS_FALSE; + } catch (...) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error processing incoming binary message\n"); + switch_mutex_unlock(tech_pvt->mutex); + return SWITCH_STATUS_FALSE; } - // Push the data into the buffer. - cBuffer->insert(cBuffer->end(), pcm_data.begin(), pcm_data.end()); switch_mutex_unlock(tech_pvt->mutex); diff --git a/mod_azure_transcribe/azure_transcribe_glue.cpp b/mod_azure_transcribe/azure_transcribe_glue.cpp index 37caddf..d8439ed 100644 --- a/mod_azure_transcribe/azure_transcribe_glue.cpp +++ b/mod_azure_transcribe/azure_transcribe_glue.cpp @@ -32,6 +32,7 @@ static const char* proxyIP = std::getenv("JAMBONES_HTTP_PROXY_IP"); static const char* proxyPort = std::getenv("JAMBONES_HTTP_PROXY_PORT"); static const char* proxyUsername = std::getenv("JAMBONES_HTTP_PROXY_USERNAME"); static const char* proxyPassword = std::getenv("JAMBONES_HTTP_PROXY_PASSWORD"); +static const bool use_single_connection = switch_true(std::getenv("AZURE_SPEECH_USE_SINGLE_CONNECTION")); class GStreamer { public: @@ -51,15 +52,6 @@ public: switch_core_session_t* psession = switch_core_session_locate(sessionId); if (!psession) throw std::invalid_argument( "session id no longer active" ); - //Due to use_single_connection, each GStreamer need to identify itself by configuration, if there is changes in configuration, - // the GStreamer will be closed and replaced by new object with new configuration. - m_configuration_stream << - channels << ";" << - lang << ";" << - interim << ";" << - samples_per_second << ";" << - region << ";" << - subscriptionKey << ";"; switch_channel_t *channel = switch_core_session_get_channel(psession); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::GStreamer(%p) region %s, language %s\n", @@ -68,9 +60,6 @@ public: const char* endpoint = switch_channel_get_variable(channel, "AZURE_SERVICE_ENDPOINT"); const char* endpointId = switch_channel_get_variable(channel, "AZURE_SERVICE_ENDPOINT_ID"); - m_configuration_stream << - endpoint << ";" << - endpointId << ";"; auto sourceLanguageConfig = SourceLanguageConfig::FromLanguage(lang); auto format = AudioStreamFormat::GetWaveFormatPCM(8000, 16, channels); @@ -81,7 +70,6 @@ public: SpeechConfig::FromEndpoint(endpoint)) : SpeechConfig::FromSubscription(subscriptionKey, region); if (switch_true(switch_channel_get_variable(channel, "AZURE_USE_OUTPUT_FORMAT_DETAILED"))) { - m_configuration_stream << "output_format_detailed;"; speechConfig->SetOutputFormat(OutputFormat::Detailed); } if (nullptr != endpointId) { @@ -93,18 +81,12 @@ public: speechConfig->SetProperty(PropertyId::Speech_LogFilename, sdkLog); } if (switch_true(switch_channel_get_variable(channel, "AZURE_AUDIO_LOGGING"))) { - m_configuration_stream << "audio_logging;"; speechConfig->EnableAudioLogging(); } if (nullptr != proxyIP && nullptr != proxyPort) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(psession), SWITCH_LOG_DEBUG, "setting proxy: %s:%s\n", proxyIP, proxyPort); speechConfig->SetProxy(proxyIP, atoi(proxyPort), proxyUsername, proxyPassword); - m_configuration_stream << - proxyIP << ";" << - proxyPort << ";" << - proxyUsername << ";" << - proxyPassword << ";"; } m_pushStream = AudioInputStream::CreatePushStream(format); @@ -113,7 +95,6 @@ public: // alternative language const char* var; if (var = switch_channel_get_variable(channel, "AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES")) { - m_configuration_stream << var << ";"; std::vector languages; char *alt_langs[3] = { 0 }; int argc = switch_separate_string((char *) var, ',', alt_langs, 3); @@ -138,22 +119,18 @@ public: // profanity options: Allowed values are "masked", "removed", and "raw". const char* profanity = switch_channel_get_variable(channel, "AZURE_PROFANITY_OPTION"); if (profanity) { - m_configuration_stream << profanity << ";"; properties.SetProperty(PropertyId::SpeechServiceResponse_ProfanityOption, profanity); } // report signal-to-noise ratio if (switch_true(switch_channel_get_variable(channel, "AZURE_REQUEST_SNR"))) { - m_configuration_stream << "request_snr;"; properties.SetProperty(PropertyId::SpeechServiceResponse_RequestSnr, TrueString); } // initial speech timeout in milliseconds const char* timeout = switch_channel_get_variable(channel, "AZURE_INITIAL_SPEECH_TIMEOUT_MS"); - m_configuration_stream << timeout << ";"; if (timeout) properties.SetProperty(PropertyId::SpeechServiceConnection_InitialSilenceTimeoutMs, timeout); else properties.SetProperty(PropertyId::SpeechServiceConnection_InitialSilenceTimeoutMs, DEFAULT_SPEECH_TIMEOUT); const char* segmentationInterval = switch_channel_get_variable(channel, "AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS"); - m_configuration_stream << segmentationInterval << ";"; if (segmentationInterval) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(psession), SWITCH_LOG_DEBUG, "setting segmentation interval to %s ms\n", segmentationInterval); properties.SetProperty(PropertyId::Speech_SegmentationSilenceTimeoutMs, segmentationInterval); @@ -161,7 +138,6 @@ public: //https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-identification?tabs=once&pivots=programming-language-cpp#at-start-and-continuous-language-identification const char* languageIdMode = switch_channel_get_variable(channel, "AZURE_LANGUAGE_ID_MODE"); - m_configuration_stream << languageIdMode << ";"; if (languageIdMode) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(psession), SWITCH_LOG_DEBUG, "setting SpeechServiceConnection_LanguageIdMode to %s \n", languageIdMode); properties.SetProperty(PropertyId::SpeechServiceConnection_LanguageIdMode, languageIdMode); @@ -178,7 +154,6 @@ public: // hints const char* hints = switch_channel_get_variable(channel, "AZURE_SPEECH_HINTS"); - m_configuration_stream << hints << ";"; if (hints) { auto grammar = PhraseListGrammar::FromRecognizer(m_recognizer); char *phrases[500] = { 0 }; @@ -272,6 +247,9 @@ public: m_recognizer->Recognized += onRecognitionEvent; m_recognizer->Canceled += onCanceled; + // Store the final configuration string + m_configuration_string = createConfigurationStr(channels, lang, interim, samples_per_second, region, subscriptionKey, psession); + switch_core_session_rwunlock(psession); } @@ -280,7 +258,7 @@ public: } const char* configuration() { - return m_configuration_stream.str().c_str(); + return m_configuration_string.c_str(); } void connect() { @@ -348,14 +326,33 @@ public: return m_connecting; } + bool hasConfigurationChanged( + u_int16_t channels, + char *lang, + int interim, + uint32_t samples_per_second, + const char* region, + const char* subscriptionKey) { + switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str()); + if (!psession) throw std::invalid_argument( "session id no longer active" ); + + std::string newConfiguration = createConfigurationStr(channels, lang, interim, samples_per_second, region, subscriptionKey, psession); + + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(psession), SWITCH_LOG_DEBUG, + "hasConfigurationChanged: old configurattion: %s, new configuration: %s\n", configuration(), newConfiguration.c_str()); + + switch_core_session_rwunlock(psession); + + return strcmp(newConfiguration.c_str(), configuration()); + } + private: std::string m_sessionId; std::string m_bugname; std::string m_region; std::shared_ptr m_recognizer; std::shared_ptr m_pushStream; - std::ostringstream m_configuration_stream; - + std::string m_configuration_string; responseHandler_t m_responseHandler; bool m_interim; bool m_finished; @@ -363,6 +360,68 @@ private: bool m_connecting; bool m_stopped; SimpleBuffer m_audioBuffer; + + std::string createConfigurationStr( + u_int16_t channels, + char *lang, + int interim, + uint32_t samples_per_second, + const char* region, + const char* subscriptionKey, + switch_core_session_t* psession + ) { + switch_channel_t *channel = switch_core_session_get_channel(psession); + std::ostringstream configuration_stream; + configuration_stream << + channels << ";" << + lang << ";" << + interim << ";" << + samples_per_second << ";" << + region << ";" << + subscriptionKey << ";"; + + const char* endpoint = switch_channel_get_variable(channel, "AZURE_SERVICE_ENDPOINT"); + const char* endpointId = switch_channel_get_variable(channel, "AZURE_SERVICE_ENDPOINT_ID"); + configuration_stream << + endpoint << ";" << + endpointId << ";"; + if (switch_true(switch_channel_get_variable(channel, "AZURE_USE_OUTPUT_FORMAT_DETAILED"))) { + configuration_stream << "output_format_detailed;"; + } + if (switch_true(switch_channel_get_variable(channel, "AZURE_AUDIO_LOGGING"))) { + configuration_stream << "audio_logging;"; + } + if (nullptr != proxyIP && nullptr != proxyPort) { + configuration_stream << + proxyIP << ";" << + proxyPort << ";" << + proxyUsername << ";" << + proxyPassword << ";"; + } + const char* var; + if (var = switch_channel_get_variable(channel, "AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES")) { + configuration_stream << var << ";"; + } + if (var = switch_channel_get_variable(channel, "AZURE_PROFANITY_OPTION")) { + configuration_stream << var << ";"; + } + if (var = switch_channel_get_variable(channel, "AZURE_REQUEST_SNR")) { + configuration_stream << var << ";"; + } + if (var = switch_channel_get_variable(channel, "AZURE_INITIAL_SPEECH_TIMEOUT_MS")) { + configuration_stream << var << ";"; + } + if (var = switch_channel_get_variable(channel, "AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS")) { + configuration_stream << var << ";"; + } + if (var = switch_channel_get_variable(channel, "AZURE_LANGUAGE_ID_MODE")) { + configuration_stream << var << ";"; + } + if (var = switch_channel_get_variable(channel, "AZURE_SPEECH_HINTS")) { + configuration_stream << var << ";"; + } + return configuration_stream.str(); + } }; static void reaper(struct cap_cb *cb) { @@ -420,16 +479,32 @@ extern "C" { switch_status_t status = SWITCH_STATUS_SUCCESS; switch_channel_t *channel = switch_core_session_get_channel(session); switch_media_bug_t *bug = (switch_media_bug_t*) switch_channel_get_private(channel, bugname); + const char* subscriptionKey = switch_channel_get_variable(channel, "AZURE_SUBSCRIPTION_KEY"); + const char* region = switch_channel_get_variable(channel, "AZURE_REGION"); + const char* sessionId = switch_core_session_get_uuid(session); + auto read_codec = switch_core_session_get_read_codec(session); + uint32_t sampleRate = read_codec->implementation->actual_samples_per_second; + if (bug) { + struct cap_cb* existing_cb = (struct cap_cb*) switch_core_media_bug_get_user_data(bug); + GStreamer* existing_streamer = (GStreamer*) existing_cb->streamer; + existing_cb->is_keep_alive = 0; + if (!existing_streamer->hasConfigurationChanged(channels, lang, interim, sampleRate, region, subscriptionKey)) { + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Reuse active azure connection.\n"); + return SWITCH_STATUS_SUCCESS; + } + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Azure configuration is changed, destroy old and create new azure connection\n"); + reaper(existing_cb); + streamer = new GStreamer(sessionId, bugname, channels, lang, interim, sampleRate, region, subscriptionKey, responseHandler); + if (!existing_cb->vad) streamer->connect(); + existing_cb->streamer = streamer; + *ppUserData = existing_cb; + return SWITCH_STATUS_SUCCESS; + } int err; switch_threadattr_t *thd_attr = NULL; switch_memory_pool_t *pool = switch_core_session_get_pool(session); - auto read_codec = switch_core_session_get_read_codec(session); - uint32_t sampleRate = read_codec->implementation->actual_samples_per_second; - const char* sessionId = switch_core_session_get_uuid(session); struct cap_cb* cb = (struct cap_cb *) switch_core_session_alloc(session, sizeof(*cb)); memset(cb, sizeof(cb), 0); - const char* subscriptionKey = switch_channel_get_variable(channel, "AZURE_SUBSCRIPTION_KEY"); - const char* region = switch_channel_get_variable(channel, "AZURE_REGION"); cb->channels = channels; strncpy(cb->sessionId, sessionId, MAX_SESSION_ID); strncpy(cb->bugname, bugname, MAX_BUG_LEN); @@ -458,24 +533,7 @@ extern "C" { switch_channel_get_name(channel), bugname); streamer = new GStreamer(sessionId, bugname, channels, lang, interim, sampleRate, cb->region, subscriptionKey, responseHandler); cb->streamer = streamer; - - if (bug) { - struct cap_cb* existing_cb = (struct cap_cb*) switch_core_media_bug_get_user_data(bug); - GStreamer* existing_streamer = (GStreamer*) existing_cb->streamer; - if (0 != strcmp(existing_streamer->configuration(), streamer->configuration())) { - switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "azure_transcribe_session_init: stop existing azure connection, old configuration %s, new configuration %s\n", - existing_streamer->configuration(), streamer->configuration()); - if (existing_streamer) reaper(existing_cb); - killcb(existing_cb); - switch_mutex_destroy(existing_cb->mutex); - } else { - switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "azure_transcribe_session_init: enable existing azure connection\n"); - killcb(cb); - cb = existing_cb; - status = SWITCH_STATUS_SUCCESS; - goto done; - } - } + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "azure_transcribe_session_init: config: %s\n", streamer->configuration()); } catch (std::exception& e) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing gstreamer: %s.\n", switch_channel_get_name(channel), e.what()); @@ -539,7 +597,6 @@ extern "C" { switch_status_t azure_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname) { switch_channel_t *channel = switch_core_session_get_channel(session); switch_media_bug_t *bug = (switch_media_bug_t*) switch_channel_get_private(channel, bugname); - const bool use_single_connection = switch_true(std::getenv("AZURE_SPEECH_USE_SINGLE_CONNECTION")); if (bug) { struct cap_cb *cb = (struct cap_cb *) switch_core_media_bug_get_user_data(bug); @@ -581,7 +638,6 @@ extern "C" { frame.data = data; frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE; if (cb->is_keep_alive) { - // remove media bug buffered data while (true) { unsigned char data[SWITCH_RECOMMENDED_BUFFER_SIZE] = {0}; @@ -593,7 +649,6 @@ extern "C" { } return SWITCH_TRUE; } - if (switch_mutex_trylock(cb->mutex) == SWITCH_STATUS_SUCCESS) { GStreamer* streamer = (GStreamer *) cb->streamer; if (streamer) { diff --git a/mod_azure_tts/azure_glue.cpp b/mod_azure_tts/azure_glue.cpp index 29a5317..f7bc9e1 100644 --- a/mod_azure_tts/azure_glue.cpp +++ b/mod_azure_tts/azure_glue.cpp @@ -15,11 +15,13 @@ typedef boost::circular_buffer CircularBuffer_t; using namespace Microsoft::CognitiveServices::Speech; +static const char* audioLogFile= std::getenv("AZURE_AUDIO_LOGGING"); + static std::string fullDirPath; static void start_synthesis(std::shared_ptr speechSynthesizer, const char* text, azure_t* a) { try { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis calling \n"); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "start_synthesis calling, text %s\n", text); auto result = std::strncmp(text, "SpeakSsmlAsync(text).get() : speechSynthesizer->SpeakTextAsync(text).get(); @@ -43,6 +45,8 @@ static void start_synthesis(std::shared_ptr speechSynthesizer switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_azure_tts: Exception in start_synthesis %s\n", e.what()); } a->draining = 1; + + free((void*) text); } extern "C" { @@ -87,14 +91,7 @@ extern "C" { switch_status_t azure_speech_feed_tts(azure_t* a, char* text, switch_speech_flag_t *flags) { const int MAX_CHARS = 20; - char tempText[MAX_CHARS + 4]; // +4 for the ellipsis and null terminator - - if (strlen(text) > MAX_CHARS) { - strncpy(tempText, text, MAX_CHARS); - strcpy(tempText + MAX_CHARS, "..."); - } else { - strcpy(tempText, text); - } + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts %s\n", text); /* open cache file */ if (a->cache_audio && fullDirPath.length() > 0) { @@ -168,8 +165,14 @@ extern "C" { speechConfig->SetEndpointId(a->endpointId); } + if (audioLogFile) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts enabling audio logging to %s\n", audioLogFile); + speechConfig->SetProperty(PropertyId::Speech_LogFilename, audioLogFile); + speechConfig->EnableAudioLogging(); + } + try { - auto speechSynthesizer = SpeechSynthesizer::FromConfig(speechConfig); + auto speechSynthesizer = SpeechSynthesizer::FromConfig(speechConfig, nullptr); speechSynthesizer->SynthesisStarted += [a](const SpeechSynthesisEventArgs& e) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisStarted\n"); @@ -266,7 +269,8 @@ extern "C" { } }; - std::thread(start_synthesis, speechSynthesizer, text, a).detach(); + const char* dupText = strdup(text); // text will be freed in the thread + std::thread(start_synthesis, speechSynthesizer, dupText, a).detach(); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts sent synthesize request\n"); } catch (const std::exception& e) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "mod_azure_tts: Exception: %s\n", e.what());