modify cache folder name and various fixes from testing tts streaming (#50)

This commit is contained in:
Dave Horton
2024-04-18 11:28:17 -04:00
committed by GitHub
parent 3f642467eb
commit 83a2d1d730
6 changed files with 70 additions and 65 deletions

View File

@@ -59,7 +59,7 @@ extern "C" {
return SWITCH_STATUS_FALSE; return SWITCH_STATUS_FALSE;
} }
fullDirPath = std::string(baseDir) + "jambonz-tts-cache-files"; fullDirPath = std::string(baseDir) + "tts-cache-files";
// Create the directory with read, write, and execute permissions for everyone // Create the directory with read, write, and execute permissions for everyone
mode_t oldMask = umask(0); mode_t oldMask = umask(0);
@@ -179,72 +179,77 @@ extern "C" {
speechConfig->SetEndpointId(a->endpointId); speechConfig->SetEndpointId(a->endpointId);
} }
auto speechSynthesizer = SpeechSynthesizer::FromConfig(speechConfig); try {
auto speechSynthesizer = SpeechSynthesizer::FromConfig(speechConfig);
speechSynthesizer->SynthesisStarted += [a](const SpeechSynthesisEventArgs& e) { speechSynthesizer->SynthesisStarted += [a](const SpeechSynthesisEventArgs& e) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisStarted\n"); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts SynthesisStarted\n");
}; };
speechSynthesizer->Synthesizing += [a](const SpeechSynthesisEventArgs& e) { speechSynthesizer->Synthesizing += [a](const SpeechSynthesisEventArgs& e) {
if (a->flushed) return; if (a->flushed) return;
bool fireEvent = false; bool fireEvent = false;
CircularBuffer_t *cBuffer = (CircularBuffer_t *) a->circularBuffer; CircularBuffer_t *cBuffer = (CircularBuffer_t *) a->circularBuffer;
auto audioData = e.Result->GetAudioData(); auto audioData = e.Result->GetAudioData();
if (a->file) { if (a->file) {
fwrite(audioData->data(), 1, audioData->size(), a->file); fwrite(audioData->data(), 1, audioData->size(), a->file);
} }
/** /**
* this sort of reinterpretation can be dangerous as a general rule, but in this case we know that the data * this sort of reinterpretation can be dangerous as a general rule, but in this case we know that the data
* is 16-bit PCM, so it's safe to do this and its much faster than copying the data byte by byte * is 16-bit PCM, so it's safe to do this and its much faster than copying the data byte by byte
*/ */
const uint16_t* begin = reinterpret_cast<const uint16_t*>(audioData->data()); const uint16_t* begin = reinterpret_cast<const uint16_t*>(audioData->data());
const uint16_t* end = reinterpret_cast<const uint16_t*>(audioData->data() + audioData->size()); const uint16_t* end = reinterpret_cast<const uint16_t*>(audioData->data() + audioData->size());
/* lock as briefly as possible */ /* lock as briefly as possible */
switch_mutex_lock(a->mutex); switch_mutex_lock(a->mutex);
if (cBuffer->capacity() - cBuffer->size() < audioData->size()) { if (cBuffer->capacity() - cBuffer->size() < audioData->size()) {
cBuffer->set_capacity(cBuffer->size() + std::max( audioData->size(), (size_t)BUFFER_SIZE)); cBuffer->set_capacity(cBuffer->size() + std::max( audioData->size(), (size_t)BUFFER_SIZE));
} }
cBuffer->insert(cBuffer->end(), begin, end); cBuffer->insert(cBuffer->end(), begin, end);
switch_mutex_unlock(a->mutex); switch_mutex_unlock(a->mutex);
if (0 == a->reads++) { if (0 == a->reads++) {
fireEvent = true; fireEvent = true;
} }
if (fireEvent && a->session_id) { if (fireEvent && a->session_id) {
auto endTime = std::chrono::high_resolution_clock::now(); auto endTime = std::chrono::high_resolution_clock::now();
auto startTime = *static_cast<std::chrono::time_point<std::chrono::high_resolution_clock>*>(a->startTime); auto startTime = *static_cast<std::chrono::time_point<std::chrono::high_resolution_clock>*>(a->startTime);
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime); auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime);
auto time_to_first_byte_ms = std::to_string(duration.count()); auto time_to_first_byte_ms = std::to_string(duration.count());
switch_core_session_t* session = switch_core_session_locate(a->session_id); switch_core_session_t* session = switch_core_session_locate(a->session_id);
if (session) { if (session) {
switch_channel_t *channel = switch_core_session_get_channel(session); switch_channel_t *channel = switch_core_session_get_channel(session);
switch_core_session_rwunlock(session); switch_core_session_rwunlock(session);
if (channel) { if (channel) {
switch_event_t *event; switch_event_t *event;
if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_START) == SWITCH_STATUS_SUCCESS) { if (switch_event_create(&event, SWITCH_EVENT_PLAYBACK_START) == SWITCH_STATUS_SUCCESS) {
switch_channel_event_set_data(channel, event); switch_channel_event_set_data(channel, event);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Playback-File-Type", "tts_stream"); switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Playback-File-Type", "tts_stream");
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "variable_tts_time_to_first_byte_ms", time_to_first_byte_ms.c_str()); switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "variable_tts_time_to_first_byte_ms", time_to_first_byte_ms.c_str());
if (a->cache_filename) { if (a->cache_filename) {
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "variable_tts_cache_filename", a->cache_filename); switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "variable_tts_cache_filename", a->cache_filename);
}
switch_event_fire(&event);
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "speechSynthesizer->Synthesizing: failed to create event\n");
} }
switch_event_fire(&event); }else {
} else { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "speechSynthesizer->Synthesizing: channel not found\n");
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "speechSynthesizer->Synthesizing: failed to create event\n");
} }
}else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "speechSynthesizer->Synthesizing: channel not found\n");
} }
} }
} };
};
std::thread(start_synthesis, speechSynthesizer, text, a).detach(); std::thread(start_synthesis, speechSynthesizer, text, a).detach();
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts sent synthesize request\n"); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "azure_speech_feed_tts sent synthesize request\n");
} catch (const std::exception& e) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "mod_azure_tts: Exception: %s\n", e.what());
return SWITCH_STATUS_FALSE;
}
return SWITCH_STATUS_SUCCESS; return SWITCH_STATUS_SUCCESS;
} }

View File

@@ -385,7 +385,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
auto d = conn->deepgram; auto d = conn->deepgram;
CircularBuffer_t *cBuffer = (CircularBuffer_t *) d->circularBuffer; CircularBuffer_t *cBuffer = (CircularBuffer_t *) d->circularBuffer;
if (conn->flushed) { if (conn->flushed || cBuffer == nullptr) {
/* this will abort the transfer */ /* this will abort the transfer */
return 0; return 0;
} }
@@ -641,7 +641,7 @@ extern "C" {
return SWITCH_STATUS_FALSE; return SWITCH_STATUS_FALSE;
} }
fullDirPath = std::string(baseDir) + "jambonz-tts-cache-files"; fullDirPath = std::string(baseDir) + "tts-cache-files";
// Create the directory with read, write, and execute permissions for everyone // Create the directory with read, write, and execute permissions for everyone
mode_t oldMask = umask(0); mode_t oldMask = umask(0);

View File

@@ -445,7 +445,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
CircularBuffer_t *cBuffer = (CircularBuffer_t *) el->circularBuffer; CircularBuffer_t *cBuffer = (CircularBuffer_t *) el->circularBuffer;
std::vector<uint16_t> pcm_data; std::vector<uint16_t> pcm_data;
if (conn->flushed) { if (conn->flushed || cBuffer == nullptr) {
/* this will abort the transfer */ /* this will abort the transfer */
return 0; return 0;
} }
@@ -688,7 +688,7 @@ extern "C" {
return SWITCH_STATUS_FALSE; return SWITCH_STATUS_FALSE;
} }
fullDirPath = std::string(baseDir) + "jambonz-tts-cache-files"; fullDirPath = std::string(baseDir) + "tts-cache-files";
// Create the directory with read, write, and execute permissions for everyone // Create the directory with read, write, and execute permissions for everyone
mode_t oldMask = umask(0); mode_t oldMask = umask(0);

View File

@@ -436,7 +436,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
CircularBuffer_t *cBuffer = (CircularBuffer_t *) p->circularBuffer; CircularBuffer_t *cBuffer = (CircularBuffer_t *) p->circularBuffer;
std::vector<uint16_t> pcm_data; std::vector<uint16_t> pcm_data;
if (conn->flushed) { if (conn->flushed || cBuffer == nullptr) {
/* this will abort the transfer */ /* this will abort the transfer */
return 0; return 0;
} }
@@ -648,7 +648,7 @@ extern "C" {
return SWITCH_STATUS_FALSE; return SWITCH_STATUS_FALSE;
} }
fullDirPath = std::string(baseDir) + "jambonz-tts-cache-files"; fullDirPath = std::string(baseDir) + "tts-cache-files";
// Create the directory with read, write, and execute permissions for everyone // Create the directory with read, write, and execute permissions for everyone
mode_t oldMask = umask(0); mode_t oldMask = umask(0);

View File

@@ -386,7 +386,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
auto d = conn->rimelabs; auto d = conn->rimelabs;
CircularBuffer_t *cBuffer = (CircularBuffer_t *) d->circularBuffer; CircularBuffer_t *cBuffer = (CircularBuffer_t *) d->circularBuffer;
if (conn->flushed) { if (conn->flushed || cBuffer == nullptr) {
/* this will abort the transfer */ /* this will abort the transfer */
return 0; return 0;
} }
@@ -626,7 +626,7 @@ extern "C" {
return SWITCH_STATUS_FALSE; return SWITCH_STATUS_FALSE;
} }
fullDirPath = std::string(baseDir) + "jambonz-tts-cache-files"; fullDirPath = std::string(baseDir) + "tts-cache-files";
// Create the directory with read, write, and execute permissions for everyone // Create the directory with read, write, and execute permissions for everyone
mode_t oldMask = umask(0); mode_t oldMask = umask(0);

View File

@@ -436,7 +436,7 @@ static size_t write_cb(void *ptr, size_t size, size_t nmemb, ConnInfo_t *conn) {
CircularBuffer_t *cBuffer = (CircularBuffer_t *) w->circularBuffer; CircularBuffer_t *cBuffer = (CircularBuffer_t *) w->circularBuffer;
std::vector<uint16_t> pcm_data; std::vector<uint16_t> pcm_data;
if (conn->flushed) { if (conn->flushed || cBuffer == nullptr) {
/* this will abort the transfer */ /* this will abort the transfer */
return 0; return 0;
} }
@@ -671,7 +671,7 @@ extern "C" {
return SWITCH_STATUS_FALSE; return SWITCH_STATUS_FALSE;
} }
fullDirPath = std::string(baseDir) + "jambonz-tts-cache-files"; fullDirPath = std::string(baseDir) + "tts-cache-files";
// Create the directory with read, write, and execute permissions for everyone // Create the directory with read, write, and execute permissions for everyone
mode_t oldMask = umask(0); mode_t oldMask = umask(0);