From f828171b3bf90312470a17f266a8d00d4aa2b87a Mon Sep 17 00:00:00 2001 From: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com> Date: Thu, 8 Aug 2024 01:40:22 +0700 Subject: [PATCH] support jambonz transcribe with multiple sampling rate (#98) * support jambonz transcribe with multiple sampling rate * wip Signed-off-by: Hoan HL --------- Signed-off-by: Hoan HL --- mod_jambonz_transcribe/jb_transcribe_glue.cpp | 6 +++--- mod_jambonz_transcribe/jb_transcribe_glue.h | 2 +- mod_jambonz_transcribe/mod_jambonz_transcribe.c | 9 +++++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/mod_jambonz_transcribe/jb_transcribe_glue.cpp b/mod_jambonz_transcribe/jb_transcribe_glue.cpp index 037e886..de87b4c 100644 --- a/mod_jambonz_transcribe/jb_transcribe_glue.cpp +++ b/mod_jambonz_transcribe/jb_transcribe_glue.cpp @@ -145,7 +145,7 @@ namespace { cJSON_AddStringToObject(json, "format", "raw"); cJSON_AddStringToObject(json, "encoding", "LINEAR16"); cJSON_AddBoolToObject(json, "interimResults", tech_pvt->interim); - cJSON_AddNumberToObject(json, "sampleRateHz", 8000); + cJSON_AddNumberToObject(json, "sampleRateHz", tech_pvt->sampling); if (var = switch_channel_get_variable(channel, "JAMBONZ_STT_OPTIONS")) { cJSON* jOptions = cJSON_Parse(var); if (jOptions) { @@ -353,7 +353,7 @@ extern "C" { } switch_status_t jb_transcribe_session_init(switch_core_session_t *session, - responseHandler_t responseHandler, uint32_t samples_per_second, uint32_t channels, + responseHandler_t responseHandler, uint32_t samples_per_second, int desiredSampling, uint32_t channels, char* lang, int interim, char* bugname, void **ppUserData) { int err; @@ -365,7 +365,7 @@ extern "C" { return SWITCH_STATUS_FALSE; } - if (SWITCH_STATUS_SUCCESS != fork_data_init(tech_pvt, session, samples_per_second, 8000, channels, lang, interim, bugname, responseHandler)) { + if (SWITCH_STATUS_SUCCESS != fork_data_init(tech_pvt, session, samples_per_second, desiredSampling, channels, lang, interim, bugname, responseHandler)) { destroy_tech_pvt(tech_pvt); return SWITCH_STATUS_FALSE; } diff --git a/mod_jambonz_transcribe/jb_transcribe_glue.h b/mod_jambonz_transcribe/jb_transcribe_glue.h index bdaab98..eefc53f 100644 --- a/mod_jambonz_transcribe/jb_transcribe_glue.h +++ b/mod_jambonz_transcribe/jb_transcribe_glue.h @@ -6,7 +6,7 @@ int parse_ws_uri(switch_channel_t *channel, const char* szServerUri, char* host, switch_status_t jb_transcribe_init(); switch_status_t jb_transcribe_cleanup(); switch_status_t jb_transcribe_session_init(switch_core_session_t *session, responseHandler_t responseHandler, - uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char* bugname, void **ppUserData); + uint32_t samples_per_second, int desiredSampling, uint32_t channels, char* lang, int interim, char* bugname, void **ppUserData); switch_status_t jb_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname); switch_bool_t jb_transcribe_frame(switch_core_session_t *session, switch_media_bug_t *bug); diff --git a/mod_jambonz_transcribe/mod_jambonz_transcribe.c b/mod_jambonz_transcribe/mod_jambonz_transcribe.c index 230e32b..f6399bd 100644 --- a/mod_jambonz_transcribe/mod_jambonz_transcribe.c +++ b/mod_jambonz_transcribe/mod_jambonz_transcribe.c @@ -73,6 +73,8 @@ static switch_status_t start_capture(switch_core_session_t *session, switch_medi switch_codec_implementation_t read_impl = { 0 }; void *pUserData; uint32_t samples_per_second; + uint32_t desiredSampling = 8000; + const char* var; if (!switch_channel_get_variable(channel, "JAMBONZ_STT_URL")) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing JAMBONZ_STT_URL channel var\n"); @@ -91,8 +93,11 @@ static switch_status_t start_capture(switch_core_session_t *session, switch_medi } samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second; - - if (SWITCH_STATUS_FALSE == jb_transcribe_session_init(session, responseHandler, samples_per_second, flags & SMBF_STEREO ? 2 : 1, lang, interim, bugname, &pUserData)) { + var = switch_channel_get_variable(channel, "JAMBONZ_STT_SAMPLING"); + if (var != NULL) { + desiredSampling = atoi(var); + } + if (SWITCH_STATUS_FALSE == jb_transcribe_session_init(session, responseHandler, samples_per_second, desiredSampling, flags & SMBF_STEREO ? 2 : 1, lang, interim, bugname, &pUserData)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing jb speech session.\n"); return SWITCH_STATUS_FALSE; }