mirror of
https://github.com/jambonz/freeswitch-modules.git
synced 2025-12-19 08:27:44 +00:00
support new parameters for google v2 (#31)
* support new parameters for google v2 Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * add enable_voice_activity_events Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * changes to start and end timeout --------- Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
@@ -18,6 +18,8 @@ using google::cloud::speech::v2::SpeechRecognitionAlternative;
|
|||||||
using google::cloud::speech::v2::PhraseSet;
|
using google::cloud::speech::v2::PhraseSet;
|
||||||
using google::cloud::speech::v2::PhraseSet_Phrase;
|
using google::cloud::speech::v2::PhraseSet_Phrase;
|
||||||
using google::cloud::speech::v2::StreamingRecognizeResponse_SpeechEventType_END_OF_SINGLE_UTTERANCE;
|
using google::cloud::speech::v2::StreamingRecognizeResponse_SpeechEventType_END_OF_SINGLE_UTTERANCE;
|
||||||
|
using google::cloud::speech::v2::StreamingRecognizeResponse_SpeechEventType_SPEECH_ACTIVITY_BEGIN;
|
||||||
|
using google::cloud::speech::v2::StreamingRecognizeResponse_SpeechEventType_SPEECH_ACTIVITY_END;
|
||||||
using google::cloud::speech::v2::ExplicitDecodingConfig_AudioEncoding_LINEAR16;
|
using google::cloud::speech::v2::ExplicitDecodingConfig_AudioEncoding_LINEAR16;
|
||||||
using google::cloud::speech::v2::RecognitionFeatures_MultiChannelMode_SEPARATE_RECOGNITION_PER_CHANNEL;
|
using google::cloud::speech::v2::RecognitionFeatures_MultiChannelMode_SEPARATE_RECOGNITION_PER_CHANNEL;
|
||||||
using google::cloud::speech::v2::SpeechAdaptation_AdaptationPhraseSet;
|
using google::cloud::speech::v2::SpeechAdaptation_AdaptationPhraseSet;
|
||||||
@@ -158,12 +160,54 @@ GStreamer<StreamingRecognizeRequest, StreamingRecognizeResponse, Speech::Stub>::
|
|||||||
diarization_config->set_max_speaker_count(count);
|
diarization_config->set_max_speaker_count(count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (var = switch_channel_get_variable(channel, "GOOGLE_SPEECH_TRANSCRIPTION_NORMALIZATION")) {
|
||||||
|
// parse JSON string
|
||||||
|
cJSON *json_array = cJSON_Parse(var);
|
||||||
|
|
||||||
|
int array_size = cJSON_GetArraySize(json_array);
|
||||||
|
|
||||||
|
for(int i=0; i<array_size; i++) {
|
||||||
|
cJSON* json_item = cJSON_GetArrayItem(json_array, i);
|
||||||
|
|
||||||
|
auto entry = config->mutable_transcript_normalization()->add_entries();
|
||||||
|
|
||||||
|
std::string search_string = cJSON_GetObjectItem(json_item, "search")->valuestring;
|
||||||
|
std::string replacement_string = cJSON_GetObjectItem(json_item, "replace")->valuestring;
|
||||||
|
bool case_sensitive = cJSON_GetObjectItem(json_item, "case_sensitive")->valueint != 0;
|
||||||
|
|
||||||
|
entry->set_search(search_string);
|
||||||
|
entry->set_replace(replacement_string);
|
||||||
|
entry->set_case_sensitive(case_sensitive);
|
||||||
|
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_DEBUG,
|
||||||
|
"TRANSCRIPTION_NORMALIZATION search %s, replace %s, set_case_sensitive %d\n", search_string.c_str(), replacement_string.c_str(), case_sensitive);
|
||||||
|
}
|
||||||
|
// clean json
|
||||||
|
cJSON_Delete(json_array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (var = switch_channel_get_variable(channel, "GOOGLE_SPEECH_START_TIMEOUT_MS")) {
|
||||||
|
auto ms = atoi(var);
|
||||||
|
streaming_config->mutable_streaming_features()->mutable_voice_activity_timeout()->mutable_speech_start_timeout()->set_nanos(ms * 1000000);
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_DEBUG, "setting speech_start_timeout to %d milliseconds\n", ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (var = switch_channel_get_variable(channel, "GOOGLE_SPEECH_END_TIMEOUT_MS")) {
|
||||||
|
auto ms = atoi(var);
|
||||||
|
streaming_config->mutable_streaming_features()->mutable_voice_activity_timeout()->mutable_speech_end_timeout()->set_nanos(ms * 1000000);
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_DEBUG, "setting speech_end_timeout to %d milliseconds\n", ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (var = switch_channel_get_variable(channel, "GOOGLE_SPEECH_ENABLE_VOICE_ACTIVITY_EVENTS")) {
|
||||||
|
bool enabled = !strcmp(var, "true") ? 1 : 0;
|
||||||
|
streaming_config->mutable_streaming_features()->set_enable_voice_activity_events(enabled);
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_DEBUG, "setting enable_voice_activity_events to %d \n", enabled);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_request.set_recognizer(recognizer);
|
m_request.set_recognizer(recognizer);
|
||||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_DEBUG, "using recognizer: %s\n", recognizer.c_str());
|
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_DEBUG, "using recognizer: %s\n", recognizer.c_str());
|
||||||
|
|
||||||
// This must be set whether a recognizer id is provided orr not, because it cannot be configured as part of a recognizer.
|
// This must be set whether a recognizer id is provided or not, because it cannot be configured as part of a recognizer.
|
||||||
if (interim > 0) {
|
if (interim > 0) {
|
||||||
streaming_config->mutable_streaming_features()->set_interim_results(interim > 0);
|
streaming_config->mutable_streaming_features()->set_interim_results(interim > 0);
|
||||||
}
|
}
|
||||||
@@ -277,6 +321,12 @@ static void *SWITCH_THREAD_FUNC grpc_read_thread(switch_thread_t *thread, void *
|
|||||||
streamer->writesDone();
|
streamer->writesDone();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (speech_event_type == StreamingRecognizeResponse_SpeechEventType_SPEECH_ACTIVITY_BEGIN) {
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: got SPEECH_ACTIVITY_BEGIN\n") ;
|
||||||
|
}
|
||||||
|
else if (speech_event_type == StreamingRecognizeResponse_SpeechEventType_SPEECH_ACTIVITY_END) {
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: got SPEECH_ACTIVITY_END\n") ;
|
||||||
|
}
|
||||||
switch_core_session_rwunlock(session);
|
switch_core_session_rwunlock(session);
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: got %d responses\n", response.results_size());
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: got %d responses\n", response.results_size());
|
||||||
}
|
}
|
||||||
@@ -296,7 +346,7 @@ static void *SWITCH_THREAD_FUNC grpc_read_thread(switch_thread_t *thread, void *
|
|||||||
cb->responseHandler(session, "no_audio", cb->bugname);
|
cb->responseHandler(session, "no_audio", cb->bugname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else if (status.error_code() != 0) {
|
||||||
cJSON* json = cJSON_CreateObject();
|
cJSON* json = cJSON_CreateObject();
|
||||||
cJSON_AddStringToObject(json, "type", "error");
|
cJSON_AddStringToObject(json, "type", "error");
|
||||||
cJSON_AddItemToObject(json, "error_code", cJSON_CreateNumber(status.error_code()));
|
cJSON_AddItemToObject(json, "error_code", cJSON_CreateNumber(status.error_code()));
|
||||||
|
|||||||
@@ -76,6 +76,16 @@ static void responseHandler(switch_core_session_t* session, const char * json, c
|
|||||||
switch_channel_event_set_data(channel, event);
|
switch_channel_event_set_data(channel, event);
|
||||||
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "google");
|
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "google");
|
||||||
}
|
}
|
||||||
|
else if (0 == strcmp("start_of_speech", json)) {
|
||||||
|
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_START_OF_SPEECH);
|
||||||
|
switch_channel_event_set_data(channel, event);
|
||||||
|
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "google");
|
||||||
|
}
|
||||||
|
else if (0 == strcmp("end_of_speech", json)) {
|
||||||
|
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_END_OF_SPEECH);
|
||||||
|
switch_channel_event_set_data(channel, event);
|
||||||
|
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "google");
|
||||||
|
}
|
||||||
else if (0 == strcmp("end_of_transcript", json)) {
|
else if (0 == strcmp("end_of_transcript", json)) {
|
||||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_END_OF_TRANSCRIPT);
|
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_END_OF_TRANSCRIPT);
|
||||||
switch_channel_event_set_data(channel, event);
|
switch_channel_event_set_data(channel, event);
|
||||||
@@ -506,6 +516,14 @@ SWITCH_MODULE_LOAD_FUNCTION(mod_transcribe_load)
|
|||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_END_OF_UTTERANCE);
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_END_OF_UTTERANCE);
|
||||||
return SWITCH_STATUS_TERM;
|
return SWITCH_STATUS_TERM;
|
||||||
}
|
}
|
||||||
|
if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_START_OF_SPEECH) != SWITCH_STATUS_SUCCESS) {
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_START_OF_SPEECH);
|
||||||
|
return SWITCH_STATUS_TERM;
|
||||||
|
}
|
||||||
|
if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_END_OF_SPEECH) != SWITCH_STATUS_SUCCESS) {
|
||||||
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_END_OF_SPEECH);
|
||||||
|
return SWITCH_STATUS_TERM;
|
||||||
|
}
|
||||||
if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_START_OF_TRANSCRIPT) != SWITCH_STATUS_SUCCESS) {
|
if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_START_OF_TRANSCRIPT) != SWITCH_STATUS_SUCCESS) {
|
||||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_START_OF_TRANSCRIPT);
|
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_START_OF_TRANSCRIPT);
|
||||||
return SWITCH_STATUS_TERM;
|
return SWITCH_STATUS_TERM;
|
||||||
@@ -556,6 +574,8 @@ SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_transcribe_shutdown)
|
|||||||
google_speech_cleanup();
|
google_speech_cleanup();
|
||||||
switch_event_free_subclass(TRANSCRIBE_EVENT_RESULTS);
|
switch_event_free_subclass(TRANSCRIBE_EVENT_RESULTS);
|
||||||
switch_event_free_subclass(TRANSCRIBE_EVENT_END_OF_UTTERANCE);
|
switch_event_free_subclass(TRANSCRIBE_EVENT_END_OF_UTTERANCE);
|
||||||
|
switch_event_free_subclass(TRANSCRIBE_EVENT_START_OF_SPEECH);
|
||||||
|
switch_event_free_subclass(TRANSCRIBE_EVENT_END_OF_SPEECH);
|
||||||
switch_event_free_subclass(TRANSCRIBE_EVENT_START_OF_TRANSCRIPT);
|
switch_event_free_subclass(TRANSCRIBE_EVENT_START_OF_TRANSCRIPT);
|
||||||
switch_event_free_subclass(TRANSCRIBE_EVENT_END_OF_TRANSCRIPT);
|
switch_event_free_subclass(TRANSCRIBE_EVENT_END_OF_TRANSCRIPT);
|
||||||
switch_event_free_subclass(TRANSCRIBE_EVENT_NO_AUDIO_DETECTED);
|
switch_event_free_subclass(TRANSCRIBE_EVENT_NO_AUDIO_DETECTED);
|
||||||
|
|||||||
@@ -11,6 +11,8 @@
|
|||||||
#define MY_BUG_NAME "google_transcribe"
|
#define MY_BUG_NAME "google_transcribe"
|
||||||
#define TRANSCRIBE_EVENT_RESULTS "google_transcribe::transcription"
|
#define TRANSCRIBE_EVENT_RESULTS "google_transcribe::transcription"
|
||||||
#define TRANSCRIBE_EVENT_END_OF_UTTERANCE "google_transcribe::end_of_utterance"
|
#define TRANSCRIBE_EVENT_END_OF_UTTERANCE "google_transcribe::end_of_utterance"
|
||||||
|
#define TRANSCRIBE_EVENT_START_OF_SPEECH "google_transcribe::start_of_speech"
|
||||||
|
#define TRANSCRIBE_EVENT_END_OF_SPEECH "google_transcribe::end_of_speech"
|
||||||
#define TRANSCRIBE_EVENT_START_OF_TRANSCRIPT "google_transcribe::start_of_transcript"
|
#define TRANSCRIBE_EVENT_START_OF_TRANSCRIPT "google_transcribe::start_of_transcript"
|
||||||
#define TRANSCRIBE_EVENT_END_OF_TRANSCRIPT "google_transcribe::end_of_transcript"
|
#define TRANSCRIBE_EVENT_END_OF_TRANSCRIPT "google_transcribe::end_of_transcript"
|
||||||
#define TRANSCRIBE_EVENT_NO_AUDIO_DETECTED "google_transcribe::no_audio_detected"
|
#define TRANSCRIBE_EVENT_NO_AUDIO_DETECTED "google_transcribe::no_audio_detected"
|
||||||
|
|||||||
Reference in New Issue
Block a user