From ebd518eaf8163b4789deba2f6df32c688ead6f09 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Wed, 28 Aug 2024 15:09:23 -0400 Subject: [PATCH] wip --- mod_dialogflow_cx/google_glue.cpp | 153 +++++++++++++------------- mod_dialogflow_cx/google_glue.h | 3 +- mod_dialogflow_cx/mod_dialogflow_cx.c | 24 ++-- 3 files changed, 87 insertions(+), 93 deletions(-) diff --git a/mod_dialogflow_cx/google_glue.cpp b/mod_dialogflow_cx/google_glue.cpp index 94ba42c..5fefa31 100644 --- a/mod_dialogflow_cx/google_glue.cpp +++ b/mod_dialogflow_cx/google_glue.cpp @@ -115,7 +115,7 @@ void tokenize(std::string const &str, const char delim, std::vector class GStreamer { public: GStreamer(switch_core_session_t *session, const char* lang, char* region, char* projectId, char* agentId, - char* environmentId, char* event, char* text) : + char* environmentId, char* intent) : m_lang(lang), m_sessionId(switch_core_session_get_uuid(session)), m_agent(agentId), m_projectId(projectId), m_environment( nullptr != environmentId ? environmentId : "draft"), m_regionId(nullptr != region ? region : "us"), m_speakingRate(), m_pitch(), m_volume(), m_voiceName(""), m_voiceGender(""), m_effects(""), @@ -123,8 +123,6 @@ public: const char* var; switch_channel_t* channel = switch_core_session_get_channel(session); - // TOODO: handle via channel vars - /* std::vector tokens; const char delim = ':'; tokenize(projectId, delim, tokens); @@ -143,7 +141,6 @@ public: else if (9 == idx && s.length() > 0) m_sentimentAnalysis = (s == "true"); idx++; } - */ std::string endpoint = "dialogflow.googleapis.com"; if (0 != m_regionId.compare("us")) { @@ -166,14 +163,14 @@ public: auto creds = grpc::GoogleDefaultCredentials(); m_channel = grpc::CreateChannel(endpoint, creds); } - startStream(session, event, text); + startStream(session, intent); } ~GStreamer() { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::~GStreamer wrote %ld packets %p\n", m_packets, this); } - void startStream(switch_core_session_t *session, const char* event, const char* text) { + void startStream(switch_core_session_t *session, const char* intent) { char szSession[256]; m_request = std::make_shared(); @@ -183,38 +180,73 @@ public: if (0 == m_environment.compare("draft")) { snprintf(szSession, 256, "projects/%s/locations/%s/agents/%s/sessions/%s", m_projectId.c_str(), m_regionId.c_str(), m_agent.c_str(), m_sessionId.c_str()); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream session %s, %p\n", szSession, this); } else { snprintf(szSession, 256, "projects/%s/locations/%s/agents/%s/environments/%s/sessions/%s", m_projectId.c_str(), m_regionId.c_str(), m_environment.c_str(), m_sessionId.c_str()); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream session %s, intent %s,%p\n", szSession, intent, this); } - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream session %s, event %s, text %s %p\n", szSession, event, text, this); - m_request->set_session(szSession); - auto* queryInput = m_request->mutable_query_input(); - if (event) { - auto* eventInput = queryInput->mutable_event(); - eventInput->set_event(event); - queryInput->set_language_code(m_lang.c_str()); - } - else if (text) { - auto* textInput = queryInput->mutable_text(); - textInput->set_text(text); - queryInput->set_language_code(m_lang.c_str()); - } - else { + + auto* queryInput = m_request->mutable_query_input(); + queryInput->set_language_code(m_lang.c_str()); + if (intent) { + char szIntent[256]; auto* intentInput = queryInput->mutable_intent(); - intentInput->set_intent(DEFAULT_INTENT); + bool isDefault = 0 == strcasecmp(intent, "default"); + + snprintf(szIntent, 256, "projects/%s/locations/%s/agents/%s/intents/%s", + m_projectId.c_str(), m_regionId.c_str(), m_agent.c_str(), isDefault ? DEFAULT_INTENT : intent); + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write writing initial query input w/intent %s, %p\n", + szIntent, this); + + intentInput->set_intent(szIntent); + } + else { + auto* audio_config = queryInput->mutable_audio()->mutable_config(); + audio_config->set_sample_rate_hertz(16000); + audio_config->set_enable_word_info(false); + audio_config->set_audio_encoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16); + audio_config->set_single_utterance(true); + } + + auto* outputAudioConfig = m_request->mutable_output_audio_config(); + outputAudioConfig->set_sample_rate_hertz(8000); + outputAudioConfig->set_audio_encoding(OutputAudioEncoding::OUTPUT_AUDIO_ENCODING_LINEAR_16); + + if (isAnyOutputAudioConfigChanged()) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream adding a custom OutputAudioConfig to the request since at" + " least one parameter was received."); + + auto* synthesizeSpeechConfig = outputAudioConfig->mutable_synthesize_speech_config(); + if (m_speakingRate) synthesizeSpeechConfig->set_speaking_rate(m_speakingRate); + if (m_pitch) synthesizeSpeechConfig->set_pitch(m_pitch); + if (m_volume) synthesizeSpeechConfig->set_volume_gain_db(m_volume); + if (!m_effects.empty()) synthesizeSpeechConfig->add_effects_profile_id(m_effects); + + auto* voice = synthesizeSpeechConfig->mutable_voice(); + if (!m_voiceName.empty()) voice->set_name(m_voiceName); + if (!m_voiceGender.empty()) { + SsmlVoiceGender gender = SsmlVoiceGender::SSML_VOICE_GENDER_UNSPECIFIED; + switch (toupper(m_voiceGender[0])) + { + case 'F': gender = SsmlVoiceGender::SSML_VOICE_GENDER_MALE; break; + case 'M': gender = SsmlVoiceGender::SSML_VOICE_GENDER_FEMALE; break; + case 'N': gender = SsmlVoiceGender::SSML_VOICE_GENDER_NEUTRAL; break; + } + voice->set_ssml_gender(gender); + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream no custom parameters for OutputAudioConfig, keeping default"); + } + if (m_sentimentAnalysis) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream received sentiment analysis flag as true, adding as query param"); + auto* queryParameters = m_request->mutable_query_params(); + queryParameters->set_analyze_query_text_sentiment(m_sentimentAnalysis); + } } - /* - auto* audio_input = queryInput->mutable_audio(); - auto* audio_config = audio_input->mutable_config(); - audio_config->set_sample_rate_hertz(16000); - audio_config->set_enable_word_info(false); - audio_config->set_audio_encoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16); - audio_config->set_single_utterance(false); - */ /** * Note: there are other parameters that can be set in the audio config, such as: @@ -222,48 +254,12 @@ public: * */ - queryInput->set_language_code(m_lang.c_str()); - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::startStream checking OutputAudioConfig custom parameters: speaking rate %f," " pitch %f, volume %f, voice name '%s' gender '%s', effects '%s'\n", m_speakingRate, m_pitch, m_volume, m_voiceName.c_str(), m_voiceGender.c_str(), m_effects.c_str()); - if (isAnyOutputAudioConfigChanged()) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream adding a custom OutputAudioConfig to the request since at" - " least one parameter was received."); - auto* outputAudioConfig = m_request->mutable_output_audio_config(); - outputAudioConfig->set_sample_rate_hertz(16000); - outputAudioConfig->set_audio_encoding(OutputAudioEncoding::OUTPUT_AUDIO_ENCODING_LINEAR_16); - - auto* synthesizeSpeechConfig = outputAudioConfig->mutable_synthesize_speech_config(); - if (m_speakingRate) synthesizeSpeechConfig->set_speaking_rate(m_speakingRate); - if (m_pitch) synthesizeSpeechConfig->set_pitch(m_pitch); - if (m_volume) synthesizeSpeechConfig->set_volume_gain_db(m_volume); - if (!m_effects.empty()) synthesizeSpeechConfig->add_effects_profile_id(m_effects); - - auto* voice = synthesizeSpeechConfig->mutable_voice(); - if (!m_voiceName.empty()) voice->set_name(m_voiceName); - if (!m_voiceGender.empty()) { - SsmlVoiceGender gender = SsmlVoiceGender::SSML_VOICE_GENDER_UNSPECIFIED; - switch (toupper(m_voiceGender[0])) - { - case 'F': gender = SsmlVoiceGender::SSML_VOICE_GENDER_MALE; break; - case 'M': gender = SsmlVoiceGender::SSML_VOICE_GENDER_FEMALE; break; - case 'N': gender = SsmlVoiceGender::SSML_VOICE_GENDER_NEUTRAL; break; - } - voice->set_ssml_gender(gender); - } - } else { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream no custom parameters for OutputAudioConfig, keeping default"); - } - - if (m_sentimentAnalysis) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream received sentiment analysis flag as true, adding as query param"); - auto* queryParameters = m_request->mutable_query_params(); - queryParameters->set_analyze_query_text_sentiment(m_sentimentAnalysis); - } m_streamer = m_stub->StreamingDetectIntent(m_context.get()); - m_streamer->Write(*m_request); + m_streamer->Write(*m_request); } bool write(void* data, uint32_t datalen) { if (m_finished) { @@ -271,20 +267,17 @@ public: return false; } + auto* queryInput = m_request->mutable_query_input(); m_request->clear_query_input(); m_request->clear_query_params(); + + queryInput->set_language_code(m_lang.c_str()); + queryInput->mutable_audio()->set_audio(data, datalen); - auto* audio_config = m_request->mutable_query_input()->mutable_audio()->mutable_config(); - audio_config->set_sample_rate_hertz(16000); - audio_config->set_enable_word_info(false); - audio_config->set_audio_encoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16); - audio_config->set_single_utterance(false); - - m_request->mutable_query_input()->mutable_audio()->set_audio(data, datalen); + //switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write writing packet %d\n", m_packets); m_packets++; return m_streamer->Write(*m_request); - } bool read(StreamingDetectIntentResponse* response) { return m_streamer->Read(response); @@ -330,6 +323,8 @@ private: std::string m_voiceGender; bool m_sentimentAnalysis; bool m_finished; + bool m_ready; + uint32_t m_packets; }; @@ -363,7 +358,12 @@ static void *SWITCH_THREAD_FUNC grpc_read_thread(switch_thread_t *thread, void * switch_channel_t* channel = switch_core_session_get_channel(psession); GRPCParser parser(psession); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: read something %p\n", (void *) cb); + // TODO: handle has_debugging_info() + if (response.has_debugging_info()) { + auto di = response.debugging_info(); + } bool hasAudio = false; if (response.has_detect_intent_response() || response.has_recognition_result()) { @@ -485,8 +485,7 @@ extern "C" { char* projectId, char* agentId, char* environmentId, - char* event, - char* text, + char* intent, struct cap_cb **ppUserData ) { switch_status_t status = SWITCH_STATUS_SUCCESS; @@ -518,7 +517,7 @@ extern "C" { strncpy(cb->agentId, agentId, MAX_PROJECT_ID); if (nullptr != environmentId) strncpy(cb->environmentId, environmentId, MAX_PROJECT_ID); if (nullptr != region) strncpy(cb->region, region, MAX_REGION); - cb->streamer = new GStreamer(session, lang, region, projectId, agentId, environmentId, event, text); + cb->streamer = new GStreamer(session, lang, region, projectId, agentId, environmentId, intent); cb->resampler = speex_resampler_init(1, 8000, 16000, SWITCH_RESAMPLE_QUALITY, &err); if (0 != err) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n", diff --git a/mod_dialogflow_cx/google_glue.h b/mod_dialogflow_cx/google_glue.h index 6ba3c7e..afaded6 100644 --- a/mod_dialogflow_cx/google_glue.h +++ b/mod_dialogflow_cx/google_glue.h @@ -13,8 +13,7 @@ switch_status_t google_dialogflow_cx_session_init( char* projectId, char* agentId, char* environmentId, - char* event, - char* text, + char* intent, struct cap_cb **ppUserData); switch_status_t google_dialogflow_cx_session_stop(switch_core_session_t *session, int channelIsClosing); switch_bool_t google_dialogflow_cx_frame(switch_media_bug_t *bug, void* user_data); diff --git a/mod_dialogflow_cx/mod_dialogflow_cx.c b/mod_dialogflow_cx/mod_dialogflow_cx.c index 5075e18..713e09e 100644 --- a/mod_dialogflow_cx/mod_dialogflow_cx.c +++ b/mod_dialogflow_cx/mod_dialogflow_cx.c @@ -71,7 +71,7 @@ static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data, } static switch_status_t start_capture(switch_core_session_t *session, switch_media_bug_flag_t flags, char* lang, char* region, char* projectId, -char *agentId, char *environmentId, char* event, char* text) +char *agentId, char *environmentId, char* intent) { switch_channel_t *channel = switch_core_session_get_channel(session); switch_media_bug_t *bug; @@ -90,12 +90,12 @@ char *agentId, char *environmentId, char* event, char* text) goto done; } - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "starting dialogflow_cx with project %s, language %s, event %s, text %s.\n", - projectId, lang, event, text); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "starting dialogflow_cx with project %s, language %s, intent %s\n", + projectId, lang, intent); switch_core_session_get_read_impl(session, &read_impl); if (SWITCH_STATUS_FALSE == google_dialogflow_cx_session_init(session, responseHandler, errorHandler, - read_impl.samples_per_second, lang, region, projectId, agentId, environmentId, event, text, &cb)) { + read_impl.samples_per_second, lang, region, projectId, agentId, environmentId, intent, &cb)) { switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing google dialogflow_cx session.\n"); status = SWITCH_STATUS_FALSE; goto done; @@ -132,7 +132,7 @@ static switch_status_t do_stop(switch_core_session_t *session) return status; } -#define DIALOGFLOW_API_START_SYNTAX " region project-id agent-id environment-id lang-code [event] [text]" +#define DIALOGFLOW_API_START_SYNTAX " region project-id agent-id environment-id lang-code [intent]" SWITCH_STANDARD_API(dialogflow_cx_api_start_function) { char *mycmd = NULL, *argv[10] = { 0 }; @@ -153,8 +153,7 @@ SWITCH_STANDARD_API(dialogflow_cx_api_start_function) switch_core_session_t *lsession = NULL; if ((lsession = switch_core_session_locate(argv[0]))) { - char *event = NULL; - char *text = NULL; + char *intent = NULL; char *region = argv[1]; char *projectId = argv[2]; char *agentId = argv[3]; @@ -167,15 +166,12 @@ SWITCH_STANDARD_API(dialogflow_cx_api_start_function) region = NULL; } if (argc > 6) { - event = argv[6]; - if (0 == strcmp("none", event)) { - event = NULL; + intent = argv[6]; + if (0 == strcmp("none", intent)) { + intent = NULL; } } - if (argc > 7) { - text = argv[6]; - } - status = start_capture(lsession, flags, lang, region, projectId, agentId, environmentId, event, text); + status = start_capture(lsession, flags, lang, region, projectId, agentId, environmentId, intent); switch_core_session_rwunlock(lsession); } }