This commit is contained in:
Dave Horton
2024-08-28 15:09:23 -04:00
parent 71a61c068d
commit ebd518eaf8
3 changed files with 87 additions and 93 deletions

View File

@@ -115,7 +115,7 @@ void tokenize(std::string const &str, const char delim, std::vector<std::string>
class GStreamer { class GStreamer {
public: public:
GStreamer(switch_core_session_t *session, const char* lang, char* region, char* projectId, char* agentId, GStreamer(switch_core_session_t *session, const char* lang, char* region, char* projectId, char* agentId,
char* environmentId, char* event, char* text) : char* environmentId, char* intent) :
m_lang(lang), m_sessionId(switch_core_session_get_uuid(session)), m_agent(agentId), m_projectId(projectId), m_lang(lang), m_sessionId(switch_core_session_get_uuid(session)), m_agent(agentId), m_projectId(projectId),
m_environment( nullptr != environmentId ? environmentId : "draft"), m_regionId(nullptr != region ? region : "us"), m_environment( nullptr != environmentId ? environmentId : "draft"), m_regionId(nullptr != region ? region : "us"),
m_speakingRate(), m_pitch(), m_volume(), m_voiceName(""), m_voiceGender(""), m_effects(""), m_speakingRate(), m_pitch(), m_volume(), m_voiceName(""), m_voiceGender(""), m_effects(""),
@@ -123,8 +123,6 @@ public:
const char* var; const char* var;
switch_channel_t* channel = switch_core_session_get_channel(session); switch_channel_t* channel = switch_core_session_get_channel(session);
// TOODO: handle via channel vars
/*
std::vector<std::string> tokens; std::vector<std::string> tokens;
const char delim = ':'; const char delim = ':';
tokenize(projectId, delim, tokens); tokenize(projectId, delim, tokens);
@@ -143,7 +141,6 @@ public:
else if (9 == idx && s.length() > 0) m_sentimentAnalysis = (s == "true"); else if (9 == idx && s.length() > 0) m_sentimentAnalysis = (s == "true");
idx++; idx++;
} }
*/
std::string endpoint = "dialogflow.googleapis.com"; std::string endpoint = "dialogflow.googleapis.com";
if (0 != m_regionId.compare("us")) { if (0 != m_regionId.compare("us")) {
@@ -166,14 +163,14 @@ public:
auto creds = grpc::GoogleDefaultCredentials(); auto creds = grpc::GoogleDefaultCredentials();
m_channel = grpc::CreateChannel(endpoint, creds); m_channel = grpc::CreateChannel(endpoint, creds);
} }
startStream(session, event, text); startStream(session, intent);
} }
~GStreamer() { ~GStreamer() {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::~GStreamer wrote %ld packets %p\n", m_packets, this); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::~GStreamer wrote %ld packets %p\n", m_packets, this);
} }
void startStream(switch_core_session_t *session, const char* event, const char* text) { void startStream(switch_core_session_t *session, const char* intent) {
char szSession[256]; char szSession[256];
m_request = std::make_shared<StreamingDetectIntentRequest>(); m_request = std::make_shared<StreamingDetectIntentRequest>();
@@ -183,38 +180,73 @@ public:
if (0 == m_environment.compare("draft")) { if (0 == m_environment.compare("draft")) {
snprintf(szSession, 256, "projects/%s/locations/%s/agents/%s/sessions/%s", snprintf(szSession, 256, "projects/%s/locations/%s/agents/%s/sessions/%s",
m_projectId.c_str(), m_regionId.c_str(), m_agent.c_str(), m_sessionId.c_str()); m_projectId.c_str(), m_regionId.c_str(), m_agent.c_str(), m_sessionId.c_str());
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream session %s, %p\n", szSession, this);
} }
else { else {
snprintf(szSession, 256, "projects/%s/locations/%s/agents/%s/environments/%s/sessions/%s", snprintf(szSession, 256, "projects/%s/locations/%s/agents/%s/environments/%s/sessions/%s",
m_projectId.c_str(), m_regionId.c_str(), m_environment.c_str(), m_sessionId.c_str()); m_projectId.c_str(), m_regionId.c_str(), m_environment.c_str(), m_sessionId.c_str());
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream session %s, intent %s,%p\n", szSession, intent, this);
} }
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream session %s, event %s, text %s %p\n", szSession, event, text, this);
m_request->set_session(szSession); m_request->set_session(szSession);
auto* queryInput = m_request->mutable_query_input();
if (event) { auto* queryInput = m_request->mutable_query_input();
auto* eventInput = queryInput->mutable_event(); queryInput->set_language_code(m_lang.c_str());
eventInput->set_event(event); if (intent) {
queryInput->set_language_code(m_lang.c_str()); char szIntent[256];
}
else if (text) {
auto* textInput = queryInput->mutable_text();
textInput->set_text(text);
queryInput->set_language_code(m_lang.c_str());
}
else {
auto* intentInput = queryInput->mutable_intent(); auto* intentInput = queryInput->mutable_intent();
intentInput->set_intent(DEFAULT_INTENT); bool isDefault = 0 == strcasecmp(intent, "default");
snprintf(szIntent, 256, "projects/%s/locations/%s/agents/%s/intents/%s",
m_projectId.c_str(), m_regionId.c_str(), m_agent.c_str(), isDefault ? DEFAULT_INTENT : intent);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write writing initial query input w/intent %s, %p\n",
szIntent, this);
intentInput->set_intent(szIntent);
}
else {
auto* audio_config = queryInput->mutable_audio()->mutable_config();
audio_config->set_sample_rate_hertz(16000);
audio_config->set_enable_word_info(false);
audio_config->set_audio_encoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16);
audio_config->set_single_utterance(true);
}
auto* outputAudioConfig = m_request->mutable_output_audio_config();
outputAudioConfig->set_sample_rate_hertz(8000);
outputAudioConfig->set_audio_encoding(OutputAudioEncoding::OUTPUT_AUDIO_ENCODING_LINEAR_16);
if (isAnyOutputAudioConfigChanged()) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream adding a custom OutputAudioConfig to the request since at"
" least one parameter was received.");
auto* synthesizeSpeechConfig = outputAudioConfig->mutable_synthesize_speech_config();
if (m_speakingRate) synthesizeSpeechConfig->set_speaking_rate(m_speakingRate);
if (m_pitch) synthesizeSpeechConfig->set_pitch(m_pitch);
if (m_volume) synthesizeSpeechConfig->set_volume_gain_db(m_volume);
if (!m_effects.empty()) synthesizeSpeechConfig->add_effects_profile_id(m_effects);
auto* voice = synthesizeSpeechConfig->mutable_voice();
if (!m_voiceName.empty()) voice->set_name(m_voiceName);
if (!m_voiceGender.empty()) {
SsmlVoiceGender gender = SsmlVoiceGender::SSML_VOICE_GENDER_UNSPECIFIED;
switch (toupper(m_voiceGender[0]))
{
case 'F': gender = SsmlVoiceGender::SSML_VOICE_GENDER_MALE; break;
case 'M': gender = SsmlVoiceGender::SSML_VOICE_GENDER_FEMALE; break;
case 'N': gender = SsmlVoiceGender::SSML_VOICE_GENDER_NEUTRAL; break;
}
voice->set_ssml_gender(gender);
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream no custom parameters for OutputAudioConfig, keeping default");
}
if (m_sentimentAnalysis) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream received sentiment analysis flag as true, adding as query param");
auto* queryParameters = m_request->mutable_query_params();
queryParameters->set_analyze_query_text_sentiment(m_sentimentAnalysis);
}
} }
/*
auto* audio_input = queryInput->mutable_audio();
auto* audio_config = audio_input->mutable_config();
audio_config->set_sample_rate_hertz(16000);
audio_config->set_enable_word_info(false);
audio_config->set_audio_encoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16);
audio_config->set_single_utterance(false);
*/
/** /**
* Note: there are other parameters that can be set in the audio config, such as: * Note: there are other parameters that can be set in the audio config, such as:
@@ -222,48 +254,12 @@ public:
* *
*/ */
queryInput->set_language_code(m_lang.c_str());
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::startStream checking OutputAudioConfig custom parameters: speaking rate %f," switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::startStream checking OutputAudioConfig custom parameters: speaking rate %f,"
" pitch %f, volume %f, voice name '%s' gender '%s', effects '%s'\n", m_speakingRate, " pitch %f, volume %f, voice name '%s' gender '%s', effects '%s'\n", m_speakingRate,
m_pitch, m_volume, m_voiceName.c_str(), m_voiceGender.c_str(), m_effects.c_str()); m_pitch, m_volume, m_voiceName.c_str(), m_voiceGender.c_str(), m_effects.c_str());
if (isAnyOutputAudioConfigChanged()) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream adding a custom OutputAudioConfig to the request since at"
" least one parameter was received.");
auto* outputAudioConfig = m_request->mutable_output_audio_config();
outputAudioConfig->set_sample_rate_hertz(16000);
outputAudioConfig->set_audio_encoding(OutputAudioEncoding::OUTPUT_AUDIO_ENCODING_LINEAR_16);
auto* synthesizeSpeechConfig = outputAudioConfig->mutable_synthesize_speech_config();
if (m_speakingRate) synthesizeSpeechConfig->set_speaking_rate(m_speakingRate);
if (m_pitch) synthesizeSpeechConfig->set_pitch(m_pitch);
if (m_volume) synthesizeSpeechConfig->set_volume_gain_db(m_volume);
if (!m_effects.empty()) synthesizeSpeechConfig->add_effects_profile_id(m_effects);
auto* voice = synthesizeSpeechConfig->mutable_voice();
if (!m_voiceName.empty()) voice->set_name(m_voiceName);
if (!m_voiceGender.empty()) {
SsmlVoiceGender gender = SsmlVoiceGender::SSML_VOICE_GENDER_UNSPECIFIED;
switch (toupper(m_voiceGender[0]))
{
case 'F': gender = SsmlVoiceGender::SSML_VOICE_GENDER_MALE; break;
case 'M': gender = SsmlVoiceGender::SSML_VOICE_GENDER_FEMALE; break;
case 'N': gender = SsmlVoiceGender::SSML_VOICE_GENDER_NEUTRAL; break;
}
voice->set_ssml_gender(gender);
}
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream no custom parameters for OutputAudioConfig, keeping default");
}
if (m_sentimentAnalysis) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "GStreamer::startStream received sentiment analysis flag as true, adding as query param");
auto* queryParameters = m_request->mutable_query_params();
queryParameters->set_analyze_query_text_sentiment(m_sentimentAnalysis);
}
m_streamer = m_stub->StreamingDetectIntent(m_context.get()); m_streamer = m_stub->StreamingDetectIntent(m_context.get());
m_streamer->Write(*m_request); m_streamer->Write(*m_request);
} }
bool write(void* data, uint32_t datalen) { bool write(void* data, uint32_t datalen) {
if (m_finished) { if (m_finished) {
@@ -271,20 +267,17 @@ public:
return false; return false;
} }
auto* queryInput = m_request->mutable_query_input();
m_request->clear_query_input(); m_request->clear_query_input();
m_request->clear_query_params(); m_request->clear_query_params();
queryInput->set_language_code(m_lang.c_str());
queryInput->mutable_audio()->set_audio(data, datalen);
auto* audio_config = m_request->mutable_query_input()->mutable_audio()->mutable_config(); //switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write writing packet %d\n", m_packets);
audio_config->set_sample_rate_hertz(16000);
audio_config->set_enable_word_info(false);
audio_config->set_audio_encoding(AudioEncoding::AUDIO_ENCODING_LINEAR_16);
audio_config->set_single_utterance(false);
m_request->mutable_query_input()->mutable_audio()->set_audio(data, datalen);
m_packets++; m_packets++;
return m_streamer->Write(*m_request); return m_streamer->Write(*m_request);
} }
bool read(StreamingDetectIntentResponse* response) { bool read(StreamingDetectIntentResponse* response) {
return m_streamer->Read(response); return m_streamer->Read(response);
@@ -330,6 +323,8 @@ private:
std::string m_voiceGender; std::string m_voiceGender;
bool m_sentimentAnalysis; bool m_sentimentAnalysis;
bool m_finished; bool m_finished;
bool m_ready;
uint32_t m_packets; uint32_t m_packets;
}; };
@@ -363,7 +358,12 @@ static void *SWITCH_THREAD_FUNC grpc_read_thread(switch_thread_t *thread, void *
switch_channel_t* channel = switch_core_session_get_channel(psession); switch_channel_t* channel = switch_core_session_get_channel(psession);
GRPCParser parser(psession); GRPCParser parser(psession);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: read something %p\n", (void *) cb);
// TODO: handle has_debugging_info() // TODO: handle has_debugging_info()
if (response.has_debugging_info()) {
auto di = response.debugging_info();
}
bool hasAudio = false; bool hasAudio = false;
if (response.has_detect_intent_response() || response.has_recognition_result()) { if (response.has_detect_intent_response() || response.has_recognition_result()) {
@@ -485,8 +485,7 @@ extern "C" {
char* projectId, char* projectId,
char* agentId, char* agentId,
char* environmentId, char* environmentId,
char* event, char* intent,
char* text,
struct cap_cb **ppUserData struct cap_cb **ppUserData
) { ) {
switch_status_t status = SWITCH_STATUS_SUCCESS; switch_status_t status = SWITCH_STATUS_SUCCESS;
@@ -518,7 +517,7 @@ extern "C" {
strncpy(cb->agentId, agentId, MAX_PROJECT_ID); strncpy(cb->agentId, agentId, MAX_PROJECT_ID);
if (nullptr != environmentId) strncpy(cb->environmentId, environmentId, MAX_PROJECT_ID); if (nullptr != environmentId) strncpy(cb->environmentId, environmentId, MAX_PROJECT_ID);
if (nullptr != region) strncpy(cb->region, region, MAX_REGION); if (nullptr != region) strncpy(cb->region, region, MAX_REGION);
cb->streamer = new GStreamer(session, lang, region, projectId, agentId, environmentId, event, text); cb->streamer = new GStreamer(session, lang, region, projectId, agentId, environmentId, intent);
cb->resampler = speex_resampler_init(1, 8000, 16000, SWITCH_RESAMPLE_QUALITY, &err); cb->resampler = speex_resampler_init(1, 8000, 16000, SWITCH_RESAMPLE_QUALITY, &err);
if (0 != err) { if (0 != err) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n", switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n",

View File

@@ -13,8 +13,7 @@ switch_status_t google_dialogflow_cx_session_init(
char* projectId, char* projectId,
char* agentId, char* agentId,
char* environmentId, char* environmentId,
char* event, char* intent,
char* text,
struct cap_cb **ppUserData); struct cap_cb **ppUserData);
switch_status_t google_dialogflow_cx_session_stop(switch_core_session_t *session, int channelIsClosing); switch_status_t google_dialogflow_cx_session_stop(switch_core_session_t *session, int channelIsClosing);
switch_bool_t google_dialogflow_cx_frame(switch_media_bug_t *bug, void* user_data); switch_bool_t google_dialogflow_cx_frame(switch_media_bug_t *bug, void* user_data);

View File

@@ -71,7 +71,7 @@ static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data,
} }
static switch_status_t start_capture(switch_core_session_t *session, switch_media_bug_flag_t flags, char* lang, char* region, char* projectId, static switch_status_t start_capture(switch_core_session_t *session, switch_media_bug_flag_t flags, char* lang, char* region, char* projectId,
char *agentId, char *environmentId, char* event, char* text) char *agentId, char *environmentId, char* intent)
{ {
switch_channel_t *channel = switch_core_session_get_channel(session); switch_channel_t *channel = switch_core_session_get_channel(session);
switch_media_bug_t *bug; switch_media_bug_t *bug;
@@ -90,12 +90,12 @@ char *agentId, char *environmentId, char* event, char* text)
goto done; goto done;
} }
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "starting dialogflow_cx with project %s, language %s, event %s, text %s.\n", switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "starting dialogflow_cx with project %s, language %s, intent %s\n",
projectId, lang, event, text); projectId, lang, intent);
switch_core_session_get_read_impl(session, &read_impl); switch_core_session_get_read_impl(session, &read_impl);
if (SWITCH_STATUS_FALSE == google_dialogflow_cx_session_init(session, responseHandler, errorHandler, if (SWITCH_STATUS_FALSE == google_dialogflow_cx_session_init(session, responseHandler, errorHandler,
read_impl.samples_per_second, lang, region, projectId, agentId, environmentId, event, text, &cb)) { read_impl.samples_per_second, lang, region, projectId, agentId, environmentId, intent, &cb)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing google dialogflow_cx session.\n"); switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing google dialogflow_cx session.\n");
status = SWITCH_STATUS_FALSE; status = SWITCH_STATUS_FALSE;
goto done; goto done;
@@ -132,7 +132,7 @@ static switch_status_t do_stop(switch_core_session_t *session)
return status; return status;
} }
#define DIALOGFLOW_API_START_SYNTAX "<uuid> region project-id agent-id environment-id lang-code [event] [text]" #define DIALOGFLOW_API_START_SYNTAX "<uuid> region project-id agent-id environment-id lang-code [intent]"
SWITCH_STANDARD_API(dialogflow_cx_api_start_function) SWITCH_STANDARD_API(dialogflow_cx_api_start_function)
{ {
char *mycmd = NULL, *argv[10] = { 0 }; char *mycmd = NULL, *argv[10] = { 0 };
@@ -153,8 +153,7 @@ SWITCH_STANDARD_API(dialogflow_cx_api_start_function)
switch_core_session_t *lsession = NULL; switch_core_session_t *lsession = NULL;
if ((lsession = switch_core_session_locate(argv[0]))) { if ((lsession = switch_core_session_locate(argv[0]))) {
char *event = NULL; char *intent = NULL;
char *text = NULL;
char *region = argv[1]; char *region = argv[1];
char *projectId = argv[2]; char *projectId = argv[2];
char *agentId = argv[3]; char *agentId = argv[3];
@@ -167,15 +166,12 @@ SWITCH_STANDARD_API(dialogflow_cx_api_start_function)
region = NULL; region = NULL;
} }
if (argc > 6) { if (argc > 6) {
event = argv[6]; intent = argv[6];
if (0 == strcmp("none", event)) { if (0 == strcmp("none", intent)) {
event = NULL; intent = NULL;
} }
} }
if (argc > 7) { status = start_capture(lsession, flags, lang, region, projectId, agentId, environmentId, intent);
text = argv[6];
}
status = start_capture(lsession, flags, lang, region, projectId, agentId, environmentId, event, text);
switch_core_session_rwunlock(lsession); switch_core_session_rwunlock(lsession);
} }
} }