Files
freeswitch-modules/mod_aws_transcribe/aws_transcribe_glue.cpp
2023-12-26 10:57:15 -05:00

594 lines
22 KiB
C++

#include <cstdlib>
#include <switch.h>
#include <switch_json.h>
#include <string.h>
#include <mutex>
#include <thread>
#include <condition_variable>
#include <string>
#include <sstream>
#include <deque>
#include <aws/core/Aws.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
#include <aws/core/client/ClientConfiguration.h>
#include <aws/core/utils/logging/DefaultLogSystem.h>
#include <aws/core/utils/logging/AWSLogging.h>
#include <aws/transcribestreaming/TranscribeStreamingServiceClient.h>
#include <aws/transcribestreaming/model/StartStreamTranscriptionHandler.h>
#include <aws/transcribestreaming/model/StartStreamTranscriptionRequest.h>
#include "mod_aws_transcribe.h"
#include "simple_buffer.h"
#define BUFFER_SECS (3)
#define CHUNKSIZE (320)
using namespace Aws;
using namespace Aws::Utils;
using namespace Aws::Auth;
using namespace Aws::TranscribeStreamingService;
using namespace Aws::TranscribeStreamingService::Model;
const char ALLOC_TAG[] = "drachtio";
static bool hasDefaultCredentials = false;
class GStreamer {
public:
GStreamer(
const char *sessionId,
const char *bugname,
u_int16_t channels,
char *lang,
int interim,
uint32_t samples_per_second,
const char* region,
const char* awsAccessKeyId,
const char* awsSecretAccessKey,
responseHandler_t responseHandler
) : m_sessionId(sessionId), m_bugname(bugname), m_finished(false), m_interim(interim), m_finishing(false), m_connected(false), m_connecting(false),
m_packets(0), m_responseHandler(responseHandler), m_pStream(nullptr),
m_audioBuffer(320 * (samples_per_second == 8000 ? 1 : 2), 15) {
Aws::String key(awsAccessKeyId);
Aws::String secret(awsSecretAccessKey);
Aws::Client::ClientConfiguration config;
if (region != nullptr && strlen(region) > 0) config.region = region;
char keySnippet[20];
strncpy(keySnippet, awsAccessKeyId, 4);
for (int i = 4; i < 20; i++) keySnippet[i] = 'x';
keySnippet[19] = '\0';
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p ACCESS_KEY_ID %s, region %s\n", this, keySnippet, region);
if (*awsAccessKeyId && *awsSecretAccessKey) {
m_client = Aws::MakeUnique<TranscribeStreamingServiceClient>(ALLOC_TAG, AWSCredentials(awsAccessKeyId, awsSecretAccessKey), config);
}
else {
m_client = Aws::MakeUnique<TranscribeStreamingServiceClient>(ALLOC_TAG, config);
}
m_handler.SetTranscriptEventCallback([this](const TranscriptEvent& ev)
{
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
switch_channel_t* channel = switch_core_session_get_channel(psession);
std::lock_guard<std::mutex> lk(m_mutex);
m_transcript = ev;
m_cond.notify_one();
switch_core_session_rwunlock(psession);
}
});
// not worth resampling to 16k if we get 8k ulaw or alaw in..
m_request.SetMediaSampleRateHertz(samples_per_second > 8000 ? 16000 : 8000);
m_request.SetLanguageCode(LanguageCodeMapper::GetLanguageCodeForName(lang));
m_request.SetMediaEncoding(MediaEncoding::pcm);
m_request.SetEventStreamHandler(m_handler);
if (channels > 1) m_request.SetNumberOfChannels(channels);
const char* var;
switch_core_session_t* session = switch_core_session_locate(sessionId);
switch_channel_t *channel = switch_core_session_get_channel(session);
if (var = switch_channel_get_variable(channel, "AWS_SHOW_SPEAKER_LABEL")) {
m_request.SetShowSpeakerLabel(true);
}
if (var = switch_channel_get_variable(channel, "AWS_ENABLE_CHANNEL_IDENTIFICATION")) {
m_request.SetEnableChannelIdentification(true);
}
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_NAME")) {
m_request.SetVocabularyName(var);
}
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_FILTER_NAME")) {
m_request.SetVocabularyFilterName(var);
}
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_FILTER_METHOD")) {
m_request.SetVocabularyFilterMethod(VocabularyFilterMethodMapper::GetVocabularyFilterMethodForName(var));
}
switch_core_session_rwunlock(session);
}
void connect() {
if (m_connecting) return;
m_connecting = true;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer:connect %p connecting to aws speech..\n", this);
auto OnStreamReady = [this](Model::AudioStream& stream)
{
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
switch_channel_t* channel = switch_core_session_get_channel(psession);
m_pStream = &stream;
m_connected = true;
// send any buffered audio
int nFrames = m_audioBuffer.getNumItems();
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p got stream ready, %d buffered frames\n", this, nFrames);
if (nFrames) {
char *p;
do {
p = m_audioBuffer.getNextChunk();
if (p) {
write(p, CHUNKSIZE);
}
} while (p);
}
switch_core_session_rwunlock(psession);
}
};
auto OnResponseCallback = [this](const TranscribeStreamingServiceClient* pClient,
const Model::StartStreamTranscriptionRequest& request,
const Model::StartStreamTranscriptionOutcome& outcome,
const std::shared_ptr<const Aws::Client::AsyncCallerContext>& context)
{
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p stream got final response\n", this);
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
if (!outcome.IsSuccess()) {
const TranscribeStreamingServiceError& err = outcome.GetError();
auto message = err.GetMessage();
auto exception = err.GetExceptionName();
cJSON* json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "type", "error");
cJSON_AddStringToObject(json, "error", message.c_str());
char* jsonString = cJSON_PrintUnformatted(json);
m_responseHandler(psession, jsonString, m_bugname.c_str());
free(jsonString);
cJSON_Delete(json);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p stream got error response %s : %s\n", this, message.c_str(), exception.c_str());
}
std::lock_guard<std::mutex> lk(m_mutex);
m_finished = true;
m_cond.notify_one();
switch_core_session_rwunlock(psession);
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p session is closed/hungup. Need to unblock thread.\n", this);
std::lock_guard<std::mutex> lk(m_mutex);
m_finished = true;
m_cond.notify_one();
}
};
m_client->StartStreamTranscriptionAsync(m_request, OnStreamReady, OnResponseCallback, nullptr);
}
~GStreamer() {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::~GStreamer wrote %u packets %p\n", m_packets, this);
}
bool write(void* data, uint32_t datalen) {
if (m_finishing || m_finished) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write not writing because we are finished, %p\n", this);
return false;
}
if (!m_connected) {
if (datalen % CHUNKSIZE == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write queuing %d bytes\n", datalen);
m_audioBuffer.add(data, datalen);
}
return true;
}
std::lock_guard<std::mutex> lk(m_mutex);
const auto beg = static_cast<const unsigned char*>(data);
const auto end = beg + datalen;
Aws::Vector<unsigned char> bits { beg, end };
m_deqAudio.push_back(bits);
m_packets++;
m_cond.notify_one();
return true;
}
void finish() {
if (m_finishing) return;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::finish %p\n", this);
std::lock_guard<std::mutex> lk(m_mutex);
m_finishing = true;
m_cond.notify_one();
}
void processData() {
bool shutdownInitiated = false;
while (true) {
std::unique_lock<std::mutex> lk(m_mutex);
m_cond.wait(lk, [&, this] {
return (!m_deqAudio.empty() && !m_finishing) || m_transcript.TranscriptHasBeenSet() || m_finished || (m_finishing && !shutdownInitiated);
});
// we have data to process or have been told we're done
if (m_finished || !m_connected) return;
if (m_transcript.TranscriptHasBeenSet()) {
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::got a transcript to send out %p\n", this);
bool isFinal = false;
std::ostringstream s;
s << "[";
for (auto&& r : m_transcript.GetTranscript().GetResults()) {
int count = 0;
std::ostringstream t1;
if (!isFinal && !r.GetIsPartial()) isFinal = true;
t1 << "{\"is_final\": " << (r.GetIsPartial() ? "false" : "true") << ", \"alternatives\": [";
for (auto&& alt : r.GetAlternatives()) {
std::ostringstream t2;
if (count++ == 0) t2 << "{\"transcript\": \"" << alt.GetTranscript() << "\"}";
else t2 << ", {\"transcript\": \"" << alt.GetTranscript() << "\"}";
t1 << t2.str();
}
t1 << "]}";
s << t1.str();
}
s << "]";
if (0 != s.str().compare("[]") && (isFinal || m_interim)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::writing transcript %p: %s\n", this, s.str().c_str() );
m_responseHandler(psession, s.str().c_str(), m_bugname.c_str());
}
TranscriptEvent empty;
m_transcript = empty;
switch_core_session_rwunlock(psession);
}
}
if (m_finishing) {
shutdownInitiated = true;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::writing disconnect event %p\n", this);
if (m_pStream) {
m_pStream->flush();
m_pStream->Close();
m_pStream = nullptr;
}
}
else {
// send out any queued speech packets
while (!m_deqAudio.empty()) {
Aws::Vector<unsigned char>& bits = m_deqAudio.front();
Aws::TranscribeStreamingService::Model::AudioEvent event(std::move(bits));
m_pStream->WriteAudioEvent(event);
m_deqAudio.pop_front();
}
}
}
}
bool isConnecting() {
return m_connecting;
}
private:
std::string m_sessionId;
std::string m_bugname;
std::string m_region;
Aws::UniquePtr<TranscribeStreamingServiceClient> m_client;
AudioStream* m_pStream;
StartStreamTranscriptionRequest m_request;
StartStreamTranscriptionHandler m_handler;
TranscriptEvent m_transcript;
responseHandler_t m_responseHandler;
bool m_finishing;
bool m_interim;
bool m_finished;
bool m_connected;
bool m_connecting;
uint32_t m_packets;
std::mutex m_mutex;
std::condition_variable m_cond;
std::deque< Aws::Vector<unsigned char> > m_deqAudio;
SimpleBuffer m_audioBuffer;
};
static void *SWITCH_THREAD_FUNC aws_transcribe_thread(switch_thread_t *thread, void *obj) {
struct cap_cb *cb = (struct cap_cb *) obj;
bool ok = true;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: starting cb %p\n", (void *) cb);
GStreamer* pStreamer = new GStreamer(cb->sessionId, cb->bugname, cb->channels, cb->lang, cb->interim, cb->samples_per_second, cb->region, cb->awsAccessKeyId, cb->awsSecretAccessKey,
cb->responseHandler);
if (!pStreamer) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: Error allocating streamer\n");
return nullptr;
}
if (!cb->vad) pStreamer->connect();
cb->streamer = pStreamer;
pStreamer->processData(); //blocks until done
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: stopping cb %p\n", (void *) cb);
delete pStreamer;
cb->streamer = nullptr;
return nullptr;
}
static void killcb(struct cap_cb* cb) {
if (cb) {
if (cb->streamer) {
GStreamer* p = (GStreamer *) cb->streamer;
delete p;
cb->streamer = nullptr;
}
if (cb->resampler) {
speex_resampler_destroy(cb->resampler);
cb->resampler = nullptr;
}
if (cb->vad) {
switch_vad_destroy(&cb->vad);
cb->vad = nullptr;
}
}
}
extern "C" {
switch_status_t aws_transcribe_init() {
const char* accessKeyId = std::getenv("AWS_ACCESS_KEY_ID");
const char* secretAccessKey = std::getenv("AWS_SECRET_ACCESS_KEY");
const char* region = std::getenv("AWS_REGION");
if (NULL == accessKeyId && NULL == secretAccessKey) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE,
"\"AWS_ACCESS_KEY_ID\" and/or \"AWS_SECRET_ACCESS_KEY\" env var not set; authentication will expect channel variables of same names to be set\n");
}
else {
hasDefaultCredentials = true;
}
Aws::SDKOptions options;
/*
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::Utils::Logging::InitializeAWSLogging(
Aws::MakeShared<Aws::Utils::Logging::DefaultLogSystem>(
ALLOC_TAG, Aws::Utils::Logging::LogLevel::Trace, "aws_sdk_transcribe"));
*/
Aws::InitAPI(options);
return SWITCH_STATUS_SUCCESS;
}
switch_status_t aws_transcribe_cleanup() {
Aws::SDKOptions options;
/*
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::Utils::Logging::ShutdownAWSLogging();
*/
Aws::ShutdownAPI(options);
return SWITCH_STATUS_SUCCESS;
}
// start transcribe on a channel
switch_status_t aws_transcribe_session_init(switch_core_session_t *session, responseHandler_t responseHandler,
uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char* bugname, void **ppUserData
) {
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_channel_t *channel = switch_core_session_get_channel(session);
int err;
switch_threadattr_t *thd_attr = NULL;
switch_memory_pool_t *pool = switch_core_session_get_pool(session);
auto read_codec = switch_core_session_get_read_codec(session);
uint32_t sampleRate = read_codec->implementation->actual_samples_per_second;
struct cap_cb* cb = (struct cap_cb *) switch_core_session_alloc(session, sizeof(*cb));
memset(cb, sizeof(cb), 0);
const char* awsAccessKeyId = switch_channel_get_variable(channel, "AWS_ACCESS_KEY_ID");
const char* awsSecretAccessKey = switch_channel_get_variable(channel, "AWS_SECRET_ACCESS_KEY");
const char* awsRegion = switch_channel_get_variable(channel, "AWS_REGION");
cb->channels = channels;
LanguageCode code = LanguageCodeMapper::GetLanguageCodeForName(lang);
if(LanguageCode::NOT_SET == code) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "invalid language code %s\n", lang);
status = SWITCH_STATUS_FALSE;
goto done;
}
strncpy(cb->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID);
strncpy(cb->bugname, bugname, MAX_BUG_LEN);
if (awsAccessKeyId && awsSecretAccessKey && awsRegion) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Using channel vars for aws authentication\n");
strncpy(cb->awsAccessKeyId, awsAccessKeyId, 128);
strncpy(cb->awsSecretAccessKey, awsSecretAccessKey, 128);
strncpy(cb->region, awsRegion, MAX_REGION);
}
else if (std::getenv("AWS_ACCESS_KEY_ID") &&
std::getenv("AWS_SECRET_ACCESS_KEY") &&
std::getenv("AWS_REGION")) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Using env vars for aws authentication\n");
strncpy(cb->awsAccessKeyId, std::getenv("AWS_ACCESS_KEY_ID"), 128);
strncpy(cb->awsSecretAccessKey, std::getenv("AWS_SECRET_ACCESS_KEY"), 128);
strncpy(cb->region, std::getenv("AWS_REGION"), MAX_REGION);
}
else {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "No channel vars or env vars for aws authentication..will use default profile if found\n");
}
cb->responseHandler = responseHandler;
if (switch_mutex_init(&cb->mutex, SWITCH_MUTEX_NESTED, pool) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error initializing mutex\n");
status = SWITCH_STATUS_FALSE;
goto done;
}
cb->interim = interim;
strncpy(cb->lang, lang, MAX_LANG);
cb->samples_per_second = sampleRate;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "sample rate of rtp stream is %d\n", samples_per_second);
if (sampleRate != 8000) {
cb->resampler = speex_resampler_init(1, sampleRate, 16000, SWITCH_RESAMPLE_QUALITY, &err);
if (0 != err) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n",
switch_channel_get_name(channel), speex_resampler_strerror(err));
status = SWITCH_STATUS_FALSE;
goto done;
}
}
// allocate vad if we are delaying connecting to the recognizer until we detect speech
if (switch_channel_var_true(channel, "START_RECOGNIZING_ON_VAD")) {
cb->vad = switch_vad_init(sampleRate, 1);
if (cb->vad) {
const char* var;
int mode = 2;
int silence_ms = 150;
int voice_ms = 250;
int debug = 0;
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_MODE")) {
mode = atoi(var);
}
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_SILENCE_MS")) {
silence_ms = atoi(var);
}
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_VOICE_MS")) {
voice_ms = atoi(var);
}
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_DEBUG")) {
debug = atoi(var);
}
switch_vad_set_mode(cb->vad, mode);
switch_vad_set_param(cb->vad, "silence_ms", silence_ms);
switch_vad_set_param(cb->vad, "voice_ms", voice_ms);
switch_vad_set_param(cb->vad, "debug", debug);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s: delaying connection until vad, voice_ms %d, mode %d\n",
switch_channel_get_name(channel), voice_ms, mode);
}
}
// create a thread to service the http/2 connection to aws
switch_threadattr_create(&thd_attr, pool);
switch_threadattr_stacksize_set(thd_attr, SWITCH_THREAD_STACKSIZE);
switch_thread_create(&cb->thread, thd_attr, aws_transcribe_thread, cb, pool);
*ppUserData = cb;
done:
return status;
}
switch_status_t aws_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname) {
switch_channel_t *channel = switch_core_session_get_channel(session);
switch_media_bug_t *bug = (switch_media_bug_t*) switch_channel_get_private(channel, bugname);
if (bug) {
struct cap_cb *cb = (struct cap_cb *) switch_core_media_bug_get_user_data(bug);
switch_status_t st;
// close connection and get final responses
switch_mutex_lock(cb->mutex);
GStreamer* streamer = (GStreamer *) cb->streamer;
if (streamer) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "aws_transcribe_session_stop: finish..%s\n", bugname);
streamer->finish();
}
if (cb->thread) {
switch_status_t retval;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: waiting for read thread to complete %s\n", bugname);
switch_thread_join(&retval, cb->thread);
cb->thread = NULL;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: read thread completed %s, %d\n", bugname, retval);
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; going to kill callback\n", bugname);
killcb(cb);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; killed callback\n", bugname);
switch_channel_set_private(channel, bugname, NULL);
if (!channelIsClosing) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: removing bug %s\n", bugname);
switch_core_media_bug_remove(session, &bug);
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; unlocking callback mutex\n", bugname);
switch_mutex_unlock(cb->mutex);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: Closed aws session\n");
return SWITCH_STATUS_SUCCESS;
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "%s Bug is not attached.\n", switch_channel_get_name(channel));
return SWITCH_STATUS_FALSE;
}
switch_bool_t aws_transcribe_frame(switch_media_bug_t *bug, void* user_data) {
switch_core_session_t *session = switch_core_media_bug_get_session(bug);
uint8_t data[SWITCH_RECOMMENDED_BUFFER_SIZE];
switch_frame_t frame = {};
struct cap_cb *cb = (struct cap_cb *) user_data;
frame.data = data;
frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE;
if (switch_mutex_trylock(cb->mutex) == SWITCH_STATUS_SUCCESS) {
GStreamer* streamer = (GStreamer *) cb->streamer;
if (streamer) {
while (switch_core_media_bug_read(bug, &frame, SWITCH_TRUE) == SWITCH_STATUS_SUCCESS && !switch_test_flag((&frame), SFF_CNG)) {
if (frame.datalen) {
spx_int16_t out[SWITCH_RECOMMENDED_BUFFER_SIZE];
spx_uint32_t out_len = SWITCH_RECOMMENDED_BUFFER_SIZE;
spx_uint32_t in_len = frame.samples;
size_t written;
if (cb->vad && !streamer->isConnecting()) {
switch_vad_state_t state = switch_vad_process(cb->vad, (int16_t*) frame.data, frame.samples);
if (state == SWITCH_VAD_STATE_START_TALKING) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "detected speech, connect to aws speech now\n");
streamer->connect();
cb->responseHandler(session, "vad_detected", cb->bugname);
}
}
if (cb->resampler) {
speex_resampler_process_interleaved_int(cb->resampler, (const spx_int16_t *) frame.data, (spx_uint32_t *) &in_len, &out[0], &out_len);
streamer->write( &out[0], sizeof(spx_int16_t) * out_len);
}
else {
streamer->write( frame.data, sizeof(spx_int16_t) * frame.samples);
}
}
}
}
else {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG,
"aws_transcribe_frame: not sending audio because aws channel has been closed\n");
}
switch_mutex_unlock(cb->mutex);
}
return SWITCH_TRUE;
}
}