eliminate support for multiple lws threads as part of fixing valgrind errors

Signed-off-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
Dave Horton
2023-12-26 10:57:15 -05:00
parent a2324972eb
commit 420e51eac7
140 changed files with 19851 additions and 0 deletions

View File

@@ -0,0 +1,8 @@
Copyright 2023, Drachtio Communications Services, LLC
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,10 @@
include $(top_srcdir)/build/modmake.rulesam
MODNAME=mod_aws_transcribe
mod_LTLIBRARIES = mod_aws_transcribe.la
mod_aws_transcribe_la_SOURCES = mod_aws_transcribe.c aws_transcribe_glue.cpp
mod_aws_transcribe_la_CFLAGS = $(AM_CFLAGS)
mod_aws_transcribe_la_CXXFLAGS = $(AM_CXXFLAGS) -std=c++11 -I${switch_srcdir}/libs/aws-sdk-cpp/aws-cpp-sdk-core/include -I${switch_srcdir}/libs/aws-sdk-cpp/aws-cpp-sdk-transcribestreaming/include -I${switch_srcdir}/libs/aws-sdk-cpp/build/.deps/install/include
mod_aws_transcribe_la_LIBADD = $(switch_builddir)/libfreeswitch.la
mod_aws_transcribe_la_LDFLAGS = -avoid-version -module -no-undefined -L${switch_srcdir}/libs/aws-sdk-cpp/build/.deps/install/lib -L${switch_srcdir}/libs/aws-sdk-cpp/build/aws-cpp-sdk-core -L${switch_srcdir}/libs/aws-sdk-cpp/build/aws-cpp-sdk-transcribestreaming -laws-cpp-sdk-transcribestreaming -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -lpthread -lcurl -lcrypto -lssl -lz

View File

@@ -0,0 +1,58 @@
# mod_aws_transcribe
A Freeswitch module that generates real-time transcriptions on a Freeswitch channel by using AWS streaming transcription API
## API
### Commands
The freeswitch module exposes the following API commands:
```
aws_transcribe <uuid> start <lang-code> [interim]
```
Attaches media bug to channel and performs streaming recognize request.
- `uuid` - unique identifier of Freeswitch channel
- `lang-code` - a valid AWS [language code](https://docs.aws.amazon.com/transcribe/latest/dg/what-is-transcribe.html) that is supported for streaming transcription
- `interim` - If the 'interim' keyword is present then both interim and final transcription results will be returned; otherwise only final transcriptions will be returned
```
aws_transcribe <uuid> stop
```
Stop transcription on the channel.
### Authentication
The plugin will first look for channel variables, then environment variables. If neither are found, then the default AWS profile on the server will be used.
The names of the channel variables and environment variables are:
| variable | Description |
| --- | ----------- |
| AWS_ACCESS_KEY_ID | The Aws access key ID |
| AWS_SECRET_ACCESS_KEY | The Aws secret access key |
| AWS_REGION | The Aws region |
### Events
`aws_transcribe::transcription` - returns an interim or final transcription. The event contains a JSON body describing the transcription result:
```js
[
{
"is_final": true,
"alternatives": [{
"transcript": "Hello. Can you hear me?"
}]
}
]
```
## Usage
When using [drachtio-fsrmf](https://www.npmjs.com/package/drachtio-fsmrf), you can access this API command via the api method on the 'endpoint' object.
```js
ep.api('aws_transcribe', `${ep.uuid} start en-US interim`);
```
## Building
You will need to build the AWS C++ SDK. You can use [this ansible role](https://github.com/davehorton/ansible-role-fsmrf), or refer to the specific steps [here](https://github.com/davehorton/ansible-role-fsmrf/blob/a1947cc24e89dee7d6b42053c53295f9198340c1/tasks/grpc.yml#L28).
## Examples
[aws_transcribe.js](../../examples/aws_transcribe.js)

View File

@@ -0,0 +1,594 @@
#include <cstdlib>
#include <switch.h>
#include <switch_json.h>
#include <string.h>
#include <mutex>
#include <thread>
#include <condition_variable>
#include <string>
#include <sstream>
#include <deque>
#include <aws/core/Aws.h>
#include <aws/core/auth/AWSCredentialsProvider.h>
#include <aws/core/client/ClientConfiguration.h>
#include <aws/core/utils/logging/DefaultLogSystem.h>
#include <aws/core/utils/logging/AWSLogging.h>
#include <aws/transcribestreaming/TranscribeStreamingServiceClient.h>
#include <aws/transcribestreaming/model/StartStreamTranscriptionHandler.h>
#include <aws/transcribestreaming/model/StartStreamTranscriptionRequest.h>
#include "mod_aws_transcribe.h"
#include "simple_buffer.h"
#define BUFFER_SECS (3)
#define CHUNKSIZE (320)
using namespace Aws;
using namespace Aws::Utils;
using namespace Aws::Auth;
using namespace Aws::TranscribeStreamingService;
using namespace Aws::TranscribeStreamingService::Model;
const char ALLOC_TAG[] = "drachtio";
static bool hasDefaultCredentials = false;
class GStreamer {
public:
GStreamer(
const char *sessionId,
const char *bugname,
u_int16_t channels,
char *lang,
int interim,
uint32_t samples_per_second,
const char* region,
const char* awsAccessKeyId,
const char* awsSecretAccessKey,
responseHandler_t responseHandler
) : m_sessionId(sessionId), m_bugname(bugname), m_finished(false), m_interim(interim), m_finishing(false), m_connected(false), m_connecting(false),
m_packets(0), m_responseHandler(responseHandler), m_pStream(nullptr),
m_audioBuffer(320 * (samples_per_second == 8000 ? 1 : 2), 15) {
Aws::String key(awsAccessKeyId);
Aws::String secret(awsSecretAccessKey);
Aws::Client::ClientConfiguration config;
if (region != nullptr && strlen(region) > 0) config.region = region;
char keySnippet[20];
strncpy(keySnippet, awsAccessKeyId, 4);
for (int i = 4; i < 20; i++) keySnippet[i] = 'x';
keySnippet[19] = '\0';
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p ACCESS_KEY_ID %s, region %s\n", this, keySnippet, region);
if (*awsAccessKeyId && *awsSecretAccessKey) {
m_client = Aws::MakeUnique<TranscribeStreamingServiceClient>(ALLOC_TAG, AWSCredentials(awsAccessKeyId, awsSecretAccessKey), config);
}
else {
m_client = Aws::MakeUnique<TranscribeStreamingServiceClient>(ALLOC_TAG, config);
}
m_handler.SetTranscriptEventCallback([this](const TranscriptEvent& ev)
{
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
switch_channel_t* channel = switch_core_session_get_channel(psession);
std::lock_guard<std::mutex> lk(m_mutex);
m_transcript = ev;
m_cond.notify_one();
switch_core_session_rwunlock(psession);
}
});
// not worth resampling to 16k if we get 8k ulaw or alaw in..
m_request.SetMediaSampleRateHertz(samples_per_second > 8000 ? 16000 : 8000);
m_request.SetLanguageCode(LanguageCodeMapper::GetLanguageCodeForName(lang));
m_request.SetMediaEncoding(MediaEncoding::pcm);
m_request.SetEventStreamHandler(m_handler);
if (channels > 1) m_request.SetNumberOfChannels(channels);
const char* var;
switch_core_session_t* session = switch_core_session_locate(sessionId);
switch_channel_t *channel = switch_core_session_get_channel(session);
if (var = switch_channel_get_variable(channel, "AWS_SHOW_SPEAKER_LABEL")) {
m_request.SetShowSpeakerLabel(true);
}
if (var = switch_channel_get_variable(channel, "AWS_ENABLE_CHANNEL_IDENTIFICATION")) {
m_request.SetEnableChannelIdentification(true);
}
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_NAME")) {
m_request.SetVocabularyName(var);
}
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_FILTER_NAME")) {
m_request.SetVocabularyFilterName(var);
}
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_FILTER_METHOD")) {
m_request.SetVocabularyFilterMethod(VocabularyFilterMethodMapper::GetVocabularyFilterMethodForName(var));
}
switch_core_session_rwunlock(session);
}
void connect() {
if (m_connecting) return;
m_connecting = true;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer:connect %p connecting to aws speech..\n", this);
auto OnStreamReady = [this](Model::AudioStream& stream)
{
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
switch_channel_t* channel = switch_core_session_get_channel(psession);
m_pStream = &stream;
m_connected = true;
// send any buffered audio
int nFrames = m_audioBuffer.getNumItems();
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p got stream ready, %d buffered frames\n", this, nFrames);
if (nFrames) {
char *p;
do {
p = m_audioBuffer.getNextChunk();
if (p) {
write(p, CHUNKSIZE);
}
} while (p);
}
switch_core_session_rwunlock(psession);
}
};
auto OnResponseCallback = [this](const TranscribeStreamingServiceClient* pClient,
const Model::StartStreamTranscriptionRequest& request,
const Model::StartStreamTranscriptionOutcome& outcome,
const std::shared_ptr<const Aws::Client::AsyncCallerContext>& context)
{
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p stream got final response\n", this);
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
if (!outcome.IsSuccess()) {
const TranscribeStreamingServiceError& err = outcome.GetError();
auto message = err.GetMessage();
auto exception = err.GetExceptionName();
cJSON* json = cJSON_CreateObject();
cJSON_AddStringToObject(json, "type", "error");
cJSON_AddStringToObject(json, "error", message.c_str());
char* jsonString = cJSON_PrintUnformatted(json);
m_responseHandler(psession, jsonString, m_bugname.c_str());
free(jsonString);
cJSON_Delete(json);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p stream got error response %s : %s\n", this, message.c_str(), exception.c_str());
}
std::lock_guard<std::mutex> lk(m_mutex);
m_finished = true;
m_cond.notify_one();
switch_core_session_rwunlock(psession);
} else {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p session is closed/hungup. Need to unblock thread.\n", this);
std::lock_guard<std::mutex> lk(m_mutex);
m_finished = true;
m_cond.notify_one();
}
};
m_client->StartStreamTranscriptionAsync(m_request, OnStreamReady, OnResponseCallback, nullptr);
}
~GStreamer() {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::~GStreamer wrote %u packets %p\n", m_packets, this);
}
bool write(void* data, uint32_t datalen) {
if (m_finishing || m_finished) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write not writing because we are finished, %p\n", this);
return false;
}
if (!m_connected) {
if (datalen % CHUNKSIZE == 0) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write queuing %d bytes\n", datalen);
m_audioBuffer.add(data, datalen);
}
return true;
}
std::lock_guard<std::mutex> lk(m_mutex);
const auto beg = static_cast<const unsigned char*>(data);
const auto end = beg + datalen;
Aws::Vector<unsigned char> bits { beg, end };
m_deqAudio.push_back(bits);
m_packets++;
m_cond.notify_one();
return true;
}
void finish() {
if (m_finishing) return;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::finish %p\n", this);
std::lock_guard<std::mutex> lk(m_mutex);
m_finishing = true;
m_cond.notify_one();
}
void processData() {
bool shutdownInitiated = false;
while (true) {
std::unique_lock<std::mutex> lk(m_mutex);
m_cond.wait(lk, [&, this] {
return (!m_deqAudio.empty() && !m_finishing) || m_transcript.TranscriptHasBeenSet() || m_finished || (m_finishing && !shutdownInitiated);
});
// we have data to process or have been told we're done
if (m_finished || !m_connected) return;
if (m_transcript.TranscriptHasBeenSet()) {
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
if (psession) {
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::got a transcript to send out %p\n", this);
bool isFinal = false;
std::ostringstream s;
s << "[";
for (auto&& r : m_transcript.GetTranscript().GetResults()) {
int count = 0;
std::ostringstream t1;
if (!isFinal && !r.GetIsPartial()) isFinal = true;
t1 << "{\"is_final\": " << (r.GetIsPartial() ? "false" : "true") << ", \"alternatives\": [";
for (auto&& alt : r.GetAlternatives()) {
std::ostringstream t2;
if (count++ == 0) t2 << "{\"transcript\": \"" << alt.GetTranscript() << "\"}";
else t2 << ", {\"transcript\": \"" << alt.GetTranscript() << "\"}";
t1 << t2.str();
}
t1 << "]}";
s << t1.str();
}
s << "]";
if (0 != s.str().compare("[]") && (isFinal || m_interim)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::writing transcript %p: %s\n", this, s.str().c_str() );
m_responseHandler(psession, s.str().c_str(), m_bugname.c_str());
}
TranscriptEvent empty;
m_transcript = empty;
switch_core_session_rwunlock(psession);
}
}
if (m_finishing) {
shutdownInitiated = true;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::writing disconnect event %p\n", this);
if (m_pStream) {
m_pStream->flush();
m_pStream->Close();
m_pStream = nullptr;
}
}
else {
// send out any queued speech packets
while (!m_deqAudio.empty()) {
Aws::Vector<unsigned char>& bits = m_deqAudio.front();
Aws::TranscribeStreamingService::Model::AudioEvent event(std::move(bits));
m_pStream->WriteAudioEvent(event);
m_deqAudio.pop_front();
}
}
}
}
bool isConnecting() {
return m_connecting;
}
private:
std::string m_sessionId;
std::string m_bugname;
std::string m_region;
Aws::UniquePtr<TranscribeStreamingServiceClient> m_client;
AudioStream* m_pStream;
StartStreamTranscriptionRequest m_request;
StartStreamTranscriptionHandler m_handler;
TranscriptEvent m_transcript;
responseHandler_t m_responseHandler;
bool m_finishing;
bool m_interim;
bool m_finished;
bool m_connected;
bool m_connecting;
uint32_t m_packets;
std::mutex m_mutex;
std::condition_variable m_cond;
std::deque< Aws::Vector<unsigned char> > m_deqAudio;
SimpleBuffer m_audioBuffer;
};
static void *SWITCH_THREAD_FUNC aws_transcribe_thread(switch_thread_t *thread, void *obj) {
struct cap_cb *cb = (struct cap_cb *) obj;
bool ok = true;
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: starting cb %p\n", (void *) cb);
GStreamer* pStreamer = new GStreamer(cb->sessionId, cb->bugname, cb->channels, cb->lang, cb->interim, cb->samples_per_second, cb->region, cb->awsAccessKeyId, cb->awsSecretAccessKey,
cb->responseHandler);
if (!pStreamer) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: Error allocating streamer\n");
return nullptr;
}
if (!cb->vad) pStreamer->connect();
cb->streamer = pStreamer;
pStreamer->processData(); //blocks until done
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: stopping cb %p\n", (void *) cb);
delete pStreamer;
cb->streamer = nullptr;
return nullptr;
}
static void killcb(struct cap_cb* cb) {
if (cb) {
if (cb->streamer) {
GStreamer* p = (GStreamer *) cb->streamer;
delete p;
cb->streamer = nullptr;
}
if (cb->resampler) {
speex_resampler_destroy(cb->resampler);
cb->resampler = nullptr;
}
if (cb->vad) {
switch_vad_destroy(&cb->vad);
cb->vad = nullptr;
}
}
}
extern "C" {
switch_status_t aws_transcribe_init() {
const char* accessKeyId = std::getenv("AWS_ACCESS_KEY_ID");
const char* secretAccessKey = std::getenv("AWS_SECRET_ACCESS_KEY");
const char* region = std::getenv("AWS_REGION");
if (NULL == accessKeyId && NULL == secretAccessKey) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE,
"\"AWS_ACCESS_KEY_ID\" and/or \"AWS_SECRET_ACCESS_KEY\" env var not set; authentication will expect channel variables of same names to be set\n");
}
else {
hasDefaultCredentials = true;
}
Aws::SDKOptions options;
/*
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::Utils::Logging::InitializeAWSLogging(
Aws::MakeShared<Aws::Utils::Logging::DefaultLogSystem>(
ALLOC_TAG, Aws::Utils::Logging::LogLevel::Trace, "aws_sdk_transcribe"));
*/
Aws::InitAPI(options);
return SWITCH_STATUS_SUCCESS;
}
switch_status_t aws_transcribe_cleanup() {
Aws::SDKOptions options;
/*
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
Aws::Utils::Logging::ShutdownAWSLogging();
*/
Aws::ShutdownAPI(options);
return SWITCH_STATUS_SUCCESS;
}
// start transcribe on a channel
switch_status_t aws_transcribe_session_init(switch_core_session_t *session, responseHandler_t responseHandler,
uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char* bugname, void **ppUserData
) {
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_channel_t *channel = switch_core_session_get_channel(session);
int err;
switch_threadattr_t *thd_attr = NULL;
switch_memory_pool_t *pool = switch_core_session_get_pool(session);
auto read_codec = switch_core_session_get_read_codec(session);
uint32_t sampleRate = read_codec->implementation->actual_samples_per_second;
struct cap_cb* cb = (struct cap_cb *) switch_core_session_alloc(session, sizeof(*cb));
memset(cb, sizeof(cb), 0);
const char* awsAccessKeyId = switch_channel_get_variable(channel, "AWS_ACCESS_KEY_ID");
const char* awsSecretAccessKey = switch_channel_get_variable(channel, "AWS_SECRET_ACCESS_KEY");
const char* awsRegion = switch_channel_get_variable(channel, "AWS_REGION");
cb->channels = channels;
LanguageCode code = LanguageCodeMapper::GetLanguageCodeForName(lang);
if(LanguageCode::NOT_SET == code) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "invalid language code %s\n", lang);
status = SWITCH_STATUS_FALSE;
goto done;
}
strncpy(cb->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID);
strncpy(cb->bugname, bugname, MAX_BUG_LEN);
if (awsAccessKeyId && awsSecretAccessKey && awsRegion) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Using channel vars for aws authentication\n");
strncpy(cb->awsAccessKeyId, awsAccessKeyId, 128);
strncpy(cb->awsSecretAccessKey, awsSecretAccessKey, 128);
strncpy(cb->region, awsRegion, MAX_REGION);
}
else if (std::getenv("AWS_ACCESS_KEY_ID") &&
std::getenv("AWS_SECRET_ACCESS_KEY") &&
std::getenv("AWS_REGION")) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Using env vars for aws authentication\n");
strncpy(cb->awsAccessKeyId, std::getenv("AWS_ACCESS_KEY_ID"), 128);
strncpy(cb->awsSecretAccessKey, std::getenv("AWS_SECRET_ACCESS_KEY"), 128);
strncpy(cb->region, std::getenv("AWS_REGION"), MAX_REGION);
}
else {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "No channel vars or env vars for aws authentication..will use default profile if found\n");
}
cb->responseHandler = responseHandler;
if (switch_mutex_init(&cb->mutex, SWITCH_MUTEX_NESTED, pool) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error initializing mutex\n");
status = SWITCH_STATUS_FALSE;
goto done;
}
cb->interim = interim;
strncpy(cb->lang, lang, MAX_LANG);
cb->samples_per_second = sampleRate;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "sample rate of rtp stream is %d\n", samples_per_second);
if (sampleRate != 8000) {
cb->resampler = speex_resampler_init(1, sampleRate, 16000, SWITCH_RESAMPLE_QUALITY, &err);
if (0 != err) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n",
switch_channel_get_name(channel), speex_resampler_strerror(err));
status = SWITCH_STATUS_FALSE;
goto done;
}
}
// allocate vad if we are delaying connecting to the recognizer until we detect speech
if (switch_channel_var_true(channel, "START_RECOGNIZING_ON_VAD")) {
cb->vad = switch_vad_init(sampleRate, 1);
if (cb->vad) {
const char* var;
int mode = 2;
int silence_ms = 150;
int voice_ms = 250;
int debug = 0;
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_MODE")) {
mode = atoi(var);
}
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_SILENCE_MS")) {
silence_ms = atoi(var);
}
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_VOICE_MS")) {
voice_ms = atoi(var);
}
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_DEBUG")) {
debug = atoi(var);
}
switch_vad_set_mode(cb->vad, mode);
switch_vad_set_param(cb->vad, "silence_ms", silence_ms);
switch_vad_set_param(cb->vad, "voice_ms", voice_ms);
switch_vad_set_param(cb->vad, "debug", debug);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s: delaying connection until vad, voice_ms %d, mode %d\n",
switch_channel_get_name(channel), voice_ms, mode);
}
}
// create a thread to service the http/2 connection to aws
switch_threadattr_create(&thd_attr, pool);
switch_threadattr_stacksize_set(thd_attr, SWITCH_THREAD_STACKSIZE);
switch_thread_create(&cb->thread, thd_attr, aws_transcribe_thread, cb, pool);
*ppUserData = cb;
done:
return status;
}
switch_status_t aws_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname) {
switch_channel_t *channel = switch_core_session_get_channel(session);
switch_media_bug_t *bug = (switch_media_bug_t*) switch_channel_get_private(channel, bugname);
if (bug) {
struct cap_cb *cb = (struct cap_cb *) switch_core_media_bug_get_user_data(bug);
switch_status_t st;
// close connection and get final responses
switch_mutex_lock(cb->mutex);
GStreamer* streamer = (GStreamer *) cb->streamer;
if (streamer) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "aws_transcribe_session_stop: finish..%s\n", bugname);
streamer->finish();
}
if (cb->thread) {
switch_status_t retval;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: waiting for read thread to complete %s\n", bugname);
switch_thread_join(&retval, cb->thread);
cb->thread = NULL;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: read thread completed %s, %d\n", bugname, retval);
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; going to kill callback\n", bugname);
killcb(cb);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; killed callback\n", bugname);
switch_channel_set_private(channel, bugname, NULL);
if (!channelIsClosing) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: removing bug %s\n", bugname);
switch_core_media_bug_remove(session, &bug);
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; unlocking callback mutex\n", bugname);
switch_mutex_unlock(cb->mutex);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: Closed aws session\n");
return SWITCH_STATUS_SUCCESS;
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "%s Bug is not attached.\n", switch_channel_get_name(channel));
return SWITCH_STATUS_FALSE;
}
switch_bool_t aws_transcribe_frame(switch_media_bug_t *bug, void* user_data) {
switch_core_session_t *session = switch_core_media_bug_get_session(bug);
uint8_t data[SWITCH_RECOMMENDED_BUFFER_SIZE];
switch_frame_t frame = {};
struct cap_cb *cb = (struct cap_cb *) user_data;
frame.data = data;
frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE;
if (switch_mutex_trylock(cb->mutex) == SWITCH_STATUS_SUCCESS) {
GStreamer* streamer = (GStreamer *) cb->streamer;
if (streamer) {
while (switch_core_media_bug_read(bug, &frame, SWITCH_TRUE) == SWITCH_STATUS_SUCCESS && !switch_test_flag((&frame), SFF_CNG)) {
if (frame.datalen) {
spx_int16_t out[SWITCH_RECOMMENDED_BUFFER_SIZE];
spx_uint32_t out_len = SWITCH_RECOMMENDED_BUFFER_SIZE;
spx_uint32_t in_len = frame.samples;
size_t written;
if (cb->vad && !streamer->isConnecting()) {
switch_vad_state_t state = switch_vad_process(cb->vad, (int16_t*) frame.data, frame.samples);
if (state == SWITCH_VAD_STATE_START_TALKING) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "detected speech, connect to aws speech now\n");
streamer->connect();
cb->responseHandler(session, "vad_detected", cb->bugname);
}
}
if (cb->resampler) {
speex_resampler_process_interleaved_int(cb->resampler, (const spx_int16_t *) frame.data, (spx_uint32_t *) &in_len, &out[0], &out_len);
streamer->write( &out[0], sizeof(spx_int16_t) * out_len);
}
else {
streamer->write( frame.data, sizeof(spx_int16_t) * frame.samples);
}
}
}
}
else {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG,
"aws_transcribe_frame: not sending audio because aws channel has been closed\n");
}
switch_mutex_unlock(cb->mutex);
}
return SWITCH_TRUE;
}
}

View File

@@ -0,0 +1,11 @@
#ifndef __AWS_GLUE_H__
#define __AWS_GLUE_H__
switch_status_t aws_transcribe_init();
switch_status_t aws_transcribe_cleanup();
switch_status_t aws_transcribe_session_init(switch_core_session_t *session, responseHandler_t responseHandler,
uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char *bugname, void **ppUserData);
switch_status_t aws_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname);
switch_bool_t aws_transcribe_frame(switch_media_bug_t *bug, void* user_data);
#endif

View File

@@ -0,0 +1,240 @@
/*
*
* mod_aws_transcribe.c -- Freeswitch module for using aws streaming transcribe api
*
*/
#include "mod_aws_transcribe.h"
#include "aws_transcribe_glue.h"
/* Prototypes */
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_aws_transcribe_shutdown);
SWITCH_MODULE_LOAD_FUNCTION(mod_aws_transcribe_load);
SWITCH_MODULE_DEFINITION(mod_aws_transcribe, mod_aws_transcribe_load, mod_aws_transcribe_shutdown, NULL);
static switch_status_t do_stop(switch_core_session_t *session, char* bugname);
static void responseHandler(switch_core_session_t* session, const char * json, const char* bugname) {
switch_event_t *event;
switch_channel_t *channel = switch_core_session_get_channel(session);
if (0 == strcmp("vad_detected", json)) {
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_VAD_DETECTED);
switch_channel_event_set_data(channel, event);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
}
else if (0 == strcmp("end_of_transcript", json)) {
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_END_OF_TRANSCRIPT);
switch_channel_event_set_data(channel, event);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
}
else if (0 == strcmp("max_duration_exceeded", json)) {
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_MAX_DURATION_EXCEEDED);
switch_channel_event_set_data(channel, event);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
}
else if (0 == strcmp("no_audio", json)) {
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_NO_AUDIO_DETECTED);
switch_channel_event_set_data(channel, event);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
}
else {
int error = 0;
cJSON* jMessage = cJSON_Parse(json);
if (jMessage) {
const char* type = cJSON_GetStringValue(cJSON_GetObjectItem(jMessage, "type"));
if (type && 0 == strcmp(type, "error")) {
error = 1;
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_ERROR);
}
cJSON_Delete(jMessage);
}
if (!error) {
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_RESULTS);
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "json payload: %s.\n", json);
switch_channel_event_set_data(channel, event);
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
switch_event_add_body(event, "%s", json);
}
if (bugname) switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "media-bugname", bugname);
switch_event_fire(&event);
}
static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type)
{
switch_core_session_t *session = switch_core_media_bug_get_session(bug);
switch (type) {
case SWITCH_ABC_TYPE_INIT:
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Got SWITCH_ABC_TYPE_INIT.\n");
break;
case SWITCH_ABC_TYPE_CLOSE:
{
struct cap_cb* cb = (struct cap_cb*) switch_core_media_bug_get_user_data(bug);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Got SWITCH_ABC_TYPE_CLOSE.\n");
aws_transcribe_session_stop(session, 1, cb->bugname);
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Finished SWITCH_ABC_TYPE_CLOSE.\n");
}
break;
case SWITCH_ABC_TYPE_READ:
return aws_transcribe_frame(bug, user_data);
break;
case SWITCH_ABC_TYPE_WRITE:
default:
break;
}
return SWITCH_TRUE;
}
static switch_status_t start_capture(switch_core_session_t *session, switch_media_bug_flag_t flags,
char* lang, int interim, char* bugname)
{
switch_channel_t *channel = switch_core_session_get_channel(session);
switch_media_bug_t *bug;
switch_status_t status;
switch_codec_implementation_t read_impl = { 0 };
void *pUserData;
uint32_t samples_per_second;
if (switch_channel_get_private(channel, bugname)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "removing bug from previous transcribe\n");
do_stop(session, bugname);
}
switch_core_session_get_read_impl(session, &read_impl);
if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
return SWITCH_STATUS_FALSE;
}
samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second;
if (SWITCH_STATUS_FALSE == aws_transcribe_session_init(session, responseHandler, samples_per_second, flags & SMBF_STEREO ? 2 : 1, lang, interim, bugname, &pUserData)) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing aws speech session.\n");
return SWITCH_STATUS_FALSE;
}
if ((status = switch_core_media_bug_add(session, bugname, NULL, capture_callback, pUserData, 0, flags, &bug)) != SWITCH_STATUS_SUCCESS) {
return status;
}
switch_channel_set_private(channel, bugname, bug);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "added media bug for aws transcribe\n");
return SWITCH_STATUS_SUCCESS;
}
static switch_status_t do_stop(switch_core_session_t *session, char* bugname)
{
switch_status_t status = SWITCH_STATUS_SUCCESS;
switch_channel_t *channel = switch_core_session_get_channel(session);
switch_media_bug_t *bug = switch_channel_get_private(channel, bugname);
if (bug) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Received user command command to stop transcribe on %s.\n", bugname);
status = aws_transcribe_session_stop(session, 0, bugname);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "stopped transcribe.\n");
}
return status;
}
#define TRANSCRIBE_API_SYNTAX "<uuid> [start|stop] lang-code [interim] [stereo|mono] [bugname]"
SWITCH_STANDARD_API(aws_transcribe_function)
{
char *mycmd = NULL, *argv[6] = { 0 };
int argc = 0;
switch_status_t status = SWITCH_STATUS_FALSE;
switch_media_bug_flag_t flags = SMBF_READ_STREAM /* | SMBF_WRITE_STREAM | SMBF_READ_PING */;
if (!zstr(cmd) && (mycmd = strdup(cmd))) {
argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
}
if (zstr(cmd) ||
(!strcasecmp(argv[1], "stop") && argc < 2) ||
(!strcasecmp(argv[1], "start") && argc < 3) ||
zstr(argv[0])) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s %s %s.\n", cmd, argv[0], argv[1]);
stream->write_function(stream, "-USAGE: %s\n", TRANSCRIBE_API_SYNTAX);
goto done;
} else {
switch_core_session_t *lsession = NULL;
if ((lsession = switch_core_session_locate(argv[0]))) {
if (!strcasecmp(argv[1], "stop")) {
char *bugname = argc > 2 ? argv[2] : MY_BUG_NAME;
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "stop transcribing\n");
status = do_stop(lsession, bugname);
} else if (!strcasecmp(argv[1], "start")) {
char* lang = argv[2];
int interim = argc > 3 && !strcmp(argv[3], "interim");
char *bugname = argc > 5 ? argv[5] : MY_BUG_NAME;
if (argc > 4 && !strcmp(argv[4], "stereo")) {
flags |= SMBF_WRITE_STREAM ;
flags |= SMBF_STEREO;
}
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "start transcribing %s %s %s\n", lang, interim ? "interim": "complete", bugname);
status = start_capture(lsession, flags, lang, interim, bugname);
}
switch_core_session_rwunlock(lsession);
}
}
if (status == SWITCH_STATUS_SUCCESS) {
stream->write_function(stream, "+OK Success\n");
} else {
stream->write_function(stream, "-ERR Operation Failed\n");
}
done:
switch_safe_free(mycmd);
return SWITCH_STATUS_SUCCESS;
}
SWITCH_MODULE_LOAD_FUNCTION(mod_aws_transcribe_load)
{
switch_api_interface_t *api_interface;
/* create/register custom event message type */
if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_RESULTS) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_RESULTS);
return SWITCH_STATUS_TERM;
}
/* connect my internal structure to the blank pointer passed to me */
*module_interface = switch_loadable_module_create_module_interface(pool, modname);
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "AWS Speech Transcription API loading..\n");
if (SWITCH_STATUS_FALSE == aws_transcribe_init()) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed initializing aws speech interface\n");
}
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "AWS Speech Transcription API successfully loaded\n");
SWITCH_ADD_API(api_interface, "uuid_aws_transcribe", "AWS Speech Transcription API", aws_transcribe_function, TRANSCRIBE_API_SYNTAX);
switch_console_set_complete("add uuid_aws_transcribe start lang-code [interim|final] [stereo|mono]");
switch_console_set_complete("add uuid_aws_transcribe stop ");
/* indicate that the module should continue to be loaded */
return SWITCH_STATUS_SUCCESS;
}
/*
Called when the system shuts down
Macro expands to: switch_status_t mod_aws_transcribe_shutdown() */
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_aws_transcribe_shutdown)
{
aws_transcribe_cleanup();
switch_event_free_subclass(TRANSCRIBE_EVENT_RESULTS);
return SWITCH_STATUS_SUCCESS;
}

View File

@@ -0,0 +1,45 @@
#ifndef __MOD_AWS_TRANSCRIBE_H__
#define __MOD_AWS_TRANSCRIBE_H__
#include <switch.h>
#include <speex/speex_resampler.h>
#include <unistd.h>
#define MY_BUG_NAME "aws_transcribe"
#define MAX_BUG_LEN (64)
#define MAX_SESSION_ID (256)
#define TRANSCRIBE_EVENT_RESULTS "aws_transcribe::transcription"
#define TRANSCRIBE_EVENT_END_OF_TRANSCRIPT "aws_transcribe::end_of_transcript"
#define TRANSCRIBE_EVENT_NO_AUDIO_DETECTED "aws_transcribe::no_audio_detected"
#define TRANSCRIBE_EVENT_MAX_DURATION_EXCEEDED "aws_transcribe::max_duration_exceeded"
#define TRANSCRIBE_EVENT_VAD_DETECTED "aws_transcribe::vad_detected"
#define TRANSCRIBE_EVENT_ERROR "jambonz_transcribe::error"
#define MAX_LANG (12)
#define MAX_REGION (32)
/* per-channel data */
typedef void (*responseHandler_t)(switch_core_session_t* session, const char * json, const char* bugname);
struct cap_cb {
switch_mutex_t *mutex;
char bugname[MAX_BUG_LEN+1];
char sessionId[MAX_SESSION_ID+1];
char awsAccessKeyId[128];
char awsSecretAccessKey[128];
uint32_t channels;
SpeexResamplerState *resampler;
void* streamer;
responseHandler_t responseHandler;
switch_thread_t* thread;
int interim;
char lang[MAX_LANG];
char region[MAX_REGION];
switch_vad_t * vad;
uint32_t samples_per_second;
};
#endif

View File

@@ -0,0 +1,51 @@
/**
* (very) simple and limited circular buffer,
* supporting only the use case of doing all of the adds
* and then subsquently retrieves.
*
*/
class SimpleBuffer {
public:
SimpleBuffer(uint32_t chunkSize, uint32_t numChunks) : numItems(0),
m_numChunks(numChunks), m_chunkSize(chunkSize) {
m_pData = new char[chunkSize * numChunks];
m_pNextWrite = m_pData;
}
~SimpleBuffer() {
delete [] m_pData;
}
void add(void *data, uint32_t datalen) {
if (datalen % m_chunkSize != 0) return;
int numChunks = datalen / m_chunkSize;
for (int i = 0; i < numChunks; i++) {
memcpy(m_pNextWrite, data, m_chunkSize);
data = static_cast<char*>(data) + m_chunkSize;
if (numItems < m_numChunks) numItems++;
uint32_t offset = (m_pNextWrite - m_pData) / m_chunkSize;
if (offset >= m_numChunks - 1) m_pNextWrite = m_pData;
else m_pNextWrite += m_chunkSize;
}
}
char* getNextChunk() {
if (numItems--) {
char *p = m_pNextWrite;
uint32_t offset = (m_pNextWrite - m_pData) / m_chunkSize;
if (offset >= m_numChunks - 1) m_pNextWrite = m_pData;
else m_pNextWrite += m_chunkSize;
return p;
}
return nullptr;
}
uint32_t getNumItems() { return numItems;}
private:
char *m_pData;
uint32_t numItems;
uint32_t m_chunkSize;
uint32_t m_numChunks;
char* m_pNextWrite;
};