mirror of
https://github.com/jambonz/freeswitch-modules.git
synced 2025-12-19 08:27:44 +00:00
eliminate support for multiple lws threads as part of fixing valgrind errors
Signed-off-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
8
mod_aws_transcribe/LICENSE
Normal file
8
mod_aws_transcribe/LICENSE
Normal file
@@ -0,0 +1,8 @@
|
||||
Copyright 2023, Drachtio Communications Services, LLC
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
10
mod_aws_transcribe/Makefile.am
Normal file
10
mod_aws_transcribe/Makefile.am
Normal file
@@ -0,0 +1,10 @@
|
||||
include $(top_srcdir)/build/modmake.rulesam
|
||||
MODNAME=mod_aws_transcribe
|
||||
|
||||
mod_LTLIBRARIES = mod_aws_transcribe.la
|
||||
mod_aws_transcribe_la_SOURCES = mod_aws_transcribe.c aws_transcribe_glue.cpp
|
||||
mod_aws_transcribe_la_CFLAGS = $(AM_CFLAGS)
|
||||
mod_aws_transcribe_la_CXXFLAGS = $(AM_CXXFLAGS) -std=c++11 -I${switch_srcdir}/libs/aws-sdk-cpp/aws-cpp-sdk-core/include -I${switch_srcdir}/libs/aws-sdk-cpp/aws-cpp-sdk-transcribestreaming/include -I${switch_srcdir}/libs/aws-sdk-cpp/build/.deps/install/include
|
||||
|
||||
mod_aws_transcribe_la_LIBADD = $(switch_builddir)/libfreeswitch.la
|
||||
mod_aws_transcribe_la_LDFLAGS = -avoid-version -module -no-undefined -L${switch_srcdir}/libs/aws-sdk-cpp/build/.deps/install/lib -L${switch_srcdir}/libs/aws-sdk-cpp/build/aws-cpp-sdk-core -L${switch_srcdir}/libs/aws-sdk-cpp/build/aws-cpp-sdk-transcribestreaming -laws-cpp-sdk-transcribestreaming -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -lpthread -lcurl -lcrypto -lssl -lz
|
||||
58
mod_aws_transcribe/README.md
Normal file
58
mod_aws_transcribe/README.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# mod_aws_transcribe
|
||||
|
||||
A Freeswitch module that generates real-time transcriptions on a Freeswitch channel by using AWS streaming transcription API
|
||||
|
||||
## API
|
||||
|
||||
### Commands
|
||||
The freeswitch module exposes the following API commands:
|
||||
|
||||
```
|
||||
aws_transcribe <uuid> start <lang-code> [interim]
|
||||
```
|
||||
Attaches media bug to channel and performs streaming recognize request.
|
||||
- `uuid` - unique identifier of Freeswitch channel
|
||||
- `lang-code` - a valid AWS [language code](https://docs.aws.amazon.com/transcribe/latest/dg/what-is-transcribe.html) that is supported for streaming transcription
|
||||
- `interim` - If the 'interim' keyword is present then both interim and final transcription results will be returned; otherwise only final transcriptions will be returned
|
||||
|
||||
```
|
||||
aws_transcribe <uuid> stop
|
||||
```
|
||||
Stop transcription on the channel.
|
||||
|
||||
### Authentication
|
||||
The plugin will first look for channel variables, then environment variables. If neither are found, then the default AWS profile on the server will be used.
|
||||
|
||||
The names of the channel variables and environment variables are:
|
||||
|
||||
| variable | Description |
|
||||
| --- | ----------- |
|
||||
| AWS_ACCESS_KEY_ID | The Aws access key ID |
|
||||
| AWS_SECRET_ACCESS_KEY | The Aws secret access key |
|
||||
| AWS_REGION | The Aws region |
|
||||
|
||||
|
||||
### Events
|
||||
`aws_transcribe::transcription` - returns an interim or final transcription. The event contains a JSON body describing the transcription result:
|
||||
```js
|
||||
[
|
||||
{
|
||||
"is_final": true,
|
||||
"alternatives": [{
|
||||
"transcript": "Hello. Can you hear me?"
|
||||
}]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Usage
|
||||
When using [drachtio-fsrmf](https://www.npmjs.com/package/drachtio-fsmrf), you can access this API command via the api method on the 'endpoint' object.
|
||||
```js
|
||||
ep.api('aws_transcribe', `${ep.uuid} start en-US interim`);
|
||||
```
|
||||
|
||||
## Building
|
||||
You will need to build the AWS C++ SDK. You can use [this ansible role](https://github.com/davehorton/ansible-role-fsmrf), or refer to the specific steps [here](https://github.com/davehorton/ansible-role-fsmrf/blob/a1947cc24e89dee7d6b42053c53295f9198340c1/tasks/grpc.yml#L28).
|
||||
|
||||
## Examples
|
||||
[aws_transcribe.js](../../examples/aws_transcribe.js)
|
||||
594
mod_aws_transcribe/aws_transcribe_glue.cpp
Normal file
594
mod_aws_transcribe/aws_transcribe_glue.cpp
Normal file
@@ -0,0 +1,594 @@
|
||||
#include <cstdlib>
|
||||
|
||||
#include <switch.h>
|
||||
#include <switch_json.h>
|
||||
|
||||
#include <string.h>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <condition_variable>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <deque>
|
||||
|
||||
#include <aws/core/Aws.h>
|
||||
#include <aws/core/auth/AWSCredentialsProvider.h>
|
||||
#include <aws/core/client/ClientConfiguration.h>
|
||||
#include <aws/core/utils/logging/DefaultLogSystem.h>
|
||||
#include <aws/core/utils/logging/AWSLogging.h>
|
||||
#include <aws/transcribestreaming/TranscribeStreamingServiceClient.h>
|
||||
#include <aws/transcribestreaming/model/StartStreamTranscriptionHandler.h>
|
||||
#include <aws/transcribestreaming/model/StartStreamTranscriptionRequest.h>
|
||||
|
||||
#include "mod_aws_transcribe.h"
|
||||
#include "simple_buffer.h"
|
||||
|
||||
#define BUFFER_SECS (3)
|
||||
#define CHUNKSIZE (320)
|
||||
|
||||
using namespace Aws;
|
||||
using namespace Aws::Utils;
|
||||
using namespace Aws::Auth;
|
||||
using namespace Aws::TranscribeStreamingService;
|
||||
using namespace Aws::TranscribeStreamingService::Model;
|
||||
|
||||
|
||||
const char ALLOC_TAG[] = "drachtio";
|
||||
|
||||
static bool hasDefaultCredentials = false;
|
||||
|
||||
class GStreamer {
|
||||
public:
|
||||
GStreamer(
|
||||
const char *sessionId,
|
||||
const char *bugname,
|
||||
u_int16_t channels,
|
||||
char *lang,
|
||||
int interim,
|
||||
uint32_t samples_per_second,
|
||||
const char* region,
|
||||
const char* awsAccessKeyId,
|
||||
const char* awsSecretAccessKey,
|
||||
responseHandler_t responseHandler
|
||||
) : m_sessionId(sessionId), m_bugname(bugname), m_finished(false), m_interim(interim), m_finishing(false), m_connected(false), m_connecting(false),
|
||||
m_packets(0), m_responseHandler(responseHandler), m_pStream(nullptr),
|
||||
m_audioBuffer(320 * (samples_per_second == 8000 ? 1 : 2), 15) {
|
||||
Aws::String key(awsAccessKeyId);
|
||||
Aws::String secret(awsSecretAccessKey);
|
||||
Aws::Client::ClientConfiguration config;
|
||||
if (region != nullptr && strlen(region) > 0) config.region = region;
|
||||
char keySnippet[20];
|
||||
|
||||
strncpy(keySnippet, awsAccessKeyId, 4);
|
||||
for (int i = 4; i < 20; i++) keySnippet[i] = 'x';
|
||||
keySnippet[19] = '\0';
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p ACCESS_KEY_ID %s, region %s\n", this, keySnippet, region);
|
||||
if (*awsAccessKeyId && *awsSecretAccessKey) {
|
||||
m_client = Aws::MakeUnique<TranscribeStreamingServiceClient>(ALLOC_TAG, AWSCredentials(awsAccessKeyId, awsSecretAccessKey), config);
|
||||
}
|
||||
else {
|
||||
m_client = Aws::MakeUnique<TranscribeStreamingServiceClient>(ALLOC_TAG, config);
|
||||
}
|
||||
|
||||
m_handler.SetTranscriptEventCallback([this](const TranscriptEvent& ev)
|
||||
{
|
||||
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
|
||||
if (psession) {
|
||||
switch_channel_t* channel = switch_core_session_get_channel(psession);
|
||||
std::lock_guard<std::mutex> lk(m_mutex);
|
||||
m_transcript = ev;
|
||||
m_cond.notify_one();
|
||||
|
||||
switch_core_session_rwunlock(psession);
|
||||
}
|
||||
});
|
||||
|
||||
// not worth resampling to 16k if we get 8k ulaw or alaw in..
|
||||
m_request.SetMediaSampleRateHertz(samples_per_second > 8000 ? 16000 : 8000);
|
||||
m_request.SetLanguageCode(LanguageCodeMapper::GetLanguageCodeForName(lang));
|
||||
m_request.SetMediaEncoding(MediaEncoding::pcm);
|
||||
m_request.SetEventStreamHandler(m_handler);
|
||||
if (channels > 1) m_request.SetNumberOfChannels(channels);
|
||||
|
||||
const char* var;
|
||||
switch_core_session_t* session = switch_core_session_locate(sessionId);
|
||||
switch_channel_t *channel = switch_core_session_get_channel(session);
|
||||
|
||||
if (var = switch_channel_get_variable(channel, "AWS_SHOW_SPEAKER_LABEL")) {
|
||||
m_request.SetShowSpeakerLabel(true);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "AWS_ENABLE_CHANNEL_IDENTIFICATION")) {
|
||||
m_request.SetEnableChannelIdentification(true);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_NAME")) {
|
||||
m_request.SetVocabularyName(var);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_FILTER_NAME")) {
|
||||
m_request.SetVocabularyFilterName(var);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "AWS_VOCABULARY_FILTER_METHOD")) {
|
||||
m_request.SetVocabularyFilterMethod(VocabularyFilterMethodMapper::GetVocabularyFilterMethodForName(var));
|
||||
}
|
||||
switch_core_session_rwunlock(session);
|
||||
}
|
||||
|
||||
void connect() {
|
||||
if (m_connecting) return;
|
||||
m_connecting = true;
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer:connect %p connecting to aws speech..\n", this);
|
||||
|
||||
auto OnStreamReady = [this](Model::AudioStream& stream)
|
||||
{
|
||||
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
|
||||
if (psession) {
|
||||
switch_channel_t* channel = switch_core_session_get_channel(psession);
|
||||
|
||||
m_pStream = &stream;
|
||||
m_connected = true;
|
||||
|
||||
|
||||
// send any buffered audio
|
||||
int nFrames = m_audioBuffer.getNumItems();
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p got stream ready, %d buffered frames\n", this, nFrames);
|
||||
if (nFrames) {
|
||||
char *p;
|
||||
do {
|
||||
p = m_audioBuffer.getNextChunk();
|
||||
if (p) {
|
||||
write(p, CHUNKSIZE);
|
||||
}
|
||||
} while (p);
|
||||
}
|
||||
|
||||
switch_core_session_rwunlock(psession);
|
||||
}
|
||||
};
|
||||
auto OnResponseCallback = [this](const TranscribeStreamingServiceClient* pClient,
|
||||
const Model::StartStreamTranscriptionRequest& request,
|
||||
const Model::StartStreamTranscriptionOutcome& outcome,
|
||||
const std::shared_ptr<const Aws::Client::AsyncCallerContext>& context)
|
||||
{
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p stream got final response\n", this);
|
||||
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
|
||||
if (psession) {
|
||||
if (!outcome.IsSuccess()) {
|
||||
const TranscribeStreamingServiceError& err = outcome.GetError();
|
||||
auto message = err.GetMessage();
|
||||
auto exception = err.GetExceptionName();
|
||||
cJSON* json = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(json, "type", "error");
|
||||
cJSON_AddStringToObject(json, "error", message.c_str());
|
||||
char* jsonString = cJSON_PrintUnformatted(json);
|
||||
m_responseHandler(psession, jsonString, m_bugname.c_str());
|
||||
free(jsonString);
|
||||
cJSON_Delete(json);
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p stream got error response %s : %s\n", this, message.c_str(), exception.c_str());
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(m_mutex);
|
||||
m_finished = true;
|
||||
m_cond.notify_one();
|
||||
|
||||
switch_core_session_rwunlock(psession);
|
||||
} else {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p session is closed/hungup. Need to unblock thread.\n", this);
|
||||
std::lock_guard<std::mutex> lk(m_mutex);
|
||||
m_finished = true;
|
||||
m_cond.notify_one();
|
||||
}
|
||||
};
|
||||
|
||||
m_client->StartStreamTranscriptionAsync(m_request, OnStreamReady, OnResponseCallback, nullptr);
|
||||
}
|
||||
|
||||
|
||||
~GStreamer() {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::~GStreamer wrote %u packets %p\n", m_packets, this);
|
||||
}
|
||||
|
||||
bool write(void* data, uint32_t datalen) {
|
||||
if (m_finishing || m_finished) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write not writing because we are finished, %p\n", this);
|
||||
return false;
|
||||
}
|
||||
if (!m_connected) {
|
||||
if (datalen % CHUNKSIZE == 0) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::write queuing %d bytes\n", datalen);
|
||||
m_audioBuffer.add(data, datalen);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lk(m_mutex);
|
||||
|
||||
const auto beg = static_cast<const unsigned char*>(data);
|
||||
const auto end = beg + datalen;
|
||||
Aws::Vector<unsigned char> bits { beg, end };
|
||||
m_deqAudio.push_back(bits);
|
||||
m_packets++;
|
||||
|
||||
m_cond.notify_one();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void finish() {
|
||||
if (m_finishing) return;
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::finish %p\n", this);
|
||||
std::lock_guard<std::mutex> lk(m_mutex);
|
||||
|
||||
m_finishing = true;
|
||||
m_cond.notify_one();
|
||||
}
|
||||
|
||||
void processData() {
|
||||
bool shutdownInitiated = false;
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> lk(m_mutex);
|
||||
m_cond.wait(lk, [&, this] {
|
||||
return (!m_deqAudio.empty() && !m_finishing) || m_transcript.TranscriptHasBeenSet() || m_finished || (m_finishing && !shutdownInitiated);
|
||||
});
|
||||
|
||||
|
||||
// we have data to process or have been told we're done
|
||||
if (m_finished || !m_connected) return;
|
||||
|
||||
if (m_transcript.TranscriptHasBeenSet()) {
|
||||
switch_core_session_t* psession = switch_core_session_locate(m_sessionId.c_str());
|
||||
if (psession) {
|
||||
|
||||
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::got a transcript to send out %p\n", this);
|
||||
bool isFinal = false;
|
||||
std::ostringstream s;
|
||||
s << "[";
|
||||
for (auto&& r : m_transcript.GetTranscript().GetResults()) {
|
||||
int count = 0;
|
||||
std::ostringstream t1;
|
||||
if (!isFinal && !r.GetIsPartial()) isFinal = true;
|
||||
t1 << "{\"is_final\": " << (r.GetIsPartial() ? "false" : "true") << ", \"alternatives\": [";
|
||||
for (auto&& alt : r.GetAlternatives()) {
|
||||
std::ostringstream t2;
|
||||
if (count++ == 0) t2 << "{\"transcript\": \"" << alt.GetTranscript() << "\"}";
|
||||
else t2 << ", {\"transcript\": \"" << alt.GetTranscript() << "\"}";
|
||||
t1 << t2.str();
|
||||
}
|
||||
t1 << "]}";
|
||||
s << t1.str();
|
||||
}
|
||||
s << "]";
|
||||
if (0 != s.str().compare("[]") && (isFinal || m_interim)) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::writing transcript %p: %s\n", this, s.str().c_str() );
|
||||
m_responseHandler(psession, s.str().c_str(), m_bugname.c_str());
|
||||
}
|
||||
TranscriptEvent empty;
|
||||
m_transcript = empty;
|
||||
|
||||
switch_core_session_rwunlock(psession);
|
||||
}
|
||||
}
|
||||
if (m_finishing) {
|
||||
shutdownInitiated = true;
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer::writing disconnect event %p\n", this);
|
||||
|
||||
if (m_pStream) {
|
||||
m_pStream->flush();
|
||||
m_pStream->Close();
|
||||
m_pStream = nullptr;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// send out any queued speech packets
|
||||
while (!m_deqAudio.empty()) {
|
||||
Aws::Vector<unsigned char>& bits = m_deqAudio.front();
|
||||
Aws::TranscribeStreamingService::Model::AudioEvent event(std::move(bits));
|
||||
m_pStream->WriteAudioEvent(event);
|
||||
m_deqAudio.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isConnecting() {
|
||||
return m_connecting;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string m_sessionId;
|
||||
std::string m_bugname;
|
||||
std::string m_region;
|
||||
Aws::UniquePtr<TranscribeStreamingServiceClient> m_client;
|
||||
AudioStream* m_pStream;
|
||||
StartStreamTranscriptionRequest m_request;
|
||||
StartStreamTranscriptionHandler m_handler;
|
||||
TranscriptEvent m_transcript;
|
||||
responseHandler_t m_responseHandler;
|
||||
bool m_finishing;
|
||||
bool m_interim;
|
||||
bool m_finished;
|
||||
bool m_connected;
|
||||
bool m_connecting;
|
||||
uint32_t m_packets;
|
||||
std::mutex m_mutex;
|
||||
std::condition_variable m_cond;
|
||||
std::deque< Aws::Vector<unsigned char> > m_deqAudio;
|
||||
SimpleBuffer m_audioBuffer;
|
||||
};
|
||||
|
||||
static void *SWITCH_THREAD_FUNC aws_transcribe_thread(switch_thread_t *thread, void *obj) {
|
||||
struct cap_cb *cb = (struct cap_cb *) obj;
|
||||
bool ok = true;
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: starting cb %p\n", (void *) cb);
|
||||
GStreamer* pStreamer = new GStreamer(cb->sessionId, cb->bugname, cb->channels, cb->lang, cb->interim, cb->samples_per_second, cb->region, cb->awsAccessKeyId, cb->awsSecretAccessKey,
|
||||
cb->responseHandler);
|
||||
if (!pStreamer) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: Error allocating streamer\n");
|
||||
return nullptr;
|
||||
}
|
||||
if (!cb->vad) pStreamer->connect();
|
||||
cb->streamer = pStreamer;
|
||||
pStreamer->processData(); //blocks until done
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "transcribe_thread: stopping cb %p\n", (void *) cb);
|
||||
delete pStreamer;
|
||||
cb->streamer = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void killcb(struct cap_cb* cb) {
|
||||
if (cb) {
|
||||
if (cb->streamer) {
|
||||
GStreamer* p = (GStreamer *) cb->streamer;
|
||||
delete p;
|
||||
cb->streamer = nullptr;
|
||||
}
|
||||
if (cb->resampler) {
|
||||
speex_resampler_destroy(cb->resampler);
|
||||
cb->resampler = nullptr;
|
||||
}
|
||||
if (cb->vad) {
|
||||
switch_vad_destroy(&cb->vad);
|
||||
cb->vad = nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
switch_status_t aws_transcribe_init() {
|
||||
const char* accessKeyId = std::getenv("AWS_ACCESS_KEY_ID");
|
||||
const char* secretAccessKey = std::getenv("AWS_SECRET_ACCESS_KEY");
|
||||
const char* region = std::getenv("AWS_REGION");
|
||||
if (NULL == accessKeyId && NULL == secretAccessKey) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE,
|
||||
"\"AWS_ACCESS_KEY_ID\" and/or \"AWS_SECRET_ACCESS_KEY\" env var not set; authentication will expect channel variables of same names to be set\n");
|
||||
}
|
||||
else {
|
||||
hasDefaultCredentials = true;
|
||||
|
||||
}
|
||||
Aws::SDKOptions options;
|
||||
/*
|
||||
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
|
||||
|
||||
Aws::Utils::Logging::InitializeAWSLogging(
|
||||
Aws::MakeShared<Aws::Utils::Logging::DefaultLogSystem>(
|
||||
ALLOC_TAG, Aws::Utils::Logging::LogLevel::Trace, "aws_sdk_transcribe"));
|
||||
*/
|
||||
Aws::InitAPI(options);
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
switch_status_t aws_transcribe_cleanup() {
|
||||
Aws::SDKOptions options;
|
||||
/*
|
||||
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
|
||||
Aws::Utils::Logging::ShutdownAWSLogging();
|
||||
*/
|
||||
Aws::ShutdownAPI(options);
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// start transcribe on a channel
|
||||
switch_status_t aws_transcribe_session_init(switch_core_session_t *session, responseHandler_t responseHandler,
|
||||
uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char* bugname, void **ppUserData
|
||||
) {
|
||||
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
||||
switch_channel_t *channel = switch_core_session_get_channel(session);
|
||||
int err;
|
||||
switch_threadattr_t *thd_attr = NULL;
|
||||
switch_memory_pool_t *pool = switch_core_session_get_pool(session);
|
||||
auto read_codec = switch_core_session_get_read_codec(session);
|
||||
uint32_t sampleRate = read_codec->implementation->actual_samples_per_second;
|
||||
|
||||
struct cap_cb* cb = (struct cap_cb *) switch_core_session_alloc(session, sizeof(*cb));
|
||||
memset(cb, sizeof(cb), 0);
|
||||
const char* awsAccessKeyId = switch_channel_get_variable(channel, "AWS_ACCESS_KEY_ID");
|
||||
const char* awsSecretAccessKey = switch_channel_get_variable(channel, "AWS_SECRET_ACCESS_KEY");
|
||||
const char* awsRegion = switch_channel_get_variable(channel, "AWS_REGION");
|
||||
cb->channels = channels;
|
||||
LanguageCode code = LanguageCodeMapper::GetLanguageCodeForName(lang);
|
||||
if(LanguageCode::NOT_SET == code) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "invalid language code %s\n", lang);
|
||||
status = SWITCH_STATUS_FALSE;
|
||||
goto done;
|
||||
}
|
||||
strncpy(cb->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID);
|
||||
strncpy(cb->bugname, bugname, MAX_BUG_LEN);
|
||||
|
||||
if (awsAccessKeyId && awsSecretAccessKey && awsRegion) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Using channel vars for aws authentication\n");
|
||||
strncpy(cb->awsAccessKeyId, awsAccessKeyId, 128);
|
||||
strncpy(cb->awsSecretAccessKey, awsSecretAccessKey, 128);
|
||||
strncpy(cb->region, awsRegion, MAX_REGION);
|
||||
|
||||
}
|
||||
else if (std::getenv("AWS_ACCESS_KEY_ID") &&
|
||||
std::getenv("AWS_SECRET_ACCESS_KEY") &&
|
||||
std::getenv("AWS_REGION")) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Using env vars for aws authentication\n");
|
||||
strncpy(cb->awsAccessKeyId, std::getenv("AWS_ACCESS_KEY_ID"), 128);
|
||||
strncpy(cb->awsSecretAccessKey, std::getenv("AWS_SECRET_ACCESS_KEY"), 128);
|
||||
strncpy(cb->region, std::getenv("AWS_REGION"), MAX_REGION);
|
||||
}
|
||||
else {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "No channel vars or env vars for aws authentication..will use default profile if found\n");
|
||||
}
|
||||
|
||||
cb->responseHandler = responseHandler;
|
||||
|
||||
if (switch_mutex_init(&cb->mutex, SWITCH_MUTEX_NESTED, pool) != SWITCH_STATUS_SUCCESS) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error initializing mutex\n");
|
||||
status = SWITCH_STATUS_FALSE;
|
||||
goto done;
|
||||
}
|
||||
|
||||
cb->interim = interim;
|
||||
strncpy(cb->lang, lang, MAX_LANG);
|
||||
cb->samples_per_second = sampleRate;
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "sample rate of rtp stream is %d\n", samples_per_second);
|
||||
if (sampleRate != 8000) {
|
||||
cb->resampler = speex_resampler_init(1, sampleRate, 16000, SWITCH_RESAMPLE_QUALITY, &err);
|
||||
if (0 != err) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n",
|
||||
switch_channel_get_name(channel), speex_resampler_strerror(err));
|
||||
status = SWITCH_STATUS_FALSE;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
// allocate vad if we are delaying connecting to the recognizer until we detect speech
|
||||
if (switch_channel_var_true(channel, "START_RECOGNIZING_ON_VAD")) {
|
||||
cb->vad = switch_vad_init(sampleRate, 1);
|
||||
if (cb->vad) {
|
||||
const char* var;
|
||||
int mode = 2;
|
||||
int silence_ms = 150;
|
||||
int voice_ms = 250;
|
||||
int debug = 0;
|
||||
|
||||
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_MODE")) {
|
||||
mode = atoi(var);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_SILENCE_MS")) {
|
||||
silence_ms = atoi(var);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_VOICE_MS")) {
|
||||
voice_ms = atoi(var);
|
||||
}
|
||||
if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_DEBUG")) {
|
||||
debug = atoi(var);
|
||||
}
|
||||
switch_vad_set_mode(cb->vad, mode);
|
||||
switch_vad_set_param(cb->vad, "silence_ms", silence_ms);
|
||||
switch_vad_set_param(cb->vad, "voice_ms", voice_ms);
|
||||
switch_vad_set_param(cb->vad, "debug", debug);
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s: delaying connection until vad, voice_ms %d, mode %d\n",
|
||||
switch_channel_get_name(channel), voice_ms, mode);
|
||||
}
|
||||
}
|
||||
|
||||
// create a thread to service the http/2 connection to aws
|
||||
switch_threadattr_create(&thd_attr, pool);
|
||||
switch_threadattr_stacksize_set(thd_attr, SWITCH_THREAD_STACKSIZE);
|
||||
switch_thread_create(&cb->thread, thd_attr, aws_transcribe_thread, cb, pool);
|
||||
|
||||
*ppUserData = cb;
|
||||
|
||||
done:
|
||||
return status;
|
||||
}
|
||||
|
||||
switch_status_t aws_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname) {
|
||||
switch_channel_t *channel = switch_core_session_get_channel(session);
|
||||
switch_media_bug_t *bug = (switch_media_bug_t*) switch_channel_get_private(channel, bugname);
|
||||
|
||||
if (bug) {
|
||||
struct cap_cb *cb = (struct cap_cb *) switch_core_media_bug_get_user_data(bug);
|
||||
switch_status_t st;
|
||||
|
||||
// close connection and get final responses
|
||||
switch_mutex_lock(cb->mutex);
|
||||
GStreamer* streamer = (GStreamer *) cb->streamer;
|
||||
if (streamer) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "aws_transcribe_session_stop: finish..%s\n", bugname);
|
||||
streamer->finish();
|
||||
}
|
||||
if (cb->thread) {
|
||||
switch_status_t retval;
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: waiting for read thread to complete %s\n", bugname);
|
||||
switch_thread_join(&retval, cb->thread);
|
||||
cb->thread = NULL;
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: read thread completed %s, %d\n", bugname, retval);
|
||||
}
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; going to kill callback\n", bugname);
|
||||
killcb(cb);
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; killed callback\n", bugname);
|
||||
|
||||
switch_channel_set_private(channel, bugname, NULL);
|
||||
if (!channelIsClosing) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: removing bug %s\n", bugname);
|
||||
switch_core_media_bug_remove(session, &bug);
|
||||
}
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: bugname - %s; unlocking callback mutex\n", bugname);
|
||||
switch_mutex_unlock(cb->mutex);
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "aws_transcribe_session_stop: Closed aws session\n");
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "%s Bug is not attached.\n", switch_channel_get_name(channel));
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
switch_bool_t aws_transcribe_frame(switch_media_bug_t *bug, void* user_data) {
|
||||
switch_core_session_t *session = switch_core_media_bug_get_session(bug);
|
||||
uint8_t data[SWITCH_RECOMMENDED_BUFFER_SIZE];
|
||||
switch_frame_t frame = {};
|
||||
struct cap_cb *cb = (struct cap_cb *) user_data;
|
||||
|
||||
frame.data = data;
|
||||
frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE;
|
||||
|
||||
if (switch_mutex_trylock(cb->mutex) == SWITCH_STATUS_SUCCESS) {
|
||||
GStreamer* streamer = (GStreamer *) cb->streamer;
|
||||
if (streamer) {
|
||||
while (switch_core_media_bug_read(bug, &frame, SWITCH_TRUE) == SWITCH_STATUS_SUCCESS && !switch_test_flag((&frame), SFF_CNG)) {
|
||||
if (frame.datalen) {
|
||||
spx_int16_t out[SWITCH_RECOMMENDED_BUFFER_SIZE];
|
||||
spx_uint32_t out_len = SWITCH_RECOMMENDED_BUFFER_SIZE;
|
||||
spx_uint32_t in_len = frame.samples;
|
||||
size_t written;
|
||||
|
||||
if (cb->vad && !streamer->isConnecting()) {
|
||||
switch_vad_state_t state = switch_vad_process(cb->vad, (int16_t*) frame.data, frame.samples);
|
||||
if (state == SWITCH_VAD_STATE_START_TALKING) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "detected speech, connect to aws speech now\n");
|
||||
streamer->connect();
|
||||
cb->responseHandler(session, "vad_detected", cb->bugname);
|
||||
}
|
||||
}
|
||||
|
||||
if (cb->resampler) {
|
||||
speex_resampler_process_interleaved_int(cb->resampler, (const spx_int16_t *) frame.data, (spx_uint32_t *) &in_len, &out[0], &out_len);
|
||||
streamer->write( &out[0], sizeof(spx_int16_t) * out_len);
|
||||
}
|
||||
else {
|
||||
streamer->write( frame.data, sizeof(spx_int16_t) * frame.samples);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG,
|
||||
"aws_transcribe_frame: not sending audio because aws channel has been closed\n");
|
||||
}
|
||||
switch_mutex_unlock(cb->mutex);
|
||||
}
|
||||
return SWITCH_TRUE;
|
||||
}
|
||||
}
|
||||
11
mod_aws_transcribe/aws_transcribe_glue.h
Normal file
11
mod_aws_transcribe/aws_transcribe_glue.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef __AWS_GLUE_H__
|
||||
#define __AWS_GLUE_H__
|
||||
|
||||
switch_status_t aws_transcribe_init();
|
||||
switch_status_t aws_transcribe_cleanup();
|
||||
switch_status_t aws_transcribe_session_init(switch_core_session_t *session, responseHandler_t responseHandler,
|
||||
uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char *bugname, void **ppUserData);
|
||||
switch_status_t aws_transcribe_session_stop(switch_core_session_t *session, int channelIsClosing, char* bugname);
|
||||
switch_bool_t aws_transcribe_frame(switch_media_bug_t *bug, void* user_data);
|
||||
|
||||
#endif
|
||||
240
mod_aws_transcribe/mod_aws_transcribe.c
Normal file
240
mod_aws_transcribe/mod_aws_transcribe.c
Normal file
@@ -0,0 +1,240 @@
|
||||
/*
|
||||
*
|
||||
* mod_aws_transcribe.c -- Freeswitch module for using aws streaming transcribe api
|
||||
*
|
||||
*/
|
||||
#include "mod_aws_transcribe.h"
|
||||
#include "aws_transcribe_glue.h"
|
||||
|
||||
/* Prototypes */
|
||||
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_aws_transcribe_shutdown);
|
||||
SWITCH_MODULE_LOAD_FUNCTION(mod_aws_transcribe_load);
|
||||
|
||||
SWITCH_MODULE_DEFINITION(mod_aws_transcribe, mod_aws_transcribe_load, mod_aws_transcribe_shutdown, NULL);
|
||||
|
||||
static switch_status_t do_stop(switch_core_session_t *session, char* bugname);
|
||||
|
||||
static void responseHandler(switch_core_session_t* session, const char * json, const char* bugname) {
|
||||
switch_event_t *event;
|
||||
switch_channel_t *channel = switch_core_session_get_channel(session);
|
||||
|
||||
if (0 == strcmp("vad_detected", json)) {
|
||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_VAD_DETECTED);
|
||||
switch_channel_event_set_data(channel, event);
|
||||
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
|
||||
}
|
||||
else if (0 == strcmp("end_of_transcript", json)) {
|
||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_END_OF_TRANSCRIPT);
|
||||
switch_channel_event_set_data(channel, event);
|
||||
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
|
||||
}
|
||||
else if (0 == strcmp("max_duration_exceeded", json)) {
|
||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_MAX_DURATION_EXCEEDED);
|
||||
switch_channel_event_set_data(channel, event);
|
||||
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
|
||||
}
|
||||
else if (0 == strcmp("no_audio", json)) {
|
||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_NO_AUDIO_DETECTED);
|
||||
switch_channel_event_set_data(channel, event);
|
||||
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
|
||||
}
|
||||
else {
|
||||
int error = 0;
|
||||
cJSON* jMessage = cJSON_Parse(json);
|
||||
if (jMessage) {
|
||||
const char* type = cJSON_GetStringValue(cJSON_GetObjectItem(jMessage, "type"));
|
||||
if (type && 0 == strcmp(type, "error")) {
|
||||
error = 1;
|
||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_ERROR);
|
||||
}
|
||||
cJSON_Delete(jMessage);
|
||||
}
|
||||
if (!error) {
|
||||
switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_RESULTS);
|
||||
}
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "json payload: %s.\n", json);
|
||||
switch_channel_event_set_data(channel, event);
|
||||
switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "aws");
|
||||
switch_event_add_body(event, "%s", json);
|
||||
}
|
||||
if (bugname) switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "media-bugname", bugname);
|
||||
switch_event_fire(&event);
|
||||
}
|
||||
|
||||
|
||||
static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type)
|
||||
{
|
||||
switch_core_session_t *session = switch_core_media_bug_get_session(bug);
|
||||
|
||||
switch (type) {
|
||||
case SWITCH_ABC_TYPE_INIT:
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Got SWITCH_ABC_TYPE_INIT.\n");
|
||||
break;
|
||||
|
||||
case SWITCH_ABC_TYPE_CLOSE:
|
||||
{
|
||||
struct cap_cb* cb = (struct cap_cb*) switch_core_media_bug_get_user_data(bug);
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Got SWITCH_ABC_TYPE_CLOSE.\n");
|
||||
aws_transcribe_session_stop(session, 1, cb->bugname);
|
||||
//switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Finished SWITCH_ABC_TYPE_CLOSE.\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case SWITCH_ABC_TYPE_READ:
|
||||
|
||||
return aws_transcribe_frame(bug, user_data);
|
||||
break;
|
||||
|
||||
case SWITCH_ABC_TYPE_WRITE:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return SWITCH_TRUE;
|
||||
}
|
||||
|
||||
static switch_status_t start_capture(switch_core_session_t *session, switch_media_bug_flag_t flags,
|
||||
char* lang, int interim, char* bugname)
|
||||
{
|
||||
switch_channel_t *channel = switch_core_session_get_channel(session);
|
||||
switch_media_bug_t *bug;
|
||||
switch_status_t status;
|
||||
switch_codec_implementation_t read_impl = { 0 };
|
||||
void *pUserData;
|
||||
uint32_t samples_per_second;
|
||||
|
||||
if (switch_channel_get_private(channel, bugname)) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "removing bug from previous transcribe\n");
|
||||
do_stop(session, bugname);
|
||||
}
|
||||
|
||||
switch_core_session_get_read_impl(session, &read_impl);
|
||||
|
||||
if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second;
|
||||
|
||||
if (SWITCH_STATUS_FALSE == aws_transcribe_session_init(session, responseHandler, samples_per_second, flags & SMBF_STEREO ? 2 : 1, lang, interim, bugname, &pUserData)) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing aws speech session.\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
if ((status = switch_core_media_bug_add(session, bugname, NULL, capture_callback, pUserData, 0, flags, &bug)) != SWITCH_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
switch_channel_set_private(channel, bugname, bug);
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "added media bug for aws transcribe\n");
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static switch_status_t do_stop(switch_core_session_t *session, char* bugname)
|
||||
{
|
||||
switch_status_t status = SWITCH_STATUS_SUCCESS;
|
||||
|
||||
switch_channel_t *channel = switch_core_session_get_channel(session);
|
||||
switch_media_bug_t *bug = switch_channel_get_private(channel, bugname);
|
||||
|
||||
if (bug) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Received user command command to stop transcribe on %s.\n", bugname);
|
||||
status = aws_transcribe_session_stop(session, 0, bugname);
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "stopped transcribe.\n");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
#define TRANSCRIBE_API_SYNTAX "<uuid> [start|stop] lang-code [interim] [stereo|mono] [bugname]"
|
||||
SWITCH_STANDARD_API(aws_transcribe_function)
|
||||
{
|
||||
char *mycmd = NULL, *argv[6] = { 0 };
|
||||
int argc = 0;
|
||||
switch_status_t status = SWITCH_STATUS_FALSE;
|
||||
switch_media_bug_flag_t flags = SMBF_READ_STREAM /* | SMBF_WRITE_STREAM | SMBF_READ_PING */;
|
||||
|
||||
if (!zstr(cmd) && (mycmd = strdup(cmd))) {
|
||||
argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
|
||||
}
|
||||
|
||||
if (zstr(cmd) ||
|
||||
(!strcasecmp(argv[1], "stop") && argc < 2) ||
|
||||
(!strcasecmp(argv[1], "start") && argc < 3) ||
|
||||
zstr(argv[0])) {
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s %s %s.\n", cmd, argv[0], argv[1]);
|
||||
stream->write_function(stream, "-USAGE: %s\n", TRANSCRIBE_API_SYNTAX);
|
||||
goto done;
|
||||
} else {
|
||||
switch_core_session_t *lsession = NULL;
|
||||
|
||||
if ((lsession = switch_core_session_locate(argv[0]))) {
|
||||
if (!strcasecmp(argv[1], "stop")) {
|
||||
char *bugname = argc > 2 ? argv[2] : MY_BUG_NAME;
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "stop transcribing\n");
|
||||
status = do_stop(lsession, bugname);
|
||||
} else if (!strcasecmp(argv[1], "start")) {
|
||||
char* lang = argv[2];
|
||||
int interim = argc > 3 && !strcmp(argv[3], "interim");
|
||||
char *bugname = argc > 5 ? argv[5] : MY_BUG_NAME;
|
||||
if (argc > 4 && !strcmp(argv[4], "stereo")) {
|
||||
flags |= SMBF_WRITE_STREAM ;
|
||||
flags |= SMBF_STEREO;
|
||||
}
|
||||
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "start transcribing %s %s %s\n", lang, interim ? "interim": "complete", bugname);
|
||||
status = start_capture(lsession, flags, lang, interim, bugname);
|
||||
}
|
||||
switch_core_session_rwunlock(lsession);
|
||||
}
|
||||
}
|
||||
|
||||
if (status == SWITCH_STATUS_SUCCESS) {
|
||||
stream->write_function(stream, "+OK Success\n");
|
||||
} else {
|
||||
stream->write_function(stream, "-ERR Operation Failed\n");
|
||||
}
|
||||
|
||||
done:
|
||||
|
||||
switch_safe_free(mycmd);
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
SWITCH_MODULE_LOAD_FUNCTION(mod_aws_transcribe_load)
|
||||
{
|
||||
switch_api_interface_t *api_interface;
|
||||
|
||||
/* create/register custom event message type */
|
||||
if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_RESULTS) != SWITCH_STATUS_SUCCESS) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_RESULTS);
|
||||
return SWITCH_STATUS_TERM;
|
||||
}
|
||||
|
||||
/* connect my internal structure to the blank pointer passed to me */
|
||||
*module_interface = switch_loadable_module_create_module_interface(pool, modname);
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "AWS Speech Transcription API loading..\n");
|
||||
|
||||
if (SWITCH_STATUS_FALSE == aws_transcribe_init()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed initializing aws speech interface\n");
|
||||
}
|
||||
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "AWS Speech Transcription API successfully loaded\n");
|
||||
|
||||
SWITCH_ADD_API(api_interface, "uuid_aws_transcribe", "AWS Speech Transcription API", aws_transcribe_function, TRANSCRIBE_API_SYNTAX);
|
||||
switch_console_set_complete("add uuid_aws_transcribe start lang-code [interim|final] [stereo|mono]");
|
||||
switch_console_set_complete("add uuid_aws_transcribe stop ");
|
||||
|
||||
/* indicate that the module should continue to be loaded */
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
Called when the system shuts down
|
||||
Macro expands to: switch_status_t mod_aws_transcribe_shutdown() */
|
||||
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_aws_transcribe_shutdown)
|
||||
{
|
||||
aws_transcribe_cleanup();
|
||||
switch_event_free_subclass(TRANSCRIBE_EVENT_RESULTS);
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
45
mod_aws_transcribe/mod_aws_transcribe.h
Normal file
45
mod_aws_transcribe/mod_aws_transcribe.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef __MOD_AWS_TRANSCRIBE_H__
|
||||
#define __MOD_AWS_TRANSCRIBE_H__
|
||||
|
||||
#include <switch.h>
|
||||
#include <speex/speex_resampler.h>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#define MY_BUG_NAME "aws_transcribe"
|
||||
#define MAX_BUG_LEN (64)
|
||||
#define MAX_SESSION_ID (256)
|
||||
#define TRANSCRIBE_EVENT_RESULTS "aws_transcribe::transcription"
|
||||
#define TRANSCRIBE_EVENT_END_OF_TRANSCRIPT "aws_transcribe::end_of_transcript"
|
||||
#define TRANSCRIBE_EVENT_NO_AUDIO_DETECTED "aws_transcribe::no_audio_detected"
|
||||
#define TRANSCRIBE_EVENT_MAX_DURATION_EXCEEDED "aws_transcribe::max_duration_exceeded"
|
||||
#define TRANSCRIBE_EVENT_VAD_DETECTED "aws_transcribe::vad_detected"
|
||||
#define TRANSCRIBE_EVENT_ERROR "jambonz_transcribe::error"
|
||||
|
||||
#define MAX_LANG (12)
|
||||
#define MAX_REGION (32)
|
||||
|
||||
/* per-channel data */
|
||||
typedef void (*responseHandler_t)(switch_core_session_t* session, const char * json, const char* bugname);
|
||||
|
||||
struct cap_cb {
|
||||
switch_mutex_t *mutex;
|
||||
char bugname[MAX_BUG_LEN+1];
|
||||
char sessionId[MAX_SESSION_ID+1];
|
||||
char awsAccessKeyId[128];
|
||||
char awsSecretAccessKey[128];
|
||||
uint32_t channels;
|
||||
SpeexResamplerState *resampler;
|
||||
void* streamer;
|
||||
responseHandler_t responseHandler;
|
||||
switch_thread_t* thread;
|
||||
int interim;
|
||||
|
||||
char lang[MAX_LANG];
|
||||
char region[MAX_REGION];
|
||||
|
||||
switch_vad_t * vad;
|
||||
uint32_t samples_per_second;
|
||||
};
|
||||
|
||||
#endif
|
||||
51
mod_aws_transcribe/simple_buffer.h
Normal file
51
mod_aws_transcribe/simple_buffer.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/**
|
||||
* (very) simple and limited circular buffer,
|
||||
* supporting only the use case of doing all of the adds
|
||||
* and then subsquently retrieves.
|
||||
*
|
||||
*/
|
||||
class SimpleBuffer {
|
||||
public:
|
||||
SimpleBuffer(uint32_t chunkSize, uint32_t numChunks) : numItems(0),
|
||||
m_numChunks(numChunks), m_chunkSize(chunkSize) {
|
||||
m_pData = new char[chunkSize * numChunks];
|
||||
m_pNextWrite = m_pData;
|
||||
}
|
||||
~SimpleBuffer() {
|
||||
delete [] m_pData;
|
||||
}
|
||||
|
||||
void add(void *data, uint32_t datalen) {
|
||||
if (datalen % m_chunkSize != 0) return;
|
||||
int numChunks = datalen / m_chunkSize;
|
||||
for (int i = 0; i < numChunks; i++) {
|
||||
memcpy(m_pNextWrite, data, m_chunkSize);
|
||||
data = static_cast<char*>(data) + m_chunkSize;
|
||||
if (numItems < m_numChunks) numItems++;
|
||||
|
||||
uint32_t offset = (m_pNextWrite - m_pData) / m_chunkSize;
|
||||
if (offset >= m_numChunks - 1) m_pNextWrite = m_pData;
|
||||
else m_pNextWrite += m_chunkSize;
|
||||
}
|
||||
}
|
||||
|
||||
char* getNextChunk() {
|
||||
if (numItems--) {
|
||||
char *p = m_pNextWrite;
|
||||
uint32_t offset = (m_pNextWrite - m_pData) / m_chunkSize;
|
||||
if (offset >= m_numChunks - 1) m_pNextWrite = m_pData;
|
||||
else m_pNextWrite += m_chunkSize;
|
||||
return p;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t getNumItems() { return numItems;}
|
||||
|
||||
private:
|
||||
char *m_pData;
|
||||
uint32_t numItems;
|
||||
uint32_t m_chunkSize;
|
||||
uint32_t m_numChunks;
|
||||
char* m_pNextWrite;
|
||||
};
|
||||
Reference in New Issue
Block a user