mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
Feature/deepgram stt (#190)
* initial changes to support deepgram stt * fixes for normalizing vendor-specific transcriptions * update to latest drachtio-fsmrf with support for deepgram stt * deepgram parsing error * hints support for deepgram * handling deepgram errors * ignore late arriving transcripts for deepgram * handling of empty transcripts * transcribe changes * allow deepgram stt credentials to be provided at run time * bind channel in transcription handler * fixes for transcribe when handling empty transcripts * more empty transcript fixes * update tests to latest modules * add test cases for deepgram speech recognition
This commit is contained in:
@@ -569,6 +569,12 @@ class CallSession extends Emitter {
|
||||
secret: credential.secret
|
||||
};
|
||||
}
|
||||
else if ('deepgram' === vendor) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
api_key: credential.api_key
|
||||
};
|
||||
}
|
||||
}
|
||||
else {
|
||||
writeAlerts({
|
||||
|
||||
@@ -5,7 +5,8 @@ const {
|
||||
GoogleTranscriptionEvents,
|
||||
NuanceTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
|
||||
const makeTask = require('./make_task');
|
||||
@@ -54,11 +55,14 @@ class TaskGather extends Task {
|
||||
this.vendor = recognizer.vendor;
|
||||
this.language = recognizer.language;
|
||||
|
||||
/* let credentials be supplied in the recognizer object at runtime */
|
||||
if (recognizer.vendor === 'nuance') {
|
||||
const {clientId, secret} = recognizer.nuanceOptions;
|
||||
if (clientId && secret) {
|
||||
this.sttCredentials = {client_id: clientId, secret};
|
||||
}
|
||||
if (clientId && secret) this.sttCredentials = {client_id: clientId, secret};
|
||||
}
|
||||
else if (recognizer.vendor === 'deepgram') {
|
||||
const {apiKey} = recognizer.deepgramOptions;
|
||||
if (apiKey) this.sttCredentials = {api_key: apiKey};
|
||||
}
|
||||
|
||||
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
|
||||
@@ -338,8 +342,16 @@ class TaskGather extends Task {
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
opts.NUANCE_STALL_TIMERS = 1;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 'deepgram':
|
||||
this.bugname = 'deepgram_transcribe';
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onDeepgramConnect.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
|
||||
this._onDeepGramConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
@@ -441,30 +453,31 @@ class TaskGather extends Task {
|
||||
|
||||
_onTranscription(cs, ep, evt, fsEvent) {
|
||||
// make sure this is not a transcript from answering machine detection
|
||||
this.logger.debug({evt}, 'Gather:_onTranscription');
|
||||
const bugname = fsEvent.getHeader('media-bugname');
|
||||
const finished = fsEvent.getHeader('transcription-session-finished');
|
||||
this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
|
||||
if (bugname && this.bugname !== bugname) return;
|
||||
|
||||
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language);
|
||||
|
||||
/* count words for bargein feature */
|
||||
const words = evt.alternatives[0].transcript.split(' ').length;
|
||||
const words = evt.alternatives[0]?.transcript.split(' ').length;
|
||||
const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
|
||||
return count + e.alternatives[0].transcript.split(' ').length;
|
||||
return count + e.alternatives[0]?.transcript.split(' ').length;
|
||||
}, 0);
|
||||
|
||||
if (evt.is_final) {
|
||||
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
|
||||
if ('microsoft' === this.vendor && finished === 'true') {
|
||||
if (finished === 'true' && ['microsoft', 'deepgram'].includes(this.vendor)) {
|
||||
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
|
||||
}
|
||||
else {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
||||
this._startTranscribing(ep);
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
|
||||
//this._startTranscribing(ep);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.isContinuousAsr) {
|
||||
/* append the transcript and start listening again for asrTimeout */
|
||||
const t = evt.alternatives[0].transcript;
|
||||
@@ -548,6 +561,23 @@ class TaskGather extends Task {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskGather:_onDeepgramConnect');
|
||||
}
|
||||
|
||||
_onDeepGramConnectFailure(cs, _ep, evt) {
|
||||
const {reason} = evt;
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info({evt}, 'TaskGather:_onDeepgramConnectFailure');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Failed connecting to Deepgram speech recognizer: ${reason}`,
|
||||
vendor: 'deepgram',
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure'));
|
||||
this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`);
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
_onVadDetected(cs, ep) {
|
||||
if (this.bargein && this.minBargeinWordCount === 0) {
|
||||
|
||||
@@ -445,7 +445,7 @@
|
||||
"properties": {
|
||||
"vendor": {
|
||||
"type": "string",
|
||||
"enum": ["google", "aws", "microsoft", "nuance", "default"]
|
||||
"enum": ["google", "aws", "microsoft", "nuance", "deepgram", "default"]
|
||||
},
|
||||
"language": "string",
|
||||
"vad": "#vad",
|
||||
@@ -510,12 +510,63 @@
|
||||
"azureSttEndpointId": "string",
|
||||
"asrDtmfTerminationDigit": "string",
|
||||
"asrTimeout": "number",
|
||||
"nuanceOptions": "#nuanceOptions"
|
||||
"nuanceOptions": "#nuanceOptions",
|
||||
"deepgramOptions": "#deepgramOptions"
|
||||
},
|
||||
"required": [
|
||||
"vendor"
|
||||
]
|
||||
},
|
||||
"deepgramOptions": {
|
||||
"properties": {
|
||||
"apiKey": "string",
|
||||
"tier": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"enhanced",
|
||||
"base"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"general",
|
||||
"meeting",
|
||||
"phonecall",
|
||||
"voicemail",
|
||||
"finance",
|
||||
"conversationalai",
|
||||
"video",
|
||||
"custom"
|
||||
]
|
||||
},
|
||||
"customModel": "string",
|
||||
"version": "string",
|
||||
"punctuate": "boolean",
|
||||
"profanityFilter": "boolean",
|
||||
"redact": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"pci",
|
||||
"numbers",
|
||||
"true",
|
||||
"ssn"
|
||||
]
|
||||
},
|
||||
"diarize": "boolean",
|
||||
"diarizeVersion": "string",
|
||||
"ner": "boolean",
|
||||
"multichannel": "boolean",
|
||||
"alternatives": "number",
|
||||
"numerals": "boolean",
|
||||
"search": "array",
|
||||
"replace": "array",
|
||||
"keywords": "array",
|
||||
"endpointing": "boolean",
|
||||
"vadTurnoff": "number",
|
||||
"tag": "string"
|
||||
}
|
||||
},
|
||||
"nuanceOptions": {
|
||||
"properties": {
|
||||
"clientId": "string",
|
||||
|
||||
@@ -5,7 +5,8 @@ const {
|
||||
GoogleTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
NuanceTranscriptionEvents
|
||||
NuanceTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
const normalizeJambones = require('../utils/normalize-jambones');
|
||||
|
||||
@@ -15,9 +16,14 @@ class TaskTranscribe extends Task {
|
||||
this.preconditions = TaskPreconditions.Endpoint;
|
||||
this.parentTask = parentTask;
|
||||
|
||||
const {setChannelVarsForStt, normalizeTranscription} = require('../utils/transcription-utils')(logger);
|
||||
const {
|
||||
setChannelVarsForStt,
|
||||
normalizeTranscription,
|
||||
removeSpeechListeners
|
||||
} = require('../utils/transcription-utils')(logger);
|
||||
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||
this.normalizeTranscription = normalizeTranscription;
|
||||
this.removeSpeechListeners = removeSpeechListeners;
|
||||
|
||||
this.transcriptionHook = this.data.transcriptionHook;
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
@@ -28,12 +34,17 @@ class TaskTranscribe extends Task {
|
||||
this.interim = !!recognizer.interim;
|
||||
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
|
||||
|
||||
/* let credentials be supplied in the recognizer object at runtime */
|
||||
if (recognizer.vendor === 'nuance') {
|
||||
const {clientId, secret} = recognizer.nuanceOptions;
|
||||
if (clientId && secret) {
|
||||
this.sttCredentials = {client_id: clientId, secret};
|
||||
}
|
||||
}
|
||||
else if (recognizer.vendor === 'deepgram') {
|
||||
const {apiKey} = recognizer.deepgramOptions;
|
||||
if (apiKey) this.sttCredentials = {api_key: apiKey};
|
||||
}
|
||||
|
||||
recognizer.hints = recognizer.hints || [];
|
||||
recognizer.altLanguages = recognizer.altLanguages || [];
|
||||
@@ -69,7 +80,7 @@ class TaskTranscribe extends Task {
|
||||
if (!this.data.recognizer.vendor) {
|
||||
this.data.recognizer.vendor = this.vendor;
|
||||
}
|
||||
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
||||
if (!this.sttCredentials) this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
||||
|
||||
try {
|
||||
if (!this.sttCredentials) {
|
||||
@@ -105,22 +116,7 @@ class TaskTranscribe extends Task {
|
||||
this.logger.info(err, 'TaskTranscribe:exec - error');
|
||||
this.parentTask && this.parentTask.emit('error', err);
|
||||
}
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
|
||||
this.removeSpeechListeners(ep);
|
||||
}
|
||||
|
||||
async kill(cs) {
|
||||
@@ -184,6 +180,15 @@ class TaskTranscribe extends Task {
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Error,
|
||||
this._onNuanceError.bind(this, cs, ep, channel));
|
||||
break;
|
||||
case 'deepgram':
|
||||
this.bugname = 'deepgram_transcribe';
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect,
|
||||
this._onDeepgramConnect.bind(this, cs, ep, channel));
|
||||
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
|
||||
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||
}
|
||||
@@ -215,9 +220,15 @@ class TaskTranscribe extends Task {
|
||||
|
||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
|
||||
|
||||
if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
||||
return this._transcribe(ep);
|
||||
if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
|
||||
if (['microsoft', 'deepgram'].includes(this.vendor)) {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
|
||||
}
|
||||
else {
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, listen again');
|
||||
this._transcribe(ep);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.transcriptionHook) {
|
||||
@@ -268,6 +279,34 @@ class TaskTranscribe extends Task {
|
||||
this._timer = null;
|
||||
}
|
||||
}
|
||||
_onNuanceError(_cs, _ep, evt) {
|
||||
const {code, error, details} = evt;
|
||||
if (code === 404 && error === 'No speech') {
|
||||
this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
this.logger.info({code, error, details}, 'TaskTranscribe:_onNuanceError');
|
||||
if (code === 413 && error === 'Too much speech') {
|
||||
return this._resolve('timeout');
|
||||
}
|
||||
}
|
||||
_onDeepgramConnect(_cs, _ep) {
|
||||
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
|
||||
}
|
||||
|
||||
_onDeepGramConnectFailure(cs, _ep, evt) {
|
||||
const {reason} = evt;
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info({evt}, 'TaskTranscribe:_onDeepgramConnectFailure');
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_FAILURE,
|
||||
message: `Failed connecting to Deepgram speech recognizer: ${reason}`,
|
||||
vendor: 'deepgram',
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure'));
|
||||
this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`);
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = TaskTranscribe;
|
||||
|
||||
@@ -74,6 +74,11 @@
|
||||
"Error": "nuance_transcribe::error",
|
||||
"VadDetected": "nuance_transcribe::vad_detected"
|
||||
},
|
||||
"DeepgramTranscriptionEvents": {
|
||||
"Transcription": "deepgram_transcribe::transcription",
|
||||
"ConnectFailure": "deepgram_transcribe::connect_failed",
|
||||
"Connect": "deepgram_transcribe::connect"
|
||||
},
|
||||
"AwsTranscriptionEvents": {
|
||||
"Transcription": "aws_transcribe::transcription",
|
||||
"EndOfTranscript": "aws_transcribe::end_of_transcript",
|
||||
|
||||
@@ -51,6 +51,10 @@ const speechMapper = (cred) => {
|
||||
obj.client_id = o.client_id;
|
||||
obj.secret = o.secret;
|
||||
}
|
||||
else if ('deepgram' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
}
|
||||
@@ -74,6 +78,7 @@ module.exports = (logger, srf) => {
|
||||
const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
|
||||
const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
|
||||
const haveNuance = speech.find((s) => s.vendor === 'nuance');
|
||||
const haveDeepgram = speech.find((s) => s.vendor === 'deepgram');
|
||||
if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance) {
|
||||
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
|
||||
if (r3.length) {
|
||||
@@ -97,6 +102,10 @@ module.exports = (logger, srf) => {
|
||||
const nuance = r3.find((s) => s.vendor === 'nuance');
|
||||
if (nuance) speech.push(speechMapper(nuance));
|
||||
}
|
||||
if (!haveDeepgram) {
|
||||
const deepgram = r3.find((s) => s.vendor === 'deepgram');
|
||||
if (deepgram) speech.push(speechMapper(deepgram));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,52 +3,120 @@ const {
|
||||
AzureTranscriptionEvents,
|
||||
GoogleTranscriptionEvents,
|
||||
AwsTranscriptionEvents,
|
||||
NuanceTranscriptionEvents
|
||||
NuanceTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
} = require('./constants');
|
||||
|
||||
const normalizeDeepgram = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const alternatives = (evt.channel?.alternatives || [])
|
||||
.map((alt) => ({
|
||||
confidence: alt.confidence,
|
||||
transcript: alt.transcript,
|
||||
}));
|
||||
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.is_final,
|
||||
alternatives,
|
||||
vendor: {
|
||||
name: 'deepgram',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeGoogle = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.is_final,
|
||||
alternatives: evt.alternatives,
|
||||
vendor: {
|
||||
name: 'google',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeNuance = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.is_final,
|
||||
alternatives: evt.alternatives,
|
||||
vendor: {
|
||||
name: 'nuance',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeMicrosoft = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const nbest = evt.NBest;
|
||||
const language_code = evt.PrimaryLanguage?.Language || language;
|
||||
const alternatives = nbest ? nbest.map((n) => {
|
||||
return {
|
||||
confidence: n.Confidence,
|
||||
transcript: n.Display
|
||||
};
|
||||
}) :
|
||||
[
|
||||
{
|
||||
transcript: evt.DisplayText || evt.Text
|
||||
}
|
||||
];
|
||||
|
||||
return {
|
||||
language_code,
|
||||
channel_tag: channel,
|
||||
is_final: evt.RecognitionStatus === 'Success',
|
||||
alternatives,
|
||||
vendor: {
|
||||
name: 'microsoft',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeAws = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt[0].is_final,
|
||||
alternatives: evt[0].alternatives,
|
||||
vendor: {
|
||||
name: 'aws',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
module.exports = (logger) => {
|
||||
const normalizeTranscription = (evt, vendor, channel, language) => {
|
||||
let newEvent = JSON.parse(JSON.stringify(evt));
|
||||
|
||||
/* add in channel_tag and provide the full vendor-specific event */
|
||||
newEvent = {
|
||||
...(vendor === 'aws' ? newEvent[0] : newEvent),
|
||||
language_code: language,
|
||||
channel_tag: channel
|
||||
};
|
||||
|
||||
|
||||
if ('aws' === vendor && Array.isArray(evt) && evt.length > 0) {
|
||||
newEvent = {
|
||||
...newEvent,
|
||||
vendor: {event: evt, name: vendor}
|
||||
};
|
||||
logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
|
||||
switch (vendor) {
|
||||
case 'deepgram':
|
||||
return normalizeDeepgram(evt, channel, language);
|
||||
case 'microsoft':
|
||||
return normalizeMicrosoft(evt, channel, language);
|
||||
case 'google':
|
||||
return normalizeGoogle(evt, channel, language);
|
||||
case 'aws':
|
||||
return normalizeAws(evt, channel, language);
|
||||
case 'nuance':
|
||||
return normalizeNuance(evt, channel, language);
|
||||
default:
|
||||
logger.error(`Unknown vendor ${vendor}`);
|
||||
return evt;
|
||||
}
|
||||
else if ('microsoft' === vendor) {
|
||||
const nbest = evt.NBest;
|
||||
const language_code = evt.PrimaryLanguage?.Language || language;
|
||||
const alternatives = nbest ? nbest.map((n) => {
|
||||
return {
|
||||
confidence: n.Confidence,
|
||||
transcript: n.Display
|
||||
};
|
||||
}) :
|
||||
[
|
||||
{
|
||||
transcript: evt.DisplayText || evt.Text
|
||||
}
|
||||
];
|
||||
|
||||
newEvent = {
|
||||
...newEvent,
|
||||
is_final: evt.RecognitionStatus === 'Success',
|
||||
channel,
|
||||
language_code,
|
||||
alternatives,
|
||||
vendor: {event: evt, name: vendor}
|
||||
};
|
||||
}
|
||||
return newEvent;
|
||||
};
|
||||
|
||||
const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
|
||||
@@ -201,6 +269,48 @@ module.exports = (logger) => {
|
||||
{NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
|
||||
};
|
||||
}
|
||||
else if ('deepgram' === rOpts.vendor) {
|
||||
const {deepgramOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials.api_key) &&
|
||||
{DEEPGRAM_API_KEY: sttCredentials.api_key},
|
||||
...(deepgramOptions.tier) &&
|
||||
{DEEPGRAM_SPEECH_TIER: deepgramOptions.tier},
|
||||
...(deepgramOptions.model) &&
|
||||
{DEEPGRAM_SPEECH_MODEL: deepgramOptions.model},
|
||||
...(deepgramOptions.punctuate) &&
|
||||
{DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1},
|
||||
...(deepgramOptions.profanityFilter) &&
|
||||
{DEEPGRAM_SPEECH_PROFANITY_FILTER: 1},
|
||||
...(deepgramOptions.redact) &&
|
||||
{DEEPGRAM_SPEECH_REDACT: 1},
|
||||
...(deepgramOptions.diarize) &&
|
||||
{DEEPGRAM_SPEECH_DIARIZE: 1},
|
||||
...(deepgramOptions.diarizeVersion) &&
|
||||
{DEEPGRAM_SPEECH_DIARIZE_VERSION: deepgramOptions.diarizeVersion},
|
||||
...(deepgramOptions.ner) &&
|
||||
{DEEPGRAM_SPEECH_NER: 1},
|
||||
...(deepgramOptions.alternatives) &&
|
||||
{DEEPGRAM_SPEECH_ALTERNATIVES: deepgramOptions.alternatives},
|
||||
...(deepgramOptions.numerals) &&
|
||||
{DEEPGRAM_SPEECH_NUMERALS: deepgramOptions.numerals},
|
||||
...(deepgramOptions.search) &&
|
||||
{DEEPGRAM_SPEECH_SEARCH: deepgramOptions.search.join(',')},
|
||||
...(deepgramOptions.replace) &&
|
||||
{DEEPGRAM_SPEECH_REPLACE: deepgramOptions.replace.join(',')},
|
||||
...(rOpts.hints.length > 0 &&
|
||||
{DEEPGRAM_SPEECH_KEYWORDS: rOpts.hints.join(',')}),
|
||||
...(deepgramOptions.keywords) &&
|
||||
{DEEPGRAM_SPEECH_KEYWORDS: deepgramOptions.keywords.join(',')},
|
||||
...('endpointing' in deepgramOptions) &&
|
||||
{DEEPGRAM_SPEECH_ENDPOINTING: deepgramOptions.endpointing},
|
||||
...(deepgramOptions.vadTurnoff) &&
|
||||
{DEEPGRAM_SPEECH_VAD_TURNOFF: deepgramOptions.vadTurnoff},
|
||||
...(deepgramOptions.tag) &&
|
||||
{DEEPGRAM_SPEECH_VAD_TURNOFF: deepgramOptions.tag}
|
||||
};
|
||||
}
|
||||
logger.debug({opts}, 'recognizer channel vars');
|
||||
return opts;
|
||||
};
|
||||
@@ -223,6 +333,11 @@ module.exports = (logger) => {
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Connect);
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure);
|
||||
|
||||
|
||||
};
|
||||
return {
|
||||
normalizeTranscription,
|
||||
|
||||
1786
package-lock.json
generated
1786
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -26,7 +26,7 @@
|
||||
"dependencies": {
|
||||
"@jambonz/db-helpers": "^0.7.3",
|
||||
"@jambonz/http-health-check": "^0.0.1",
|
||||
"@jambonz/realtimedb-helpers": "^0.5.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.6.0",
|
||||
"@jambonz/stats-collector": "^0.1.6",
|
||||
"@jambonz/time-series": "^0.2.5",
|
||||
"@opentelemetry/api": "^1.2.0",
|
||||
@@ -42,7 +42,7 @@
|
||||
"bent": "^7.3.12",
|
||||
"debug": "^4.3.4",
|
||||
"deepcopy": "^2.1.0",
|
||||
"drachtio-fsmrf": "^3.0.5",
|
||||
"drachtio-fsmrf": "^3.0.6",
|
||||
"drachtio-srf": "^4.5.18",
|
||||
"express": "^4.18.2",
|
||||
"helmet": "^5.1.1",
|
||||
|
||||
@@ -57,7 +57,7 @@ services:
|
||||
condition: service_healthy
|
||||
|
||||
freeswitch:
|
||||
image: drachtio/drachtio-freeswitch-mrf:0.4.15
|
||||
image: drachtio/drachtio-freeswitch-mrf:0.4.18
|
||||
restart: always
|
||||
command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100
|
||||
environment:
|
||||
|
||||
@@ -45,7 +45,8 @@ test('\'gather\' test - google', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
//console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'gather: succeeds when using google credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -80,7 +81,8 @@ test('\'gather\' test - default (google)', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
//console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase() === 'i\'d like to speak to customer support',
|
||||
'gather: succeeds when using default (google) credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -119,7 +121,8 @@ test('\'gather\' test - microsoft', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
//console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'gather: succeeds when using microsoft credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -158,7 +161,8 @@ test('\'gather\' test - aws', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
//console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'gather: succeeds when using aws credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -168,3 +172,46 @@ test('\'gather\' test - aws', async(t) => {
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
test('\'gather\' test - deepgram', async(t) => {
|
||||
if (!process.env.DEEPGRAM_API_KEY ) {
|
||||
t.pass('skipping deepgram tests');
|
||||
return t.end();
|
||||
}
|
||||
clearModule.all();
|
||||
const {srf, disconnect} = require('../app');
|
||||
|
||||
try {
|
||||
await connect(srf);
|
||||
// GIVEN
|
||||
let verbs = [
|
||||
{
|
||||
"verb": "gather",
|
||||
"input": ["speech"],
|
||||
"recognizer": {
|
||||
"vendor": "deepgram",
|
||||
"hints": ["customer support", "sales", "human resources", "HR"],
|
||||
"deepgramOptions": {
|
||||
"apiKey": process.env.DEEPGRAM_API_KEY
|
||||
}
|
||||
},
|
||||
"timeout": 10,
|
||||
"actionHook": "/actionHook"
|
||||
}
|
||||
];
|
||||
let from = "gather_success";
|
||||
provisionCallHook(from, verbs);
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
//console.log(JSON.stringify(obj));
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'gather: succeeds when using deepgram credentials');
|
||||
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
console.log(`error received: ${err}`);
|
||||
disconnect();
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -43,7 +43,7 @@ test('\'transcribe\' test - google', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'transcribe: succeeds when using google credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -80,7 +80,7 @@ test('\'transcribe\' test - microsoft', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'transcribe: succeeds when using microsoft credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -117,7 +117,7 @@ test('\'transcribe\' test - aws', async(t) => {
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'transcribe: succeeds when using aws credentials');
|
||||
|
||||
disconnect();
|
||||
@@ -127,3 +127,43 @@ test('\'transcribe\' test - aws', async(t) => {
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
|
||||
test('\'transcribe\' test - deepgram', async(t) => {
|
||||
if (!process.env.DEEPGRAM_API_KEY ) {
|
||||
t.pass('skipping deepgram tests');
|
||||
return t.end();
|
||||
}
|
||||
clearModule.all();
|
||||
const {srf, disconnect} = require('../app');
|
||||
|
||||
try {
|
||||
await connect(srf);
|
||||
// GIVEN
|
||||
let verbs = [
|
||||
{
|
||||
"verb": "transcribe",
|
||||
"recognizer": {
|
||||
"vendor": "aws",
|
||||
"hints": ["customer support", "sales", "human resources", "HR"],
|
||||
"deepgramOptions": {
|
||||
"apiKey": process.env.DEEPGRAM_API_KEY
|
||||
}
|
||||
},
|
||||
"transcriptionHook": "/transcriptionHook"
|
||||
}
|
||||
];
|
||||
let from = "gather_success";
|
||||
provisionCallHook(from, verbs);
|
||||
// THEN
|
||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
|
||||
'transcribe: succeeds when using deepgram credentials');
|
||||
|
||||
disconnect();
|
||||
} catch (err) {
|
||||
console.log(`error received: ${err}`);
|
||||
disconnect();
|
||||
t.error(err);
|
||||
}
|
||||
});
|
||||
Reference in New Issue
Block a user