Feature/deepgram stt (#190)

* initial changes to support deepgram stt

* fixes for normalizing vendor-specific transcriptions

* update to latest drachtio-fsmrf with support for deepgram stt

* deepgram parsing error

* hints support for deepgram

* handling deepgram errors

* ignore late arriving transcripts for deepgram

* handling of empty transcripts

* transcribe changes

* allow deepgram stt credentials to be provided at run time

* bind channel in transcription handler

* fixes for transcribe when handling empty transcripts

* more empty transcript fixes

* update tests to latest modules

* add test cases for deepgram speech recognition
This commit is contained in:
Dave Horton
2022-11-12 19:48:59 -05:00
committed by GitHub
parent f511e6ab6b
commit 8686348454
12 changed files with 2148 additions and 152 deletions

View File

@@ -569,6 +569,12 @@ class CallSession extends Emitter {
secret: credential.secret secret: credential.secret
}; };
} }
else if ('deepgram' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key
};
}
} }
else { else {
writeAlerts({ writeAlerts({

View File

@@ -5,7 +5,8 @@ const {
GoogleTranscriptionEvents, GoogleTranscriptionEvents,
NuanceTranscriptionEvents, NuanceTranscriptionEvents,
AwsTranscriptionEvents, AwsTranscriptionEvents,
AzureTranscriptionEvents AzureTranscriptionEvents,
DeepgramTranscriptionEvents
} = require('../utils/constants'); } = require('../utils/constants');
const makeTask = require('./make_task'); const makeTask = require('./make_task');
@@ -54,11 +55,14 @@ class TaskGather extends Task {
this.vendor = recognizer.vendor; this.vendor = recognizer.vendor;
this.language = recognizer.language; this.language = recognizer.language;
/* let credentials be supplied in the recognizer object at runtime */
if (recognizer.vendor === 'nuance') { if (recognizer.vendor === 'nuance') {
const {clientId, secret} = recognizer.nuanceOptions; const {clientId, secret} = recognizer.nuanceOptions;
if (clientId && secret) { if (clientId && secret) this.sttCredentials = {client_id: clientId, secret};
this.sttCredentials = {client_id: clientId, secret}; }
} else if (recognizer.vendor === 'deepgram') {
const {apiKey} = recognizer.deepgramOptions;
if (apiKey) this.sttCredentials = {api_key: apiKey};
} }
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */ /* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
@@ -338,8 +342,16 @@ class TaskGather extends Task {
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) { if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
opts.NUANCE_STALL_TIMERS = 1; opts.NUANCE_STALL_TIMERS = 1;
} }
break; break;
case 'deepgram':
this.bugname = 'deepgram_transcribe';
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onDeepgramConnect.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep));
break;
default: default:
throw new Error(`Invalid vendor ${this.vendor}`); throw new Error(`Invalid vendor ${this.vendor}`);
} }
@@ -441,30 +453,31 @@ class TaskGather extends Task {
_onTranscription(cs, ep, evt, fsEvent) { _onTranscription(cs, ep, evt, fsEvent) {
// make sure this is not a transcript from answering machine detection // make sure this is not a transcript from answering machine detection
this.logger.debug({evt}, 'Gather:_onTranscription');
const bugname = fsEvent.getHeader('media-bugname'); const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished'); const finished = fsEvent.getHeader('transcription-session-finished');
this.logger.debug({evt, bugname, finished}, 'Gather:_onTranscription');
if (bugname && this.bugname !== bugname) return; if (bugname && this.bugname !== bugname) return;
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language); evt = this.normalizeTranscription(evt, this.vendor, 1, this.language);
/* count words for bargein feature */ /* count words for bargein feature */
const words = evt.alternatives[0].transcript.split(' ').length; const words = evt.alternatives[0]?.transcript.split(' ').length;
const bufferedWords = this._bufferedTranscripts.reduce((count, e) => { const bufferedWords = this._bufferedTranscripts.reduce((count, e) => {
return count + e.alternatives[0].transcript.split(' ').length; return count + e.alternatives[0]?.transcript.split(' ').length;
}, 0); }, 0);
if (evt.is_final) { if (evt.is_final) {
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) { if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
if ('microsoft' === this.vendor && finished === 'true') { if (finished === 'true' && ['microsoft', 'deepgram'].includes(this.vendor)) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding'); this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
} }
else { else {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again'); this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
this._startTranscribing(ep); //this._startTranscribing(ep);
} }
return; return;
} }
if (this.isContinuousAsr) { if (this.isContinuousAsr) {
/* append the transcript and start listening again for asrTimeout */ /* append the transcript and start listening again for asrTimeout */
const t = evt.alternatives[0].transcript; const t = evt.alternatives[0].transcript;
@@ -548,6 +561,23 @@ class TaskGather extends Task {
return this._resolve('timeout'); return this._resolve('timeout');
} }
} }
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onDeepgramConnect');
}
_onDeepGramConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskGather:_onDeepgramConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to Deepgram speech recognizer: ${reason}`,
vendor: 'deepgram',
}).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure'));
this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`);
this.notifyTaskDone();
}
_onVadDetected(cs, ep) { _onVadDetected(cs, ep) {
if (this.bargein && this.minBargeinWordCount === 0) { if (this.bargein && this.minBargeinWordCount === 0) {

View File

@@ -445,7 +445,7 @@
"properties": { "properties": {
"vendor": { "vendor": {
"type": "string", "type": "string",
"enum": ["google", "aws", "microsoft", "nuance", "default"] "enum": ["google", "aws", "microsoft", "nuance", "deepgram", "default"]
}, },
"language": "string", "language": "string",
"vad": "#vad", "vad": "#vad",
@@ -510,12 +510,63 @@
"azureSttEndpointId": "string", "azureSttEndpointId": "string",
"asrDtmfTerminationDigit": "string", "asrDtmfTerminationDigit": "string",
"asrTimeout": "number", "asrTimeout": "number",
"nuanceOptions": "#nuanceOptions" "nuanceOptions": "#nuanceOptions",
"deepgramOptions": "#deepgramOptions"
}, },
"required": [ "required": [
"vendor" "vendor"
] ]
}, },
"deepgramOptions": {
"properties": {
"apiKey": "string",
"tier": {
"type": "string",
"enum": [
"enhanced",
"base"
]
},
"model": {
"type": "string",
"enum": [
"general",
"meeting",
"phonecall",
"voicemail",
"finance",
"conversationalai",
"video",
"custom"
]
},
"customModel": "string",
"version": "string",
"punctuate": "boolean",
"profanityFilter": "boolean",
"redact": {
"type": "string",
"enum": [
"pci",
"numbers",
"true",
"ssn"
]
},
"diarize": "boolean",
"diarizeVersion": "string",
"ner": "boolean",
"multichannel": "boolean",
"alternatives": "number",
"numerals": "boolean",
"search": "array",
"replace": "array",
"keywords": "array",
"endpointing": "boolean",
"vadTurnoff": "number",
"tag": "string"
}
},
"nuanceOptions": { "nuanceOptions": {
"properties": { "properties": {
"clientId": "string", "clientId": "string",

View File

@@ -5,7 +5,8 @@ const {
GoogleTranscriptionEvents, GoogleTranscriptionEvents,
AzureTranscriptionEvents, AzureTranscriptionEvents,
AwsTranscriptionEvents, AwsTranscriptionEvents,
NuanceTranscriptionEvents NuanceTranscriptionEvents,
DeepgramTranscriptionEvents
} = require('../utils/constants'); } = require('../utils/constants');
const normalizeJambones = require('../utils/normalize-jambones'); const normalizeJambones = require('../utils/normalize-jambones');
@@ -15,9 +16,14 @@ class TaskTranscribe extends Task {
this.preconditions = TaskPreconditions.Endpoint; this.preconditions = TaskPreconditions.Endpoint;
this.parentTask = parentTask; this.parentTask = parentTask;
const {setChannelVarsForStt, normalizeTranscription} = require('../utils/transcription-utils')(logger); const {
setChannelVarsForStt,
normalizeTranscription,
removeSpeechListeners
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt; this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription; this.normalizeTranscription = normalizeTranscription;
this.removeSpeechListeners = removeSpeechListeners;
this.transcriptionHook = this.data.transcriptionHook; this.transcriptionHook = this.data.transcriptionHook;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia); this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
@@ -28,12 +34,17 @@ class TaskTranscribe extends Task {
this.interim = !!recognizer.interim; this.interim = !!recognizer.interim;
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel; this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
/* let credentials be supplied in the recognizer object at runtime */
if (recognizer.vendor === 'nuance') { if (recognizer.vendor === 'nuance') {
const {clientId, secret} = recognizer.nuanceOptions; const {clientId, secret} = recognizer.nuanceOptions;
if (clientId && secret) { if (clientId && secret) {
this.sttCredentials = {client_id: clientId, secret}; this.sttCredentials = {client_id: clientId, secret};
} }
} }
else if (recognizer.vendor === 'deepgram') {
const {apiKey} = recognizer.deepgramOptions;
if (apiKey) this.sttCredentials = {api_key: apiKey};
}
recognizer.hints = recognizer.hints || []; recognizer.hints = recognizer.hints || [];
recognizer.altLanguages = recognizer.altLanguages || []; recognizer.altLanguages = recognizer.altLanguages || [];
@@ -69,7 +80,7 @@ class TaskTranscribe extends Task {
if (!this.data.recognizer.vendor) { if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor; this.data.recognizer.vendor = this.vendor;
} }
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt'); if (!this.sttCredentials) this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
try { try {
if (!this.sttCredentials) { if (!this.sttCredentials) {
@@ -105,22 +116,7 @@ class TaskTranscribe extends Task {
this.logger.info(err, 'TaskTranscribe:exec - error'); this.logger.info(err, 'TaskTranscribe:exec - error');
this.parentTask && this.parentTask.emit('error', err); this.parentTask && this.parentTask.emit('error', err);
} }
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription); this.removeSpeechListeners(ep);
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
} }
async kill(cs) { async kill(cs) {
@@ -184,6 +180,15 @@ class TaskTranscribe extends Task {
ep.addCustomEventListener(AzureTranscriptionEvents.Error, ep.addCustomEventListener(AzureTranscriptionEvents.Error,
this._onNuanceError.bind(this, cs, ep, channel)); this._onNuanceError.bind(this, cs, ep, channel));
break; break;
case 'deepgram':
this.bugname = 'deepgram_transcribe';
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect,
this._onDeepgramConnect.bind(this, cs, ep, channel));
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
break;
default: default:
throw new Error(`Invalid vendor ${this.vendor}`); throw new Error(`Invalid vendor ${this.vendor}`);
} }
@@ -215,9 +220,15 @@ class TaskTranscribe extends Task {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription'); this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) { if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again'); if (['microsoft', 'deepgram'].includes(this.vendor)) {
return this._transcribe(ep); this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
}
else {
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, listen again');
this._transcribe(ep);
}
return;
} }
if (this.transcriptionHook) { if (this.transcriptionHook) {
@@ -268,6 +279,34 @@ class TaskTranscribe extends Task {
this._timer = null; this._timer = null;
} }
} }
_onNuanceError(_cs, _ep, evt) {
const {code, error, details} = evt;
if (code === 404 && error === 'No speech') {
this.logger.debug({code, error, details}, 'TaskTranscribe:_onNuanceError');
return this._resolve('timeout');
}
this.logger.info({code, error, details}, 'TaskTranscribe:_onNuanceError');
if (code === 413 && error === 'Too much speech') {
return this._resolve('timeout');
}
}
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
}
_onDeepGramConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskTranscribe:_onDeepgramConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to Deepgram speech recognizer: ${reason}`,
vendor: 'deepgram',
}).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure'));
this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`);
this.notifyTaskDone();
}
} }
module.exports = TaskTranscribe; module.exports = TaskTranscribe;

View File

@@ -74,6 +74,11 @@
"Error": "nuance_transcribe::error", "Error": "nuance_transcribe::error",
"VadDetected": "nuance_transcribe::vad_detected" "VadDetected": "nuance_transcribe::vad_detected"
}, },
"DeepgramTranscriptionEvents": {
"Transcription": "deepgram_transcribe::transcription",
"ConnectFailure": "deepgram_transcribe::connect_failed",
"Connect": "deepgram_transcribe::connect"
},
"AwsTranscriptionEvents": { "AwsTranscriptionEvents": {
"Transcription": "aws_transcribe::transcription", "Transcription": "aws_transcribe::transcription",
"EndOfTranscript": "aws_transcribe::end_of_transcript", "EndOfTranscript": "aws_transcribe::end_of_transcript",

View File

@@ -51,6 +51,10 @@ const speechMapper = (cred) => {
obj.client_id = o.client_id; obj.client_id = o.client_id;
obj.secret = o.secret; obj.secret = o.secret;
} }
else if ('deepgram' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
}
} catch (err) { } catch (err) {
console.log(err); console.log(err);
} }
@@ -74,6 +78,7 @@ module.exports = (logger, srf) => {
const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft'); const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid'); const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
const haveNuance = speech.find((s) => s.vendor === 'nuance'); const haveNuance = speech.find((s) => s.vendor === 'nuance');
const haveDeepgram = speech.find((s) => s.vendor === 'deepgram');
if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance) { if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance) {
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid); const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
if (r3.length) { if (r3.length) {
@@ -97,6 +102,10 @@ module.exports = (logger, srf) => {
const nuance = r3.find((s) => s.vendor === 'nuance'); const nuance = r3.find((s) => s.vendor === 'nuance');
if (nuance) speech.push(speechMapper(nuance)); if (nuance) speech.push(speechMapper(nuance));
} }
if (!haveDeepgram) {
const deepgram = r3.find((s) => s.vendor === 'deepgram');
if (deepgram) speech.push(speechMapper(deepgram));
}
} }
} }

View File

@@ -3,52 +3,120 @@ const {
AzureTranscriptionEvents, AzureTranscriptionEvents,
GoogleTranscriptionEvents, GoogleTranscriptionEvents,
AwsTranscriptionEvents, AwsTranscriptionEvents,
NuanceTranscriptionEvents NuanceTranscriptionEvents,
DeepgramTranscriptionEvents,
} = require('./constants'); } = require('./constants');
const normalizeDeepgram = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const alternatives = (evt.channel?.alternatives || [])
.map((alt) => ({
confidence: alt.confidence,
transcript: alt.transcript,
}));
return {
language_code: language,
channel_tag: channel,
is_final: evt.is_final,
alternatives,
vendor: {
name: 'deepgram',
evt: copy
}
};
};
const normalizeGoogle = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt.is_final,
alternatives: evt.alternatives,
vendor: {
name: 'google',
evt: copy
}
};
};
const normalizeNuance = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt.is_final,
alternatives: evt.alternatives,
vendor: {
name: 'nuance',
evt: copy
}
};
};
const normalizeMicrosoft = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const nbest = evt.NBest;
const language_code = evt.PrimaryLanguage?.Language || language;
const alternatives = nbest ? nbest.map((n) => {
return {
confidence: n.Confidence,
transcript: n.Display
};
}) :
[
{
transcript: evt.DisplayText || evt.Text
}
];
return {
language_code,
channel_tag: channel,
is_final: evt.RecognitionStatus === 'Success',
alternatives,
vendor: {
name: 'microsoft',
evt: copy
}
};
};
const normalizeAws = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt[0].is_final,
alternatives: evt[0].alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
};
module.exports = (logger) => { module.exports = (logger) => {
const normalizeTranscription = (evt, vendor, channel, language) => { const normalizeTranscription = (evt, vendor, channel, language) => {
let newEvent = JSON.parse(JSON.stringify(evt));
/* add in channel_tag and provide the full vendor-specific event */ logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription');
newEvent = { switch (vendor) {
...(vendor === 'aws' ? newEvent[0] : newEvent), case 'deepgram':
language_code: language, return normalizeDeepgram(evt, channel, language);
channel_tag: channel case 'microsoft':
}; return normalizeMicrosoft(evt, channel, language);
case 'google':
return normalizeGoogle(evt, channel, language);
if ('aws' === vendor && Array.isArray(evt) && evt.length > 0) { case 'aws':
newEvent = { return normalizeAws(evt, channel, language);
...newEvent, case 'nuance':
vendor: {event: evt, name: vendor} return normalizeNuance(evt, channel, language);
}; default:
logger.error(`Unknown vendor ${vendor}`);
return evt;
} }
else if ('microsoft' === vendor) {
const nbest = evt.NBest;
const language_code = evt.PrimaryLanguage?.Language || language;
const alternatives = nbest ? nbest.map((n) => {
return {
confidence: n.Confidence,
transcript: n.Display
};
}) :
[
{
transcript: evt.DisplayText || evt.Text
}
];
newEvent = {
...newEvent,
is_final: evt.RecognitionStatus === 'Success',
channel,
language_code,
alternatives,
vendor: {event: evt, name: vendor}
};
}
return newEvent;
}; };
const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => { const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
@@ -201,6 +269,48 @@ module.exports = (logger) => {
{NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)}, {NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
}; };
} }
else if ('deepgram' === rOpts.vendor) {
const {deepgramOptions = {}} = rOpts;
opts = {
...opts,
...(sttCredentials.api_key) &&
{DEEPGRAM_API_KEY: sttCredentials.api_key},
...(deepgramOptions.tier) &&
{DEEPGRAM_SPEECH_TIER: deepgramOptions.tier},
...(deepgramOptions.model) &&
{DEEPGRAM_SPEECH_MODEL: deepgramOptions.model},
...(deepgramOptions.punctuate) &&
{DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1},
...(deepgramOptions.profanityFilter) &&
{DEEPGRAM_SPEECH_PROFANITY_FILTER: 1},
...(deepgramOptions.redact) &&
{DEEPGRAM_SPEECH_REDACT: 1},
...(deepgramOptions.diarize) &&
{DEEPGRAM_SPEECH_DIARIZE: 1},
...(deepgramOptions.diarizeVersion) &&
{DEEPGRAM_SPEECH_DIARIZE_VERSION: deepgramOptions.diarizeVersion},
...(deepgramOptions.ner) &&
{DEEPGRAM_SPEECH_NER: 1},
...(deepgramOptions.alternatives) &&
{DEEPGRAM_SPEECH_ALTERNATIVES: deepgramOptions.alternatives},
...(deepgramOptions.numerals) &&
{DEEPGRAM_SPEECH_NUMERALS: deepgramOptions.numerals},
...(deepgramOptions.search) &&
{DEEPGRAM_SPEECH_SEARCH: deepgramOptions.search.join(',')},
...(deepgramOptions.replace) &&
{DEEPGRAM_SPEECH_REPLACE: deepgramOptions.replace.join(',')},
...(rOpts.hints.length > 0 &&
{DEEPGRAM_SPEECH_KEYWORDS: rOpts.hints.join(',')}),
...(deepgramOptions.keywords) &&
{DEEPGRAM_SPEECH_KEYWORDS: deepgramOptions.keywords.join(',')},
...('endpointing' in deepgramOptions) &&
{DEEPGRAM_SPEECH_ENDPOINTING: deepgramOptions.endpointing},
...(deepgramOptions.vadTurnoff) &&
{DEEPGRAM_SPEECH_VAD_TURNOFF: deepgramOptions.vadTurnoff},
...(deepgramOptions.tag) &&
{DEEPGRAM_SPEECH_VAD_TURNOFF: deepgramOptions.tag}
};
}
logger.debug({opts}, 'recognizer channel vars'); logger.debug({opts}, 'recognizer channel vars');
return opts; return opts;
}; };
@@ -223,6 +333,11 @@ module.exports = (logger) => {
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error); ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected); ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Transcription);
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Connect);
ep.removeCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure);
}; };
return { return {
normalizeTranscription, normalizeTranscription,

1786
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -26,7 +26,7 @@
"dependencies": { "dependencies": {
"@jambonz/db-helpers": "^0.7.3", "@jambonz/db-helpers": "^0.7.3",
"@jambonz/http-health-check": "^0.0.1", "@jambonz/http-health-check": "^0.0.1",
"@jambonz/realtimedb-helpers": "^0.5.7", "@jambonz/realtimedb-helpers": "^0.6.0",
"@jambonz/stats-collector": "^0.1.6", "@jambonz/stats-collector": "^0.1.6",
"@jambonz/time-series": "^0.2.5", "@jambonz/time-series": "^0.2.5",
"@opentelemetry/api": "^1.2.0", "@opentelemetry/api": "^1.2.0",
@@ -42,7 +42,7 @@
"bent": "^7.3.12", "bent": "^7.3.12",
"debug": "^4.3.4", "debug": "^4.3.4",
"deepcopy": "^2.1.0", "deepcopy": "^2.1.0",
"drachtio-fsmrf": "^3.0.5", "drachtio-fsmrf": "^3.0.6",
"drachtio-srf": "^4.5.18", "drachtio-srf": "^4.5.18",
"express": "^4.18.2", "express": "^4.18.2",
"helmet": "^5.1.1", "helmet": "^5.1.1",

View File

@@ -57,7 +57,7 @@ services:
condition: service_healthy condition: service_healthy
freeswitch: freeswitch:
image: drachtio/drachtio-freeswitch-mrf:0.4.15 image: drachtio/drachtio-freeswitch-mrf:0.4.18
restart: always restart: always
command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100 command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100
environment: environment:

View File

@@ -45,7 +45,8 @@ test('\'gather\' test - google', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', //console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'gather: succeeds when using google credentials'); 'gather: succeeds when using google credentials');
disconnect(); disconnect();
@@ -80,7 +81,8 @@ test('\'gather\' test - default (google)', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', //console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase() === 'i\'d like to speak to customer support',
'gather: succeeds when using default (google) credentials'); 'gather: succeeds when using default (google) credentials');
disconnect(); disconnect();
@@ -119,7 +121,8 @@ test('\'gather\' test - microsoft', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', //console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'gather: succeeds when using microsoft credentials'); 'gather: succeeds when using microsoft credentials');
disconnect(); disconnect();
@@ -158,7 +161,8 @@ test('\'gather\' test - aws', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', //console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'gather: succeeds when using aws credentials'); 'gather: succeeds when using aws credentials');
disconnect(); disconnect();
@@ -167,4 +171,47 @@ test('\'gather\' test - aws', async(t) => {
disconnect(); disconnect();
t.error(err); t.error(err);
} }
}); });
test('\'gather\' test - deepgram', async(t) => {
if (!process.env.DEEPGRAM_API_KEY ) {
t.pass('skipping deepgram tests');
return t.end();
}
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let verbs = [
{
"verb": "gather",
"input": ["speech"],
"recognizer": {
"vendor": "deepgram",
"hints": ["customer support", "sales", "human resources", "HR"],
"deepgramOptions": {
"apiKey": process.env.DEEPGRAM_API_KEY
}
},
"timeout": 10,
"actionHook": "/actionHook"
}
];
let from = "gather_success";
provisionCallHook(from, verbs);
// THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
//console.log(JSON.stringify(obj));
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'gather: succeeds when using deepgram credentials');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});

View File

@@ -43,7 +43,7 @@ test('\'transcribe\' test - google', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'transcribe: succeeds when using google credentials'); 'transcribe: succeeds when using google credentials');
disconnect(); disconnect();
@@ -80,7 +80,7 @@ test('\'transcribe\' test - microsoft', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'transcribe: succeeds when using microsoft credentials'); 'transcribe: succeeds when using microsoft credentials');
disconnect(); disconnect();
@@ -117,9 +117,49 @@ test('\'transcribe\' test - aws', async(t) => {
// THEN // THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from); await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`); let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support', t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'transcribe: succeeds when using aws credentials'); 'transcribe: succeeds when using aws credentials');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});
test('\'transcribe\' test - deepgram', async(t) => {
if (!process.env.DEEPGRAM_API_KEY ) {
t.pass('skipping deepgram tests');
return t.end();
}
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let verbs = [
{
"verb": "transcribe",
"recognizer": {
"vendor": "aws",
"hints": ["customer support", "sales", "human resources", "HR"],
"deepgramOptions": {
"apiKey": process.env.DEEPGRAM_API_KEY
}
},
"transcriptionHook": "/transcriptionHook"
}
];
let from = "gather_success";
provisionCallHook(from, verbs);
// THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().startsWith('i\'d like to speak to customer support'),
'transcribe: succeeds when using deepgram credentials');
disconnect(); disconnect();
} catch (err) { } catch (err) {
console.log(`error received: ${err}`); console.log(`error received: ${err}`);