mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
Feature/nuance stt (#185)
* initial changes to gather to support nuance stt * updateSpeechCredentialLastUsed could be called without a speech_credential_sid if credentials are passed in the flow * fix bugname * typo * added handlers for nuance * logging * major refactor of parsing transcriptions * initial support for nuance in transcribe verb * updates from testing * cleanup some tests * update action * typo * gather: start nuance timers after say/play completes * update drachtio-fsrmf * refactor some code * typo * log nuance error detail * timeout handling * typo * handle nuance 413 response when recognition times out * typo in specs.json * add support for nuance resources * fixes and tests for transcribe * remove logging from test * initial support for kryptonEndpoint * try getting access token even when using krypton * typo in kryptonEndpoint property * add support for Nuance tts * parse nuance voice and model for tts * use nuance credentials from db * update to db-helpers@0.7.0 with caching option * add support for azure audio logging in gather/transcribe * sync package-lock.json
This commit is contained in:
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -20,3 +20,5 @@ jobs:
|
|||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
AWS_REGION: ${{ secrets.AWS_REGION }}
|
AWS_REGION: ${{ secrets.AWS_REGION }}
|
||||||
|
MICROSOFT_REGION: ${{ secrets.MICROSOFT_REGION }}
|
||||||
|
MICROSOFT_API_KEY: ${{ secrets.MICROSOFT_API_KEY }}
|
||||||
@@ -560,6 +560,13 @@ class CallSession extends Emitter {
|
|||||||
api_key: credential.api_key
|
api_key: credential.api_key
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
else if ('nuance' === vendor) {
|
||||||
|
return {
|
||||||
|
speech_credential_sid: credential.speech_credential_sid,
|
||||||
|
client_id: credential.client_id,
|
||||||
|
secret: credential.secret
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
writeAlerts({
|
writeAlerts({
|
||||||
|
|||||||
@@ -3,25 +3,22 @@ const {
|
|||||||
TaskName,
|
TaskName,
|
||||||
TaskPreconditions,
|
TaskPreconditions,
|
||||||
GoogleTranscriptionEvents,
|
GoogleTranscriptionEvents,
|
||||||
|
NuanceTranscriptionEvents,
|
||||||
AwsTranscriptionEvents,
|
AwsTranscriptionEvents,
|
||||||
AzureTranscriptionEvents
|
AzureTranscriptionEvents
|
||||||
} = require('../utils/constants');
|
} = require('../utils/constants');
|
||||||
|
|
||||||
const makeTask = require('./make_task');
|
const makeTask = require('./make_task');
|
||||||
const assert = require('assert');
|
const assert = require('assert');
|
||||||
//const GATHER_STABILITY_THRESHOLD = Number(process.env.JAMBONZ_GATHER_STABILITY_THRESHOLD || 0.7);
|
|
||||||
|
|
||||||
const compileTranscripts = (logger, evt, arr) => {
|
const compileTranscripts = (logger, evt, arr) => {
|
||||||
//logger.debug({arr, evt}, 'compile transcripts');
|
|
||||||
if (!Array.isArray(arr) || arr.length === 0) return;
|
if (!Array.isArray(arr) || arr.length === 0) return;
|
||||||
let t = '';
|
let t = '';
|
||||||
for (const a of arr) {
|
for (const a of arr) {
|
||||||
//logger.debug(`adding ${a.alternatives[0].transcript}`);
|
|
||||||
t += ` ${a.alternatives[0].transcript}`;
|
t += ` ${a.alternatives[0].transcript}`;
|
||||||
}
|
}
|
||||||
t += ` ${evt.alternatives[0].transcript}`;
|
t += ` ${evt.alternatives[0].transcript}`;
|
||||||
evt.alternatives[0].transcript = t.trim();
|
evt.alternatives[0].transcript = t.trim();
|
||||||
//logger.debug(`compiled transcript: ${evt.alternatives[0].transcript}`);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class TaskGather extends Task {
|
class TaskGather extends Task {
|
||||||
@@ -29,6 +26,15 @@ class TaskGather extends Task {
|
|||||||
super(logger, opts);
|
super(logger, opts);
|
||||||
this.preconditions = TaskPreconditions.Endpoint;
|
this.preconditions = TaskPreconditions.Endpoint;
|
||||||
|
|
||||||
|
const {
|
||||||
|
setChannelVarsForStt,
|
||||||
|
normalizeTranscription,
|
||||||
|
removeSpeechListeners
|
||||||
|
} = require('../utils/transcription-utils')(logger);
|
||||||
|
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||||
|
this.normalizeTranscription = normalizeTranscription;
|
||||||
|
this.removeSpeechListeners = removeSpeechListeners;
|
||||||
|
|
||||||
[
|
[
|
||||||
'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits',
|
'finishOnKey', 'hints', 'input', 'numDigits', 'minDigits', 'maxDigits',
|
||||||
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
|
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
|
||||||
@@ -47,47 +53,23 @@ class TaskGather extends Task {
|
|||||||
const recognizer = this.data.recognizer;
|
const recognizer = this.data.recognizer;
|
||||||
this.vendor = recognizer.vendor;
|
this.vendor = recognizer.vendor;
|
||||||
this.language = recognizer.language;
|
this.language = recognizer.language;
|
||||||
this.hints = recognizer.hints || [];
|
|
||||||
this.hintsBoost = recognizer.hintsBoost;
|
if (recognizer.vendor === 'nuance') {
|
||||||
this.profanityFilter = recognizer.profanityFilter;
|
const {clientId, secret} = recognizer.nuanceOptions;
|
||||||
this.punctuation = !!recognizer.punctuation;
|
if (clientId && secret) {
|
||||||
this.enhancedModel = !!recognizer.enhancedModel;
|
this.sttCredentials = {client_id: clientId, secret};
|
||||||
this.model = recognizer.model || 'command_and_search';
|
}
|
||||||
this.words = !!recognizer.words;
|
}
|
||||||
this.singleUtterance = recognizer.singleUtterance || true;
|
|
||||||
this.diarization = !!recognizer.diarization;
|
|
||||||
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
|
|
||||||
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
|
|
||||||
this.interactionType = recognizer.interactionType || 'unspecified';
|
|
||||||
this.naicsCode = recognizer.naicsCode || 0;
|
|
||||||
this.altLanguages = recognizer.altLanguages || [];
|
|
||||||
|
|
||||||
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
|
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
|
||||||
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
|
this.asrTimeout = typeof recognizer.asrTimeout === 'number' ? recognizer.asrTimeout * 1000 : 0;
|
||||||
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
|
if (this.asrTimeout > 0) this.asrDtmfTerminationDigit = recognizer.asrDtmfTerminationDigit;
|
||||||
this.isContinuousAsr = this.asrTimeout > 0;
|
this.isContinuousAsr = this.asrTimeout > 0;
|
||||||
|
|
||||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
this.data.recognizer.hints = this.data.recognizer.hints || [];
|
||||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages || [];
|
||||||
this.vad = {enable, voiceMs, mode};
|
|
||||||
|
|
||||||
/* aws options */
|
|
||||||
this.vocabularyName = recognizer.vocabularyName;
|
|
||||||
this.vocabularyFilterName = recognizer.vocabularyFilterName;
|
|
||||||
this.filterMethod = recognizer.filterMethod;
|
|
||||||
|
|
||||||
/* microsoft options */
|
|
||||||
this.outputFormat = recognizer.outputFormat || 'simple';
|
|
||||||
this.profanityOption = recognizer.profanityOption || 'raw';
|
|
||||||
this.requestSnr = recognizer.requestSnr || false;
|
|
||||||
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
|
|
||||||
this.azureServiceEndpoint = recognizer.azureServiceEndpoint;
|
|
||||||
this.azureSttEndpointId = recognizer.azureSttEndpointId;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
this.hints = [];
|
|
||||||
this.altLanguages = [];
|
|
||||||
}
|
}
|
||||||
|
else this.data.recognizer = {hints: [], altLanguages: []};
|
||||||
|
|
||||||
this.digitBuffer = '';
|
this.digitBuffer = '';
|
||||||
this._earlyMedia = this.data.earlyMedia === true;
|
this._earlyMedia = this.data.earlyMedia === true;
|
||||||
@@ -134,21 +116,22 @@ class TaskGather extends Task {
|
|||||||
this.logger.debug('Gather:exec');
|
this.logger.debug('Gather:exec');
|
||||||
await super.exec(cs);
|
await super.exec(cs);
|
||||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||||
|
const {getNuanceAccessToken} = cs.srf.locals.dbHelpers;
|
||||||
|
|
||||||
if (cs.hasGlobalSttHints) {
|
if (cs.hasGlobalSttHints) {
|
||||||
const {hints, hintsBoost} = cs.globalSttHints;
|
const {hints, hintsBoost} = cs.globalSttHints;
|
||||||
this.hints = this.hints.concat(hints);
|
this.data.recognizer.hints = this.data.recognizer.hints.concat(hints);
|
||||||
if (!this.hintsBoost && hintsBoost) this.hintsBoost = hintsBoost;
|
if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
|
||||||
this.logger.debug({hints: this.hints, hintsBoost: this.hintsBoost},
|
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
|
||||||
'Gather:exec - applying global sttHints');
|
'Gather:exec - applying global sttHints');
|
||||||
}
|
}
|
||||||
if (cs.hasAltLanguages) {
|
if (cs.hasAltLanguages) {
|
||||||
this.altLanguages = this.altLanguages.concat(cs.altLanguages);
|
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages.concat(cs.altLanguages);
|
||||||
this.logger.debug({altLanguages: this.altLanguages},
|
this.logger.debug({altLanguages: this.altLanguages},
|
||||||
'Gather:exec - applying altLanguages');
|
'Gather:exec - applying altLanguages');
|
||||||
}
|
}
|
||||||
if (cs.hasGlobalSttPunctuation) {
|
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
|
||||||
this.punctuation = cs.globalSttPunctuation;
|
this.data.recognizer.punctuation = cs.globalSttPunctuation;
|
||||||
}
|
}
|
||||||
if (!this.isContinuousAsr && cs.isContinuousAsr) {
|
if (!this.isContinuousAsr && cs.isContinuousAsr) {
|
||||||
this.isContinuousAsr = true;
|
this.isContinuousAsr = true;
|
||||||
@@ -162,7 +145,8 @@ class TaskGather extends Task {
|
|||||||
this.ep = ep;
|
this.ep = ep;
|
||||||
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
if ('default' === this.vendor || !this.vendor) this.vendor = cs.speechRecognizerVendor;
|
||||||
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
if ('default' === this.language || !this.language) this.language = cs.speechRecognizerLanguage;
|
||||||
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
|
||||||
|
if (this.needsStt && !this.sttCredentials) this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
||||||
if (this.needsStt && !this.sttCredentials) {
|
if (this.needsStt && !this.sttCredentials) {
|
||||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||||
this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
||||||
@@ -175,16 +159,27 @@ class TaskGather extends Task {
|
|||||||
throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`);
|
throw new Error(`no speech-to-text service credentials for ${this.vendor} have been configured`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.info({sttCredentials: this.sttCredentials}, 'Gather:exec - sttCredentials');
|
||||||
|
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
||||||
|
/* get nuance access token */
|
||||||
|
const {client_id, secret} = this.sttCredentials;
|
||||||
|
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||||
|
this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||||
|
this.sttCredentials = {...this.sttCredentials, access_token};
|
||||||
|
}
|
||||||
const startListening = (cs, ep) => {
|
const startListening = (cs, ep) => {
|
||||||
this._startTimer();
|
this._startTimer();
|
||||||
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
||||||
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
||||||
|
this.logger.debug('Gather:exec - calling _initSpeech');
|
||||||
this._initSpeech(cs, ep)
|
this._initSpeech(cs, ep)
|
||||||
.then(() => {
|
.then(() => {
|
||||||
this._startTranscribing(ep);
|
this._startTranscribing(ep);
|
||||||
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
|
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
|
||||||
})
|
})
|
||||||
.catch(() => {});
|
.catch((err) => {
|
||||||
|
this.logger.error({err}, 'error in initSpeech');
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -198,7 +193,15 @@ class TaskGather extends Task {
|
|||||||
span.end();
|
span.end();
|
||||||
if (err) this.logger.error({err}, 'Gather:exec Error playing tts');
|
if (err) this.logger.error({err}, 'Gather:exec Error playing tts');
|
||||||
this.logger.debug('Gather: nested say task completed');
|
this.logger.debug('Gather: nested say task completed');
|
||||||
if (!this.killed) startListening(cs, ep);
|
if (!this.killed) {
|
||||||
|
startListening(cs, ep);
|
||||||
|
if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
|
||||||
|
this.logger.debug('Gather:exec - starting transcription timers after say completes');
|
||||||
|
ep.startTranscriptionTimers((err) => {
|
||||||
|
if (err) this.logger.error({err}, 'Gather:exec - error starting transcription timers');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else if (this.playTask) {
|
else if (this.playTask) {
|
||||||
@@ -210,7 +213,15 @@ class TaskGather extends Task {
|
|||||||
span.end();
|
span.end();
|
||||||
if (err) this.logger.error({err}, 'Gather:exec Error playing url');
|
if (err) this.logger.error({err}, 'Gather:exec Error playing url');
|
||||||
this.logger.debug('Gather: nested play task completed');
|
this.logger.debug('Gather: nested play task completed');
|
||||||
if (!this.killed) startListening(cs, ep);
|
if (!this.killed) {
|
||||||
|
startListening(cs, ep);
|
||||||
|
if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
|
||||||
|
this.logger.debug('Gather:exec - starting transcription timers after play completes');
|
||||||
|
ep.startTranscriptionTimers((err) => {
|
||||||
|
if (err) this.logger.error({err}, 'Gather:exec - error starting transcription timers');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else startListening(cs, ep);
|
else startListening(cs, ep);
|
||||||
@@ -230,14 +241,7 @@ class TaskGather extends Task {
|
|||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.error(err, 'TaskGather:exec error');
|
this.logger.error(err, 'TaskGather:exec error');
|
||||||
}
|
}
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
this.removeSpeechListeners(ep);
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
|
|
||||||
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
|
||||||
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
|
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
kill(cs) {
|
kill(cs) {
|
||||||
@@ -292,106 +296,52 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _initSpeech(cs, ep) {
|
async _initSpeech(cs, ep) {
|
||||||
const opts = {};
|
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
||||||
|
this.logger.debug(opts, 'TaskGather:_initSpeech - channel vars');
|
||||||
|
switch (this.vendor) {
|
||||||
|
case 'google':
|
||||||
|
this.bugname = 'google_transcribe';
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
|
break;
|
||||||
|
|
||||||
if (this.vad?.enable) {
|
case 'aws':
|
||||||
opts.START_RECOGNIZING_ON_VAD = 1;
|
case 'polly':
|
||||||
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
|
this.bugname = 'aws_transcribe';
|
||||||
else opts.RECOGNIZER_VAD_VOICE_MS = 125;
|
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
}
|
break;
|
||||||
|
case 'microsoft':
|
||||||
|
this.bugname = 'azure_transcribe';
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
|
||||||
|
this._onNoSpeechDetected.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
|
break;
|
||||||
|
case 'nuance':
|
||||||
|
this.bugname = 'nuance_transcribe';
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription,
|
||||||
|
this._onTranscription.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech,
|
||||||
|
this._onStartOfSpeech.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete,
|
||||||
|
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.VadDetected,
|
||||||
|
this._onVadDetected.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.Error,
|
||||||
|
this._onNuanceError.bind(this, cs, ep));
|
||||||
|
|
||||||
if ('google' === this.vendor) {
|
/* stall timers until prompt finishes playing */
|
||||||
this.bugname = 'google_transcribe';
|
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||||
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
opts.NUANCE_STALL_TIMERS = 1;
|
||||||
[
|
|
||||||
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
|
|
||||||
['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
|
||||||
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
|
||||||
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
|
||||||
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
|
||||||
['singleUtterance', 'GOOGLE_SPEECH_SINGLE_UTTERANCE'],
|
|
||||||
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
|
|
||||||
].forEach((arr) => {
|
|
||||||
if (this[arr[0]]) opts[arr[1]] = true;
|
|
||||||
else if (this[arr[0]] === false) opts[arr[1]] = false;
|
|
||||||
});
|
|
||||||
if (this.hints.length > 0) {
|
|
||||||
opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
|
|
||||||
if (typeof this.hintsBoost === 'number') {
|
|
||||||
opts.GOOGLE_SPEECH_HINTS_BOOST = this.hintsBoost;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (this.altLanguages.length > 0) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
|
||||||
else opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = '';
|
|
||||||
if ('unspecified' !== this.interactionType) {
|
|
||||||
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
|
|
||||||
}
|
|
||||||
opts.GOOGLE_SPEECH_MODEL = this.model;
|
|
||||||
if (this.diarization && this.diarizationMinSpeakers > 0) {
|
|
||||||
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
|
|
||||||
}
|
|
||||||
if (this.diarization && this.diarizationMaxSpeakers > 0) {
|
|
||||||
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT = this.diarizationMaxSpeakers;
|
|
||||||
}
|
|
||||||
if (this.naicsCode > 0) opts.GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE = this.naicsCode;
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
|
||||||
}
|
|
||||||
else if (['aws', 'polly'].includes(this.vendor)) {
|
|
||||||
this.bugname = 'aws_transcribe';
|
|
||||||
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
|
|
||||||
if (this.vocabularyFilterName) {
|
|
||||||
opts.AWS_VOCABULARY_NAME = this.vocabularyFilterName;
|
|
||||||
opts.AWS_VOCABULARY_FILTER_METHOD = this.filterMethod || 'mask';
|
|
||||||
}
|
|
||||||
if (this.sttCredentials) {
|
|
||||||
Object.assign(opts, {
|
|
||||||
AWS_ACCESS_KEY_ID: this.sttCredentials.accessKeyId,
|
|
||||||
AWS_SECRET_ACCESS_KEY: this.sttCredentials.secretAccessKey,
|
|
||||||
AWS_REGION: this.sttCredentials.region
|
|
||||||
});
|
|
||||||
}
|
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
|
||||||
}
|
|
||||||
else if ('microsoft' === this.vendor) {
|
|
||||||
this.bugname = 'azure_transcribe';
|
|
||||||
if (this.sttCredentials) {
|
|
||||||
const {api_key, region, use_custom_stt, custom_stt_endpoint} = this.sttCredentials;
|
|
||||||
|
|
||||||
Object.assign(opts, {
|
break;
|
||||||
'AZURE_SUBSCRIPTION_KEY': api_key,
|
default:
|
||||||
'AZURE_REGION': region
|
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||||
});
|
|
||||||
if (this.azureSttEndpointId) {
|
|
||||||
Object.assign(opts, {'AZURE_SERVICE_ENDPOINT_ID': this.azureSttEndpointId});
|
|
||||||
}
|
|
||||||
else if (use_custom_stt && custom_stt_endpoint) {
|
|
||||||
Object.assign(opts, {'AZURE_SERVICE_ENDPOINT_ID': custom_stt_endpoint});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (this.hints && this.hints.length > 0) {
|
|
||||||
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
|
||||||
}
|
|
||||||
if (this.altLanguages && this.altLanguages.length > 0) {
|
|
||||||
opts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
opts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = '';
|
|
||||||
}
|
|
||||||
if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
|
|
||||||
if (this.profanityOption && this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
|
||||||
if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint;
|
|
||||||
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
|
|
||||||
else if (this.timeout === 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = 120000; // lengthy
|
|
||||||
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
|
|
||||||
|
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
|
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await ep.set(opts)
|
await ep.set(opts)
|
||||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||||
}
|
}
|
||||||
@@ -489,40 +439,12 @@ class TaskGather extends Task {
|
|||||||
|
|
||||||
_onTranscription(cs, ep, evt, fsEvent) {
|
_onTranscription(cs, ep, evt, fsEvent) {
|
||||||
// make sure this is not a transcript from answering machine detection
|
// make sure this is not a transcript from answering machine detection
|
||||||
|
this.logger.debug({evt}, 'Gather:_onTranscription');
|
||||||
const bugname = fsEvent.getHeader('media-bugname');
|
const bugname = fsEvent.getHeader('media-bugname');
|
||||||
const finished = fsEvent.getHeader('transcription-session-finished');
|
const finished = fsEvent.getHeader('transcription-session-finished');
|
||||||
if (bugname && this.bugname !== bugname) return;
|
if (bugname && this.bugname !== bugname) return;
|
||||||
|
|
||||||
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language);
|
||||||
if ('microsoft' === this.vendor) {
|
|
||||||
const final = evt.RecognitionStatus === 'Success';
|
|
||||||
if (final) {
|
|
||||||
// don't sort based on confidence: https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues/1463
|
|
||||||
//const nbest = evt.NBest.sort((a, b) => b.Confidence - a.Confidence);
|
|
||||||
const nbest = evt.NBest;
|
|
||||||
const language_code = evt.PrimaryLanguage?.Language || this.language;
|
|
||||||
evt = {
|
|
||||||
is_final: true,
|
|
||||||
language_code,
|
|
||||||
alternatives: [
|
|
||||||
{
|
|
||||||
confidence: nbest[0].Confidence,
|
|
||||||
transcript: nbest[0].Display
|
|
||||||
}
|
|
||||||
]
|
|
||||||
};
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
evt = {
|
|
||||||
is_final: false,
|
|
||||||
alternatives: [
|
|
||||||
{
|
|
||||||
transcript: evt.Text
|
|
||||||
}
|
|
||||||
]
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* count words for bargein feature */
|
/* count words for bargein feature */
|
||||||
const words = evt.alternatives[0].transcript.split(' ').length;
|
const words = evt.alternatives[0].transcript.split(' ').length;
|
||||||
@@ -607,6 +529,24 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_onStartOfSpeech(cs, ep) {
|
||||||
|
this.logger.debug('TaskGather:_onStartOfSpeech');
|
||||||
|
}
|
||||||
|
_onTranscriptionComplete(cs, ep) {
|
||||||
|
this.logger.debug('TaskGather:_onTranscriptionComplete');
|
||||||
|
}
|
||||||
|
_onNuanceError(cs, ep, evt) {
|
||||||
|
const {code, error, details} = evt;
|
||||||
|
if (code === 404 && error === 'No speech') {
|
||||||
|
this.logger.debug({code, error, details}, 'TaskGather:_onNuanceError');
|
||||||
|
return this._resolve('timeout');
|
||||||
|
}
|
||||||
|
this.logger.info({code, error, details}, 'TaskGather:_onNuanceError');
|
||||||
|
if (code === 413 && error === 'Too much speech') {
|
||||||
|
return this._resolve('timeout');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_onVadDetected(cs, ep) {
|
_onVadDetected(cs, ep) {
|
||||||
if (this.bargein && this.minBargeinWordCount === 0) {
|
if (this.bargein && this.minBargeinWordCount === 0) {
|
||||||
this.logger.debug('TaskGather:_onVadDetected');
|
this.logger.debug('TaskGather:_onVadDetected');
|
||||||
|
|||||||
@@ -130,14 +130,24 @@ class TaskSay extends Task {
|
|||||||
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
|
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
|
||||||
this.synthesizer.language :
|
this.synthesizer.language :
|
||||||
cs.speechSynthesisLanguage ;
|
cs.speechSynthesisLanguage ;
|
||||||
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
|
let voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
|
||||||
this.synthesizer.voice :
|
this.synthesizer.voice :
|
||||||
cs.speechSynthesisVoice;
|
cs.speechSynthesisVoice;
|
||||||
const engine = this.synthesizer.engine || 'standard';
|
const engine = this.synthesizer.engine || 'standard';
|
||||||
const salt = cs.callSid;
|
const salt = cs.callSid;
|
||||||
const credentials = cs.getSpeechCredentials(vendor, 'tts');
|
const credentials = cs.getSpeechCredentials(vendor, 'tts');
|
||||||
|
|
||||||
this.logger.info({vendor, language, voice}, 'TaskSay:exec');
|
/* parse Nuance voces into name and model */
|
||||||
|
let model;
|
||||||
|
if (vendor === 'nuance' && voice) {
|
||||||
|
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
|
||||||
|
if (arr) {
|
||||||
|
voice = arr[1];
|
||||||
|
model = arr[2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
|
||||||
this.ep = ep;
|
this.ep = ep;
|
||||||
try {
|
try {
|
||||||
if (!credentials) {
|
if (!credentials) {
|
||||||
@@ -170,6 +180,7 @@ class TaskSay extends Task {
|
|||||||
language,
|
language,
|
||||||
voice,
|
voice,
|
||||||
engine,
|
engine,
|
||||||
|
model,
|
||||||
salt,
|
salt,
|
||||||
credentials
|
credentials
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -445,7 +445,7 @@
|
|||||||
"properties": {
|
"properties": {
|
||||||
"vendor": {
|
"vendor": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": ["google", "aws", "microsoft", "default"]
|
"enum": ["google", "aws", "microsoft", "nuance", "default"]
|
||||||
},
|
},
|
||||||
"language": "string",
|
"language": "string",
|
||||||
"vad": "#vad",
|
"vad": "#vad",
|
||||||
@@ -509,12 +509,121 @@
|
|||||||
"azureServiceEndpoint": "string",
|
"azureServiceEndpoint": "string",
|
||||||
"azureSttEndpointId": "string",
|
"azureSttEndpointId": "string",
|
||||||
"asrDtmfTerminationDigit": "string",
|
"asrDtmfTerminationDigit": "string",
|
||||||
"asrTimeout": "number"
|
"asrTimeout": "number",
|
||||||
|
"nuanceOptions": "#nuanceOptions"
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"vendor"
|
"vendor"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"nuanceOptions": {
|
||||||
|
"properties": {
|
||||||
|
"clientId": "string",
|
||||||
|
"secret": "string",
|
||||||
|
"kryptonEndpoint": "string",
|
||||||
|
"topic": "string",
|
||||||
|
"utteranceDetectionMode": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"single",
|
||||||
|
"multiple",
|
||||||
|
"disabled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"punctuation": "boolean",
|
||||||
|
"profanityFilter": "boolean",
|
||||||
|
"includeTokenization": "boolean",
|
||||||
|
"discardSpeakerAdaptation": "boolean",
|
||||||
|
"suppressCallRecording": "boolean",
|
||||||
|
"maskLoadFailures": "boolean",
|
||||||
|
"suppressInitialCapitalization": "boolean",
|
||||||
|
"allowZeroBaseLmWeight": "boolean",
|
||||||
|
"filterWakeupWord": "boolean",
|
||||||
|
"resultType": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"final",
|
||||||
|
"partial",
|
||||||
|
"immutable_partial"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"noInputTimeoutMs": "number",
|
||||||
|
"recognitionTimeoutMs": "number",
|
||||||
|
"utteranceEndSilenceMs": "number",
|
||||||
|
"maxHypotheses": "number",
|
||||||
|
"speechDomain": "string",
|
||||||
|
"formatting": "#formatting",
|
||||||
|
"clientData": "object",
|
||||||
|
"userId": "string",
|
||||||
|
"speechDetectionSensitivity": "number",
|
||||||
|
"resources": ["#resource"]
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"resource": {
|
||||||
|
"properties": {
|
||||||
|
"externalReference": "#resourceReference",
|
||||||
|
"inlineWordset": "string",
|
||||||
|
"builtin": "string",
|
||||||
|
"inlineGrammar": "string",
|
||||||
|
"wakeupWord": "[string]",
|
||||||
|
"weightName": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"defaultWeight",
|
||||||
|
"lowest",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"highest"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"weightValue": "number",
|
||||||
|
"reuse": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"undefined_reuse",
|
||||||
|
"low_reuse",
|
||||||
|
"high_reuse"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"resourceReference": {
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"undefined_resource_type",
|
||||||
|
"wordset",
|
||||||
|
"compiled_wordset",
|
||||||
|
"domain_lm",
|
||||||
|
"speaker_profile",
|
||||||
|
"grammar",
|
||||||
|
"settings"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"uri": "string",
|
||||||
|
"maxLoadFailures": "boolean",
|
||||||
|
"requestTimeoutMs": "number",
|
||||||
|
"headers": "object"
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"formatting": {
|
||||||
|
"properties": {
|
||||||
|
"scheme": "string",
|
||||||
|
"options": "object"
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"scheme",
|
||||||
|
"options"
|
||||||
|
]
|
||||||
|
},
|
||||||
"lexIntent": {
|
"lexIntent": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"name": "string",
|
"name": "string",
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ const {
|
|||||||
TaskPreconditions,
|
TaskPreconditions,
|
||||||
GoogleTranscriptionEvents,
|
GoogleTranscriptionEvents,
|
||||||
AzureTranscriptionEvents,
|
AzureTranscriptionEvents,
|
||||||
AwsTranscriptionEvents
|
AwsTranscriptionEvents,
|
||||||
|
NuanceTranscriptionEvents
|
||||||
} = require('../utils/constants');
|
} = require('../utils/constants');
|
||||||
|
const normalizeJambones = require('../utils/normalize-jambones');
|
||||||
|
|
||||||
class TaskTranscribe extends Task {
|
class TaskTranscribe extends Task {
|
||||||
constructor(logger, opts, parentTask) {
|
constructor(logger, opts, parentTask) {
|
||||||
@@ -13,6 +15,10 @@ class TaskTranscribe extends Task {
|
|||||||
this.preconditions = TaskPreconditions.Endpoint;
|
this.preconditions = TaskPreconditions.Endpoint;
|
||||||
this.parentTask = parentTask;
|
this.parentTask = parentTask;
|
||||||
|
|
||||||
|
const {setChannelVarsForStt, normalizeTranscription} = require('../utils/transcription-utils')(logger);
|
||||||
|
this.setChannelVarsForStt = setChannelVarsForStt;
|
||||||
|
this.normalizeTranscription = normalizeTranscription;
|
||||||
|
|
||||||
this.transcriptionHook = this.data.transcriptionHook;
|
this.transcriptionHook = this.data.transcriptionHook;
|
||||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||||
|
|
||||||
@@ -22,39 +28,15 @@ class TaskTranscribe extends Task {
|
|||||||
this.interim = !!recognizer.interim;
|
this.interim = !!recognizer.interim;
|
||||||
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
|
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
|
||||||
|
|
||||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
if (recognizer.vendor === 'nuance') {
|
||||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
const {clientId, secret} = recognizer.nuanceOptions;
|
||||||
this.vad = {enable, voiceMs, mode};
|
if (clientId && secret) {
|
||||||
|
this.sttCredentials = {client_id: clientId, secret};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* google-specific options */
|
recognizer.hints = recognizer.hints || [];
|
||||||
this.hints = recognizer.hints || [];
|
recognizer.altLanguages = recognizer.altLanguages || [];
|
||||||
this.hintsBoost = recognizer.hintsBoost;
|
|
||||||
this.profanityFilter = recognizer.profanityFilter;
|
|
||||||
this.punctuation = !!recognizer.punctuation;
|
|
||||||
this.enhancedModel = !!recognizer.enhancedModel;
|
|
||||||
this.model = recognizer.model || 'phone_call';
|
|
||||||
this.words = !!recognizer.words;
|
|
||||||
this.singleUtterance = recognizer.singleUtterance || false;
|
|
||||||
this.diarization = !!recognizer.diarization;
|
|
||||||
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
|
|
||||||
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
|
|
||||||
this.interactionType = recognizer.interactionType || 'unspecified';
|
|
||||||
this.naicsCode = recognizer.naicsCode || 0;
|
|
||||||
this.altLanguages = recognizer.altLanguages || [];
|
|
||||||
|
|
||||||
/* aws-specific options */
|
|
||||||
this.identifyChannels = !!recognizer.identifyChannels;
|
|
||||||
this.vocabularyName = recognizer.vocabularyName;
|
|
||||||
this.vocabularyFilterName = recognizer.vocabularyFilterName;
|
|
||||||
this.filterMethod = recognizer.filterMethod;
|
|
||||||
|
|
||||||
/* microsoft options */
|
|
||||||
this.outputFormat = recognizer.outputFormat || 'simple';
|
|
||||||
this.profanityOption = recognizer.profanityOption || 'raw';
|
|
||||||
this.requestSnr = recognizer.requestSnr || false;
|
|
||||||
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
|
|
||||||
this.azureServiceEndpoint = recognizer.azureServiceEndpoint;
|
|
||||||
this.azureSttEndpointId = recognizer.azureSttEndpointId;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
get name() { return TaskName.Transcribe; }
|
get name() { return TaskName.Transcribe; }
|
||||||
@@ -62,21 +44,22 @@ class TaskTranscribe extends Task {
|
|||||||
async exec(cs, {ep, ep2}) {
|
async exec(cs, {ep, ep2}) {
|
||||||
super.exec(cs);
|
super.exec(cs);
|
||||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||||
|
const {getNuanceAccessToken} = cs.srf.locals.dbHelpers;
|
||||||
|
|
||||||
if (cs.hasGlobalSttHints) {
|
if (cs.hasGlobalSttHints) {
|
||||||
const {hints, hintsBoost} = cs.globalSttHints;
|
const {hints, hintsBoost} = cs.globalSttHints;
|
||||||
this.hints = this.hints.concat(hints);
|
this.data.recognizer.hints = this.data.recognizer.hints.concat(hints);
|
||||||
if (!this.hintsBoost && hintsBoost) this.hintsBoost = hintsBoost;
|
if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
|
||||||
this.logger.debug({hints: this.hints, hintsBoost: this.hintsBoost},
|
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
|
||||||
'Transcribe:exec - applying global `sttHints');
|
'Transcribe:exec - applying global sttHints');
|
||||||
}
|
}
|
||||||
if (cs.hasAltLanguages) {
|
if (cs.hasAltLanguages) {
|
||||||
this.altLanguages = this.altLanguages.concat(cs.altLanguages);
|
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages.concat(cs.altLanguages);
|
||||||
this.logger.debug({altLanguages: this.altLanguages},
|
this.logger.debug({altLanguages: this.altLanguages},
|
||||||
'Gather:exec - applying altLanguages');
|
'Transcribe:exec - applying altLanguages');
|
||||||
}
|
}
|
||||||
if (cs.hasGlobalSttPunctuation) {
|
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
|
||||||
this.punctuation = cs.globalSttPunctuation;
|
this.data.recognizer.punctuation = cs.globalSttPunctuation;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.ep = ep;
|
this.ep = ep;
|
||||||
@@ -96,6 +79,16 @@ class TaskTranscribe extends Task {
|
|||||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||||
throw new Error('no provisioned speech credentials for TTS');
|
throw new Error('no provisioned speech credentials for TTS');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
||||||
|
/* get nuance access token */
|
||||||
|
const {client_id, secret} = this.sttCredentials;
|
||||||
|
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||||
|
this.logger.debug({client_id},
|
||||||
|
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||||
|
this.sttCredentials = {...this.sttCredentials, access_token};
|
||||||
|
}
|
||||||
|
|
||||||
await this._startTranscribing(cs, ep, 1);
|
await this._startTranscribing(cs, ep, 1);
|
||||||
if (this.separateRecognitionPerChannel && ep2) {
|
if (this.separateRecognitionPerChannel && ep2) {
|
||||||
await this._startTranscribing(cs, ep2, 2);
|
await this._startTranscribing(cs, ep2, 2);
|
||||||
@@ -110,13 +103,21 @@ class TaskTranscribe extends Task {
|
|||||||
this.parentTask && this.parentTask.emit('error', err);
|
this.parentTask && this.parentTask.emit('error', err);
|
||||||
}
|
}
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected);
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded);
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
|
||||||
|
|
||||||
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
||||||
ep.removeCustomEventListener(AwsTranscriptionEvents.NoAudioDetected);
|
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
|
||||||
ep.removeCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded);
|
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
||||||
|
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
|
||||||
|
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
|
||||||
}
|
}
|
||||||
|
|
||||||
async kill(cs) {
|
async kill(cs) {
|
||||||
@@ -140,124 +141,53 @@ class TaskTranscribe extends Task {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _startTranscribing(cs, ep, channel) {
|
async _startTranscribing(cs, ep, channel) {
|
||||||
const opts = {};
|
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
||||||
|
switch (this.vendor) {
|
||||||
|
case 'google':
|
||||||
|
this.bugname = 'google_transcribe';
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
|
||||||
|
this._onTranscription.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected,
|
||||||
|
this._onNoAudio.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
||||||
|
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
||||||
|
break;
|
||||||
|
|
||||||
if (this.vad.enable) {
|
case 'aws':
|
||||||
opts.START_RECOGNIZING_ON_VAD = 1;
|
case 'polly':
|
||||||
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
|
this.bugname = 'aws_transcribe';
|
||||||
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription,
|
||||||
|
this._onTranscription.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected,
|
||||||
|
this._onNoAudio.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
|
||||||
|
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
||||||
|
break;
|
||||||
|
case 'microsoft':
|
||||||
|
this.bugname = 'azure_transcribe';
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
|
||||||
|
this._onTranscription.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected,
|
||||||
|
this._onNoAudio.bind(this, cs, ep, channel));
|
||||||
|
break;
|
||||||
|
case 'nuance':
|
||||||
|
this.bugname = 'nuance_transcribe';
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.Transcription,
|
||||||
|
this._onTranscription.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech,
|
||||||
|
this._onStartOfSpeech.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete,
|
||||||
|
this._onTranscriptionComplete.bind(this, cs, ep, channel));
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.Error,
|
||||||
|
this._onNuanceError.bind(this, cs, ep, channel));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error(`Invalid vendor ${this.vendor}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription,
|
await ep.set(opts)
|
||||||
this._onTranscription.bind(this, cs, ep, channel));
|
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
|
||||||
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
|
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.MaxDurationExceeded,
|
|
||||||
this._onMaxDurationExceeded.bind(this, cs, ep, channel));
|
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription,
|
|
||||||
this._onTranscription.bind(this, cs, ep, channel));
|
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoAudio.bind(this, cs, ep, channel));
|
|
||||||
|
|
||||||
if (this.vendor === 'google') {
|
|
||||||
this.bugname = 'google_transcribe';
|
|
||||||
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
|
||||||
[
|
|
||||||
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
|
|
||||||
//['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
|
||||||
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
|
||||||
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
|
||||||
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
|
||||||
['singleUtterance', 'GOOGLE_SPEECH_SINGLE_UTTERANCE'],
|
|
||||||
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
|
|
||||||
].forEach((arr) => {
|
|
||||||
if (this[arr[0]]) opts[arr[1]] = true;
|
|
||||||
else if (this[arr[0]] === false) opts[arr[1]] = false;
|
|
||||||
});
|
|
||||||
if (this.hints.length > 0) {
|
|
||||||
opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
|
|
||||||
if (typeof this.hintsBoost === 'number') {
|
|
||||||
opts.GOOGLE_SPEECH_HINTS_BOOST = this.hintsBoost;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (this.altLanguages.length > 0) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
|
||||||
else opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = '';
|
|
||||||
if ('unspecified' !== this.interactionType) {
|
|
||||||
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
|
|
||||||
}
|
|
||||||
opts.GOOGLE_SPEECH_MODEL = this.model;
|
|
||||||
if (this.diarization && this.diarizationMinSpeakers > 0) {
|
|
||||||
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
|
|
||||||
}
|
|
||||||
if (this.diarization && this.diarizationMaxSpeakers > 0) {
|
|
||||||
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT = this.diarizationMaxSpeakers;
|
|
||||||
}
|
|
||||||
if (this.naicsCode > 0) opts.GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE = this.naicsCode;
|
|
||||||
|
|
||||||
await ep.set(opts)
|
|
||||||
.catch((err) => this.logger.info(err, 'TaskTranscribe:_startTranscribing with google'));
|
|
||||||
}
|
|
||||||
else if (this.vendor === 'aws') {
|
|
||||||
this.bugname = 'aws_transcribe';
|
|
||||||
[
|
|
||||||
['diarization', 'AWS_SHOW_SPEAKER_LABEL'],
|
|
||||||
['identifyChannels', 'AWS_ENABLE_CHANNEL_IDENTIFICATION']
|
|
||||||
].forEach((arr) => {
|
|
||||||
if (this[arr[0]]) opts[arr[1]] = true;
|
|
||||||
});
|
|
||||||
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
|
|
||||||
if (this.vocabularyFilterName) {
|
|
||||||
opts.AWS_VOCABULARY_NAME = this.vocabularyFilterName;
|
|
||||||
opts.AWS_VOCABULARY_FILTER_METHOD = this.filterMethod || 'mask';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.sttCredentials) {
|
|
||||||
Object.assign(opts, {
|
|
||||||
AWS_ACCESS_KEY_ID: this.sttCredentials.accessKeyId,
|
|
||||||
AWS_SECRET_ACCESS_KEY: this.sttCredentials.secretAccessKey,
|
|
||||||
AWS_REGION: this.sttCredentials.region
|
|
||||||
});
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
Object.assign(opts, {
|
|
||||||
AWS_ACCESS_KEY_ID: process.env.AWS_ACCESS_KEY_ID,
|
|
||||||
AWS_SECRET_ACCESS_KEY: process.env.AWS_SECRET_ACCESS_KEY,
|
|
||||||
AWS_REGION: process.env.AWS_REGION
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
await ep.set(opts)
|
|
||||||
.catch((err) => this.logger.info(err, 'TaskTranscribe:_startTranscribing with aws'));
|
|
||||||
}
|
|
||||||
else if (this.vendor === 'microsoft') {
|
|
||||||
this.bugname = 'azure_transcribe';
|
|
||||||
const {api_key, region, use_custom_stt, custom_stt_endpoint} = this.sttCredentials;
|
|
||||||
Object.assign(opts, {
|
|
||||||
'AZURE_SUBSCRIPTION_KEY': api_key,
|
|
||||||
'AZURE_REGION': region
|
|
||||||
});
|
|
||||||
if (this.azureSttEndpointId) {
|
|
||||||
Object.assign(opts, {'AZURE_SERVICE_ENDPOINT_ID': this.azureSttEndpointId});
|
|
||||||
}
|
|
||||||
else if (use_custom_stt && custom_stt_endpoint) {
|
|
||||||
Object.assign(opts, {'AZURE_SERVICE_ENDPOINT_ID': custom_stt_endpoint});
|
|
||||||
}
|
|
||||||
if (this.hints && this.hints.length > 0) {
|
|
||||||
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
|
||||||
}
|
|
||||||
if (this.altLanguages.length > 0) opts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
|
||||||
else opts.AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = '';
|
|
||||||
if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
|
|
||||||
if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
|
||||||
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
|
|
||||||
if (this.outputFormat !== 'simple') opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
|
|
||||||
if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint;
|
|
||||||
|
|
||||||
await ep.set(opts)
|
|
||||||
.catch((err) => this.logger.info(err, 'TaskTranscribe:_startTranscribing with azure'));
|
|
||||||
}
|
|
||||||
await this._transcribe(ep);
|
await this._transcribe(ep);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -271,50 +201,43 @@ class TaskTranscribe extends Task {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
_onTranscription(cs, ep, channel, evt, fsEvent) {
|
async _onTranscription(cs, ep, channel, evt, fsEvent) {
|
||||||
// make sure this is not a transcript from answering machine detection
|
// make sure this is not a transcript from answering machine detection
|
||||||
const bugname = fsEvent.getHeader('media-bugname');
|
const bugname = fsEvent.getHeader('media-bugname');
|
||||||
if (bugname && this.bugname !== bugname) return;
|
if (bugname && this.bugname !== bugname) return;
|
||||||
|
|
||||||
this.logger.debug({evt, channel}, 'TaskTranscribe:_onTranscription');
|
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - before normalization');
|
||||||
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
|
||||||
if ('microsoft' === this.vendor) {
|
|
||||||
const nbest = evt.NBest;
|
|
||||||
const language_code = evt.PrimaryLanguage?.Language || this.language;
|
|
||||||
const alternatives = nbest ? nbest.map((n) => {
|
|
||||||
return {
|
|
||||||
confidence: n.Confidence,
|
|
||||||
transcript: n.Display
|
|
||||||
};
|
|
||||||
}) :
|
|
||||||
[
|
|
||||||
{
|
|
||||||
transcript: evt.DisplayText || evt.Text
|
|
||||||
}
|
|
||||||
];
|
|
||||||
|
|
||||||
const newEvent = {
|
evt = this.normalizeTranscription(evt, this.vendor, channel, this.language);
|
||||||
is_final: evt.RecognitionStatus === 'Success',
|
|
||||||
channel,
|
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription');
|
||||||
language_code,
|
|
||||||
alternatives
|
|
||||||
};
|
|
||||||
evt = newEvent;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
|
if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
|
||||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, listen again');
|
||||||
return this._transcribe(ep);
|
return this._transcribe(ep);
|
||||||
}
|
}
|
||||||
|
|
||||||
evt.channel_tag = channel;
|
|
||||||
|
|
||||||
if (this.transcriptionHook) {
|
if (this.transcriptionHook) {
|
||||||
const b3 = this.getTracingPropagation();
|
const b3 = this.getTracingPropagation();
|
||||||
const httpHeaders = b3 && {b3};
|
const httpHeaders = b3 && {b3};
|
||||||
this.cs.requestor.request('verb:hook', this.transcriptionHook,
|
try {
|
||||||
Object.assign({speech: evt}, this.cs.callInfo), httpHeaders)
|
const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, {
|
||||||
.catch((err) => this.logger.info(err, 'TranscribeTask:_onTranscription error'));
|
...this.cs.callInfo,
|
||||||
|
...httpHeaders,
|
||||||
|
speech: evt
|
||||||
|
});
|
||||||
|
this.logger.info({json}, 'sent transcriptionHook');
|
||||||
|
if (json && Array.isArray(json) && !this.parentTask) {
|
||||||
|
const makeTask = require('./make_task');
|
||||||
|
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
|
||||||
|
if (tasks && tasks.length > 0) {
|
||||||
|
this.logger.info({tasks: tasks}, `${this.name} replacing application with ${tasks.length} tasks`);
|
||||||
|
this.cs.replaceApplication(tasks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.info(err, 'TranscribeTask:_onTranscription error');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (this.parentTask) {
|
if (this.parentTask) {
|
||||||
this.parentTask.emit('transcription', evt);
|
this.parentTask.emit('transcription', evt);
|
||||||
|
|||||||
@@ -67,6 +67,13 @@
|
|||||||
"MaxDurationExceeded": "google_transcribe::max_duration_exceeded",
|
"MaxDurationExceeded": "google_transcribe::max_duration_exceeded",
|
||||||
"VadDetected": "google_transcribe::vad_detected"
|
"VadDetected": "google_transcribe::vad_detected"
|
||||||
},
|
},
|
||||||
|
"NuanceTranscriptionEvents": {
|
||||||
|
"Transcription": "nuance_transcribe::transcription",
|
||||||
|
"StartOfSpeech": "nuance_transcribe::start_of_speech",
|
||||||
|
"TranscriptionComplete": "nuance_transcribe::end_of_transcription",
|
||||||
|
"Error": "nuance_transcribe::error",
|
||||||
|
"VadDetected": "nuance_transcribe::vad_detected"
|
||||||
|
},
|
||||||
"AwsTranscriptionEvents": {
|
"AwsTranscriptionEvents": {
|
||||||
"Transcription": "aws_transcribe::transcription",
|
"Transcription": "aws_transcribe::transcription",
|
||||||
"EndOfTranscript": "aws_transcribe::end_of_transcript",
|
"EndOfTranscript": "aws_transcribe::end_of_transcript",
|
||||||
|
|||||||
@@ -44,7 +44,13 @@ const speechMapper = (cred) => {
|
|||||||
const o = JSON.parse(decrypt(credential));
|
const o = JSON.parse(decrypt(credential));
|
||||||
obj.api_key = o.api_key;
|
obj.api_key = o.api_key;
|
||||||
}
|
}
|
||||||
|
else if ('nuance' === obj.vendor) {
|
||||||
|
const o = JSON.parse(decrypt(credential));
|
||||||
|
obj.client_id = o.client_id;
|
||||||
|
obj.secret = o.secret;
|
||||||
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
console.log(err);
|
||||||
}
|
}
|
||||||
return obj;
|
return obj;
|
||||||
};
|
};
|
||||||
@@ -65,7 +71,8 @@ module.exports = (logger, srf) => {
|
|||||||
const haveAws = speech.find((s) => s.vendor === 'aws');
|
const haveAws = speech.find((s) => s.vendor === 'aws');
|
||||||
const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
|
const haveMicrosoft = speech.find((s) => s.vendor === 'microsoft');
|
||||||
const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
|
const haveWellsaid = speech.find((s) => s.vendor === 'wellsaid');
|
||||||
if (!haveGoogle || !haveAws || !haveMicrosoft) {
|
const haveNuance = speech.find((s) => s.vendor === 'nuance');
|
||||||
|
if (!haveGoogle || !haveAws || !haveMicrosoft || !haveWellsaid || !haveNuance) {
|
||||||
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
|
const [r3] = await pp.query(sqlSpeechCredentialsForSP, account_sid);
|
||||||
if (r3.length) {
|
if (r3.length) {
|
||||||
if (!haveGoogle) {
|
if (!haveGoogle) {
|
||||||
@@ -84,6 +91,10 @@ module.exports = (logger, srf) => {
|
|||||||
const wellsaid = r3.find((s) => s.vendor === 'wellsaid');
|
const wellsaid = r3.find((s) => s.vendor === 'wellsaid');
|
||||||
if (wellsaid) speech.push(speechMapper(wellsaid));
|
if (wellsaid) speech.push(speechMapper(wellsaid));
|
||||||
}
|
}
|
||||||
|
if (!haveNuance) {
|
||||||
|
const nuance = r3.find((s) => s.vendor === 'nuance');
|
||||||
|
if (nuance) speech.push(speechMapper(nuance));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,6 +105,7 @@ module.exports = (logger, srf) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const updateSpeechCredentialLastUsed = async(speech_credential_sid) => {
|
const updateSpeechCredentialLastUsed = async(speech_credential_sid) => {
|
||||||
|
if (!speech_credential_sid) return;
|
||||||
const pp = pool.promise();
|
const pp = pool.promise();
|
||||||
const sql = 'UPDATE speech_credentials SET last_used = NOW() WHERE speech_credential_sid = ?';
|
const sql = 'UPDATE speech_credentials SET last_used = NOW() WHERE speech_credential_sid = ?';
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -152,7 +152,8 @@ function installSrfLocals(srf, logger) {
|
|||||||
popFront,
|
popFront,
|
||||||
removeFromList,
|
removeFromList,
|
||||||
lengthOfList,
|
lengthOfList,
|
||||||
getListPosition
|
getListPosition,
|
||||||
|
getNuanceAccessToken
|
||||||
} = require('@jambonz/realtimedb-helpers')({
|
} = require('@jambonz/realtimedb-helpers')({
|
||||||
host: process.env.JAMBONES_REDIS_HOST,
|
host: process.env.JAMBONES_REDIS_HOST,
|
||||||
port: process.env.JAMBONES_REDIS_PORT || 6379
|
port: process.env.JAMBONES_REDIS_PORT || 6379
|
||||||
@@ -204,7 +205,8 @@ function installSrfLocals(srf, logger) {
|
|||||||
popFront,
|
popFront,
|
||||||
removeFromList,
|
removeFromList,
|
||||||
lengthOfList,
|
lengthOfList,
|
||||||
getListPosition
|
getListPosition,
|
||||||
|
getNuanceAccessToken
|
||||||
},
|
},
|
||||||
parentLogger: logger,
|
parentLogger: logger,
|
||||||
getSBC,
|
getSBC,
|
||||||
|
|||||||
@@ -1,9 +1,32 @@
|
|||||||
|
const {
|
||||||
|
TaskName,
|
||||||
|
AzureTranscriptionEvents,
|
||||||
|
GoogleTranscriptionEvents,
|
||||||
|
AwsTranscriptionEvents,
|
||||||
|
NuanceTranscriptionEvents
|
||||||
|
} = require('./constants');
|
||||||
|
|
||||||
module.exports = (logger) => {
|
module.exports = (logger) => {
|
||||||
const normalizeTranscription = (evt, vendor, channel) => {
|
const normalizeTranscription = (evt, vendor, channel, language) => {
|
||||||
if ('aws' === vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
let newEvent = JSON.parse(JSON.stringify(evt));
|
||||||
if ('microsoft' === vendor) {
|
|
||||||
|
/* add in channel_tag and provide the full vendor-specific event */
|
||||||
|
newEvent = {
|
||||||
|
...(vendor === 'aws' ? newEvent[0] : newEvent),
|
||||||
|
language_code: language,
|
||||||
|
channel_tag: channel
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
if ('aws' === vendor && Array.isArray(evt) && evt.length > 0) {
|
||||||
|
newEvent = {
|
||||||
|
...newEvent,
|
||||||
|
vendor: {event: evt, name: vendor}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else if ('microsoft' === vendor) {
|
||||||
const nbest = evt.NBest;
|
const nbest = evt.NBest;
|
||||||
const language_code = evt.PrimaryLanguage?.Language || this.language;
|
const language_code = evt.PrimaryLanguage?.Language || language;
|
||||||
const alternatives = nbest ? nbest.map((n) => {
|
const alternatives = nbest ? nbest.map((n) => {
|
||||||
return {
|
return {
|
||||||
confidence: n.Confidence,
|
confidence: n.Confidence,
|
||||||
@@ -16,18 +39,194 @@ module.exports = (logger) => {
|
|||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
const newEvent = {
|
newEvent = {
|
||||||
|
...newEvent,
|
||||||
is_final: evt.RecognitionStatus === 'Success',
|
is_final: evt.RecognitionStatus === 'Success',
|
||||||
channel,
|
channel,
|
||||||
language_code,
|
language_code,
|
||||||
alternatives
|
alternatives,
|
||||||
|
vendor: {event: evt, name: vendor}
|
||||||
};
|
};
|
||||||
evt = newEvent;
|
|
||||||
}
|
}
|
||||||
evt.channel_tag = channel;
|
return newEvent;
|
||||||
//logger.debug({evt}, 'normalized transcription');
|
|
||||||
return evt;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return {normalizeTranscription};
|
const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
|
||||||
|
let opts = {};
|
||||||
|
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
|
||||||
|
const vad = {enable, voiceMs, mode};
|
||||||
|
|
||||||
|
/* voice activity detection works across vendors */
|
||||||
|
opts = {
|
||||||
|
...opts,
|
||||||
|
...(vad.enable && {START_RECOGNIZING_ON_VAD: 1}),
|
||||||
|
...(vad.enable && vad.voiceMs && {RECOGNIZER_VAD_VOICE_MS: vad.voiceMs}),
|
||||||
|
...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
|
||||||
|
};
|
||||||
|
|
||||||
|
if ('google' === rOpts.vendor) {
|
||||||
|
opts = {
|
||||||
|
...opts,
|
||||||
|
...(sttCredentials &&
|
||||||
|
{GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
|
||||||
|
...(rOpts.enhancedModel &&
|
||||||
|
{GOOGLE_SPEECH_USE_ENHANCED: 1}),
|
||||||
|
...(rOpts.separateRecognitionPerChannel &&
|
||||||
|
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
|
||||||
|
...(rOpts.profanityFilter &&
|
||||||
|
{GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
|
||||||
|
...(rOpts.punctuation &&
|
||||||
|
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
|
||||||
|
...(rOpts.words &&
|
||||||
|
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
|
||||||
|
...((rOpts.singleUtterance || task.name === TaskName.Gather) &&
|
||||||
|
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
|
||||||
|
...(rOpts.diarization &&
|
||||||
|
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
|
||||||
|
...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
|
||||||
|
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
|
||||||
|
...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
|
||||||
|
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
|
||||||
|
...(rOpts.enhancedModel === false &&
|
||||||
|
{GOOGLE_SPEECH_USE_ENHANCED: 0}),
|
||||||
|
...(rOpts.separateRecognitionPerChannel === false &&
|
||||||
|
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
|
||||||
|
...(rOpts.profanityFilter === false &&
|
||||||
|
{GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
|
||||||
|
...(rOpts.punctuation === false &&
|
||||||
|
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
|
||||||
|
...(rOpts.words == false &&
|
||||||
|
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
|
||||||
|
...((rOpts.singleUtterance === false || task.name === TaskName.Transcribe) &&
|
||||||
|
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
|
||||||
|
...(rOpts.diarization === false &&
|
||||||
|
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
|
||||||
|
...(rOpts.hints.length > 0 &&
|
||||||
|
{GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}),
|
||||||
|
...(typeof rOpts.hintsBoost === 'number' &&
|
||||||
|
{GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
|
||||||
|
...(rOpts.altLanguages.length > 0 &&
|
||||||
|
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: rOpts.altLanguages.join(',')}),
|
||||||
|
...(rOpts.interactionType &&
|
||||||
|
{GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
|
||||||
|
...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'command_and_search' : 'phone_call')},
|
||||||
|
...(rOpts.naicsCode > 0 &&
|
||||||
|
{GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else if (['aws', 'polly'].includes(rOpts.vendor)) {
|
||||||
|
opts = {
|
||||||
|
...opts,
|
||||||
|
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
|
||||||
|
...(rOpts.vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: rOpts.vocabularyFilterName}),
|
||||||
|
...(rOpts.filterMethod && {AWS_VOCABULARY_FILTER_METHOD: rOpts.filterMethod}),
|
||||||
|
...(sttCredentials && {
|
||||||
|
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
|
||||||
|
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
|
||||||
|
AWS_REGION: sttCredentials.region
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else if ('microsoft' === rOpts.vendor) {
|
||||||
|
opts = {
|
||||||
|
...opts,
|
||||||
|
...(rOpts.hints && rOpts.hints.length > 0 &&
|
||||||
|
{AZURE_SPEECH_HINTS: rOpts.hints.map((h) => h.trim()).join(',')}),
|
||||||
|
...(rOpts.altLanguages && rOpts.altLanguages.length > 0 &&
|
||||||
|
{AZURE_SERVICE_ENDPOINT_ID: rOpts.sttCredentials}),
|
||||||
|
...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
|
||||||
|
...(rOpts.profanityOption && {AZURE_PROFANITY_OPTION: rOpts.profanityOption}),
|
||||||
|
...(rOpts.azureServiceEndpoint && {AZURE_SERVICE_ENDPOINT: rOpts.azureServiceEndpoint}),
|
||||||
|
...(rOpts.initialSpeechTimeoutMs > 0 &&
|
||||||
|
{AZURE_INITIAL_SPEECH_TIMEOUT_MS: rOpts.initialSpeechTimeoutMs}),
|
||||||
|
...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}),
|
||||||
|
...(rOpts.audioLogging && {AZURE_AUDIO_LOGGING: 1}),
|
||||||
|
...{AZURE_USE_OUTPUT_FORMAT_DETAILED: 1},
|
||||||
|
...(sttCredentials && {
|
||||||
|
AZURE_SUBSCRIPTION_KEY: sttCredentials.api_key,
|
||||||
|
AZURE_REGION: sttCredentials.region,
|
||||||
|
}),
|
||||||
|
...(sttCredentials.use_custom_stt && sttCredentials.custom_stt_endpoint &&
|
||||||
|
{AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint})
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else if ('nuance' === rOpts.vendor) {
|
||||||
|
/**
|
||||||
|
* Note: all nuance options are in recognizer.nuanceOptions, should migrate
|
||||||
|
* other vendor settings to similar nested structure
|
||||||
|
*/
|
||||||
|
const {nuanceOptions = {}} = rOpts;
|
||||||
|
opts = {
|
||||||
|
...opts,
|
||||||
|
...(sttCredentials.access_token) &&
|
||||||
|
{NUANCE_ACCESS_TOKEN: sttCredentials.access_token},
|
||||||
|
...(sttCredentials.krypton_endpoint) &&
|
||||||
|
{NUANCE_KRYPTON_ENDPOINT: sttCredentials.krypton_endpoint},
|
||||||
|
...(nuanceOptions.topic) &&
|
||||||
|
{NUANCE_TOPIC: nuanceOptions.topic},
|
||||||
|
...(nuanceOptions.utteranceDetectionMode) &&
|
||||||
|
{NUANCE_UTTERANCE_DETECTION_MODE: nuanceOptions.utteranceDetectionMode},
|
||||||
|
...(nuanceOptions.punctuation) && {NUANCE_PUNCTUATION: nuanceOptions.punctuation},
|
||||||
|
...(nuanceOptions.profanityFilter) &&
|
||||||
|
{NUANCE_FILTER_PROFANITY: nuanceOptions.profanityFilter},
|
||||||
|
...(nuanceOptions.includeTokenization) &&
|
||||||
|
{NUANCE_INCLUDE_TOKENIZATION: nuanceOptions.includeTokenization},
|
||||||
|
...(nuanceOptions.discardSpeakerAdaptation) &&
|
||||||
|
{NUANCE_DISCARD_SPEAKER_ADAPTATION: nuanceOptions.discardSpeakerAdaptation},
|
||||||
|
...(nuanceOptions.suppressCallRecording) &&
|
||||||
|
{NUANCE_SUPPRESS_CALL_RECORDING: nuanceOptions.suppressCallRecording},
|
||||||
|
...(nuanceOptions.maskLoadFailures) &&
|
||||||
|
{NUANCE_MASK_LOAD_FAILURES: nuanceOptions.maskLoadFailures},
|
||||||
|
...(nuanceOptions.suppressInitialCapitalization) &&
|
||||||
|
{NUANCE_SUPPRESS_INITIAL_CAPITALIZATION: nuanceOptions.suppressInitialCapitalization},
|
||||||
|
...(nuanceOptions.allowZeroBaseLmWeight)
|
||||||
|
&& {NUANCE_ALLOW_ZERO_BASE_LM_WEIGHT: nuanceOptions.allowZeroBaseLmWeight},
|
||||||
|
...(nuanceOptions.filterWakeupWord) &&
|
||||||
|
{NUANCE_FILTER_WAKEUP_WORD: nuanceOptions.filterWakeupWord},
|
||||||
|
...(nuanceOptions.resultType) &&
|
||||||
|
{NUANCE_RESULT_TYPE: nuanceOptions.resultType || rOpts.interim ? 'partial' : 'final'},
|
||||||
|
...(nuanceOptions.noInputTimeoutMs) &&
|
||||||
|
{NUANCE_NO_INPUT_TIMEOUT_MS: nuanceOptions.noInputTimeoutMs},
|
||||||
|
...(nuanceOptions.recognitionTimeoutMs) &&
|
||||||
|
{NUANCE_RECOGNITION_TIMEOUT_MS: nuanceOptions.recognitionTimeoutMs},
|
||||||
|
...(nuanceOptions.utteranceEndSilenceMs) &&
|
||||||
|
{NUANCE_UTTERANCE_END_SILENCE_MS: nuanceOptions.utteranceEndSilenceMs},
|
||||||
|
...(nuanceOptions.maxHypotheses) &&
|
||||||
|
{NUANCE_MAX_HYPOTHESES: nuanceOptions.maxHypotheses},
|
||||||
|
...(nuanceOptions.speechDomain) &&
|
||||||
|
{NUANCE_SPEECH_DOMAIN: nuanceOptions.speechDomain},
|
||||||
|
...(nuanceOptions.formatting) &&
|
||||||
|
{NUANCE_FORMATTING: nuanceOptions.formatting},
|
||||||
|
...(nuanceOptions.resources) &&
|
||||||
|
{NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
logger.debug({opts}, 'recognizer channel vars');
|
||||||
|
return opts;
|
||||||
|
};
|
||||||
|
|
||||||
|
const removeSpeechListeners = (ep) => {
|
||||||
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
||||||
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
||||||
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
|
||||||
|
|
||||||
|
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
||||||
|
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
|
||||||
|
|
||||||
|
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
||||||
|
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
||||||
|
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
|
||||||
|
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
|
||||||
|
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
|
||||||
|
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
normalizeTranscription,
|
||||||
|
setChannelVarsForStt,
|
||||||
|
removeSpeechListeners
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
804
package-lock.json
generated
804
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
36
package.json
36
package.json
@@ -19,33 +19,33 @@
|
|||||||
"bugs": {},
|
"bugs": {},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node app",
|
"start": "node app",
|
||||||
"test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:ClueCon:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
|
"test": "NODE_ENV=test JAMBONES_HOSTING=1 HTTP_POOL=1 DRACHTIO_HOST=127.0.0.1 DRACHTIO_PORT=9060 DRACHTIO_SECRET=cymru JAMBONES_MYSQL_HOST=127.0.0.1 JAMBONES_MYSQL_PORT=3360 JAMBONES_MYSQL_USER=jambones_test JAMBONES_MYSQL_PASSWORD=jambones_test JAMBONES_MYSQL_DATABASE=jambones_test JAMBONES_REDIS_HOST=127.0.0.1 JAMBONES_REDIS_PORT=16379 JAMBONES_LOGLEVEL=error ENABLE_METRICS=0 HTTP_PORT=3000 JAMBONES_SBCS=172.38.0.10 JAMBONES_FREESWITCH=127.0.0.1:8022:JambonzR0ck$:docker-host JAMBONES_TIME_SERIES_HOST=127.0.0.1 JAMBONES_NETWORK_CIDR=172.38.0.0/16 node test/ ",
|
||||||
"coverage": "./node_modules/.bin/nyc --reporter html --report-dir ./coverage npm run test",
|
"coverage": "./node_modules/.bin/nyc --reporter html --report-dir ./coverage npm run test",
|
||||||
"jslint": "eslint app.js lib"
|
"jslint": "eslint app.js lib"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@jambonz/http-health-check": "^0.0.1",
|
|
||||||
"@jambonz/db-helpers": "^0.7.0",
|
"@jambonz/db-helpers": "^0.7.0",
|
||||||
"@jambonz/realtimedb-helpers": "^0.4.35",
|
"@jambonz/realtimedb-helpers": "^0.5.1",
|
||||||
|
"@jambonz/http-health-check": "^0.0.1",
|
||||||
"@jambonz/stats-collector": "^0.1.6",
|
"@jambonz/stats-collector": "^0.1.6",
|
||||||
"@jambonz/time-series": "^0.2.5",
|
"@jambonz/time-series": "^0.2.5",
|
||||||
"@opentelemetry/api": "^1.1.0",
|
"@opentelemetry/api": "^1.2.0",
|
||||||
"@opentelemetry/exporter-jaeger": "^1.3.1",
|
"@opentelemetry/exporter-jaeger": "^1.7.0",
|
||||||
"@opentelemetry/exporter-trace-otlp-http": "^0.27.0",
|
"@opentelemetry/exporter-trace-otlp-http": "^0.27.0",
|
||||||
"@opentelemetry/exporter-zipkin": "^1.3.1",
|
"@opentelemetry/exporter-zipkin": "^1.7.0",
|
||||||
"@opentelemetry/instrumentation": "^0.27.0",
|
"@opentelemetry/instrumentation": "^0.27.0",
|
||||||
"@opentelemetry/resources": "^1.3.1",
|
"@opentelemetry/resources": "^1.7.0",
|
||||||
"@opentelemetry/sdk-trace-base": "^1.3.1",
|
"@opentelemetry/sdk-trace-base": "^1.7.0",
|
||||||
"@opentelemetry/sdk-trace-node": "^1.3.1",
|
"@opentelemetry/sdk-trace-node": "^1.7.0",
|
||||||
"@opentelemetry/semantic-conventions": "^1.3.1",
|
"@opentelemetry/semantic-conventions": "^1.7.0",
|
||||||
"aws-sdk": "^2.1152.0",
|
"aws-sdk": "^2.1233.0",
|
||||||
"bent": "^7.3.12",
|
"bent": "^7.3.12",
|
||||||
"debug": "^4.3.4",
|
"debug": "^4.3.4",
|
||||||
"deepcopy": "^2.1.0",
|
"deepcopy": "^2.1.0",
|
||||||
"drachtio-fsmrf": "^3.0.3",
|
"drachtio-fsmrf": "^3.0.5",
|
||||||
"drachtio-srf": "^4.5.1",
|
"drachtio-srf": "^4.5.18",
|
||||||
"express": "^4.18.1",
|
"express": "^4.18.2",
|
||||||
"helmet": "^5.1.0",
|
"helmet": "^5.1.1",
|
||||||
"ip": "^1.1.8",
|
"ip": "^1.1.8",
|
||||||
"moment": "^2.29.4",
|
"moment": "^2.29.4",
|
||||||
"parse-url": "^8.1.0",
|
"parse-url": "^8.1.0",
|
||||||
@@ -53,10 +53,10 @@
|
|||||||
"sdp-transform": "^2.14.1",
|
"sdp-transform": "^2.14.1",
|
||||||
"short-uuid": "^4.2.0",
|
"short-uuid": "^4.2.0",
|
||||||
"to-snake-case": "^1.0.0",
|
"to-snake-case": "^1.0.0",
|
||||||
"undici": "^5.8.2",
|
"undici": "^5.11.0",
|
||||||
"uuid": "^8.3.2",
|
"uuid": "^8.3.2",
|
||||||
"verify-aws-sns-signature": "^0.1.0",
|
"verify-aws-sns-signature": "^0.1.0",
|
||||||
"ws": "^8.8.0",
|
"ws": "^8.9.0",
|
||||||
"xml2js": "^0.4.23"
|
"xml2js": "^0.4.23"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@@ -64,7 +64,7 @@
|
|||||||
"eslint": "^7.32.0",
|
"eslint": "^7.32.0",
|
||||||
"eslint-plugin-promise": "^4.3.1",
|
"eslint-plugin-promise": "^4.3.1",
|
||||||
"nyc": "^15.1.0",
|
"nyc": "^15.1.0",
|
||||||
"tape": "^5.5.3"
|
"tape": "^5.6.1"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"bufferutil": "^4.0.6",
|
"bufferutil": "^4.0.6",
|
||||||
|
|||||||
@@ -22,11 +22,17 @@ test('creating schema', (t) => {
|
|||||||
const google_credential = encrypt(process.env.GCP_JSON_KEY);
|
const google_credential = encrypt(process.env.GCP_JSON_KEY);
|
||||||
const aws_credential = encrypt(JSON.stringify({
|
const aws_credential = encrypt(JSON.stringify({
|
||||||
access_key_id: process.env.AWS_ACCESS_KEY_ID,
|
access_key_id: process.env.AWS_ACCESS_KEY_ID,
|
||||||
secret_access_key: process.env.AWS_SECRET_ACCESS_KEY
|
secret_access_key: process.env.AWS_SECRET_ACCESS_KEY,
|
||||||
|
aws_region: process.env.AWS_REGION
|
||||||
|
}));
|
||||||
|
const microsoft_credential = encrypt(JSON.stringify({
|
||||||
|
region: process.env.MICROSOFT_REGION || 'useast',
|
||||||
|
api_key: process.env.MICROSOFT_API_KEY || '1234567890'
|
||||||
}));
|
}));
|
||||||
const cmd = `
|
const cmd = `
|
||||||
UPDATE speech_credentials SET credential='${google_credential}' WHERE vendor='google';
|
UPDATE speech_credentials SET credential='${google_credential}' WHERE vendor='google';
|
||||||
UPDATE speech_credentials SET credential='${aws_credential}' WHERE vendor='aws';
|
UPDATE speech_credentials SET credential='${aws_credential}' WHERE vendor='aws';
|
||||||
|
UPDATE speech_credentials SET credential='${microsoft_credential}' WHERE vendor='microsoft';
|
||||||
`;
|
`;
|
||||||
const path = `${__dirname}/.creds.sql`;
|
const path = `${__dirname}/.creds.sql`;
|
||||||
fs.writeFileSync(path, cmd);
|
fs.writeFileSync(path, cmd);
|
||||||
|
|||||||
@@ -614,7 +614,10 @@ CREATE TABLE `speech_credentials` (
|
|||||||
|
|
||||||
LOCK TABLES `speech_credentials` WRITE;
|
LOCK TABLES `speech_credentials` WRITE;
|
||||||
/*!40000 ALTER TABLE `speech_credentials` DISABLE KEYS */;
|
/*!40000 ALTER TABLE `speech_credentials` DISABLE KEYS */;
|
||||||
INSERT INTO `speech_credentials` VALUES ('2add163c-34f2-45c6-a016-f955d218ffb6',NULL,'bb845d4b-83a9-4cde-a6e9-50f3743bab3f','google','credential-goes-here',1,1,NULL,'2021-04-03 15:42:10',1,1),('84154212-5c99-4c94-8993-bc2a46288daa',NULL,'bb845d4b-83a9-4cde-a6e9-50f3743bab3f','aws','credential-goes-here',0,0,NULL,NULL,NULL,NULL);
|
INSERT INTO `speech_credentials` VALUES
|
||||||
|
('2add163c-34f2-45c6-a016-f955d218ffb6',NULL,'bb845d4b-83a9-4cde-a6e9-50f3743bab3f','google','credential-goes-here',1,1,NULL,'2021-04-03 15:42:10',1,1),
|
||||||
|
('2add347f-34f2-45c6-a016-f955d218ffb6',NULL,'bb845d4b-83a9-4cde-a6e9-50f3743bab3f','microsoft','credential-goes-here',1,1,NULL,'2021-04-03 15:42:10',1,1),
|
||||||
|
('84154212-5c99-4c94-8993-bc2a46288daa',NULL,'bb845d4b-83a9-4cde-a6e9-50f3743bab3f','aws','credential-goes-here',1,1,NULL,NULL,NULL,NULL);
|
||||||
/*!40000 ALTER TABLE `speech_credentials` ENABLE KEYS */;
|
/*!40000 ALTER TABLE `speech_credentials` ENABLE KEYS */;
|
||||||
UNLOCK TABLES;
|
UNLOCK TABLES;
|
||||||
|
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ services:
|
|||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
||||||
freeswitch:
|
freeswitch:
|
||||||
image: drachtio/drachtio-freeswitch-mrf:v1.10.1-full
|
image: drachtio/drachtio-freeswitch-mrf:0.4.15
|
||||||
restart: always
|
restart: always
|
||||||
command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100
|
command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100
|
||||||
environment:
|
environment:
|
||||||
@@ -68,7 +68,7 @@ services:
|
|||||||
- /tmp:/tmp
|
- /tmp:/tmp
|
||||||
- ./credentials:/opt/credentials
|
- ./credentials:/opt/credentials
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ['CMD', 'fs_cli' ,'-x', '"sofia status"']
|
test: ['CMD', 'fs_cli' ,'-p', 'JambonzR0ck$$', '-x', '"sofia status"']
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 15
|
retries: 15
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@@ -17,7 +17,11 @@ function connect(connectable) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
test('\'gather\' and \'transcribe\' tests', async(t) => {
|
test('\'gather\' test - google', async(t) => {
|
||||||
|
if (!process.env.GCP_JSON_KEY) {
|
||||||
|
t.pass('skipping google tests');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
clearModule.all();
|
clearModule.all();
|
||||||
const {srf, disconnect} = require('../app');
|
const {srf, disconnect} = require('../app');
|
||||||
|
|
||||||
@@ -42,7 +46,7 @@ test('\'gather\' and \'transcribe\' tests', async(t) => {
|
|||||||
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||||
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||||
'gather: succeeds when using account credentials');
|
'gather: succeeds when using google credentials');
|
||||||
|
|
||||||
disconnect();
|
disconnect();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -51,3 +55,81 @@ test('\'gather\' and \'transcribe\' tests', async(t) => {
|
|||||||
t.error(err);
|
t.error(err);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('\'gather\' test - microsoft', async(t) => {
|
||||||
|
if (!process.env.MICROSOFT_REGION || !process.env.MICROSOFT_API_KEY) {
|
||||||
|
t.pass('skipping microsoft tests');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
clearModule.all();
|
||||||
|
const {srf, disconnect} = require('../app');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await connect(srf);
|
||||||
|
// GIVEN
|
||||||
|
let verbs = [
|
||||||
|
{
|
||||||
|
"verb": "gather",
|
||||||
|
"input": ["speech"],
|
||||||
|
"recognizer": {
|
||||||
|
"vendor": "microsoft",
|
||||||
|
"hints": ["customer support", "sales", "human resources", "HR"]
|
||||||
|
},
|
||||||
|
"timeout": 10,
|
||||||
|
"actionHook": "/actionHook"
|
||||||
|
}
|
||||||
|
];
|
||||||
|
let from = "gather_success";
|
||||||
|
provisionCallHook(from, verbs);
|
||||||
|
// THEN
|
||||||
|
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||||
|
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||||
|
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||||
|
'gather: succeeds when using microsoft credentials');
|
||||||
|
|
||||||
|
disconnect();
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`error received: ${err}`);
|
||||||
|
disconnect();
|
||||||
|
t.error(err);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('\'gather\' test - aws', async(t) => {
|
||||||
|
if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY) {
|
||||||
|
t.pass('skipping aws tests');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
clearModule.all();
|
||||||
|
const {srf, disconnect} = require('../app');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await connect(srf);
|
||||||
|
// GIVEN
|
||||||
|
let verbs = [
|
||||||
|
{
|
||||||
|
"verb": "gather",
|
||||||
|
"input": ["speech"],
|
||||||
|
"recognizer": {
|
||||||
|
"vendor": "aws",
|
||||||
|
"hints": ["customer support", "sales", "human resources", "HR"]
|
||||||
|
},
|
||||||
|
"timeout": 10,
|
||||||
|
"actionHook": "/actionHook"
|
||||||
|
}
|
||||||
|
];
|
||||||
|
let from = "gather_success";
|
||||||
|
provisionCallHook(from, verbs);
|
||||||
|
// THEN
|
||||||
|
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||||
|
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||||
|
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||||
|
'gather: succeeds when using aws credentials');
|
||||||
|
|
||||||
|
disconnect();
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`error received: ${err}`);
|
||||||
|
disconnect();
|
||||||
|
t.error(err);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -5,6 +5,7 @@ require('./account-validation-tests');
|
|||||||
require('./webhooks-tests');
|
require('./webhooks-tests');
|
||||||
require('./say-tests');
|
require('./say-tests');
|
||||||
require('./gather-tests');
|
require('./gather-tests');
|
||||||
|
require('./transcribe-tests');
|
||||||
require('./sip-request-tests');
|
require('./sip-request-tests');
|
||||||
require('./create-call-test');
|
require('./create-call-test');
|
||||||
require('./play-tests');
|
require('./play-tests');
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ obj.sippUac = (file, bindAddress, from='sipp', to='16174000000') => {
|
|||||||
|
|
||||||
if (bindAddress) args.splice(5, 0, '--ip', bindAddress);
|
if (bindAddress) args.splice(5, 0, '--ip', bindAddress);
|
||||||
|
|
||||||
console.log(args.join(' '));
|
//console.log(args.join(' '));
|
||||||
clearOutput();
|
clearOutput();
|
||||||
|
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
|
|||||||
129
test/transcribe-tests.js
Normal file
129
test/transcribe-tests.js
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
const test = require('tape');
|
||||||
|
const { sippUac } = require('./sipp')('test_fs');
|
||||||
|
const bent = require('bent');
|
||||||
|
const getJSON = bent('json')
|
||||||
|
const clearModule = require('clear-module');
|
||||||
|
const {provisionCallHook} = require('./utils')
|
||||||
|
|
||||||
|
process.on('unhandledRejection', (reason, p) => {
|
||||||
|
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
|
||||||
|
});
|
||||||
|
|
||||||
|
function connect(connectable) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
connectable.on('connect', () => {
|
||||||
|
return resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
test('\'transcribe\' test - google', async(t) => {
|
||||||
|
if (!process.env.GCP_JSON_KEY) {
|
||||||
|
t.pass('skipping google tests');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
clearModule.all();
|
||||||
|
const {srf, disconnect} = require('../app');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await connect(srf);
|
||||||
|
// GIVEN
|
||||||
|
let verbs = [
|
||||||
|
{
|
||||||
|
"verb": "transcribe",
|
||||||
|
"recognizer": {
|
||||||
|
"vendor": "google",
|
||||||
|
"hints": ["customer support", "sales", "human resources", "HR"]
|
||||||
|
},
|
||||||
|
"transcriptionHook": "/transcriptionHook"
|
||||||
|
}
|
||||||
|
];
|
||||||
|
let from = "gather_success";
|
||||||
|
provisionCallHook(from, verbs);
|
||||||
|
// THEN
|
||||||
|
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||||
|
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||||
|
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||||
|
'transcribe: succeeds when using google credentials');
|
||||||
|
|
||||||
|
disconnect();
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`error received: ${err}`);
|
||||||
|
disconnect();
|
||||||
|
t.error(err);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('\'transcribe\' test - microsoft', async(t) => {
|
||||||
|
if (!process.env.MICROSOFT_REGION || !process.env.MICROSOFT_API_KEY) {
|
||||||
|
t.pass('skipping microsoft tests');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
clearModule.all();
|
||||||
|
const {srf, disconnect} = require('../app');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await connect(srf);
|
||||||
|
// GIVEN
|
||||||
|
let verbs = [
|
||||||
|
{
|
||||||
|
"verb": "transcribe",
|
||||||
|
"recognizer": {
|
||||||
|
"vendor": "microsoft",
|
||||||
|
"hints": ["customer support", "sales", "human resources", "HR"]
|
||||||
|
},
|
||||||
|
"transcriptionHook": "/transcriptionHook"
|
||||||
|
}
|
||||||
|
];
|
||||||
|
let from = "gather_success";
|
||||||
|
provisionCallHook(from, verbs);
|
||||||
|
// THEN
|
||||||
|
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||||
|
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||||
|
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||||
|
'transcribe: succeeds when using microsoft credentials');
|
||||||
|
|
||||||
|
disconnect();
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`error received: ${err}`);
|
||||||
|
disconnect();
|
||||||
|
t.error(err);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('\'transcribe\' test - aws', async(t) => {
|
||||||
|
if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY) {
|
||||||
|
t.pass('skipping aws tests');
|
||||||
|
return t.end();
|
||||||
|
}
|
||||||
|
clearModule.all();
|
||||||
|
const {srf, disconnect} = require('../app');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await connect(srf);
|
||||||
|
// GIVEN
|
||||||
|
let verbs = [
|
||||||
|
{
|
||||||
|
"verb": "transcribe",
|
||||||
|
"recognizer": {
|
||||||
|
"vendor": "aws",
|
||||||
|
"hints": ["customer support", "sales", "human resources", "HR"]
|
||||||
|
},
|
||||||
|
"transcriptionHook": "/transcriptionHook"
|
||||||
|
}
|
||||||
|
];
|
||||||
|
let from = "gather_success";
|
||||||
|
provisionCallHook(from, verbs);
|
||||||
|
// THEN
|
||||||
|
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
|
||||||
|
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
|
||||||
|
t.ok(obj.body.speech.alternatives[0].transcript = 'I\'d like to speak to customer support',
|
||||||
|
'transcribe: succeeds when using aws credentials');
|
||||||
|
|
||||||
|
disconnect();
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`error received: ${err}`);
|
||||||
|
disconnect();
|
||||||
|
t.error(err);
|
||||||
|
}
|
||||||
|
});
|
||||||
@@ -39,7 +39,16 @@ app.post('/callStatus', (req, res) => {
|
|||||||
return res.sendStatus(200);
|
return res.sendStatus(200);
|
||||||
});
|
});
|
||||||
/*
|
/*
|
||||||
* action Hook
|
* transcriptionHook
|
||||||
|
*/
|
||||||
|
app.post('/transcriptionHook', (req, res) => {
|
||||||
|
console.log({payload: req.body}, 'POST /transcriptionHook');
|
||||||
|
let key = req.body.from + "_actionHook"
|
||||||
|
addRequestToMap(key, req, hook_mapping);
|
||||||
|
return res.json([{"verb": "hangup"}]);
|
||||||
|
});
|
||||||
|
/*
|
||||||
|
* actionHook
|
||||||
*/
|
*/
|
||||||
app.post('/actionHook', (req, res) => {
|
app.post('/actionHook', (req, res) => {
|
||||||
console.log({payload: req.body}, 'POST /actionHook');
|
console.log({payload: req.body}, 'POST /actionHook');
|
||||||
|
|||||||
Reference in New Issue
Block a user