mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-13 17:59:42 +00:00
Compare commits
12 Commits
v0.9.6-rc3
...
feat/fallb
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ad047b605 | ||
|
|
b6c307db70 | ||
|
|
aa161290c7 | ||
|
|
4322159a41 | ||
|
|
848aa43dcb | ||
|
|
18d7ea3e37 | ||
|
|
09961f564a | ||
|
|
e9f2837370 | ||
|
|
a97d99650c | ||
|
|
541cb1458d | ||
|
|
5754c386d3 | ||
|
|
b1c0478051 |
@@ -180,6 +180,13 @@ class CallSession extends Emitter {
|
|||||||
this.application.speech_synthesis_vendor = vendor;
|
this.application.speech_synthesis_vendor = vendor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechSynthesisVendor() {
|
||||||
|
return this.application.use_for_fallback_speech ? this.application.fallback_speech_synthesis_vendor : null;
|
||||||
|
}
|
||||||
|
set fallbackSpeechSynthesisVendor(vendor) {
|
||||||
|
this.application.fallback_speech_synthesis_vendor = vendor;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* default label to use for speech synthesis if not provided in the app
|
* default label to use for speech synthesis if not provided in the app
|
||||||
*/
|
*/
|
||||||
@@ -189,6 +196,13 @@ class CallSession extends Emitter {
|
|||||||
set speechSynthesisLabel(label) {
|
set speechSynthesisLabel(label) {
|
||||||
this.application.speech_synthesis_label = label;
|
this.application.speech_synthesis_label = label;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechSynthesisLabel() {
|
||||||
|
return this.application.fallback_speech_synthesis_label;
|
||||||
|
}
|
||||||
|
set fallbackSpeechSynthesisLabel(label) {
|
||||||
|
this.application.fallback_speech_synthesis_label = label;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* default voice to use for speech synthesis if not provided in the app
|
* default voice to use for speech synthesis if not provided in the app
|
||||||
*/
|
*/
|
||||||
@@ -198,6 +212,13 @@ class CallSession extends Emitter {
|
|||||||
set speechSynthesisVoice(voice) {
|
set speechSynthesisVoice(voice) {
|
||||||
this.application.speech_synthesis_voice = voice;
|
this.application.speech_synthesis_voice = voice;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechSynthesisVoice() {
|
||||||
|
return this.application.fallback_speech_synthesis_voice;
|
||||||
|
}
|
||||||
|
set fallbackSpeechSynthesisVoice(voice) {
|
||||||
|
this.application.fallback_speech_synthesis_voice = voice;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* default language to use for speech synthesis if not provided in the app
|
* default language to use for speech synthesis if not provided in the app
|
||||||
*/
|
*/
|
||||||
@@ -208,6 +229,13 @@ class CallSession extends Emitter {
|
|||||||
this.application.speech_synthesis_language = language;
|
this.application.speech_synthesis_language = language;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechSynthesisLanguage() {
|
||||||
|
return this.application.fallback_speech_synthesis_language;
|
||||||
|
}
|
||||||
|
set fallbackSpeechSynthesisLanguage(language) {
|
||||||
|
this.application.fallback_speech_synthesis_language = language;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* default vendor to use for speech recognition if not provided in the app
|
* default vendor to use for speech recognition if not provided in the app
|
||||||
*/
|
*/
|
||||||
@@ -217,6 +245,13 @@ class CallSession extends Emitter {
|
|||||||
set speechRecognizerVendor(vendor) {
|
set speechRecognizerVendor(vendor) {
|
||||||
this.application.speech_recognizer_vendor = vendor;
|
this.application.speech_recognizer_vendor = vendor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechRecognizerVendor() {
|
||||||
|
return this.application.fallback_speech_recognizer_vendor;
|
||||||
|
}
|
||||||
|
set fallbackSpeechRecognizerVendor(vendor) {
|
||||||
|
this.application.fallback_speech_recognizer_vendor = vendor;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* default vendor to use for speech recognition if not provided in the app
|
* default vendor to use for speech recognition if not provided in the app
|
||||||
*/
|
*/
|
||||||
@@ -226,6 +261,13 @@ class CallSession extends Emitter {
|
|||||||
set speechRecognizerLabel(label) {
|
set speechRecognizerLabel(label) {
|
||||||
this.application.speech_recognizer_label = label;
|
this.application.speech_recognizer_label = label;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechRecognizerLabel() {
|
||||||
|
return this.application.fallback_speech_recognizer_label;
|
||||||
|
}
|
||||||
|
set fallbackSpeechRecognizerLabel(label) {
|
||||||
|
this.application.fallback_speech_recognizer_label = label;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* default language to use for speech recognition if not provided in the app
|
* default language to use for speech recognition if not provided in the app
|
||||||
*/
|
*/
|
||||||
@@ -236,6 +278,13 @@ class CallSession extends Emitter {
|
|||||||
this.application.speech_recognizer_language = language;
|
this.application.speech_recognizer_language = language;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
get fallbackSpeechRecognizerLanguage() {
|
||||||
|
return this.application.fallback_speech_recognizer_language;
|
||||||
|
}
|
||||||
|
set fallbackSpeechRecognizerLanguage(language) {
|
||||||
|
this.application.fallback_speech_recognizer_language = language;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* indicates whether the call currently in progress
|
* indicates whether the call currently in progress
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -114,6 +114,19 @@ class TaskConfig extends Task {
|
|||||||
cs.speechSynthesisVoice = this.synthesizer.voice !== 'default'
|
cs.speechSynthesisVoice = this.synthesizer.voice !== 'default'
|
||||||
? this.synthesizer.voice
|
? this.synthesizer.voice
|
||||||
: cs.speechSynthesisVoice;
|
: cs.speechSynthesisVoice;
|
||||||
|
// fallback vendor
|
||||||
|
cs.fallbackSpeechSynthesisVendor = this.synthesizer.fallbackVendor !== 'default'
|
||||||
|
? this.synthesizer.fallbackVendor
|
||||||
|
: cs.fallbackSpeechSynthesisVendor;
|
||||||
|
cs.fallbackSpeechSynthesisLabel = this.synthesizer.fallbackLabel !== 'default'
|
||||||
|
? this.synthesizer.fallbackLabel
|
||||||
|
: cs.fallbackSpeechSynthesisLabel;
|
||||||
|
cs.fallbackSpeechSynthesisLanguage = this.synthesizer.fallbackLanguage !== 'default'
|
||||||
|
? this.synthesizer.fallbackLanguage
|
||||||
|
: cs.fallbackSpeechSynthesisLanguage;
|
||||||
|
cs.fallbackSpeechSynthesisVoice = this.synthesizer.fallbackVoice !== 'default'
|
||||||
|
? this.synthesizer.fallbackVoice
|
||||||
|
: cs.fallbackSpeechSynthesisVoice;
|
||||||
this.logger.info({synthesizer: this.synthesizer}, 'Config: updated synthesizer');
|
this.logger.info({synthesizer: this.synthesizer}, 'Config: updated synthesizer');
|
||||||
}
|
}
|
||||||
if (this.hasRecognizer) {
|
if (this.hasRecognizer) {
|
||||||
@@ -126,6 +139,17 @@ class TaskConfig extends Task {
|
|||||||
cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
|
cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
|
||||||
? this.recognizer.language
|
? this.recognizer.language
|
||||||
: cs.speechRecognizerLanguage;
|
: cs.speechRecognizerLanguage;
|
||||||
|
|
||||||
|
//fallback
|
||||||
|
cs.fallbackSpeechRecognizerVendor = this.recognizer.fallbackVendor !== 'default'
|
||||||
|
? this.recognizer.fallbackVendor
|
||||||
|
: cs.fallbackSpeechRecognizerVendor;
|
||||||
|
cs.fallbackSpeechRecognizerLabel = this.recognizer.fallbackLabel !== 'default'
|
||||||
|
? this.recognizer.fallbackLabel
|
||||||
|
: cs.fallbackSpeechRecognizerLabel;
|
||||||
|
cs.fallbackSpeechRecognizerLanguage = this.recognizer.fallbackLanguage !== 'default'
|
||||||
|
? this.recognizer.fallbackLanguage
|
||||||
|
: cs.fallbackSpeechRecognizerLanguage;
|
||||||
cs.isContinuousAsr = typeof this.recognizer.asrTimeout === 'number' ? true : false;
|
cs.isContinuousAsr = typeof this.recognizer.asrTimeout === 'number' ? true : false;
|
||||||
if (cs.isContinuousAsr) {
|
if (cs.isContinuousAsr) {
|
||||||
cs.asrTimeout = this.recognizer.asrTimeout;
|
cs.asrTimeout = this.recognizer.asrTimeout;
|
||||||
|
|||||||
@@ -59,6 +59,12 @@ class Dialogflow extends Task {
|
|||||||
this.language = this.data.tts.language || 'default';
|
this.language = this.data.tts.language || 'default';
|
||||||
this.voice = this.data.tts.voice || 'default';
|
this.voice = this.data.tts.voice || 'default';
|
||||||
this.speechSynthesisLabel = this.data.tts.label || null;
|
this.speechSynthesisLabel = this.data.tts.label || null;
|
||||||
|
|
||||||
|
// fallback tts
|
||||||
|
this.fallbackVendor = this.data.tts.fallbackVendor || 'default';
|
||||||
|
this.fallbackLanguage = this.data.tts.fallbackLanguage || 'default';
|
||||||
|
this.fallbackVoice = this.data.tts.fallbackLanguage || 'default';
|
||||||
|
this.fallbackLabel = this.data.tts.fallbackLabel || 'default';
|
||||||
}
|
}
|
||||||
this.bargein = this.data.bargein;
|
this.bargein = this.data.bargein;
|
||||||
}
|
}
|
||||||
@@ -119,9 +125,15 @@ class Dialogflow extends Task {
|
|||||||
this.vendor = cs.speechSynthesisVendor;
|
this.vendor = cs.speechSynthesisVendor;
|
||||||
this.language = cs.speechSynthesisLanguage;
|
this.language = cs.speechSynthesisLanguage;
|
||||||
this.voice = cs.speechSynthesisVoice;
|
this.voice = cs.speechSynthesisVoice;
|
||||||
|
this.speechSynthesisLabel = cs.speechSynthesisLabel;
|
||||||
}
|
}
|
||||||
this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts',
|
if (this.fallbackVendor === 'default') {
|
||||||
this.speechSynthesisLabel || cs.speechSynthesisLabel);
|
this.fallbackVendor = cs.fallbackSpeechSynthesisVendor;
|
||||||
|
this.fallbackLanguage = cs.fallbackSpeechSynthesisLanguage;
|
||||||
|
this.fallbackVoice = cs.fallbackSpeechSynthesisVoice;
|
||||||
|
this.fallbackLabel = cs.fallbackSpeechSynthesisLabel;
|
||||||
|
}
|
||||||
|
this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts', this.speechSynthesisLabel);
|
||||||
|
|
||||||
this.ep.addCustomEventListener('dialogflow::intent', this._onIntent.bind(this, ep, cs));
|
this.ep.addCustomEventListener('dialogflow::intent', this._onIntent.bind(this, ep, cs));
|
||||||
this.ep.addCustomEventListener('dialogflow::transcription', this._onTranscription.bind(this, ep, cs));
|
this.ep.addCustomEventListener('dialogflow::transcription', this._onTranscription.bind(this, ep, cs));
|
||||||
@@ -223,17 +235,7 @@ class Dialogflow extends Task {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const obj = {
|
const {filePath, servedFromCache} = await this.fallbackSynthAudio(cs, intent, stats, synthAudio);
|
||||||
account_sid: cs.accountSid,
|
|
||||||
text: intent.fulfillmentText,
|
|
||||||
vendor: this.vendor,
|
|
||||||
language: this.language,
|
|
||||||
voice: this.voice,
|
|
||||||
salt: cs.callSid,
|
|
||||||
credentials: this.ttsCredentials
|
|
||||||
};
|
|
||||||
this.logger.debug({obj}, 'Dialogflow:_onIntent - playing message via tts');
|
|
||||||
const {filePath, servedFromCache} = await synthAudio(stats, obj);
|
|
||||||
if (filePath) cs.trackTmpFile(filePath);
|
if (filePath) cs.trackTmpFile(filePath);
|
||||||
if (!this.ttsCredentials && !servedFromCache) cs.billForTts(intent.fulfillmentText.length);
|
if (!this.ttsCredentials && !servedFromCache) cs.billForTts(intent.fulfillmentText.length);
|
||||||
|
|
||||||
@@ -279,6 +281,46 @@ class Dialogflow extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fallbackSynthAudio(cs, intent, stats, synthAudio) {
|
||||||
|
try {
|
||||||
|
const obj = {
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
text: intent.fulfillmentText,
|
||||||
|
vendor: this.vendor,
|
||||||
|
language: this.language,
|
||||||
|
voice: this.voice,
|
||||||
|
salt: cs.callSid,
|
||||||
|
credentials: this.ttsCredentials
|
||||||
|
};
|
||||||
|
this.logger.debug({obj}, 'Dialogflow:_onIntent - playing message via tts');
|
||||||
|
|
||||||
|
return await synthAudio(stats, obj);
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.info({error}, 'Failed to synthesize audio from primary vendor');
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (this.fallbackVendor) {
|
||||||
|
const credentials = cs.getSpeechCredentials(this.fallbackVendor, 'tts', this.fallbackLabel);
|
||||||
|
const obj = {
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
text: intent.fulfillmentText,
|
||||||
|
vendor: this.fallbackVendor,
|
||||||
|
language: this.fallbackLanguage,
|
||||||
|
voice: this.fallbackVoice,
|
||||||
|
salt: cs.callSid,
|
||||||
|
credentials
|
||||||
|
};
|
||||||
|
this.logger.debug({obj}, 'Dialogflow:_onIntent - playing message via fallback tts');
|
||||||
|
return await synthAudio(stats, obj);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.info({err}, 'Failed to synthesize audio from falllback vendor');
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A transcription - either interim or final - has been returned.
|
* A transcription - either interim or final - has been returned.
|
||||||
* If we are doing barge-in based on hotword detection, check for the hotword or phrase.
|
* If we are doing barge-in based on hotword detection, check for the hotword or phrase.
|
||||||
|
|||||||
@@ -65,6 +65,11 @@ class TaskGather extends Task {
|
|||||||
const recognizer = this.data.recognizer;
|
const recognizer = this.data.recognizer;
|
||||||
this.vendor = recognizer.vendor;
|
this.vendor = recognizer.vendor;
|
||||||
this.language = recognizer.language;
|
this.language = recognizer.language;
|
||||||
|
this.label = recognizer.label;
|
||||||
|
|
||||||
|
this.fallbackVendor = recognizer.fallbackVendor || 'default';
|
||||||
|
this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
|
||||||
|
this.fallbackLabel = recognizer.fallbackLabel || 'default';
|
||||||
|
|
||||||
/* let credentials be supplied in the recognizer object at runtime */
|
/* let credentials be supplied in the recognizer object at runtime */
|
||||||
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
||||||
@@ -133,11 +138,60 @@ class TaskGather extends Task {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async _initSpeechCredentials(cs, vendor, label) {
|
||||||
|
const {getNuanceAccessToken, getIbmAccessToken} = this.cs.srf.locals.dbHelpers;
|
||||||
|
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
|
||||||
|
|
||||||
|
if (!credentials) {
|
||||||
|
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||||
|
this.logger.info(`TaskGather:exec - ERROR stt using ${vendor} requested but creds not supplied`);
|
||||||
|
writeAlerts({
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
alert_type: AlertType.STT_NOT_PROVISIONED,
|
||||||
|
vendor
|
||||||
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||||
|
// Notify application that STT vender is wrong.
|
||||||
|
this.notifyError({
|
||||||
|
msg: 'ASR error',
|
||||||
|
details: `No speech-to-text service credentials for ${vendor} have been configured`
|
||||||
|
});
|
||||||
|
this.notifyTaskDone();
|
||||||
|
throw new Error(`No speech-to-text service credentials for ${vendor} have been configured`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vendor === 'nuance' && credentials.client_id) {
|
||||||
|
/* get nuance access token */
|
||||||
|
const {client_id, secret} = credentials;
|
||||||
|
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||||
|
this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||||
|
credentials = {...credentials, access_token};
|
||||||
|
}
|
||||||
|
else if (vendor == 'ibm' && credentials.stt_api_key) {
|
||||||
|
/* get ibm access token */
|
||||||
|
const {stt_api_key, stt_region} = credentials;
|
||||||
|
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
||||||
|
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
||||||
|
credentials = {...credentials, access_token, stt_region};
|
||||||
|
}
|
||||||
|
return credentials;
|
||||||
|
}
|
||||||
|
|
||||||
|
async _startTranscribeForSpeech(cs, ep, vendor, language, credentials) {
|
||||||
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||||
|
await this._initSpeech(cs, ep, vendor, credentials);
|
||||||
|
if (this.killed) {
|
||||||
|
this.logger.info(`Gather:exec - task was quickly killed so do not transcribe for vendor: ${vendor}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.execVendor = vendor;
|
||||||
|
this.execLanguage = language;
|
||||||
|
this._startTranscribing(ep);
|
||||||
|
return updateSpeechCredentialLastUsed(credentials.speech_credential_sid);
|
||||||
|
}
|
||||||
|
|
||||||
async exec(cs, {ep}) {
|
async exec(cs, {ep}) {
|
||||||
this.logger.debug({options: this.data}, 'Gather:exec');
|
this.logger.debug({options: this.data}, 'Gather:exec');
|
||||||
await super.exec(cs);
|
await super.exec(cs);
|
||||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
|
||||||
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
|
|
||||||
|
|
||||||
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
|
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
|
||||||
const {hints, hintsBoost} = cs.globalSttHints;
|
const {hints, hintsBoost} = cs.globalSttHints;
|
||||||
@@ -184,58 +238,55 @@ class TaskGather extends Task {
|
|||||||
this.language = cs.speechRecognizerLanguage;
|
this.language = cs.speechRecognizerLanguage;
|
||||||
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
||||||
}
|
}
|
||||||
|
if ('default' === this.label || !this.label) {
|
||||||
|
this.label = cs.speechRecognizerLabel;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.label = this.label;
|
||||||
|
}
|
||||||
|
// Fallback options
|
||||||
|
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
|
||||||
|
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
|
||||||
|
}
|
||||||
|
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
|
||||||
|
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
|
||||||
|
}
|
||||||
|
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
|
||||||
|
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
|
||||||
|
}
|
||||||
|
|
||||||
if (!this.data.recognizer.vendor) {
|
if (!this.data.recognizer.vendor) {
|
||||||
this.data.recognizer.vendor = this.vendor;
|
this.data.recognizer.vendor = this.vendor;
|
||||||
}
|
}
|
||||||
if (this.needsStt && !this.sttCredentials) this.sttCredentials =
|
|
||||||
cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
|
|
||||||
if (this.needsStt && !this.sttCredentials) {
|
if (this.needsStt && !this.sttCredentials) {
|
||||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
this.sttCredentials = await this._initSpeechCredentials(cs, this.vendor, this.label);
|
||||||
this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
|
||||||
writeAlerts({
|
|
||||||
account_sid: cs.accountSid,
|
|
||||||
alert_type: AlertType.STT_NOT_PROVISIONED,
|
|
||||||
vendor: this.vendor
|
|
||||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
|
||||||
// Notify application that STT vender is wrong.
|
|
||||||
this.notifyError({
|
|
||||||
msg: 'ASR error',
|
|
||||||
details: `No speech-to-text service credentials for ${this.vendor} have been configured`
|
|
||||||
});
|
|
||||||
this.notifyTaskDone();
|
|
||||||
throw new Error(`No speech-to-text service credentials for ${this.vendor} have been configured`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
// Fetch credential for fallback recognizer
|
||||||
/* get nuance access token */
|
if (this.needsStt && !this.fallbackSttCredentials && this.fallbackVendor) {
|
||||||
const {client_id, secret} = this.sttCredentials;
|
this.fallbackSttCredentials = await this._initSpeechCredentials(
|
||||||
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
cs, this.fallbackSttCredentials, this.fallbackLabel);
|
||||||
this.logger.debug({client_id}, `Gather:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
|
||||||
this.sttCredentials = {...this.sttCredentials, access_token};
|
|
||||||
}
|
}
|
||||||
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
|
|
||||||
/* get ibm access token */
|
const startListening = async(cs, ep) => {
|
||||||
const {stt_api_key, stt_region} = this.sttCredentials;
|
|
||||||
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
|
||||||
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
|
||||||
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
|
|
||||||
}
|
|
||||||
const startListening = (cs, ep) => {
|
|
||||||
this._startTimer();
|
this._startTimer();
|
||||||
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
||||||
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
if (this.input.includes('speech') && !this.listenDuringPrompt) {
|
||||||
this._initSpeech(cs, ep)
|
try {
|
||||||
.then(() => {
|
return await this._startTranscribeForSpeech(cs, ep, this.vendor, this.language, this.sttCredentials);
|
||||||
if (this.killed) {
|
} catch (error) {
|
||||||
this.logger.info('Gather:exec - task was quickly killed so do not transcribe');
|
this.logger.error({error}, 'error in initSpeech');
|
||||||
return;
|
if (this.fallbackSttCredentials) {
|
||||||
|
try {
|
||||||
|
return await this._startTranscribeForSpeech(cs, ep, this.fallbackVendor,
|
||||||
|
this.fallbackLanguage, this.fallbackSttCredentials);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error({err}, `error in initSpeech for fallback STT provider ${this.fallbackVendor}`);
|
||||||
}
|
}
|
||||||
this._startTranscribing(ep);
|
}
|
||||||
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
|
}
|
||||||
})
|
|
||||||
.catch((err) => {
|
|
||||||
this.logger.error({err}, 'error in initSpeech');
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -289,10 +340,19 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this.input.includes('speech') && this.listenDuringPrompt) {
|
if (this.input.includes('speech') && this.listenDuringPrompt) {
|
||||||
await this._initSpeech(cs, ep);
|
try {
|
||||||
this._startTranscribing(ep);
|
await this._startTranscribeForSpeech(cs, ep, this.vendor, this.language, this.sttCredentials);
|
||||||
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
|
} catch (error) {
|
||||||
.catch(() => {/*already logged error */});
|
this.logger.error({error}, 'error in initSpeech');
|
||||||
|
if (this.fallbackSttCredentials) {
|
||||||
|
try {
|
||||||
|
await this._startTranscribeForSpeech(cs, ep, this.fallbackVendor,
|
||||||
|
this.fallbackLanguage, this.fallbackSttCredentials);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.error({err}, `error in initSpeech for fallback STT provider ${this.fallbackVendor}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
|
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
|
||||||
@@ -363,9 +423,9 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async _initSpeech(cs, ep) {
|
async _initSpeech(cs, ep, vendor, credentials) {
|
||||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer);
|
||||||
switch (this.vendor) {
|
switch (vendor) {
|
||||||
case 'google':
|
case 'google':
|
||||||
this.bugname = 'google_transcribe';
|
this.bugname = 'google_transcribe';
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
@@ -451,9 +511,9 @@ class TaskGather extends Task {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${this.vendor}`});
|
this.notifyError({ msg: 'ASR error', details:`Invalid vendor ${credentials.vendor}`});
|
||||||
this.notifyTaskDone();
|
this.notifyTaskDone();
|
||||||
throw new Error(`Invalid vendor ${this.vendor}`);
|
throw new Error(`Invalid vendor ${credentials.vendor}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -465,14 +525,14 @@ class TaskGather extends Task {
|
|||||||
|
|
||||||
_startTranscribing(ep) {
|
_startTranscribing(ep) {
|
||||||
this.logger.debug({
|
this.logger.debug({
|
||||||
vendor: this.vendor,
|
vendor: this.execVendor,
|
||||||
locale: this.language,
|
locale: this.execLanguage,
|
||||||
interim: this.interim,
|
interim: this.interim,
|
||||||
bugname: this.bugname
|
bugname: this.bugname
|
||||||
}, 'Gather:_startTranscribing');
|
}, 'Gather:_startTranscribing');
|
||||||
ep.startTranscription({
|
ep.startTranscription({
|
||||||
vendor: this.vendor,
|
vendor: this.execVendor,
|
||||||
locale: this.language,
|
locale: this.execLanguage,
|
||||||
interim: this.interim,
|
interim: this.interim,
|
||||||
bugname: this.bugname,
|
bugname: this.bugname,
|
||||||
}).catch((err) => {
|
}).catch((err) => {
|
||||||
@@ -481,7 +541,7 @@ class TaskGather extends Task {
|
|||||||
writeAlerts({
|
writeAlerts({
|
||||||
account_sid: this.cs.accountSid,
|
account_sid: this.cs.accountSid,
|
||||||
alert_type: AlertType.STT_FAILURE,
|
alert_type: AlertType.STT_FAILURE,
|
||||||
vendor: this.vendor,
|
vendor: this.execVendor,
|
||||||
detail: err.message
|
detail: err.message
|
||||||
});
|
});
|
||||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
|
||||||
|
|||||||
@@ -26,6 +26,12 @@ class Lex extends Task {
|
|||||||
this.language = this.data.tts.language || 'default';
|
this.language = this.data.tts.language || 'default';
|
||||||
this.voice = this.data.tts.voice || 'default';
|
this.voice = this.data.tts.voice || 'default';
|
||||||
this.speechCredentialLabel = this.data.tts.label || null;
|
this.speechCredentialLabel = this.data.tts.label || null;
|
||||||
|
|
||||||
|
// fallback tts
|
||||||
|
this.fallbackVendor = this.data.tts.fallbackVendor || 'default';
|
||||||
|
this.fallbackLanguage = this.data.tts.fallbackLanguage || 'default';
|
||||||
|
this.fallbackVoice = this.data.tts.fallbackLanguage || 'default';
|
||||||
|
this.fallbackLabel = this.data.tts.fallbackLabel || 'default';
|
||||||
}
|
}
|
||||||
|
|
||||||
this.botName = `${this.bot}:${this.alias}:${this.region}`;
|
this.botName = `${this.bot}:${this.alias}:${this.region}`;
|
||||||
@@ -103,9 +109,15 @@ class Lex extends Task {
|
|||||||
this.vendor = cs.speechSynthesisVendor;
|
this.vendor = cs.speechSynthesisVendor;
|
||||||
this.language = cs.speechSynthesisLanguage;
|
this.language = cs.speechSynthesisLanguage;
|
||||||
this.voice = cs.speechSynthesisVoice;
|
this.voice = cs.speechSynthesisVoice;
|
||||||
|
this.speechCredentialLabel = cs.speechSynthesisLabel;
|
||||||
}
|
}
|
||||||
this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts',
|
if (this.fallbackVendor === 'default') {
|
||||||
this.speechCredentialLabel || cs.speechSynthesisVendor);
|
this.fallbackVendor = cs.fallbackSpeechSynthesisVendor;
|
||||||
|
this.fallbackLanguage = cs.fallbackSpeechSynthesisLanguage;
|
||||||
|
this.fallbackVoice = cs.fallbackSpeechSynthesisVoice;
|
||||||
|
this.fallbackLabel = cs.fallbackSpeechSynthesisLabel;
|
||||||
|
}
|
||||||
|
this.ttsCredentials = cs.getSpeechCredentials(this.vendor, 'tts', this.speechCredentialLabel);
|
||||||
|
|
||||||
this.ep.addCustomEventListener('lex::intent', this._onIntent.bind(this, ep, cs));
|
this.ep.addCustomEventListener('lex::intent', this._onIntent.bind(this, ep, cs));
|
||||||
this.ep.addCustomEventListener('lex::transcription', this._onTranscription.bind(this, ep, cs));
|
this.ep.addCustomEventListener('lex::transcription', this._onTranscription.bind(this, ep, cs));
|
||||||
@@ -170,6 +182,41 @@ class Lex extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fallbackSynthAudio(cs, msg, stats, synthAudio) {
|
||||||
|
try {
|
||||||
|
const {filePath} = await synthAudio(stats, {
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
text: msg,
|
||||||
|
vendor: this.vendor,
|
||||||
|
language: this.language,
|
||||||
|
voice: this.voice,
|
||||||
|
salt: cs.callSid,
|
||||||
|
credentials: this.ttsCredentials
|
||||||
|
});
|
||||||
|
|
||||||
|
return filePath;
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.info({error}, 'failed to synth audio from primary vendor');
|
||||||
|
if (this.fallbackVendor) {
|
||||||
|
try {
|
||||||
|
const credential = cs.getSpeechCredentials(this.fallbackVendor, 'tts', this.fallbackLabel);
|
||||||
|
const {filePath} = await synthAudio(stats, {
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
text: msg,
|
||||||
|
vendor: this.fallbackVendor,
|
||||||
|
language: this.fallbackLanguage,
|
||||||
|
voice: this.fallbackVoice,
|
||||||
|
salt: cs.callSid,
|
||||||
|
credentials: credential
|
||||||
|
});
|
||||||
|
return filePath;
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.info({err}, 'failed to synth audio from fallback vendor');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {*} evt - event data
|
* @param {*} evt - event data
|
||||||
*/
|
*/
|
||||||
@@ -189,16 +236,7 @@ class Lex extends Task {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
this.logger.debug(`tts with ${this.vendor} ${this.voice}`);
|
this.logger.debug(`tts with ${this.vendor} ${this.voice}`);
|
||||||
// eslint-disable-next-line no-unused-vars
|
const filePath = await this.fallbackSynthAudio(cs, msg, stats, synthAudio);
|
||||||
const {filePath, servedFromCache} = await synthAudio(stats, {
|
|
||||||
account_sid: cs.accountSid,
|
|
||||||
text: msg,
|
|
||||||
vendor: this.vendor,
|
|
||||||
language: this.language,
|
|
||||||
voice: this.voice,
|
|
||||||
salt: cs.callSid,
|
|
||||||
credentials: this.ttsCredentials
|
|
||||||
});
|
|
||||||
if (filePath) cs.trackTmpFile(filePath);
|
if (filePath) cs.trackTmpFile(filePath);
|
||||||
|
|
||||||
if (this.events.includes('start-play')) {
|
if (this.events.includes('start-play')) {
|
||||||
|
|||||||
129
lib/tasks/say.js
129
lib/tasks/say.js
@@ -59,15 +59,30 @@ class TaskSay extends Task {
|
|||||||
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
|
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
|
||||||
this.synthesizer.vendor :
|
this.synthesizer.vendor :
|
||||||
cs.speechSynthesisVendor;
|
cs.speechSynthesisVendor;
|
||||||
|
const fallbackVendor = this.synthesizer.fallbackVendor && this.synthesizer.fallbackVendor !== 'default' ?
|
||||||
|
this.synthesizer.fallbackVendor :
|
||||||
|
cs.fallbackSpeechSynthesisVendor;
|
||||||
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
|
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
|
||||||
this.synthesizer.language :
|
this.synthesizer.language :
|
||||||
cs.speechSynthesisLanguage ;
|
cs.speechSynthesisLanguage ;
|
||||||
|
const fallbackLanguage = this.synthesizer.fallbackLanguage && this.synthesizer.fallbackLanguage !== 'default' ?
|
||||||
|
this.synthesizer.fallbackLanguage :
|
||||||
|
cs.fallbackSpeechSynthesisLanguage ;
|
||||||
let voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
|
let voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
|
||||||
this.synthesizer.voice :
|
this.synthesizer.voice :
|
||||||
cs.speechSynthesisVoice;
|
cs.speechSynthesisVoice;
|
||||||
|
const fallbackVoice = this.synthesizer.fallbackVoice && this.synthesizer.fallbackVoice !== 'default' ?
|
||||||
|
this.synthesizer.fallbackVoice :
|
||||||
|
cs.fallbackSpeechSynthesisVoice;
|
||||||
|
const label = this.synthesizer.label && this.synthesizer.label !== 'default' ?
|
||||||
|
this.synthesizer.label :
|
||||||
|
cs.speechSynthesisLabel;
|
||||||
|
const fallbackLabel = this.synthesizer.fallbackLabel && this.synthesizer.fallbackLabel !== 'default' ?
|
||||||
|
this.synthesizer.fallbackLabel :
|
||||||
|
cs.fallbackSpeechSynthesisLabel;
|
||||||
const engine = this.synthesizer.engine || 'standard';
|
const engine = this.synthesizer.engine || 'standard';
|
||||||
const salt = cs.callSid;
|
const salt = cs.callSid;
|
||||||
let credentials = cs.getSpeechCredentials(vendor, 'tts', this.data.synthesizer?.label || cs.speechSynthesisLabel);
|
let credentials = cs.getSpeechCredentials(vendor, 'tts', label);
|
||||||
|
|
||||||
/* parse Nuance voices into name and model */
|
/* parse Nuance voices into name and model */
|
||||||
let model;
|
let model;
|
||||||
@@ -118,6 +133,8 @@ class TaskSay extends Task {
|
|||||||
'tts.language': language,
|
'tts.language': language,
|
||||||
'tts.voice': voice
|
'tts.voice': voice
|
||||||
});
|
});
|
||||||
|
let filePathUrl, isFromCache, roundTripTime;
|
||||||
|
let executedVendor, executedLanguage;
|
||||||
try {
|
try {
|
||||||
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
|
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
|
||||||
account_sid: cs.accountSid,
|
account_sid: cs.accountSid,
|
||||||
@@ -131,37 +148,101 @@ class TaskSay extends Task {
|
|||||||
credentials,
|
credentials,
|
||||||
disableTtsCache : this.disableTtsCache
|
disableTtsCache : this.disableTtsCache
|
||||||
});
|
});
|
||||||
this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`);
|
|
||||||
if (filePath) cs.trackTmpFile(filePath);
|
span.setAttributes({'tts.cached': servedFromCache});
|
||||||
|
span.end();
|
||||||
|
|
||||||
if (!servedFromCache && !lastUpdated) {
|
if (!servedFromCache && !lastUpdated) {
|
||||||
lastUpdated = true;
|
lastUpdated = true;
|
||||||
updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
|
updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
|
||||||
.catch(() => {/*already logged error */});
|
.catch(() => {/*already logged error */});
|
||||||
}
|
}
|
||||||
span.setAttributes({'tts.cached': servedFromCache});
|
|
||||||
span.end();
|
filePathUrl = filePath;
|
||||||
if (!servedFromCache && rtt) {
|
isFromCache = servedFromCache;
|
||||||
this.notifyStatus({
|
roundTripTime = rtt;
|
||||||
event: 'synthesized-audio',
|
executedVendor = vendor;
|
||||||
vendor,
|
executedLanguage = language;
|
||||||
language,
|
|
||||||
characters: text.length,
|
} catch (error) {
|
||||||
elapsedTime: rtt
|
let isFallbackSuccess = false;
|
||||||
|
if (fallbackVendor) {
|
||||||
|
const fallbackcredentials = cs.getSpeechCredentials(fallbackVendor, 'tts', fallbackLabel);
|
||||||
|
const {span: fallbackSpan} = this.startChildSpan('fallback-tts-generation', {
|
||||||
|
'tts.vendor': fallbackVendor,
|
||||||
|
'tts.language': fallbackLanguage,
|
||||||
|
'tts.voice': fallbackVoice
|
||||||
});
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
text,
|
||||||
|
vendor: fallbackVendor,
|
||||||
|
language: fallbackLanguage,
|
||||||
|
voice: fallbackVoice,
|
||||||
|
engine,
|
||||||
|
model,
|
||||||
|
salt,
|
||||||
|
credentials: fallbackcredentials,
|
||||||
|
disableTtsCache : this.disableTtsCache
|
||||||
|
});
|
||||||
|
isFallbackSuccess = true;
|
||||||
|
|
||||||
|
fallbackSpan.setAttributes({'tts.cached': servedFromCache});
|
||||||
|
fallbackSpan.end();
|
||||||
|
|
||||||
|
if (!servedFromCache && !lastUpdated) {
|
||||||
|
lastUpdated = true;
|
||||||
|
updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
|
||||||
|
.catch(() => {/*already logged error */});
|
||||||
|
}
|
||||||
|
|
||||||
|
filePathUrl = filePath;
|
||||||
|
isFromCache = servedFromCache;
|
||||||
|
roundTripTime = rtt;
|
||||||
|
executedVendor = fallbackVendor;
|
||||||
|
executedLanguage = fallbackLanguage;
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.info({err}, 'fallback Speech failed to synthesize audio');
|
||||||
|
fallbackSpan.end();
|
||||||
|
writeAlerts({
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
alert_type: AlertType.TTS_FAILURE,
|
||||||
|
vendor: fallbackVendor,
|
||||||
|
detail: err.message
|
||||||
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for fallback tts failure'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!isFallbackSuccess) {
|
||||||
|
this.logger.info({error}, 'Error synthesizing tts');
|
||||||
|
span.end();
|
||||||
|
writeAlerts({
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
alert_type: AlertType.TTS_FAILURE,
|
||||||
|
vendor,
|
||||||
|
detail: error.message
|
||||||
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
|
||||||
|
this.notifyError({msg: 'TTS error', details: error.message || error});
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
return filePath;
|
|
||||||
} catch (err) {
|
|
||||||
this.logger.info({err}, 'Error synthesizing tts');
|
|
||||||
span.end();
|
|
||||||
writeAlerts({
|
|
||||||
account_sid: cs.accountSid,
|
|
||||||
alert_type: AlertType.TTS_FAILURE,
|
|
||||||
vendor,
|
|
||||||
detail: err.message
|
|
||||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
|
|
||||||
this.notifyError({msg: 'TTS error', details: err.message || err});
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.debug(`file ${filePathUrl}, served from cache ${isFromCache}`);
|
||||||
|
if (filePathUrl) cs.trackTmpFile(filePathUrl);
|
||||||
|
|
||||||
|
if (!isFromCache && roundTripTime) {
|
||||||
|
this.notifyStatus({
|
||||||
|
event: 'synthesized-audio',
|
||||||
|
vendor: executedVendor,
|
||||||
|
language: executedLanguage,
|
||||||
|
characters: text.length,
|
||||||
|
elapsedTime: roundTripTime
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return filePathUrl;
|
||||||
};
|
};
|
||||||
|
|
||||||
const arr = this.text.map((t) => generateAudio(t));
|
const arr = this.text.map((t) => generateAudio(t));
|
||||||
|
|||||||
@@ -42,6 +42,11 @@ class TaskTranscribe extends Task {
|
|||||||
const recognizer = this.data.recognizer;
|
const recognizer = this.data.recognizer;
|
||||||
this.vendor = recognizer.vendor;
|
this.vendor = recognizer.vendor;
|
||||||
this.language = recognizer.language;
|
this.language = recognizer.language;
|
||||||
|
this.label = recognizer.label;
|
||||||
|
|
||||||
|
this.fallbackVendor = recognizer.fallbackVendor || 'default';
|
||||||
|
this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
|
||||||
|
this.fallbackLabel = recognizer.fallbackLabel || 'default';
|
||||||
/* let credentials be supplied in the recognizer object at runtime */
|
/* let credentials be supplied in the recognizer object at runtime */
|
||||||
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
||||||
|
|
||||||
@@ -67,10 +72,43 @@ class TaskTranscribe extends Task {
|
|||||||
|
|
||||||
get name() { return TaskName.Transcribe; }
|
get name() { return TaskName.Transcribe; }
|
||||||
|
|
||||||
|
async _initSpeechCredential(cs, vendor, label) {
|
||||||
|
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
|
||||||
|
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
|
||||||
|
|
||||||
|
if (!credentials) {
|
||||||
|
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||||
|
this.logger.info(`TaskTranscribe:exec - ERROR stt using ${vendor} requested but creds not supplied`);
|
||||||
|
writeAlerts({
|
||||||
|
account_sid: cs.accountSid,
|
||||||
|
alert_type: AlertType.STT_NOT_PROVISIONED,
|
||||||
|
vendor: vendor
|
||||||
|
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||||
|
throw new Error('no provisioned speech credentials for TTS');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vendor === 'nuance' && credentials.client_id) {
|
||||||
|
/* get nuance access token */
|
||||||
|
const {client_id, secret} = credentials;
|
||||||
|
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||||
|
this.logger.debug({client_id},
|
||||||
|
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||||
|
credentials = {...credentials, access_token};
|
||||||
|
}
|
||||||
|
else if (vendor == 'ibm' && credentials.stt_api_key) {
|
||||||
|
/* get ibm access token */
|
||||||
|
const {stt_api_key, stt_region} = credentials;
|
||||||
|
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
||||||
|
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
||||||
|
credentials = {...credentials, access_token, stt_region};
|
||||||
|
}
|
||||||
|
|
||||||
|
return credentials;
|
||||||
|
}
|
||||||
|
|
||||||
async exec(cs, {ep, ep2}) {
|
async exec(cs, {ep, ep2}) {
|
||||||
super.exec(cs);
|
super.exec(cs);
|
||||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||||
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
|
|
||||||
|
|
||||||
if (cs.hasGlobalSttHints) {
|
if (cs.hasGlobalSttHints) {
|
||||||
const {hints, hintsBoost} = cs.globalSttHints;
|
const {hints, hintsBoost} = cs.globalSttHints;
|
||||||
@@ -98,51 +136,60 @@ class TaskTranscribe extends Task {
|
|||||||
this.language = cs.speechRecognizerLanguage;
|
this.language = cs.speechRecognizerLanguage;
|
||||||
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
||||||
}
|
}
|
||||||
|
if ('default' === this.label || !this.label) {
|
||||||
|
this.label = cs.speechRecognizerLabel;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.label = this.label;
|
||||||
|
}
|
||||||
|
// fallback options
|
||||||
|
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
|
||||||
|
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
|
||||||
|
}
|
||||||
|
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
|
||||||
|
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
|
||||||
|
}
|
||||||
|
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
|
||||||
|
this.label = cs.fallbackSpeechRecognizerLabel;
|
||||||
|
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
|
||||||
|
}
|
||||||
if (!this.data.recognizer.vendor) {
|
if (!this.data.recognizer.vendor) {
|
||||||
this.data.recognizer.vendor = this.vendor;
|
this.data.recognizer.vendor = this.vendor;
|
||||||
}
|
}
|
||||||
if (!this.sttCredentials) this.sttCredentials =
|
if (!this.sttCredentials) {
|
||||||
cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
|
this.sttCredentials = await this._initSpeechCredential(cs, this.vendor, this.label);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.fallbackSttCredentials) {
|
||||||
|
this.fallbackSttCredentials = await this._initSpeechCredential(cs, this.fallbackVendor, this.fallbackLabel);
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
if (!this.sttCredentials) {
|
await this._startTranscribing(cs, ep, 1, this.sttCredentials);
|
||||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
|
||||||
this.logger.info(`TaskTranscribe:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
|
||||||
writeAlerts({
|
|
||||||
account_sid: cs.accountSid,
|
|
||||||
alert_type: AlertType.STT_NOT_PROVISIONED,
|
|
||||||
vendor: this.vendor
|
|
||||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
|
||||||
throw new Error('no provisioned speech credentials for TTS');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
|
||||||
/* get nuance access token */
|
|
||||||
const {client_id, secret} = this.sttCredentials;
|
|
||||||
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
|
||||||
this.logger.debug({client_id},
|
|
||||||
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
|
||||||
this.sttCredentials = {...this.sttCredentials, access_token};
|
|
||||||
}
|
|
||||||
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
|
|
||||||
/* get ibm access token */
|
|
||||||
const {stt_api_key, stt_region} = this.sttCredentials;
|
|
||||||
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
|
||||||
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
|
||||||
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
|
|
||||||
}
|
|
||||||
await this._startTranscribing(cs, ep, 1);
|
|
||||||
if (this.separateRecognitionPerChannel && ep2) {
|
if (this.separateRecognitionPerChannel && ep2) {
|
||||||
await this._startTranscribing(cs, ep2, 2);
|
await this._startTranscribing(cs, ep2, 2, this.sttCredentials);
|
||||||
}
|
}
|
||||||
|
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
|
||||||
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
|
|
||||||
.catch(() => {/*already logged error */});
|
|
||||||
|
|
||||||
await this.awaitTaskDone();
|
await this.awaitTaskDone();
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.logger.info(err, 'TaskTranscribe:exec - error');
|
this.logger.info(err, 'TaskTranscribe:exec - error');
|
||||||
this.parentTask && this.parentTask.emit('error', err);
|
let isFallbackSuccess = false;
|
||||||
|
if (this.fallbackSttCredentials) {
|
||||||
|
this.logger.info(err, 'TaskTranscribe:exec - fallback to 2nd speech provider');
|
||||||
|
try {
|
||||||
|
await this._startTranscribing(cs, ep, 1, this.fallbackSttCredentials);
|
||||||
|
if (this.separateRecognitionPerChannel && ep2) {
|
||||||
|
await this._startTranscribing(cs, ep2, 2, this.fallbackSttCredentials);
|
||||||
|
}
|
||||||
|
updateSpeechCredentialLastUsed(this.fallbackSttCredentials.speech_credential_sid);
|
||||||
|
await this.awaitTaskDone();
|
||||||
|
isFallbackSuccess = true;
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.info(err, 'TaskTranscribe:exec - fallback error');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!isFallbackSuccess) {
|
||||||
|
this.parentTask && this.parentTask.emit('error', err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.removeSpeechListeners(ep);
|
this.removeSpeechListeners(ep);
|
||||||
}
|
}
|
||||||
@@ -167,8 +214,8 @@ class TaskTranscribe extends Task {
|
|||||||
await this.awaitTaskDone();
|
await this.awaitTaskDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
async _startTranscribing(cs, ep, channel) {
|
async _startTranscribing(cs, ep, channel, credentials) {
|
||||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer);
|
||||||
switch (this.vendor) {
|
switch (this.vendor) {
|
||||||
case 'google':
|
case 'google':
|
||||||
this.bugname = 'google_transcribe';
|
this.bugname = 'google_transcribe';
|
||||||
|
|||||||
14
package-lock.json
generated
14
package-lock.json
generated
@@ -17,7 +17,7 @@
|
|||||||
"@jambonz/speech-utils": "^0.0.19",
|
"@jambonz/speech-utils": "^0.0.19",
|
||||||
"@jambonz/stats-collector": "^0.1.9",
|
"@jambonz/stats-collector": "^0.1.9",
|
||||||
"@jambonz/time-series": "^0.2.8",
|
"@jambonz/time-series": "^0.2.8",
|
||||||
"@jambonz/verb-specifications": "^0.0.27",
|
"@jambonz/verb-specifications": "^0.0.29",
|
||||||
"@opentelemetry/api": "^1.4.0",
|
"@opentelemetry/api": "^1.4.0",
|
||||||
"@opentelemetry/exporter-jaeger": "^1.9.0",
|
"@opentelemetry/exporter-jaeger": "^1.9.0",
|
||||||
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
|
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
|
||||||
@@ -3019,9 +3019,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@jambonz/verb-specifications": {
|
"node_modules/@jambonz/verb-specifications": {
|
||||||
"version": "0.0.27",
|
"version": "0.0.29",
|
||||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.27.tgz",
|
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.29.tgz",
|
||||||
"integrity": "sha512-DIcxhCNrgr2RTE3YrGNP15RqKyV+P8f97SPBlKd2zTM5aN2oV5xv+pRDx5gLzmrUZ5TIEaBXQN3vTmM2Zx5Q6g==",
|
"integrity": "sha512-jeYI+GN7Y5nXhdFG3SXvXaBlhCjIC+l5AcBywDDGxxyuuKRTukPS0MSvCtWPZP6H3wYYGqfJ4DR/vgtBF3pvyQ==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"debug": "^4.3.4",
|
"debug": "^4.3.4",
|
||||||
"pino": "^8.8.0"
|
"pino": "^8.8.0"
|
||||||
@@ -12985,9 +12985,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@jambonz/verb-specifications": {
|
"@jambonz/verb-specifications": {
|
||||||
"version": "0.0.27",
|
"version": "0.0.29",
|
||||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.27.tgz",
|
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.29.tgz",
|
||||||
"integrity": "sha512-DIcxhCNrgr2RTE3YrGNP15RqKyV+P8f97SPBlKd2zTM5aN2oV5xv+pRDx5gLzmrUZ5TIEaBXQN3vTmM2Zx5Q6g==",
|
"integrity": "sha512-jeYI+GN7Y5nXhdFG3SXvXaBlhCjIC+l5AcBywDDGxxyuuKRTukPS0MSvCtWPZP6H3wYYGqfJ4DR/vgtBF3pvyQ==",
|
||||||
"requires": {
|
"requires": {
|
||||||
"debug": "^4.3.4",
|
"debug": "^4.3.4",
|
||||||
"pino": "^8.8.0"
|
"pino": "^8.8.0"
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
"@jambonz/speech-utils": "^0.0.19",
|
"@jambonz/speech-utils": "^0.0.19",
|
||||||
"@jambonz/stats-collector": "^0.1.9",
|
"@jambonz/stats-collector": "^0.1.9",
|
||||||
"@jambonz/time-series": "^0.2.8",
|
"@jambonz/time-series": "^0.2.8",
|
||||||
"@jambonz/verb-specifications": "^0.0.27",
|
"@jambonz/verb-specifications": "^0.0.29",
|
||||||
"@opentelemetry/api": "^1.4.0",
|
"@opentelemetry/api": "^1.4.0",
|
||||||
"@opentelemetry/exporter-jaeger": "^1.9.0",
|
"@opentelemetry/exporter-jaeger": "^1.9.0",
|
||||||
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
|
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ require('./dial-tests');
|
|||||||
require('./webhooks-tests');
|
require('./webhooks-tests');
|
||||||
require('./say-tests');
|
require('./say-tests');
|
||||||
require('./gather-tests');
|
require('./gather-tests');
|
||||||
// require('./transcribe-tests');
|
require('./transcribe-tests');
|
||||||
require('./sip-request-tests');
|
require('./sip-request-tests');
|
||||||
require('./create-call-test');
|
require('./create-call-test');
|
||||||
require('./play-tests');
|
require('./play-tests');
|
||||||
|
|||||||
Reference in New Issue
Block a user