Files
jambonz-feature-server/lib/tasks/stt-task.js
Dave Horton 2c48083c26 fix to be more precise about removing custom event handlers so that w… (#580)
* fix to be more precise about removing custom event handlers so that when we stop a gather we dont also inadvertently stop a background transcribe as well

* test fixes

* fix: endpointing=false was being ignored for Deepgram
2023-12-28 11:00:27 -05:00

279 lines
11 KiB
JavaScript

const Task = require('./task');
const assert = require('assert');
const crypto = require('crypto');
const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/constants');
class SttTask extends Task {
constructor(logger, data, parentTask) {
super(logger, data);
this.parentTask = parentTask;
this.preconditions = TaskPreconditions.Endpoint;
const {
setChannelVarsForStt,
normalizeTranscription,
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts,
consolidateTranscripts
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
this.compileSonioxTranscripts = compileSonioxTranscripts;
this.consolidateTranscripts = consolidateTranscripts;
this.eventHandlers = [];
this.isHandledByPrimaryProvider = true;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor;
this.language = recognizer.language;
this.label = recognizer.label;
//fallback
this.fallbackVendor = recognizer.fallbackVendor || 'default';
this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
this.fallbackLabel = recognizer.fallbackLabel || 'default';
/* let credentials be supplied in the recognizer object at runtime */
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
if (!Array.isArray(this.data.recognizer.altLanguages)) {
this.data.recognizer.altLanguages = [];
}
} else {
this.data.recognizer = {hints: [], altLanguages: []};
}
/* buffer for soniox transcripts */
this._sonioxTranscripts = [];
/*bug name prefix */
this.bugname_prefix = '';
}
async exec(cs, {ep, ep2}) {
super.exec(cs);
this.ep = ep;
this.ep2 = ep2;
if ('default' === this.vendor || !this.vendor) {
this.vendor = cs.speechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.vendor = this.vendor;
}
if ('default' === this.language || !this.language) {
this.language = cs.speechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.language = this.language;
}
if ('default' === this.label || !this.label) {
this.label = cs.speechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.label = this.label;
}
// Fallback options
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
}
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
}
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
}
if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor;
}
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
// By default, application saves cobalt model in language
this.data.recognizer.model = cs.speechRecognizerLanguage;
}
if (cs.recognizer) {
for (const k in cs.recognizer) {
if (Array.isArray(this.data.recognizer[k]) ||
Array.isArray(cs.recognizer[k])) {
this.data.recognizer[k] = [
...this.data.recognizer[k],
...cs.recognizer[k]
];
} else if (typeof this.data.recognizer[k] === 'object' ||
typeof cs.recognizer[k] === 'object'
) {
this.data.recognizer[k] = {
...this.data.recognizer[k],
...cs.recognizer[k]
};
} else {
this.data.recognizer[k] = cs.recognizer[k] || this.data.recognizer[k];
}
}
}
if (!this.sttCredentials) {
try {
this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
} catch (error) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
} else {
throw error;
}
}
}
/* when using cobalt model is required */
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
this.notifyError({ msg: 'ASR error', details:'Cobalt requires a model to be specified'});
throw new Error('Cobalt requires a model to be specified');
}
if (cs.hasAltLanguages) {
this.data.recognizer.altLanguages = this.data.recognizer.altLanguages.concat(cs.altLanguages);
this.logger.debug({altLanguages: this.altLanguages},
'STT:exec - applying altLanguages');
}
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
this.data.recognizer.punctuation = cs.globalSttPunctuation;
}
}
addCustomEventListener(ep, event, handler) {
this.eventHandlers.push({ep, event, handler});
ep.addCustomEventListener(event, handler);
}
removeCustomEventListeners() {
this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
}
async _initSpeechCredentials(cs, vendor, label) {
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
if (!credentials) {
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info(`ERROR stt using ${vendor} requested but creds not supplied`);
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_NOT_PROVISIONED,
vendor
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
// Notify application that STT vender is wrong.
this.notifyError({
msg: 'ASR error',
details: `No speech-to-text service credentials for ${vendor} have been configured`
});
this.notifyTaskDone();
throw new Error(`No speech-to-text service credentials for ${vendor} have been configured`);
}
if (vendor === 'nuance' && credentials.client_id) {
/* get nuance access token */
const {client_id, secret} = credentials;
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
this.logger.debug({client_id}, `got nuance access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, access_token};
}
else if (vendor == 'ibm' && credentials.stt_api_key) {
/* get ibm access token */
const {stt_api_key, stt_region} = credentials;
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
this.logger.debug({stt_api_key}, `got ibm access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, access_token, stt_region};
}
return credentials;
}
async _fallback() {
assert(this.fallbackVendor, 'fallback failed without fallbackVendor configuration');
this.isHandledByPrimaryProvider = false;
this.logger.info(`Failed to use primary STT provider, fallback to ${this.fallbackVendor}`);
this.vendor = this.fallbackVendor;
this.language = this.fallbackLanguage;
this.label = this.fallbackLabel;
this.data.recognizer.vendor = this.vendor;
this.data.recognizer.language = this.language;
this.data.recognizer.label = this.label;
this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
}
async compileHintsForCobalt(ep, hostport, model, token, hints) {
const {retrieveKey} = this.cs.srf.locals.dbHelpers;
const hash = crypto.createHash('sha1');
hash.update(`${model}:${hints}`);
const key = `cobalt:${hash.digest('hex')}`;
this.context = await retrieveKey(key);
if (this.context) {
this.logger.debug({model, hints}, 'found cached cobalt context for supplied hints');
return this.context;
}
this.logger.debug({model, hints}, 'compiling cobalt context for supplied hints');
return new Promise((resolve, reject) => {
this.cobaltCompileResolver = resolve;
ep.addCustomEventListener(CobaltTranscriptionEvents.CompileContext, this._onCompileContext.bind(this, ep, key));
ep.api('uuid_cobalt_compile_context', [ep.uuid, hostport, model, token, hints], (err, evt) => {
if (err || 0 !== evt.getBody().indexOf('+OK')) {
ep.removeCustomEventListener(CobaltTranscriptionEvents.CompileContext);
return reject(err);
}
});
});
}
_onCompileContext(ep, key, evt) {
const {addKey} = this.cs.srf.locals.dbHelpers;
this.logger.debug({evt}, `received cobalt compile context event, will cache under ${key}`);
this.cobaltCompileResolver(evt.compiled_context);
ep.removeCustomEventListener(CobaltTranscriptionEvents.CompileContext);
this.cobaltCompileResolver = null;
//cache the compiled context
addKey(key, evt.compiled_context, 3600 * 12)
.catch((err) => this.logger.info({err}, `Error caching cobalt context for ${key}`));
}
_doContinuousAsrWithDeepgram(asrTimeout) {
/* deepgram has an utterance_end_ms property that simplifies things */
assert(this.vendor === 'deepgram');
this.logger.debug(`_doContinuousAsrWithDeepgram - setting utterance_end_ms to ${asrTimeout}`);
const dgOptions = this.data.recognizer.deepgramOptions = this.data.recognizer.deepgramOptions || {};
dgOptions.utteranceEndMs = dgOptions.utteranceEndMs || asrTimeout;
}
_onVendorConnect(_cs, _ep) {
this.logger.debug(`TaskGather:_on${this.vendor}Connect`);
}
_onVendorError(cs, _ep, evt) {
this.logger.info({evt}, `${this.name}:_on${this.vendor}Error`);
const {writeAlerts, AlertType} = cs.srf.locals;
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: 'STT failure reported by vendor',
detail: evt.error,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${evt.error}`});
}
_onVendorConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, `${this.name}:_on${this.vendor}ConnectFailure`);
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
}
}
module.exports = SttTask;