mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-21 09:08:02 +00:00
fix transcribe
This commit is contained in:
@@ -226,17 +226,23 @@ class TaskGather extends Task {
|
||||
this.language = cs.speechRecognizerLanguage;
|
||||
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
||||
}
|
||||
this.fallbackVendor = this.data.recognizer && this.data.recognizer.fallbackVendor !== 'default' ?
|
||||
this.data.recognizer.fallbackVendor :
|
||||
cs.fallbackSpeechRecognizerVendor;
|
||||
|
||||
this.fallbackLanguage = this.data.recognizer && this.data.recognizer.fallbackLanguage !== 'default' ?
|
||||
this.data.recognizer.fallbackLanguage :
|
||||
cs.fallbackSpeechRecognizerLanguage;
|
||||
|
||||
this.fallbackLabel = this.data.recognizer && this.data.recognizer.fallbackLabel !== 'default' ?
|
||||
this.data.recognizer.fallbackLabel :
|
||||
cs.fallbackSpeechRecognizerLabel;
|
||||
if ('default' === this.label || !this.label) {
|
||||
this.label = cs.speechRecognizerLabel;
|
||||
if (this.data.recognizer) this.data.recognizer.label = this.label;
|
||||
}
|
||||
// Fallback options
|
||||
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
|
||||
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
|
||||
}
|
||||
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
|
||||
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
|
||||
}
|
||||
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
|
||||
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
|
||||
}
|
||||
|
||||
if (!this.data.recognizer.vendor) {
|
||||
this.data.recognizer.vendor = this.vendor;
|
||||
|
||||
@@ -42,6 +42,11 @@ class TaskTranscribe extends Task {
|
||||
const recognizer = this.data.recognizer;
|
||||
this.vendor = recognizer.vendor;
|
||||
this.language = recognizer.language;
|
||||
this.label = recognizer.label;
|
||||
|
||||
this.fallbackVendor = recognizer.fallbackVendor || 'default';
|
||||
this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
|
||||
this.fallbackLabel = recognizer.fallbackLabel || 'default';
|
||||
/* let credentials be supplied in the recognizer object at runtime */
|
||||
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
|
||||
|
||||
@@ -67,6 +72,39 @@ class TaskTranscribe extends Task {
|
||||
|
||||
get name() { return TaskName.Transcribe; }
|
||||
|
||||
async _initSpeechCredential(cs, vendor, label) {
|
||||
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
|
||||
|
||||
if (!credentials) {
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info(`TaskTranscribe:exec - ERROR stt using ${vendor} requested but creds not supplied`);
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_NOT_PROVISIONED,
|
||||
vendor: vendor
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||
throw new Error('no provisioned speech credentials for TTS');
|
||||
}
|
||||
|
||||
if (vendor === 'nuance' && credentials.client_id) {
|
||||
/* get nuance access token */
|
||||
const {client_id, secret} = credentials;
|
||||
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||
this.logger.debug({client_id},
|
||||
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
credentials = {...credentials, access_token};
|
||||
}
|
||||
else if (vendor == 'ibm' && credentials.stt_api_key) {
|
||||
/* get ibm access token */
|
||||
const {stt_api_key, stt_region} = credentials;
|
||||
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
||||
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
credentials = {...credentials, access_token, stt_region};
|
||||
}
|
||||
|
||||
return credentials;
|
||||
}
|
||||
|
||||
async exec(cs, {ep, ep2}) {
|
||||
super.exec(cs);
|
||||
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
|
||||
@@ -98,52 +136,61 @@ class TaskTranscribe extends Task {
|
||||
this.language = cs.speechRecognizerLanguage;
|
||||
if (this.data.recognizer) this.data.recognizer.language = this.language;
|
||||
}
|
||||
if ('default' === this.label || !this.label) {
|
||||
this.label = cs.speechRecognizerLabel;
|
||||
if (this.data.recognizer) this.data.recognizer.label = this.label;
|
||||
}
|
||||
// fallback options
|
||||
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
|
||||
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
|
||||
}
|
||||
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
|
||||
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
|
||||
}
|
||||
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
|
||||
this.label = cs.fallbackSpeechRecognizerLabel;
|
||||
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
|
||||
}
|
||||
if (!this.data.recognizer.vendor) {
|
||||
this.data.recognizer.vendor = this.vendor;
|
||||
}
|
||||
if (!this.sttCredentials) this.sttCredentials =
|
||||
cs.getSpeechCredentials(this.vendor, 'stt', this.data.recognizer?.label || cs.speechRecognizerLabel);
|
||||
|
||||
try {
|
||||
if (!this.sttCredentials) {
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info(`TaskTranscribe:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_NOT_PROVISIONED,
|
||||
vendor: this.vendor
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
|
||||
throw new Error('no provisioned speech credentials for TTS');
|
||||
this.sttCredentials = await this._initSpeechCredential(cs, this.vendor, this.label);
|
||||
}
|
||||
|
||||
if (this.vendor === 'nuance' && this.sttCredentials.client_id) {
|
||||
/* get nuance access token */
|
||||
const {client_id, secret} = this.sttCredentials;
|
||||
const {access_token, servedFromCache} = await getNuanceAccessToken(client_id, secret, 'asr tts');
|
||||
this.logger.debug({client_id},
|
||||
`Transcribe:exec - got nuance access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
this.sttCredentials = {...this.sttCredentials, access_token};
|
||||
if (!this.fallbackSttCredentials) {
|
||||
this.fallbackSttCredentials = await this._initSpeechCredential(cs, this.fallbackVendor, this.fallbackLabel);
|
||||
}
|
||||
else if (this.vendor == 'ibm' && this.sttCredentials.stt_api_key) {
|
||||
/* get ibm access token */
|
||||
const {stt_api_key, stt_region} = this.sttCredentials;
|
||||
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
|
||||
this.logger.debug({stt_api_key}, `Gather:exec - got ibm access token ${servedFromCache ? 'from cache' : ''}`);
|
||||
this.sttCredentials = {...this.sttCredentials, access_token, stt_region};
|
||||
}
|
||||
await this._startTranscribing(cs, ep, 1);
|
||||
try {
|
||||
await this._startTranscribing(cs, ep, 1, this.sttCredentials);
|
||||
if (this.separateRecognitionPerChannel && ep2) {
|
||||
await this._startTranscribing(cs, ep2, 2);
|
||||
await this._startTranscribing(cs, ep2, 2, this.sttCredentials);
|
||||
}
|
||||
|
||||
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
|
||||
.catch(() => {/*already logged error */});
|
||||
|
||||
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
|
||||
await this.awaitTaskDone();
|
||||
} catch (err) {
|
||||
this.logger.info(err, 'TaskTranscribe:exec - error');
|
||||
let isFallbackSuccess = false;
|
||||
if (this.fallbackSttCredentials) {
|
||||
this.logger.info(err, 'TaskTranscribe:exec - fallback to 2nd speech provider');
|
||||
try {
|
||||
await this._startTranscribing(cs, ep, 1, this.fallbackSttCredentials);
|
||||
if (this.separateRecognitionPerChannel && ep2) {
|
||||
await this._startTranscribing(cs, ep2, 2, this.fallbackSttCredentials);
|
||||
}
|
||||
updateSpeechCredentialLastUsed(this.fallbackSttCredentials.speech_credential_sid);
|
||||
await this.awaitTaskDone();
|
||||
isFallbackSuccess = true;
|
||||
} catch (error) {
|
||||
this.logger.info(err, 'TaskTranscribe:exec - fallback error');
|
||||
}
|
||||
}
|
||||
if (!isFallbackSuccess) {
|
||||
this.parentTask && this.parentTask.emit('error', err);
|
||||
}
|
||||
}
|
||||
this.removeSpeechListeners(ep);
|
||||
}
|
||||
|
||||
@@ -167,8 +214,8 @@ class TaskTranscribe extends Task {
|
||||
await this.awaitTaskDone();
|
||||
}
|
||||
|
||||
async _startTranscribing(cs, ep, channel) {
|
||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
||||
async _startTranscribing(cs, ep, channel, credentials) {
|
||||
const opts = this.setChannelVarsForStt(this, credentials, this.data.recognizer);
|
||||
switch (this.vendor) {
|
||||
case 'google':
|
||||
this.bugname = 'google_transcribe';
|
||||
|
||||
Reference in New Issue
Block a user