feat assembly (#515)

* fix

* wip

* wip

* wip

* wip

* fix review comments
This commit is contained in:
Hoan Luu Huu
2023-11-02 20:25:04 +07:00
committed by GitHub
parent 4b3234f4e4
commit 7fee2ba2dc
8 changed files with 110 additions and 137 deletions

View File

@@ -835,6 +835,11 @@ class CallSession extends Emitter {
api_key: credential.api_key,
model_id: credential.model_id
};
} else if ('assemblyai' === vendor) {
return {
speech_credential_sid: credential.speech_credential_sid,
api_key: credential.api_key
};
} else if (vendor.startsWith('custom:')) {
return {
speech_credential_sid: credential.speech_credential_sid,

View File

@@ -9,8 +9,9 @@ const {
CobaltTranscriptionEvents,
IbmTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents
} = require('../utils/constants');
JambonzTranscriptionEvents,
AssemblyAiTranscriptionEvents
} = require('../utils/constants.json');
const {
JAMBONES_GATHER_EARLY_HINTS_MATCH,
JAMBONZ_GATHER_EARLY_HINTS_MATCH,
@@ -392,9 +393,9 @@ class TaskGather extends SttTask {
case 'deepgram':
this.bugname = 'deepgram_transcribe';
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onDeepgramConnect.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep));
this._onVendorConnectFailure.bind(this, cs, ep));
/* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */
if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
@@ -438,9 +439,9 @@ class TaskGather extends SttTask {
case 'ibm':
this.bugname = 'ibm_transcribe';
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
this._onIbmConnectFailure.bind(this, cs, ep));
this._onVendorConnectFailure.bind(this, cs, ep));
break;
case 'nvidia':
@@ -460,13 +461,21 @@ class TaskGather extends SttTask {
}
break;
case 'assemblyai':
this.bugname = 'assemblyai_transcribe';
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
break;
default:
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.vendor}_transcribe`;
ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onJambonzConnect.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure,
this._onJambonzConnectFailure.bind(this, cs, ep));
this._onVendorConnectFailure.bind(this, cs, ep));
break;
}
else {
@@ -788,12 +797,10 @@ class TaskGather extends SttTask {
_onTranscriptionComplete(cs, ep) {
this.logger.debug('TaskGather:_onTranscriptionComplete');
}
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onDeepgramConnect');
}
_onJambonzConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onJambonzConnect');
_onVendorConnect(_cs, _ep) {
this.logger.debug(`TaskGather:_on${this.vendor}Connect`);
}
async _onJambonzError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
@@ -827,54 +834,20 @@ class TaskGather extends SttTask {
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
}
_onDeepGramConnectFailure(cs, _ep, evt) {
_onVendorConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskGather:_onDeepgramConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to Deepgram speech recognizer: ${reason}`,
vendor: 'deepgram',
}).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure'));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor deepgram: ${reason}`});
this.notifyTaskDone();
}
_onJambonzConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskGather:_onJambonzConnectFailure');
this.logger.info({evt}, `TaskGather:_on${this.vendor}ConnectFailure`);
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
}).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
this.notifyTaskDone();
}
_onIbmConnect(_cs, _ep) {
this.logger.debug('TaskGather:_onIbmConnect');
}
_onIbmConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskGather:_onIbmConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to IBM watson speech recognizer: ${reason}`,
vendor: 'ibm',
}).catch((err) => this.logger.info({err}, 'Error generating alert for IBM connection failure'));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor IBM: ${reason}`});
this.notifyTaskDone();
}
_onIbmError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskGather:_onIbmError'); }
_onVadDetected(cs, ep) {
if (this.bargein && this.minBargeinWordCount === 0) {
this.logger.debug('TaskGather:_onVadDetected');

View File

@@ -11,8 +11,9 @@ const {
IbmTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents,
TranscribeStatus
} = require('../utils/constants');
TranscribeStatus,
AssemblyAiTranscriptionEvents
} = require('../utils/constants.json');
const { normalizeJambones } = require('@jambonz/verb-specifications');
const SttTask = require('./stt-task');
@@ -228,9 +229,9 @@ class TaskTranscribe extends SttTask {
ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect,
this._onDeepgramConnect.bind(this, cs, ep, channel));
this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure,
this._onDeepGramConnectFailure.bind(this, cs, ep, channel));
this._onVendorConnectFailure.bind(this, cs, ep, channel));
/* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */
if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
@@ -276,9 +277,9 @@ class TaskTranscribe extends SttTask {
ep.addCustomEventListener(IbmTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(IbmTranscriptionEvents.Connect,
this._onIbmConnect.bind(this, cs, ep, channel));
this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure,
this._onIbmConnectFailure.bind(this, cs, ep, channel));
this._onVendorConnectFailure.bind(this, cs, ep, channel));
break;
case 'nvidia':
@@ -293,6 +294,16 @@ class TaskTranscribe extends SttTask {
this._onVadDetected.bind(this, cs, ep));
break;
case 'assemblyai':
this.bugname = 'assemblyai_transcribe';
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
ep.addCustomEventListener(AssemblyAiTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep, channel));
break;
default:
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.vendor}_transcribe`;
@@ -480,78 +491,7 @@ class TaskTranscribe extends SttTask {
this._timer = null;
}
}
_onDeepgramConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onDeepgramConnect');
}
_onDeepGramConnectFailure(cs, _ep, channel, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskTranscribe:_onDeepgramConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to Deepgram speech recognizer: ${reason}`,
vendor: 'deepgram',
}).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure'));
this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`);
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'connection failure'
});
this.childSpan[channel - 1].span.end();
}
this.notifyTaskDone();
}
_onJambonzConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onJambonzConnect');
}
_onJambonzConnectFailure(cs, _ep, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskTranscribe:_onJambonzConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
this.notifyTaskDone();
}
_onIbmConnect(_cs, _ep) {
this.logger.debug('TaskTranscribe:_onIbmConnect');
}
_onIbmConnectFailure(cs, _ep, channel, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, 'TaskTranscribe:_onIbmConnectFailure');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to IBM watson speech recognizer: ${reason}`,
vendor: 'ibm',
}).catch((err) => this.logger.info({err}, 'Error generating alert for IBM connection failure'));
this.notifyError(`Failed connecting to speech vendor IBM: ${reason}`);
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'connection failure'
});
this.childSpan[channel - 1].span.end();
}
this.notifyTaskDone();
}
_onIbmError(cs, _ep, _channel, evt) {
this.logger.info({evt}, 'TaskTranscribe:_onIbmError');
}
async _onJambonzError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
@@ -589,6 +529,36 @@ class TaskTranscribe extends SttTask {
}
}
_onVendorConnect(_cs, _ep) {
this.logger.debug(`TaskTranscribe:_on${this.vendor}Connect`);
}
_onVendorConnectFailure(cs, _ep, channel, evt) {
const {reason} = evt;
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.info({evt}, `TaskTranscribe:_on${this.vendor}ConnectFailure`);
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`));
this.notifyError(`Failed connecting to speech vendor ${this.vendor}: ${reason}`);
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'connection failure'
});
this.childSpan[channel - 1].span.end();
}
this.notifyTaskDone();
}
_onVendorError(cs, _ep, _channel, vendor, evt) {
this.logger.info({evt}, `TaskTranscribe:_on${vendor}Error`);
}
_startAsrTimer(channel) {
if (this.vendor === 'deepgram') return; // no need
assert(this.isContinuousAsr);

View File

@@ -126,6 +126,11 @@
"Connect": "jambonz_transcribe::connect",
"Error": "jambonz_transcribe::error"
},
"AssemblyAiTranscriptionEvents": {
"Transcription": "assemblyai_transcribe::transcription",
"ConnectFailure": "assemblyai_transcribe::connect_failed",
"Connect": "assemblyai_transcribe::connect"
},
"ListenEvents": {
"Connect": "mod_audio_fork::connect",
"ConnectFailure": "mod_audio_fork::connect_failed",

View File

@@ -91,6 +91,9 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
} else if ('assemblyai' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = o.auth_token;

View File

@@ -8,8 +8,9 @@ const {
SonioxTranscriptionEvents,
NvidiaTranscriptionEvents,
CobaltTranscriptionEvents,
JambonzTranscriptionEvents
} = require('./constants');
JambonzTranscriptionEvents,
AssemblyAiTranscriptionEvents
} = require('./constants.json');
const stickyVars = {
google: [
@@ -104,6 +105,10 @@ const stickyVars = {
soniox: [
'SONIOX_PROFANITY_FILTER',
'SONIOX_MODEL'
],
assemblyai: [
'ASSEMBLYAI_API_KEY',
'ASSEMBLYAI_WORD_BOOST'
]
};
@@ -692,6 +697,14 @@ module.exports = (logger) => {
...(cobaltOptions.enableConfusionNetwork && {COBALT_ENABLE_CONFUSION_NETWORK: 1}),
...(cobaltOptions.compiledContextData && {COBALT_COMPILED_CONTEXT_DATA: cobaltOptions.compiledContextData}),
};
} else if ('assemblyai' === vendor) {
opts = {
...opts,
...(sttCredentials.api_key) &&
{ASSEMBLYAI_API_KEY: sttCredentials.api_key},
...(rOpts.hints?.length > 0 &&
{ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)})
};
}
else if (vendor.startsWith('custom:')) {
let {options = {}} = rOpts;
@@ -755,6 +768,10 @@ module.exports = (logger) => {
ep.removeCustomEventListener(JambonzTranscriptionEvents.ConnectFailure);
ep.removeCustomEventListener(JambonzTranscriptionEvents.Error);
ep.removeCustomEventListener(AssemblyAiTranscriptionEvents.Transcription);
ep.removeCustomEventListener(AssemblyAiTranscriptionEvents.Connect);
ep.removeCustomEventListener(AssemblyAiTranscriptionEvents.ConnectFailure);
};
const setSpeechCredentialsAtRuntime = (recognizer) => {

14
package-lock.json generated
View File

@@ -18,7 +18,7 @@
"@jambonz/speech-utils": "^0.0.24",
"@jambonz/stats-collector": "^0.1.9",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.44",
"@jambonz/verb-specifications": "^0.0.45",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
@@ -3181,9 +3181,9 @@
}
},
"node_modules/@jambonz/verb-specifications": {
"version": "0.0.44",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.44.tgz",
"integrity": "sha512-mXTbZlJ3AprxooSNvEHYt/9wsky4wHT4mJmL2XrkZGQY6fG/LzVNFVy0Tvx0xZzAVJMY9SmNcDiM0HBNnAufIg==",
"version": "0.0.45",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.45.tgz",
"integrity": "sha512-0cC7cfyXuOlqjfrtA9GC7A84efInj4z+ZSsibONqHMw3FVJE5IvcvabRojarDHooIn9Uw6AEX/zZ7BZqfgVmJw==",
"dependencies": {
"debug": "^4.3.4",
"pino": "^8.8.0"
@@ -13275,9 +13275,9 @@
}
},
"@jambonz/verb-specifications": {
"version": "0.0.44",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.44.tgz",
"integrity": "sha512-mXTbZlJ3AprxooSNvEHYt/9wsky4wHT4mJmL2XrkZGQY6fG/LzVNFVy0Tvx0xZzAVJMY9SmNcDiM0HBNnAufIg==",
"version": "0.0.45",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.45.tgz",
"integrity": "sha512-0cC7cfyXuOlqjfrtA9GC7A84efInj4z+ZSsibONqHMw3FVJE5IvcvabRojarDHooIn9Uw6AEX/zZ7BZqfgVmJw==",
"requires": {
"debug": "^4.3.4",
"pino": "^8.8.0"

View File

@@ -34,7 +34,7 @@
"@jambonz/speech-utils": "^0.0.24",
"@jambonz/stats-collector": "^0.1.9",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.44",
"@jambonz/verb-specifications": "^0.0.45",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",