diff --git a/lib/session/call-session.js b/lib/session/call-session.js index c4ad54a9..080c88c6 100644 --- a/lib/session/call-session.js +++ b/lib/session/call-session.js @@ -835,6 +835,11 @@ class CallSession extends Emitter { api_key: credential.api_key, model_id: credential.model_id }; + } else if ('assemblyai' === vendor) { + return { + speech_credential_sid: credential.speech_credential_sid, + api_key: credential.api_key + }; } else if (vendor.startsWith('custom:')) { return { speech_credential_sid: credential.speech_credential_sid, diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index 1d70220b..bf9e7e21 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -9,8 +9,9 @@ const { CobaltTranscriptionEvents, IbmTranscriptionEvents, NvidiaTranscriptionEvents, - JambonzTranscriptionEvents -} = require('../utils/constants'); + JambonzTranscriptionEvents, + AssemblyAiTranscriptionEvents +} = require('../utils/constants.json'); const { JAMBONES_GATHER_EARLY_HINTS_MATCH, JAMBONZ_GATHER_EARLY_HINTS_MATCH, @@ -392,9 +393,9 @@ class TaskGather extends SttTask { case 'deepgram': this.bugname = 'deepgram_transcribe'; ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); - ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onDeepgramConnect.bind(this, cs, ep)); + ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep)); ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure, - this._onDeepGramConnectFailure.bind(this, cs, ep)); + this._onVendorConnectFailure.bind(this, cs, ep)); /* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */ if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true; @@ -438,9 +439,9 @@ class TaskGather extends SttTask { case 'ibm': this.bugname = 'ibm_transcribe'; ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); - ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onIbmConnect.bind(this, cs, ep)); + ep.addCustomEventListener(IbmTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep)); ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure, - this._onIbmConnectFailure.bind(this, cs, ep)); + this._onVendorConnectFailure.bind(this, cs, ep)); break; case 'nvidia': @@ -460,13 +461,21 @@ class TaskGather extends SttTask { } break; + case 'assemblyai': + this.bugname = 'assemblyai_transcribe'; + ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription, + this._onTranscription.bind(this, cs, ep)); + ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep)); + ep.addCustomEventListener(AssemblyAiTranscriptionEvents.ConnectFailure, + this._onVendorConnectFailure.bind(this, cs, ep)); + break; default: if (this.vendor.startsWith('custom:')) { this.bugname = `${this.vendor}_transcribe`; ep.addCustomEventListener(JambonzTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); - ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onJambonzConnect.bind(this, cs, ep)); + ep.addCustomEventListener(JambonzTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep)); ep.addCustomEventListener(JambonzTranscriptionEvents.ConnectFailure, - this._onJambonzConnectFailure.bind(this, cs, ep)); + this._onVendorConnectFailure.bind(this, cs, ep)); break; } else { @@ -788,12 +797,10 @@ class TaskGather extends SttTask { _onTranscriptionComplete(cs, ep) { this.logger.debug('TaskGather:_onTranscriptionComplete'); } - _onDeepgramConnect(_cs, _ep) { - this.logger.debug('TaskGather:_onDeepgramConnect'); - } - _onJambonzConnect(_cs, _ep) { - this.logger.debug('TaskGather:_onJambonzConnect'); + _onVendorConnect(_cs, _ep) { + this.logger.debug(`TaskGather:_on${this.vendor}Connect`); } + async _onJambonzError(cs, ep, evt) { this.logger.info({evt}, 'TaskGather:_onJambonzError'); if (this.isHandledByPrimaryProvider && this.fallbackVendor) { @@ -827,54 +834,20 @@ class TaskGather extends SttTask { this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`}); } - _onDeepGramConnectFailure(cs, _ep, evt) { + _onVendorConnectFailure(cs, _ep, evt) { const {reason} = evt; const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info({evt}, 'TaskGather:_onDeepgramConnectFailure'); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.STT_FAILURE, - message: `Failed connecting to Deepgram speech recognizer: ${reason}`, - vendor: 'deepgram', - }).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure')); - this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor deepgram: ${reason}`}); - this.notifyTaskDone(); - } - _onJambonzConnectFailure(cs, _ep, evt) { - const {reason} = evt; - const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info({evt}, 'TaskGather:_onJambonzConnectFailure'); + this.logger.info({evt}, `TaskGather:_on${this.vendor}ConnectFailure`); writeAlerts({ account_sid: cs.accountSid, alert_type: AlertType.STT_FAILURE, message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`, vendor: this.vendor, - }).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure')); + }).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`)); this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`}); this.notifyTaskDone(); } - _onIbmConnect(_cs, _ep) { - this.logger.debug('TaskGather:_onIbmConnect'); - } - - _onIbmConnectFailure(cs, _ep, evt) { - const {reason} = evt; - const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info({evt}, 'TaskGather:_onIbmConnectFailure'); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.STT_FAILURE, - message: `Failed connecting to IBM watson speech recognizer: ${reason}`, - vendor: 'ibm', - }).catch((err) => this.logger.info({err}, 'Error generating alert for IBM connection failure')); - this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor IBM: ${reason}`}); - this.notifyTaskDone(); - } - - _onIbmError(cs, _ep, evt) { - this.logger.info({evt}, 'TaskGather:_onIbmError'); } - _onVadDetected(cs, ep) { if (this.bargein && this.minBargeinWordCount === 0) { this.logger.debug('TaskGather:_onVadDetected'); diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index 098e3d90..fc6fce14 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -11,8 +11,9 @@ const { IbmTranscriptionEvents, NvidiaTranscriptionEvents, JambonzTranscriptionEvents, - TranscribeStatus -} = require('../utils/constants'); + TranscribeStatus, + AssemblyAiTranscriptionEvents +} = require('../utils/constants.json'); const { normalizeJambones } = require('@jambonz/verb-specifications'); const SttTask = require('./stt-task'); @@ -228,9 +229,9 @@ class TaskTranscribe extends SttTask { ep.addCustomEventListener(DeepgramTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel)); ep.addCustomEventListener(DeepgramTranscriptionEvents.Connect, - this._onDeepgramConnect.bind(this, cs, ep, channel)); + this._onVendorConnect.bind(this, cs, ep)); ep.addCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure, - this._onDeepGramConnectFailure.bind(this, cs, ep, channel)); + this._onVendorConnectFailure.bind(this, cs, ep, channel)); /* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */ if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true; @@ -276,9 +277,9 @@ class TaskTranscribe extends SttTask { ep.addCustomEventListener(IbmTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel)); ep.addCustomEventListener(IbmTranscriptionEvents.Connect, - this._onIbmConnect.bind(this, cs, ep, channel)); + this._onVendorConnect.bind(this, cs, ep)); ep.addCustomEventListener(IbmTranscriptionEvents.ConnectFailure, - this._onIbmConnectFailure.bind(this, cs, ep, channel)); + this._onVendorConnectFailure.bind(this, cs, ep, channel)); break; case 'nvidia': @@ -293,6 +294,16 @@ class TaskTranscribe extends SttTask { this._onVadDetected.bind(this, cs, ep)); break; + case 'assemblyai': + this.bugname = 'assemblyai_transcribe'; + ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Transcription, + this._onTranscription.bind(this, cs, ep, channel)); + ep.addCustomEventListener(AssemblyAiTranscriptionEvents.Connect, + this._onVendorConnect.bind(this, cs, ep)); + ep.addCustomEventListener(AssemblyAiTranscriptionEvents.ConnectFailure, + this._onVendorConnectFailure.bind(this, cs, ep, channel)); + break; + default: if (this.vendor.startsWith('custom:')) { this.bugname = `${this.vendor}_transcribe`; @@ -480,78 +491,7 @@ class TaskTranscribe extends SttTask { this._timer = null; } } - _onDeepgramConnect(_cs, _ep) { - this.logger.debug('TaskTranscribe:_onDeepgramConnect'); - } - _onDeepGramConnectFailure(cs, _ep, channel, evt) { - const {reason} = evt; - const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info({evt}, 'TaskTranscribe:_onDeepgramConnectFailure'); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.STT_FAILURE, - message: `Failed connecting to Deepgram speech recognizer: ${reason}`, - vendor: 'deepgram', - }).catch((err) => this.logger.info({err}, 'Error generating alert for deepgram connection failure')); - this.notifyError(`Failed connecting to speech vendor deepgram: ${reason}`); - - if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) { - this.childSpan[channel - 1].span.setAttributes({ - channel, - 'stt.resolve': 'connection failure' - }); - this.childSpan[channel - 1].span.end(); - } - this.notifyTaskDone(); - } - - _onJambonzConnect(_cs, _ep) { - this.logger.debug('TaskTranscribe:_onJambonzConnect'); - } - - _onJambonzConnectFailure(cs, _ep, evt) { - const {reason} = evt; - const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info({evt}, 'TaskTranscribe:_onJambonzConnectFailure'); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.STT_FAILURE, - message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`, - vendor: this.vendor, - }).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure')); - this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`}); - this.notifyTaskDone(); - } - - _onIbmConnect(_cs, _ep) { - this.logger.debug('TaskTranscribe:_onIbmConnect'); - } - - _onIbmConnectFailure(cs, _ep, channel, evt) { - const {reason} = evt; - const {writeAlerts, AlertType} = cs.srf.locals; - this.logger.info({evt}, 'TaskTranscribe:_onIbmConnectFailure'); - writeAlerts({ - account_sid: cs.accountSid, - alert_type: AlertType.STT_FAILURE, - message: `Failed connecting to IBM watson speech recognizer: ${reason}`, - vendor: 'ibm', - }).catch((err) => this.logger.info({err}, 'Error generating alert for IBM connection failure')); - this.notifyError(`Failed connecting to speech vendor IBM: ${reason}`); - - if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) { - this.childSpan[channel - 1].span.setAttributes({ - channel, - 'stt.resolve': 'connection failure' - }); - this.childSpan[channel - 1].span.end(); - } - this.notifyTaskDone(); - } - _onIbmError(cs, _ep, _channel, evt) { - this.logger.info({evt}, 'TaskTranscribe:_onIbmError'); - } async _onJambonzError(cs, _ep, evt) { this.logger.info({evt}, 'TaskTranscribe:_onJambonzError'); if (this.isHandledByPrimaryProvider && this.fallbackVendor) { @@ -589,6 +529,36 @@ class TaskTranscribe extends SttTask { } } + _onVendorConnect(_cs, _ep) { + this.logger.debug(`TaskTranscribe:_on${this.vendor}Connect`); + } + + _onVendorConnectFailure(cs, _ep, channel, evt) { + const {reason} = evt; + const {writeAlerts, AlertType} = cs.srf.locals; + this.logger.info({evt}, `TaskTranscribe:_on${this.vendor}ConnectFailure`); + writeAlerts({ + account_sid: cs.accountSid, + alert_type: AlertType.STT_FAILURE, + message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`, + vendor: this.vendor, + }).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`)); + this.notifyError(`Failed connecting to speech vendor ${this.vendor}: ${reason}`); + + if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) { + this.childSpan[channel - 1].span.setAttributes({ + channel, + 'stt.resolve': 'connection failure' + }); + this.childSpan[channel - 1].span.end(); + } + this.notifyTaskDone(); + } + + _onVendorError(cs, _ep, _channel, vendor, evt) { + this.logger.info({evt}, `TaskTranscribe:_on${vendor}Error`); + } + _startAsrTimer(channel) { if (this.vendor === 'deepgram') return; // no need assert(this.isContinuousAsr); diff --git a/lib/utils/constants.json b/lib/utils/constants.json index 059bc53b..e3b2c6da 100644 --- a/lib/utils/constants.json +++ b/lib/utils/constants.json @@ -126,6 +126,11 @@ "Connect": "jambonz_transcribe::connect", "Error": "jambonz_transcribe::error" }, + "AssemblyAiTranscriptionEvents": { + "Transcription": "assemblyai_transcribe::transcription", + "ConnectFailure": "assemblyai_transcribe::connect_failed", + "Connect": "assemblyai_transcribe::connect" + }, "ListenEvents": { "Connect": "mod_audio_fork::connect", "ConnectFailure": "mod_audio_fork::connect_failed", diff --git a/lib/utils/db-utils.js b/lib/utils/db-utils.js index a1396ce0..8519a798 100644 --- a/lib/utils/db-utils.js +++ b/lib/utils/db-utils.js @@ -91,6 +91,9 @@ const speechMapper = (cred) => { const o = JSON.parse(decrypt(credential)); obj.api_key = o.api_key; obj.model_id = o.model_id; + } else if ('assemblyai' === obj.vendor) { + const o = JSON.parse(decrypt(credential)); + obj.api_key = o.api_key; } else if (obj.vendor.startsWith('custom:')) { const o = JSON.parse(decrypt(credential)); obj.auth_token = o.auth_token; diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js index 111fc741..e3fdd0aa 100644 --- a/lib/utils/transcription-utils.js +++ b/lib/utils/transcription-utils.js @@ -8,8 +8,9 @@ const { SonioxTranscriptionEvents, NvidiaTranscriptionEvents, CobaltTranscriptionEvents, - JambonzTranscriptionEvents -} = require('./constants'); + JambonzTranscriptionEvents, + AssemblyAiTranscriptionEvents +} = require('./constants.json'); const stickyVars = { google: [ @@ -104,6 +105,10 @@ const stickyVars = { soniox: [ 'SONIOX_PROFANITY_FILTER', 'SONIOX_MODEL' + ], + assemblyai: [ + 'ASSEMBLYAI_API_KEY', + 'ASSEMBLYAI_WORD_BOOST' ] }; @@ -692,6 +697,14 @@ module.exports = (logger) => { ...(cobaltOptions.enableConfusionNetwork && {COBALT_ENABLE_CONFUSION_NETWORK: 1}), ...(cobaltOptions.compiledContextData && {COBALT_COMPILED_CONTEXT_DATA: cobaltOptions.compiledContextData}), }; + } else if ('assemblyai' === vendor) { + opts = { + ...opts, + ...(sttCredentials.api_key) && + {ASSEMBLYAI_API_KEY: sttCredentials.api_key}, + ...(rOpts.hints?.length > 0 && + {ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)}) + }; } else if (vendor.startsWith('custom:')) { let {options = {}} = rOpts; @@ -755,6 +768,10 @@ module.exports = (logger) => { ep.removeCustomEventListener(JambonzTranscriptionEvents.ConnectFailure); ep.removeCustomEventListener(JambonzTranscriptionEvents.Error); + + ep.removeCustomEventListener(AssemblyAiTranscriptionEvents.Transcription); + ep.removeCustomEventListener(AssemblyAiTranscriptionEvents.Connect); + ep.removeCustomEventListener(AssemblyAiTranscriptionEvents.ConnectFailure); }; const setSpeechCredentialsAtRuntime = (recognizer) => { diff --git a/package-lock.json b/package-lock.json index a8dc5432..592910b3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,7 +18,7 @@ "@jambonz/speech-utils": "^0.0.24", "@jambonz/stats-collector": "^0.1.9", "@jambonz/time-series": "^0.2.8", - "@jambonz/verb-specifications": "^0.0.44", + "@jambonz/verb-specifications": "^0.0.45", "@opentelemetry/api": "^1.4.0", "@opentelemetry/exporter-jaeger": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.35.0", @@ -3181,9 +3181,9 @@ } }, "node_modules/@jambonz/verb-specifications": { - "version": "0.0.44", - "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.44.tgz", - "integrity": "sha512-mXTbZlJ3AprxooSNvEHYt/9wsky4wHT4mJmL2XrkZGQY6fG/LzVNFVy0Tvx0xZzAVJMY9SmNcDiM0HBNnAufIg==", + "version": "0.0.45", + "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.45.tgz", + "integrity": "sha512-0cC7cfyXuOlqjfrtA9GC7A84efInj4z+ZSsibONqHMw3FVJE5IvcvabRojarDHooIn9Uw6AEX/zZ7BZqfgVmJw==", "dependencies": { "debug": "^4.3.4", "pino": "^8.8.0" @@ -13275,9 +13275,9 @@ } }, "@jambonz/verb-specifications": { - "version": "0.0.44", - "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.44.tgz", - "integrity": "sha512-mXTbZlJ3AprxooSNvEHYt/9wsky4wHT4mJmL2XrkZGQY6fG/LzVNFVy0Tvx0xZzAVJMY9SmNcDiM0HBNnAufIg==", + "version": "0.0.45", + "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.45.tgz", + "integrity": "sha512-0cC7cfyXuOlqjfrtA9GC7A84efInj4z+ZSsibONqHMw3FVJE5IvcvabRojarDHooIn9Uw6AEX/zZ7BZqfgVmJw==", "requires": { "debug": "^4.3.4", "pino": "^8.8.0" diff --git a/package.json b/package.json index 5a4df70b..527bb2ac 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ "@jambonz/speech-utils": "^0.0.24", "@jambonz/stats-collector": "^0.1.9", "@jambonz/time-series": "^0.2.8", - "@jambonz/verb-specifications": "^0.0.44", + "@jambonz/verb-specifications": "^0.0.45", "@opentelemetry/api": "^1.4.0", "@opentelemetry/exporter-jaeger": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.35.0",