From dbdc1cd43d1522483239245967232c86357032ec Mon Sep 17 00:00:00 2001 From: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:33:35 +0700 Subject: [PATCH] support voxist stt (#1066) * support voxist stt * wip --- lib/session/call-session.js | 6 ++++++ lib/tasks/gather.js | 12 ++++++++++++ lib/tasks/transcribe.js | 12 ++++++++++++ lib/utils/constants.json | 6 ++++++ lib/utils/db-utils.js | 4 ++++ lib/utils/transcription-utils.js | 31 +++++++++++++++++++++++++++++++ 6 files changed, 71 insertions(+) diff --git a/lib/session/call-session.js b/lib/session/call-session.js index 43a73bfe..885aa672 100644 --- a/lib/session/call-session.js +++ b/lib/session/call-session.js @@ -1084,6 +1084,12 @@ class CallSession extends Emitter { api_key: credential.api_key }; } + else if ('voxist' === vendor) { + return { + speech_credential_sid: credential.speech_credential_sid, + api_key: credential.api_key + }; + } else if ('whisper' === vendor) { return { api_key: credential.api_key, diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index 8f1ecc47..7013805d 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -11,6 +11,7 @@ const { NvidiaTranscriptionEvents, JambonzTranscriptionEvents, AssemblyAiTranscriptionEvents, + VoxistTranscriptionEvents, VadDetection, VerbioTranscriptionEvents, SpeechmaticsTranscriptionEvents @@ -524,6 +525,17 @@ class TaskGather extends SttTask { this._onVendorConnectFailure.bind(this, cs, ep)); break; + case 'voxist': + this.bugname = `${this.bugname_prefix}voxist_transcribe`; + this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription, + this._onTranscription.bind(this, cs, ep)); + this.addCustomEventListener( + ep, VoxistTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep)); + this.addCustomEventListener(ep, VoxistTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep)); + this.addCustomEventListener(ep, VoxistTranscriptionEvents.ConnectFailure, + this._onVendorConnectFailure.bind(this, cs, ep)); + break; + case 'speechmatics': this.bugname = `${this.bugname_prefix}speechmatics_transcribe`; this.addCustomEventListener( diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index 4933152a..051c0816 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -13,6 +13,7 @@ const { JambonzTranscriptionEvents, TranscribeStatus, AssemblyAiTranscriptionEvents, + VoxistTranscriptionEvents, VerbioTranscriptionEvents, SpeechmaticsTranscriptionEvents } = require('../utils/constants.json'); @@ -300,6 +301,17 @@ class TaskTranscribe extends SttTask { this._onVendorConnectFailure.bind(this, cs, ep, channel)); break; + case 'voxist': + this.bugname = `${this.bugname_prefix}voxist_transcribe`; + this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription, + this._onTranscription.bind(this, cs, ep, channel)); + this.addCustomEventListener(ep, + VoxistTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep)); + this.addCustomEventListener(ep, VoxistTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep)); + this.addCustomEventListener(ep, VoxistTranscriptionEvents.ConnectFailure, + this._onVendorConnectFailure.bind(this, cs, ep, channel)); + break; + case 'speechmatics': this.bugname = `${this.bugname_prefix}speechmatics_transcribe`; this.addCustomEventListener( diff --git a/lib/utils/constants.json b/lib/utils/constants.json index 2d292f3e..cdaa26db 100644 --- a/lib/utils/constants.json +++ b/lib/utils/constants.json @@ -149,6 +149,12 @@ "ConnectFailure": "assemblyai_transcribe::connect_failed", "Connect": "assemblyai_transcribe::connect" }, + "VoxistTranscriptionEvents": { + "Transcription": "voxist_transcribe::transcription", + "Error": "voxist_transcribe::error", + "ConnectFailure": "voxist_transcribe::connect_failed", + "Connect": "voxist_transcribe::connect" + }, "VadDetection": { "Detection": "vad_detect:detection" }, diff --git a/lib/utils/db-utils.js b/lib/utils/db-utils.js index 9884f53c..2b4b78c9 100644 --- a/lib/utils/db-utils.js +++ b/lib/utils/db-utils.js @@ -122,6 +122,10 @@ const speechMapper = (cred) => { const o = JSON.parse(decrypt(credential)); obj.api_key = o.api_key; } + else if ('voxist' === obj.vendor) { + const o = JSON.parse(decrypt(credential)); + obj.api_key = o.api_key; + } else if ('whisper' === obj.vendor) { const o = JSON.parse(decrypt(credential)); obj.api_key = o.api_key; diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js index f2b083c5..cca1b561 100644 --- a/lib/utils/transcription-utils.js +++ b/lib/utils/transcription-utils.js @@ -105,6 +105,9 @@ const stickyVars = { 'ASSEMBLYAI_API_KEY', 'ASSEMBLYAI_WORD_BOOST' ], + voxist: [ + 'VOXIST_API_KEY', + ], speechmatics: [ 'SPEECHMATICS_API_KEY', 'SPEECHMATICS_HOST', @@ -517,6 +520,25 @@ const normalizeAssemblyAi = (evt, channel, language) => { }; }; +const normalizeVoxist = (evt, channel, language) => { + const copy = JSON.parse(JSON.stringify(evt)); + return { + language_code: language, + channel_tag: channel, + is_final: evt.type === 'final', + alternatives: [ + { + confidence: 1.00, + transcript: evt.text, + } + ], + vendor: { + name: 'voxist', + evt: copy + } + }; +}; + const normalizeSpeechmatics = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const is_final = evt.message === 'AddTranscript'; @@ -567,6 +589,8 @@ module.exports = (logger) => { return normalizeCobalt(evt, channel, language); case 'assemblyai': return normalizeAssemblyAi(evt, channel, language, shortUtterance); + case 'voxist': + return normalizeVoxist(evt, channel, language); case 'verbio': return normalizeVerbio(evt, channel, language); case 'speechmatics': @@ -926,6 +950,13 @@ module.exports = (logger) => { {ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)}) }; } + else if ('voxist' === vendor) { + opts = { + ...opts, + ...(sttCredentials.api_key) && + {VOXIST_API_KEY: sttCredentials.api_key}, + }; + } else if ('verbio' === vendor) { const {verbioOptions = {}} = rOpts; opts = {