diff --git a/lib/tasks/gather.js b/lib/tasks/gather.js index b5ef89eb..a7e1d7e2 100644 --- a/lib/tasks/gather.js +++ b/lib/tasks/gather.js @@ -301,7 +301,7 @@ class TaskGather extends SttTask { if (this.data.recognizer?.deepgramOptions?.shortUtterance) this.shortUtterance = true; } - const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer); + const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer); switch (this.vendor) { case 'google': this.bugname = `${this.bugname_prefix}google_transcribe`; diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index c6a387c4..455b3010 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -138,7 +138,7 @@ class TaskTranscribe extends SttTask { if (this.isContinuousAsr) this._doContinuousAsrWithDeepgram(this.asrTimeout); } - const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer); + const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer); switch (this.vendor) { case 'google': this.bugname = `${this.bugname_prefix}google_transcribe`; diff --git a/lib/utils/amd-utils.js b/lib/utils/amd-utils.js index 39cf49fe..a981e82d 100644 --- a/lib/utils/amd-utils.js +++ b/lib/utils/amd-utils.js @@ -266,7 +266,7 @@ module.exports = (logger) => { /* set stt options */ logger.info(`starting amd for vendor ${vendor} and language ${language}`); - const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, { + const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, language, { vendor, hints, enhancedModel: true, diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js index c3a4ab43..01fcbbc4 100644 --- a/lib/utils/transcription-utils.js +++ b/lib/utils/transcription-utils.js @@ -102,6 +102,50 @@ const stickyVars = { ] }; +const optimalDeepramModels = { + zh: ['base', 'base'], + 'zh-CN':['base', 'base'], + 'zh-TW': ['base', 'base'], + da: ['enhanced', 'enhanced'], + en: ['nova-2-conversationalai', 'nova-2'], + 'en-US': ['nova-2-conversationalai', 'nova-2'], + 'en-AU': ['nova-2-conversationalai', 'nova-2'], + 'en-GB': ['nova-2-conversationalai', 'nova-2'], + 'en-IN': ['nova-2-conversationalai', 'nova-2'], + 'en-NZ': ['nova-2-conversationalai', 'nova-2'], + nl: ['nova-2-conversationalai', 'nova-2'], + fr: ['nova-2-conversationalai', 'nova-2'], + 'fr-CA': ['nova-2-conversationalai', 'nova-2'], + de: ['nova-2-conversationalai', 'nova-2'], + hi: ['nova-2-conversationalai', 'nova-2'], + 'hi-Latn': ['nova-2-conversationalai', 'nova-2'], + id: ['base', 'base'], + it: ['enhanced', 'enhanced'], + ja: ['enhanced', 'enhanced'], + ko: ['enhanced', 'enhanced'], + no: ['enhanced', 'enhanced'], + pl: ['enhanced', 'enhanced'], + pt: ['nova-2-conversationalai', 'nova-2'], + 'pt-BR': ['nova-2-conversationalai', 'nova-2'], + 'pt-PT': ['base', 'base'], + ru: ['base', 'base'], + es: ['nova-2-conversationalai', 'nova-2'], + 'es-419': ['nova-2-conversationalai', 'nova-2'], + 'es-LATAM': ['enhanced', 'enhanced'], + sv: ['enhanced', 'enhanced'], + ta: ['enhanced', 'enhanced'], + taq: ['enhanced', 'enhanced'], + tr: ['base', 'base'], + uk: ['base', 'base'] +}; + +const selectDefaultDeepgramModel = (task, language) => { + if (language in optimalDeepramModels) { + const [gather, transcribe] = optimalDeepramModels[language]; + return task.name === TaskName.Gather ? gather : transcribe; + } +}; + const consolidateTranscripts = (bufferedTranscripts, channel, language) => { if (bufferedTranscripts.length === 1) return bufferedTranscripts[0]; let totalConfidence = 0; @@ -424,7 +468,7 @@ module.exports = (logger) => { } }; - const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => { + const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => { let opts = {}; const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {}; const vad = {enable, voiceMs, mode}; @@ -568,6 +612,9 @@ module.exports = (logger) => { } else if ('deepgram' === vendor) { const {deepgramOptions = {}} = rOpts; + if (!deepgramOptions.model) { + deepgramOptions.model = selectDefaultDeepgramModel(task, language); + } opts = { ...opts, ...(sttCredentials.api_key) &&