set default deepgram model by language and task (gather vs transcribe) (#610)

* set default deepgram model by language and task (gather vs transcribe) * wip
2025-12-20 08:40:38 +00:00 · 2024-01-14 10:38:14 -05:00
parent 09a83e3a31
commit f22d66dfd6
4 changed files with 51 additions and 4 deletions
--- a/lib/tasks/gather.js
+++ b/lib/tasks/gather.js
@@ -301,7 +301,7 @@ class TaskGather extends SttTask {
      if (this.data.recognizer?.deepgramOptions?.shortUtterance) this.shortUtterance = true;
    }
-    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
+    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
    switch (this.vendor) {
      case 'google':
        this.bugname = `${this.bugname_prefix}google_transcribe`;
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -138,7 +138,7 @@ class TaskTranscribe extends SttTask {
      if (this.isContinuousAsr) this._doContinuousAsrWithDeepgram(this.asrTimeout);
    }
-    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
+    const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
    switch (this.vendor) {
      case 'google':
        this.bugname = `${this.bugname_prefix}google_transcribe`;
--- a/lib/utils/amd-utils.js
+++ b/lib/utils/amd-utils.js
@@ -266,7 +266,7 @@ module.exports = (logger) => {
    /* set stt options */
    logger.info(`starting amd for vendor ${vendor} and language ${language}`);
-    const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
+    const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, language, {
      vendor,
      hints,
      enhancedModel: true,
--- a/lib/utils/transcription-utils.js
+++ b/lib/utils/transcription-utils.js
@@ -102,6 +102,50 @@ const stickyVars = {
  ]
 };
 const optimalDeepramModels = {
  zh: ['base', 'base'],
  'zh-CN':['base', 'base'],
  'zh-TW': ['base', 'base'],
  da: ['enhanced', 'enhanced'],
  en: ['nova-2-conversationalai', 'nova-2'],
  'en-US': ['nova-2-conversationalai', 'nova-2'],
  'en-AU': ['nova-2-conversationalai', 'nova-2'],
  'en-GB': ['nova-2-conversationalai', 'nova-2'],
  'en-IN': ['nova-2-conversationalai', 'nova-2'],
  'en-NZ': ['nova-2-conversationalai', 'nova-2'],
  nl: ['nova-2-conversationalai', 'nova-2'],
  fr: ['nova-2-conversationalai', 'nova-2'],
  'fr-CA': ['nova-2-conversationalai', 'nova-2'],
  de: ['nova-2-conversationalai', 'nova-2'],
  hi: ['nova-2-conversationalai', 'nova-2'],
  'hi-Latn': ['nova-2-conversationalai', 'nova-2'],
  id: ['base', 'base'],
  it: ['enhanced', 'enhanced'],
  ja: ['enhanced', 'enhanced'],
  ko: ['enhanced', 'enhanced'],
  no: ['enhanced', 'enhanced'],
  pl: ['enhanced', 'enhanced'],
  pt: ['nova-2-conversationalai', 'nova-2'],
  'pt-BR': ['nova-2-conversationalai', 'nova-2'],
  'pt-PT': ['base', 'base'],
  ru: ['base', 'base'],
  es: ['nova-2-conversationalai', 'nova-2'],
  'es-419': ['nova-2-conversationalai', 'nova-2'],
  'es-LATAM': ['enhanced', 'enhanced'],
  sv: ['enhanced', 'enhanced'],
  ta: ['enhanced', 'enhanced'],
  taq: ['enhanced', 'enhanced'],
  tr: ['base', 'base'],
  uk: ['base', 'base']
 };
 const selectDefaultDeepgramModel = (task, language) => {
  if (language in optimalDeepramModels) {
    const [gather, transcribe] = optimalDeepramModels[language];
    return task.name === TaskName.Gather ? gather : transcribe;
  }
 };
 const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
  if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
  let totalConfidence = 0;
@@ -424,7 +468,7 @@ module.exports = (logger) => {
    }
  };
-  const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
+  const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => {
    let opts = {};
    const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
    const vad = {enable, voiceMs, mode};
@@ -568,6 +612,9 @@ module.exports = (logger) => {
    }
    else if ('deepgram' === vendor) {
      const {deepgramOptions = {}} = rOpts;
      if (!deepgramOptions.model) {
        deepgramOptions.model = selectDefaultDeepgramModel(task, language);
      }
      opts = {
        ...opts,
        ...(sttCredentials.api_key) &&