set default deepgram model by language and task (gather vs transcribe) (#610)

* set default deepgram model by language and task (gather vs transcribe)

* wip
This commit is contained in:
Dave Horton
2024-01-14 10:38:14 -05:00
committed by GitHub
parent 09a83e3a31
commit f22d66dfd6
4 changed files with 51 additions and 4 deletions

View File

@@ -301,7 +301,7 @@ class TaskGather extends SttTask {
if (this.data.recognizer?.deepgramOptions?.shortUtterance) this.shortUtterance = true;
}
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
switch (this.vendor) {
case 'google':
this.bugname = `${this.bugname_prefix}google_transcribe`;

View File

@@ -138,7 +138,7 @@ class TaskTranscribe extends SttTask {
if (this.isContinuousAsr) this._doContinuousAsrWithDeepgram(this.asrTimeout);
}
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
switch (this.vendor) {
case 'google':
this.bugname = `${this.bugname_prefix}google_transcribe`;

View File

@@ -266,7 +266,7 @@ module.exports = (logger) => {
/* set stt options */
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, language, {
vendor,
hints,
enhancedModel: true,

View File

@@ -102,6 +102,50 @@ const stickyVars = {
]
};
const optimalDeepramModels = {
zh: ['base', 'base'],
'zh-CN':['base', 'base'],
'zh-TW': ['base', 'base'],
da: ['enhanced', 'enhanced'],
en: ['nova-2-conversationalai', 'nova-2'],
'en-US': ['nova-2-conversationalai', 'nova-2'],
'en-AU': ['nova-2-conversationalai', 'nova-2'],
'en-GB': ['nova-2-conversationalai', 'nova-2'],
'en-IN': ['nova-2-conversationalai', 'nova-2'],
'en-NZ': ['nova-2-conversationalai', 'nova-2'],
nl: ['nova-2-conversationalai', 'nova-2'],
fr: ['nova-2-conversationalai', 'nova-2'],
'fr-CA': ['nova-2-conversationalai', 'nova-2'],
de: ['nova-2-conversationalai', 'nova-2'],
hi: ['nova-2-conversationalai', 'nova-2'],
'hi-Latn': ['nova-2-conversationalai', 'nova-2'],
id: ['base', 'base'],
it: ['enhanced', 'enhanced'],
ja: ['enhanced', 'enhanced'],
ko: ['enhanced', 'enhanced'],
no: ['enhanced', 'enhanced'],
pl: ['enhanced', 'enhanced'],
pt: ['nova-2-conversationalai', 'nova-2'],
'pt-BR': ['nova-2-conversationalai', 'nova-2'],
'pt-PT': ['base', 'base'],
ru: ['base', 'base'],
es: ['nova-2-conversationalai', 'nova-2'],
'es-419': ['nova-2-conversationalai', 'nova-2'],
'es-LATAM': ['enhanced', 'enhanced'],
sv: ['enhanced', 'enhanced'],
ta: ['enhanced', 'enhanced'],
taq: ['enhanced', 'enhanced'],
tr: ['base', 'base'],
uk: ['base', 'base']
};
const selectDefaultDeepgramModel = (task, language) => {
if (language in optimalDeepramModels) {
const [gather, transcribe] = optimalDeepramModels[language];
return task.name === TaskName.Gather ? gather : transcribe;
}
};
const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
let totalConfidence = 0;
@@ -424,7 +468,7 @@ module.exports = (logger) => {
}
};
const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => {
let opts = {};
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
const vad = {enable, voiceMs, mode};
@@ -568,6 +612,9 @@ module.exports = (logger) => {
}
else if ('deepgram' === vendor) {
const {deepgramOptions = {}} = rOpts;
if (!deepgramOptions.model) {
deepgramOptions.model = selectDefaultDeepgramModel(task, language);
}
opts = {
...opts,
...(sttCredentials.api_key) &&