mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
set default deepgram model by language and task (gather vs transcribe) (#610)
* set default deepgram model by language and task (gather vs transcribe) * wip
This commit is contained in:
@@ -301,7 +301,7 @@ class TaskGather extends SttTask {
|
|||||||
if (this.data.recognizer?.deepgramOptions?.shortUtterance) this.shortUtterance = true;
|
if (this.data.recognizer?.deepgramOptions?.shortUtterance) this.shortUtterance = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
|
||||||
switch (this.vendor) {
|
switch (this.vendor) {
|
||||||
case 'google':
|
case 'google':
|
||||||
this.bugname = `${this.bugname_prefix}google_transcribe`;
|
this.bugname = `${this.bugname_prefix}google_transcribe`;
|
||||||
|
|||||||
@@ -138,7 +138,7 @@ class TaskTranscribe extends SttTask {
|
|||||||
if (this.isContinuousAsr) this._doContinuousAsrWithDeepgram(this.asrTimeout);
|
if (this.isContinuousAsr) this._doContinuousAsrWithDeepgram(this.asrTimeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
|
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
|
||||||
switch (this.vendor) {
|
switch (this.vendor) {
|
||||||
case 'google':
|
case 'google':
|
||||||
this.bugname = `${this.bugname_prefix}google_transcribe`;
|
this.bugname = `${this.bugname_prefix}google_transcribe`;
|
||||||
|
|||||||
@@ -266,7 +266,7 @@ module.exports = (logger) => {
|
|||||||
|
|
||||||
/* set stt options */
|
/* set stt options */
|
||||||
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
|
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
|
||||||
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
|
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, language, {
|
||||||
vendor,
|
vendor,
|
||||||
hints,
|
hints,
|
||||||
enhancedModel: true,
|
enhancedModel: true,
|
||||||
|
|||||||
@@ -102,6 +102,50 @@ const stickyVars = {
|
|||||||
]
|
]
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const optimalDeepramModels = {
|
||||||
|
zh: ['base', 'base'],
|
||||||
|
'zh-CN':['base', 'base'],
|
||||||
|
'zh-TW': ['base', 'base'],
|
||||||
|
da: ['enhanced', 'enhanced'],
|
||||||
|
en: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'en-US': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'en-AU': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'en-GB': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'en-IN': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'en-NZ': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
nl: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
fr: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'fr-CA': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
de: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
hi: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'hi-Latn': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
id: ['base', 'base'],
|
||||||
|
it: ['enhanced', 'enhanced'],
|
||||||
|
ja: ['enhanced', 'enhanced'],
|
||||||
|
ko: ['enhanced', 'enhanced'],
|
||||||
|
no: ['enhanced', 'enhanced'],
|
||||||
|
pl: ['enhanced', 'enhanced'],
|
||||||
|
pt: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'pt-BR': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'pt-PT': ['base', 'base'],
|
||||||
|
ru: ['base', 'base'],
|
||||||
|
es: ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'es-419': ['nova-2-conversationalai', 'nova-2'],
|
||||||
|
'es-LATAM': ['enhanced', 'enhanced'],
|
||||||
|
sv: ['enhanced', 'enhanced'],
|
||||||
|
ta: ['enhanced', 'enhanced'],
|
||||||
|
taq: ['enhanced', 'enhanced'],
|
||||||
|
tr: ['base', 'base'],
|
||||||
|
uk: ['base', 'base']
|
||||||
|
};
|
||||||
|
|
||||||
|
const selectDefaultDeepgramModel = (task, language) => {
|
||||||
|
if (language in optimalDeepramModels) {
|
||||||
|
const [gather, transcribe] = optimalDeepramModels[language];
|
||||||
|
return task.name === TaskName.Gather ? gather : transcribe;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
|
const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
|
||||||
if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
|
if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
|
||||||
let totalConfidence = 0;
|
let totalConfidence = 0;
|
||||||
@@ -424,7 +468,7 @@ module.exports = (logger) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
|
const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => {
|
||||||
let opts = {};
|
let opts = {};
|
||||||
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
|
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
|
||||||
const vad = {enable, voiceMs, mode};
|
const vad = {enable, voiceMs, mode};
|
||||||
@@ -568,6 +612,9 @@ module.exports = (logger) => {
|
|||||||
}
|
}
|
||||||
else if ('deepgram' === vendor) {
|
else if ('deepgram' === vendor) {
|
||||||
const {deepgramOptions = {}} = rOpts;
|
const {deepgramOptions = {}} = rOpts;
|
||||||
|
if (!deepgramOptions.model) {
|
||||||
|
deepgramOptions.model = selectDefaultDeepgramModel(task, language);
|
||||||
|
}
|
||||||
opts = {
|
opts = {
|
||||||
...opts,
|
...opts,
|
||||||
...(sttCredentials.api_key) &&
|
...(sttCredentials.api_key) &&
|
||||||
|
|||||||
Reference in New Issue
Block a user