mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
Speech vendor/cobalt (#463)
* initial changes for cobalt speech * wip * wip * update to drachtio-fsmrf that supports cobalt * update to verb-specifications with cobalt speech support * more wip * lint * use node 18 with gh actions * support for compiling cobalt hints * fix bug in uuid_cobalt_compile_context * update verb-specifications * remove repeated code * cobalt support for transcribe * update to verb specs
This commit is contained in:
@@ -7,6 +7,7 @@ const {
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents,
|
||||
CobaltTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('./constants');
|
||||
|
||||
@@ -92,6 +93,11 @@ const stickyVars = {
|
||||
nvidia: [
|
||||
'NVIDIA_HINTS'
|
||||
],
|
||||
cobalt: [
|
||||
'COBALT_SPEECH_HINTS',
|
||||
'COBALT_COMPILED_CONTEXT_DATA',
|
||||
'COBALT_METADATA'
|
||||
],
|
||||
soniox: [
|
||||
'SONIOX_PROFANITY_FILTER',
|
||||
'SONIOX_MODEL'
|
||||
@@ -225,6 +231,26 @@ const normalizeGoogle = (evt, channel, language) => {
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeCobalt = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const alternatives = (evt.alternatives || [])
|
||||
.map((alt) => ({
|
||||
confidence: alt.confidence,
|
||||
transcript: alt.transcript_formatted,
|
||||
}));
|
||||
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.is_final,
|
||||
alternatives,
|
||||
vendor: {
|
||||
name: 'cobalt',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeCustom = (evt, channel, language, vendor) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
@@ -317,6 +343,8 @@ module.exports = (logger) => {
|
||||
return normalizeNvidia(evt, channel, language);
|
||||
case 'soniox':
|
||||
return normalizeSoniox(evt, channel, language);
|
||||
case 'cobalt':
|
||||
return normalizeCobalt(evt, channel, language);
|
||||
default:
|
||||
if (vendor.startsWith('custom:')) {
|
||||
return normalizeCustom(evt, channel, language, vendor);
|
||||
@@ -584,6 +612,25 @@ module.exports = (logger) => {
|
||||
{NVIDIA_CUSTOM_CONFIGURATION: JSON.stringify(nvidiaOptions.customConfiguration)}),
|
||||
};
|
||||
}
|
||||
else if ('cobalt' === vendor) {
|
||||
const {cobaltOptions = {}} = rOpts;
|
||||
const cobaltUri = cobaltOptions.serverUri || sttCredentials.cobalt_server_uri;
|
||||
opts = {
|
||||
...opts,
|
||||
...(rOpts.words && {COBALT_WORD_TIME_OFFSETS: 1}),
|
||||
...(!rOpts.words && {COBALT_WORD_TIME_OFFSETS: 0}),
|
||||
...(rOpts.model && {COBALT_MODEL: rOpts.model}),
|
||||
...(cobaltUri && {COBALT_SERVER_URI: cobaltUri}),
|
||||
...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
{COBALT_SPEECH_HINTS: rOpts.hints.join(',')}),
|
||||
...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' &&
|
||||
{COBALT_SPEECH_HINTS: JSON.stringify(rOpts.hints)}),
|
||||
...(rOpts.hints?.length > 0 && {COBALT_CONTEXT_TOKEN: cobaltOptions.contextToken || 'unk:default'}),
|
||||
...(cobaltOptions.metadata && {COBALT_METADATA: cobaltOptions.metadata}),
|
||||
...(cobaltOptions.enableConfusionNetwork && {COBALT_ENABLE_CONFUSION_NETWORK: 1}),
|
||||
...(cobaltOptions.compiledContextData && {COBALT_COMPILED_CONTEXT_DATA: cobaltOptions.compiledContextData}),
|
||||
};
|
||||
}
|
||||
else if (vendor.startsWith('custom:')) {
|
||||
let {options = {}} = rOpts;
|
||||
const {auth_token, custom_stt_url} = sttCredentials;
|
||||
@@ -633,6 +680,9 @@ module.exports = (logger) => {
|
||||
|
||||
ep.removeCustomEventListener(SonioxTranscriptionEvents.Transcription);
|
||||
|
||||
ep.removeCustomEventListener(CobaltTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(CobaltTranscriptionEvents.CompileContext);
|
||||
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.TranscriptionComplete);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech);
|
||||
@@ -664,6 +714,10 @@ module.exports = (logger) => {
|
||||
const {apiKey} = recognizer.sonioxOptions || {};
|
||||
if (apiKey) return {api_key: apiKey};
|
||||
}
|
||||
else if (recognizer.vendor === 'cobalt') {
|
||||
const {serverUri} = recognizer.cobaltOptions || {};
|
||||
if (serverUri) return {cobalt_server_uri: serverUri};
|
||||
}
|
||||
else if (recognizer.vendor === 'ibm') {
|
||||
const {ttsApiKey, ttsRegion, sttApiKey, sttRegion, instanceId} = recognizer.ibmOptions || {};
|
||||
if (ttsApiKey || sttApiKey) return {
|
||||
|
||||
Reference in New Issue
Block a user