mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
feat: update speech-ultil version 1.0.1 (#275)
* feat: update speech-ultil version 1.0.1 * feat: update speech-ultil version 1.0.1 * more fixes for custom stt * more fixes * fixes * update drachtio-fsmrf * pass url to mod_jambonz_transcribe * transcription utils: handle custom results * handle custom speech vendor errors * add support for hints to custom speech * change to custom speech options * send hints as an array for custom speech * update latest speech-utils * transcribe: changes to support soniox * bugfix: soniox transcribe --------- Co-authored-by: Quan HL <quanluuhoang8@gmail.com> Co-authored-by: Dave Horton <daveh@beachdognet.com>
This commit is contained in:
@@ -6,7 +6,8 @@ const {
|
||||
NuanceTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents
|
||||
} = require('./constants');
|
||||
|
||||
const stickyVars = {
|
||||
@@ -223,6 +224,15 @@ const normalizeGoogle = (evt, channel, language) => {
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeCustom = (evt, channel, language) => {
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.is_final,
|
||||
alternatives: [evt.alternatives[0]]
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeNuance = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
@@ -302,6 +312,9 @@ module.exports = (logger) => {
|
||||
case 'soniox':
|
||||
return normalizeSoniox(evt, channel, language);
|
||||
default:
|
||||
if (vendor.startsWith('custom:')) {
|
||||
return normalizeCustom(evt, channel, language);
|
||||
}
|
||||
logger.error(`Unknown vendor ${vendor}`);
|
||||
return evt;
|
||||
}
|
||||
@@ -311,6 +324,7 @@ module.exports = (logger) => {
|
||||
let opts = {};
|
||||
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
|
||||
const vad = {enable, voiceMs, mode};
|
||||
const vendor = rOpts.vendor;
|
||||
|
||||
/* voice activity detection works across vendors */
|
||||
opts = {
|
||||
@@ -320,7 +334,7 @@ module.exports = (logger) => {
|
||||
...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
|
||||
};
|
||||
|
||||
if ('google' === rOpts.vendor) {
|
||||
if ('google' === vendor) {
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials &&
|
||||
@@ -372,7 +386,7 @@ module.exports = (logger) => {
|
||||
{GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
|
||||
};
|
||||
}
|
||||
else if (['aws', 'polly'].includes(rOpts.vendor)) {
|
||||
else if (['aws', 'polly'].includes(vendor)) {
|
||||
opts = {
|
||||
...opts,
|
||||
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
|
||||
@@ -385,7 +399,7 @@ module.exports = (logger) => {
|
||||
}),
|
||||
};
|
||||
}
|
||||
else if ('microsoft' === rOpts.vendor) {
|
||||
else if ('microsoft' === vendor) {
|
||||
opts = {
|
||||
...opts,
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
@@ -410,7 +424,7 @@ module.exports = (logger) => {
|
||||
{AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint})
|
||||
};
|
||||
}
|
||||
else if ('nuance' === rOpts.vendor) {
|
||||
else if ('nuance' === vendor) {
|
||||
/**
|
||||
* Note: all nuance options are in recognizer.nuanceOptions, should migrate
|
||||
* other vendor settings to similar nested structure
|
||||
@@ -461,7 +475,7 @@ module.exports = (logger) => {
|
||||
{NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)},
|
||||
};
|
||||
}
|
||||
else if ('deepgram' === rOpts.vendor) {
|
||||
else if ('deepgram' === vendor) {
|
||||
const {deepgramOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
@@ -505,7 +519,7 @@ module.exports = (logger) => {
|
||||
{DEEPGRAM_SPEECH_TAG: deepgramOptions.tag}
|
||||
};
|
||||
}
|
||||
else if ('soniox' === rOpts.vendor) {
|
||||
else if ('soniox' === vendor) {
|
||||
const {sonioxOptions = {}} = rOpts;
|
||||
const {storage = {}} = sonioxOptions;
|
||||
opts = {
|
||||
@@ -528,7 +542,7 @@ module.exports = (logger) => {
|
||||
...(storage?.id && storage?.disableSearch && {SONIOX_STORAGE_DISABLE_SEARCH: 1})
|
||||
};
|
||||
}
|
||||
else if ('ibm' === rOpts.vendor) {
|
||||
else if ('ibm' === vendor) {
|
||||
const {ibmOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
@@ -552,7 +566,7 @@ module.exports = (logger) => {
|
||||
{IBM_SPEECH_WATSON_LEARNING_OPT_OUT: ibmOptions.watsonLearningOptOut}
|
||||
};
|
||||
}
|
||||
else if ('nvidia' === rOpts.vendor) {
|
||||
else if ('nvidia' === vendor) {
|
||||
const {nvidiaOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
@@ -581,11 +595,29 @@ module.exports = (logger) => {
|
||||
{NVIDIA_CUSTOM_CONFIGURATION: JSON.stringify(nvidiaOptions.customConfiguration)}),
|
||||
};
|
||||
}
|
||||
else if (vendor.startsWith('custom:')) {
|
||||
let {options = {}} = rOpts;
|
||||
const {auth_token, custom_stt_url} = sttCredentials;
|
||||
options = {
|
||||
...options,
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
|
||||
{hints: rOpts.hints}),
|
||||
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
|
||||
{hints: JSON.stringify(rOpts.hints)}),
|
||||
...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost})
|
||||
};
|
||||
|
||||
stickyVars[rOpts.vendor].forEach((key) => {
|
||||
opts = {
|
||||
...opts,
|
||||
JAMBONZ_STT_API_KEY: auth_token,
|
||||
JAMBONZ_STT_URL: custom_stt_url,
|
||||
...(Object.keys(options).length > 0 && {JAMBONZ_STT_OPTIONS: JSON.stringify(options)}),
|
||||
};
|
||||
}
|
||||
|
||||
(stickyVars[vendor] || []).forEach((key) => {
|
||||
if (!opts[key]) opts[key] = '';
|
||||
});
|
||||
//logger.debug({opts}, 'recognizer channel vars');
|
||||
return opts;
|
||||
};
|
||||
|
||||
@@ -604,7 +636,6 @@ module.exports = (logger) => {
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.TranscriptionComplete);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.StartOfSpeech);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.Error);
|
||||
ep.removeCustomEventListener(NuanceTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.Transcription);
|
||||
@@ -612,13 +643,17 @@ module.exports = (logger) => {
|
||||
ep.removeCustomEventListener(DeepgramTranscriptionEvents.ConnectFailure);
|
||||
|
||||
ep.removeCustomEventListener(SonioxTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(SonioxTranscriptionEvents.Error);
|
||||
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.TranscriptionComplete);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.StartOfSpeech);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.Error);
|
||||
ep.removeCustomEventListener(NvidiaTranscriptionEvents.VadDetected);
|
||||
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.Connect);
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.ConnectFailure);
|
||||
|
||||
ep.removeCustomEventListener(JambonzTranscriptionEvents.Error);
|
||||
};
|
||||
|
||||
const setSpeechCredentialsAtRuntime = (recognizer) => {
|
||||
|
||||
Reference in New Issue
Block a user