support verbio speech (#757)

* support verbio speech

* wip

* wip

* wip

* update speech utils

* update verb specification
This commit is contained in:
Hoan Luu Huu
2024-05-29 18:55:46 +07:00
committed by GitHub
parent 498dd64025
commit c5e19bf775
10 changed files with 67 additions and 15 deletions

View File

@@ -918,6 +918,12 @@ class CallSession extends Emitter {
api_key: credential.api_key,
model_id: credential.model_id
};
} else if ('verbio' === vendor) {
return {
client_id: credential.client_id,
client_secret: credential.client_secret,
engine_version: credential.engine_version
};
} else if (vendor.startsWith('custom:')) {
return {
speech_credential_sid: credential.speech_credential_sid,

View File

@@ -11,7 +11,8 @@ const {
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents,
AssemblyAiTranscriptionEvents,
VadDetection
VadDetection,
VerbioTranscriptionEvents
} = require('../utils/constants.json');
const {
JAMBONES_GATHER_EARLY_HINTS_MATCH,
@@ -424,6 +425,12 @@ class TaskGather extends SttTask {
ep, SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'verbio':
this.bugname = `${this.bugname_prefix}verbio_transcribe`;
this.addCustomEventListener(
ep, VerbioTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'cobalt':
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
this.addCustomEventListener(

View File

@@ -380,6 +380,7 @@ class TaskSay extends TtsTask {
.replace('deepgram_', 'deepgram.')
.replace('playht_', 'playht.')
.replace('rimelabs_', 'rimelabs.')
.replace('verbio_', 'verbio.')
.replace('elevenlabs_', 'elevenlabs.');
if (spanMapping[newKey]) newKey = spanMapping[newKey];
attrs[newKey] = value;
@@ -428,6 +429,10 @@ const spanMapping = {
'rimelabs.name_lookup_time_ms': 'name_lookup_ms',
'rimelabs.connect_time_ms': 'connect_ms',
'rimelabs.final_response_time_ms': 'final_response_ms',
// verbio
'verbio.name_lookup_time_ms': 'name_lookup_ms',
'verbio.connect_time_ms': 'connect_ms',
'verbio.final_response_time_ms': 'final_response_ms',
};
module.exports = TaskSay;

View File

@@ -166,7 +166,7 @@ class SttTask extends Task {
}
async _initSpeechCredentials(cs, vendor, label) {
const {getNuanceAccessToken, getIbmAccessToken, getAwsAuthToken} = cs.srf.locals.dbHelpers;
const {getNuanceAccessToken, getIbmAccessToken, getAwsAuthToken, getVerbioAccessToken} = cs.srf.locals.dbHelpers;
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
if (!credentials) {
@@ -200,6 +200,11 @@ class SttTask extends Task {
const {accessKeyId, secretAccessKey, sessionToken, servedFromCache} = await getAwsAuthToken(roleArn);
this.logger.debug({roleArn}, `got aws access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, accessKeyId, secretAccessKey, sessionToken};
} else if (vendor === 'verbio' && credentials.client_id && credentials.client_secret) {
const {access_token, servedFromCache} = await getVerbioAccessToken(credentials);
this.logger.debug({client_id: credentials.client_id},
`got verbio access token ${servedFromCache ? 'from cache' : ''}`);
credentials.access_token = access_token;
}
return credentials;
}

View File

@@ -97,6 +97,10 @@
"Transcription": "soniox_transcribe::transcription",
"Error": "soniox_transcribe::error"
},
"VerbioTranscriptionEvents": {
"Transcription": "verbio_transcribe::transcription",
"Error": "verbio_transcribe::error"
},
"CobaltTranscriptionEvents": {
"Transcription": "cobalt_speech::transcription",
"CompileContext": "cobalt_speech::compile_context_response",

View File

@@ -113,6 +113,11 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
} else if ('verbio' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.client_secret = o.client_secret;
obj.engine_version = o.engine_version;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = o.auth_token;

View File

@@ -171,7 +171,7 @@ function installSrfLocals(srf, logger) {
retrieveFromSortedSet,
retrieveByPatternSortedSet,
sortedSetLength,
sortedSetPositionByPattern
sortedSetPositionByPattern,
} = require('@jambonz/realtimedb-helpers')({}, logger, tracer);
const registrar = new Registrar(logger, client);
const {
@@ -179,7 +179,8 @@ function installSrfLocals(srf, logger) {
addFileToCache,
getNuanceAccessToken,
getIbmAccessToken,
getAwsAuthToken
getAwsAuthToken,
getVerbioAccessToken
} = require('@jambonz/speech-utils')({}, logger);
const {
writeAlerts,
@@ -239,7 +240,8 @@ function installSrfLocals(srf, logger) {
retrieveFromSortedSet,
retrieveByPatternSortedSet,
sortedSetLength,
sortedSetPositionByPattern
sortedSetPositionByPattern,
getVerbioAccessToken
},
parentLogger: logger,
getSBC,

View File

@@ -796,8 +796,26 @@ module.exports = (logger) => {
...(rOpts.hints?.length > 0 &&
{ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)})
};
}
else if (vendor.startsWith('custom:')) {
} else if ('verbio' === vendor) {
const {verbioOptions = {}} = rOpts;
opts = {
...opts,
...(sttCredentials.access_token && { VERBIO_ACCESS_TOKEN: sttCredentials.access_token}),
...(sttCredentials.engine_version && {VERBIO_ENGINE_VERSION: sttCredentials.engine_version}),
...(language && {VERBIO_LANGUAGE: language}),
...(verbioOptions.enable_formatting && {VERBIO_ENABLE_FORMATTING: verbioOptions.enable_formatting}),
...(verbioOptions.enable_diarization && {VERBIO_ENABLE_DIARIZATION: verbioOptions.enable_diarization}),
...(verbioOptions.topic && {VERBIO_TOPIC: verbioOptions.topic}),
...(verbioOptions.inline_grammar && {VERBIO_INLINE_GRAMMAR: verbioOptions.inline_grammar}),
...(verbioOptions.grammar_uri && {VERBIO_GRAMMAR_URI: verbioOptions.grammar_uri}),
...(verbioOptions.label && {VERBIO_LABEL: verbioOptions.label}),
...(verbioOptions.recognition_timeout && {VERBIO_RECOGNITION_TIMEOUT: verbioOptions.recognition_timeout}),
...(verbioOptions.speech_complete_timeout &&
{VERBIO_SPEECH_COMPLETE_TIMEOUT: verbioOptions.speech_complete_timeout}),
...(verbioOptions.speech_incomplete_timeout &&
{VERBIO_SPEECH_INCOMPLETE_TIMEOUT: verbioOptions.speech_incomplete_timeout}),
};
} else if (vendor.startsWith('custom:')) {
let {options = {}} = rOpts;
const {auth_token, custom_stt_url} = sttCredentials;
options = {

14
package-lock.json generated
View File

@@ -18,7 +18,7 @@
"@jambonz/speech-utils": "^0.1.3",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.71",
"@jambonz/verb-specifications": "^0.0.72",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
@@ -2360,9 +2360,9 @@
}
},
"node_modules/@jambonz/verb-specifications": {
"version": "0.0.71",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.71.tgz",
"integrity": "sha512-e4f7zbSncuh4cVtEg0DlGBp60B6d9SMxa0sI+bgIWLq9oRfvziL2Afb0od/a8AiPgDmIxBp6a3IoXcOy9gNCxw==",
"version": "0.0.72",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.72.tgz",
"integrity": "sha512-sjA+/LQP2p1zE02UByy9OaAaSxbfQNxQ6D0pwYoMG42U8n+8Det+GFM/9+oFVnbNjUH9bvgT8vrR57U0lU4Cpw==",
"dependencies": {
"debug": "^4.3.4",
"pino": "^8.8.0"
@@ -11992,9 +11992,9 @@
}
},
"@jambonz/verb-specifications": {
"version": "0.0.71",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.71.tgz",
"integrity": "sha512-e4f7zbSncuh4cVtEg0DlGBp60B6d9SMxa0sI+bgIWLq9oRfvziL2Afb0od/a8AiPgDmIxBp6a3IoXcOy9gNCxw==",
"version": "0.0.72",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.72.tgz",
"integrity": "sha512-sjA+/LQP2p1zE02UByy9OaAaSxbfQNxQ6D0pwYoMG42U8n+8Det+GFM/9+oFVnbNjUH9bvgT8vrR57U0lU4Cpw==",
"requires": {
"debug": "^4.3.4",
"pino": "^8.8.0"

View File

@@ -34,7 +34,7 @@
"@jambonz/speech-utils": "^0.1.3",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.71",
"@jambonz/verb-specifications": "^0.0.72",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",