mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-18 20:07:44 +00:00
support gladia stt (#1397)
* support gladia stt * wip * wip * update verb specification
This commit is contained in:
@@ -1086,6 +1086,13 @@ class CallSession extends Emitter {
|
||||
deepgram_stt_use_tls: credential.deepgram_stt_use_tls
|
||||
};
|
||||
}
|
||||
else if ('gladia' === vendor) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
api_key: credential.api_key,
|
||||
region: credential.region,
|
||||
};
|
||||
}
|
||||
else if ('soniox' === vendor) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
|
||||
@@ -5,6 +5,7 @@ const {
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
GladiaTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
CobaltTranscriptionEvents,
|
||||
IbmTranscriptionEvents,
|
||||
@@ -487,6 +488,16 @@ class TaskGather extends SttTask {
|
||||
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'gladia':
|
||||
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
ep, GladiaTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'soniox':
|
||||
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
|
||||
@@ -203,6 +203,56 @@ class SttTask extends Task {
|
||||
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
|
||||
this.data.recognizer.punctuation = cs.globalSttPunctuation;
|
||||
}
|
||||
if (this.vendor === 'gladia') {
|
||||
const { api_key, region } = this.sttCredentials;
|
||||
const {url} = await this.createGladiaLiveSession({
|
||||
api_key, region,
|
||||
model: this.data.recognizer.model || 'solaria-1',
|
||||
options: this.data.recognizer.gladiaOptions || {}
|
||||
});
|
||||
const {host, pathname, search} = new URL(url);
|
||||
this.sttCredentials.host = host;
|
||||
this.sttCredentials.path = `${pathname}${search}`;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async createGladiaLiveSession({
|
||||
api_key,
|
||||
region = 'us-west',
|
||||
model = 'solaria-1',
|
||||
options = {},
|
||||
}) {
|
||||
const url = `https://api.gladia.io/v2/live?region=${region}`;
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-gladia-key': api_key,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
encoding: 'wav/pcm',
|
||||
bit_depth: 16,
|
||||
sample_rate: 8000,
|
||||
channels: 1,
|
||||
model,
|
||||
...options,
|
||||
messages_config: {
|
||||
receive_final_transcripts: true,
|
||||
receive_speech_events: true,
|
||||
receive_errors: true,
|
||||
}
|
||||
})
|
||||
});
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
this.logger.error({url, status: response.status, error}, 'Error creating Gladia live session');
|
||||
throw new Error(`Error creating Gladia live session: ${response.status} ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
this.logger.debug({url: data.url}, 'Gladia Call registered');
|
||||
return data;
|
||||
}
|
||||
|
||||
addCustomEventListener(ep, event, handler) {
|
||||
|
||||
@@ -6,6 +6,7 @@ const {
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
GladiaTranscriptionEvents,
|
||||
DeepgramfluxTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
CobaltTranscriptionEvents,
|
||||
@@ -255,6 +256,18 @@ class TaskTranscribe extends SttTask {
|
||||
this._onVendorConnectFailure.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
|
||||
break;
|
||||
|
||||
case 'gladia':
|
||||
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Connect,
|
||||
this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(ep, GladiaTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
|
||||
break;
|
||||
case 'soniox':
|
||||
this.bugname = `${this.bugname_prefix}soniox_transcribe`;
|
||||
|
||||
@@ -103,6 +103,12 @@
|
||||
"Connect": "deepgramflux_transcribe::connect",
|
||||
"Error": "deepgramflux_transcribe::error"
|
||||
},
|
||||
"GladiaTranscriptionEvents": {
|
||||
"Transcription": "gladia_transcribe::transcription",
|
||||
"ConnectFailure": "gladia_transcribe::connect_failed",
|
||||
"Connect": "gladia_transcribe::connect",
|
||||
"Error": "gladia_transcribe::error"
|
||||
},
|
||||
"SonioxTranscriptionEvents": {
|
||||
"Transcription": "soniox_transcribe::transcription",
|
||||
"Error": "soniox_transcribe::error"
|
||||
|
||||
@@ -81,6 +81,11 @@ const speechMapper = (cred) => {
|
||||
obj.deepgram_tts_uri = o.deepgram_tts_uri;
|
||||
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
||||
}
|
||||
else if ('gladia' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
obj.region = o.region;
|
||||
}
|
||||
else if ('deepgramflux' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
|
||||
@@ -376,6 +376,30 @@ const normalizeDeepgram = (evt, channel, language, shortUtterance) => {
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeGladia = (evt, channel, language, shortUtterance) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
|
||||
// Handle Gladia transcript format
|
||||
if (evt.type === 'transcript' && evt.data && evt.data.utterance) {
|
||||
const utterance = evt.data.utterance;
|
||||
const alternatives = [{
|
||||
confidence: utterance.confidence || 0,
|
||||
transcript: utterance.text || '',
|
||||
}];
|
||||
|
||||
return {
|
||||
language_code: utterance.language || language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.data.is_final || false,
|
||||
alternatives,
|
||||
vendor: {
|
||||
name: 'gladia',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const normalizeDeepgramFlux = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
|
||||
@@ -742,6 +766,8 @@ module.exports = (logger) => {
|
||||
switch (vendor) {
|
||||
case 'deepgram':
|
||||
return normalizeDeepgram(evt, channel, language, shortUtterance);
|
||||
case 'gladia':
|
||||
return normalizeGladia(evt, channel, language, shortUtterance);
|
||||
case 'deepgramflux':
|
||||
return normalizeDeepgramFlux(evt, channel, language, shortUtterance);
|
||||
case 'microsoft':
|
||||
@@ -1059,6 +1085,13 @@ module.exports = (logger) => {
|
||||
...(keyterms && keyterms.length > 0 && {DEEPGRAMFLUX_SPEECH_KEYTERMS: keyterms.join(',')}),
|
||||
};
|
||||
}
|
||||
else if ('gladia' === vendor) {
|
||||
const {host, path} = sttCredentials;
|
||||
opts = {
|
||||
GLADIA_SPEECH_HOST: host,
|
||||
GLADIA_SPEECH_PATH: path,
|
||||
};
|
||||
}
|
||||
else if ('soniox' === vendor) {
|
||||
const {sonioxOptions = {}} = rOpts;
|
||||
const {storage = {}} = sonioxOptions;
|
||||
|
||||
8
package-lock.json
generated
8
package-lock.json
generated
@@ -18,7 +18,7 @@
|
||||
"@jambonz/speech-utils": "^0.2.25",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/time-series": "^0.2.14",
|
||||
"@jambonz/verb-specifications": "^0.0.117",
|
||||
"@jambonz/verb-specifications": "^0.0.118",
|
||||
"@modelcontextprotocol/sdk": "^1.9.0",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
@@ -1517,9 +1517,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/verb-specifications": {
|
||||
"version": "0.0.117",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.117.tgz",
|
||||
"integrity": "sha512-yfnHWfqVRyE9ICBdVQV8CFwj0jDInpgMYdv9lI5c4iOmnYrJU3e/iFdrwM+abhFyZGL1Can9onynPW6RZFXXsw==",
|
||||
"version": "0.0.118",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.118.tgz",
|
||||
"integrity": "sha512-1dGnc6TUCehjt1yGNuqh1uzk1xw9HhUm39aVUosQMHlnT0fK0ItikeJ0uttTjFastHNmPPxqJwb20wOvVGTCFg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"debug": "^4.3.4",
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
"@jambonz/speech-utils": "^0.2.25",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/time-series": "^0.2.14",
|
||||
"@jambonz/verb-specifications": "^0.0.117",
|
||||
"@jambonz/verb-specifications": "^0.0.118",
|
||||
"@modelcontextprotocol/sdk": "^1.9.0",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
|
||||
Reference in New Issue
Block a user