mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-19 04:17:44 +00:00
fix transcribe fixes for speechmatics (#978)
* fix transcribe fixes for speechmatics * update to verb-specs with fixes for speechmatics * add support for speechmatics translation * add handlers for receiving translations * call translation hookd * gather: no need to restart speechmatics after a final transcript during continuous asr * graceful shutdown * wip * wip * wip * wip * wip
This commit is contained in:
17
app.js
17
app.js
@@ -100,12 +100,19 @@ createHttpListener(logger, srf)
|
||||
});
|
||||
|
||||
|
||||
setInterval(async() => {
|
||||
const monInterval = setInterval(async() => {
|
||||
srf.locals.stats.gauge('fs.sip.calls.count', sessionTracker.count);
|
||||
// Checking system log level
|
||||
const systemInformation = await srf.locals.dbHelpers.lookupSystemInformation();
|
||||
if (systemInformation && systemInformation.log_level) {
|
||||
logger.level = systemInformation.log_level;
|
||||
try {
|
||||
const systemInformation = await srf.locals.dbHelpers.lookupSystemInformation();
|
||||
if (systemInformation && systemInformation.log_level) {
|
||||
logger.level = systemInformation.log_level;
|
||||
}
|
||||
} catch (err) {
|
||||
if (process.env.NODE_ENV === 'test') {
|
||||
clearInterval(monInterval);
|
||||
logger.error('all tests complete');
|
||||
}
|
||||
else logger.error({err}, 'Error checking system log level in database');
|
||||
}
|
||||
}, 20000);
|
||||
|
||||
|
||||
@@ -846,7 +846,8 @@ class TaskGather extends SttTask {
|
||||
this._startAsrTimer();
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'speechmatics']
|
||||
.includes(this.vendor)) this._startTranscribing(ep);
|
||||
}
|
||||
else {
|
||||
/* this was removed to fix https://github.com/jambonz/jambonz-feature-server/issues/783 */
|
||||
|
||||
@@ -27,6 +27,7 @@ class TaskTranscribe extends SttTask {
|
||||
super(logger, opts, parentTask);
|
||||
|
||||
this.transcriptionHook = this.data.transcriptionHook;
|
||||
this.translationHook = this.data.translationHook;
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
|
||||
if (this.data.recognizer) {
|
||||
@@ -302,7 +303,9 @@ class TaskTranscribe extends SttTask {
|
||||
case 'speechmatics':
|
||||
this.bugname = `${this.bugname_prefix}speechmatics_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
ep, SpeechmaticsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep, SpeechmaticsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(
|
||||
ep, SpeechmaticsTranscriptionEvents.Translation, this._onTranslation.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Info,
|
||||
this._onSpeechmaticsInfo.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.RecognitionStarted,
|
||||
@@ -441,7 +444,7 @@ class TaskTranscribe extends SttTask {
|
||||
this._startAsrTimer(channel);
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics']
|
||||
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
|
||||
}
|
||||
else {
|
||||
@@ -466,7 +469,7 @@ class TaskTranscribe extends SttTask {
|
||||
this._resolve(channel, evt);
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google'].includes(this.vendor) &&
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics'].includes(this.vendor) &&
|
||||
!this.vendor.startsWith('custom:')) {
|
||||
this.logger.debug('TaskTranscribe:_onTranscription - restarting transcribe');
|
||||
this._startTranscribing(cs, ep, channel);
|
||||
@@ -492,6 +495,47 @@ class TaskTranscribe extends SttTask {
|
||||
}
|
||||
}
|
||||
|
||||
async _onTranslation(_cs, _ep, channel, evt, _fsEvent) {
|
||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranslation');
|
||||
if (this.translationHook && evt.results?.length > 0) {
|
||||
try {
|
||||
const b3 = this.getTracingPropagation();
|
||||
const httpHeaders = b3 && {b3};
|
||||
const payload = {
|
||||
...this.cs.callInfo,
|
||||
...httpHeaders,
|
||||
translation: {
|
||||
channel,
|
||||
language: evt.language,
|
||||
translation: evt.results[0].content
|
||||
}
|
||||
};
|
||||
|
||||
this.logger.debug({payload}, 'sending translationHook');
|
||||
const json = await this.cs.requestor.request('verb:hook', this.translationHook, payload);
|
||||
this.logger.info({json}, 'completed translationHook');
|
||||
if (json && Array.isArray(json) && !this.parentTask) {
|
||||
const makeTask = require('./make_task');
|
||||
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
|
||||
if (tasks && tasks.length > 0) {
|
||||
this.logger.info({tasks: tasks}, `${this.name} replacing application with ${tasks.length} tasks`);
|
||||
this.cs.replaceApplication(tasks);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info(err, 'TranscribeTask:_onTranslation error');
|
||||
}
|
||||
if (this.parentTask) {
|
||||
this.parentTask.emit('translation', evt);
|
||||
}
|
||||
}
|
||||
if (this.killed) {
|
||||
this.logger.debug('TaskTranscribe:_onTranslation exiting after receiving final transcription');
|
||||
this._clearTimer();
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
}
|
||||
|
||||
async _resolve(channel, evt) {
|
||||
if (evt.is_final) {
|
||||
/* we've got a final transcript, so end the otel child span for this channel */
|
||||
@@ -671,7 +715,7 @@ class TaskTranscribe extends SttTask {
|
||||
this.logger.debug({evt}, 'TaskGather:_onSpeechmaticsInfo');
|
||||
}
|
||||
|
||||
async _onSpeechmaticsErrror(cs, _ep, evt) {
|
||||
async _onSpeechmaticsError(cs, _ep, evt) {
|
||||
// eslint-disable-next-line no-unused-vars
|
||||
const {message, ...e} = evt;
|
||||
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
|
||||
|
||||
@@ -129,6 +129,7 @@
|
||||
},
|
||||
"SpeechmaticsTranscriptionEvents": {
|
||||
"Transcription": "speechmatics_transcribe::transcription",
|
||||
"Translation": "speechmatics_transcribe::translation",
|
||||
"Info": "speechmatics_transcribe::info",
|
||||
"RecognitionStarted": "speechmatics_transcribe::recognition_started",
|
||||
"ConnectFailure": "speechmatics_transcribe::connect_failed",
|
||||
|
||||
@@ -109,6 +109,8 @@ const stickyVars = {
|
||||
'SPEECHMATICS_HOST',
|
||||
'SPEECHMATICS_PATH',
|
||||
'SPEECHMATICS_SPEECH_HINTS',
|
||||
'SPEECHMATICS_TRANSLATION_LANGUAGES',
|
||||
'SPEECHMATICS_TRANSLATION_PARTIALS'
|
||||
]
|
||||
};
|
||||
|
||||
@@ -937,11 +939,18 @@ module.exports = (logger) => {
|
||||
};
|
||||
}
|
||||
else if ('speechmatics' === vendor) {
|
||||
const {speechmaticsOptions = {}} = rOpts;
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials.api_key) && {SPEECHMATICS_API_KEY: sttCredentials.api_key},
|
||||
...(sttCredentials.speechmatics_stt_uri) && {SPEECHMATICS_HOST: sttCredentials.speechmatics_stt_uri},
|
||||
...(rOpts.hints?.length > 0 && {SPEECHMATICS_SPEECH_HINTS: rOpts.hints.join(',')}),
|
||||
...(speechmaticsOptions.translation_config &&
|
||||
{
|
||||
SPEECHMATICS_TRANSLATION_LANGUAGES: speechmaticsOptions.translation_config.target_languages.join(','),
|
||||
SPEECHMATICS_TRANSLATION_PARTIALS: speechmaticsOptions.translation_config.enable_partials ? 1 : 0
|
||||
}
|
||||
),
|
||||
};
|
||||
}
|
||||
else if (vendor.startsWith('custom:')) {
|
||||
|
||||
14
package-lock.json
generated
14
package-lock.json
generated
@@ -18,7 +18,7 @@
|
||||
"@jambonz/speech-utils": "^0.1.22",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/time-series": "^0.2.9",
|
||||
"@jambonz/verb-specifications": "^0.0.83",
|
||||
"@jambonz/verb-specifications": "^0.0.85",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
|
||||
@@ -1581,9 +1581,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/verb-specifications": {
|
||||
"version": "0.0.83",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.83.tgz",
|
||||
"integrity": "sha512-3m1o3gnWw1yEwNfkZ6IIyUhdUqsQ3aWBNoWJHswe4udvVbEIxPHi+8jKGTJVF2vxddzwfOWp7RmMCV9RmKWzkw==",
|
||||
"version": "0.0.85",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.85.tgz",
|
||||
"integrity": "sha512-NQRXcfamrs3lRGy/1pAaAIuZcZPj4zjwUTYf7Sv+It0fuzB4KWbiqL9yeSq61qtCX2AutwRsV4WA8OZQYfLdgw==",
|
||||
"dependencies": {
|
||||
"debug": "^4.3.4",
|
||||
"pino": "^8.8.0"
|
||||
@@ -10751,9 +10751,9 @@
|
||||
}
|
||||
},
|
||||
"@jambonz/verb-specifications": {
|
||||
"version": "0.0.83",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.83.tgz",
|
||||
"integrity": "sha512-3m1o3gnWw1yEwNfkZ6IIyUhdUqsQ3aWBNoWJHswe4udvVbEIxPHi+8jKGTJVF2vxddzwfOWp7RmMCV9RmKWzkw==",
|
||||
"version": "0.0.85",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.85.tgz",
|
||||
"integrity": "sha512-NQRXcfamrs3lRGy/1pAaAIuZcZPj4zjwUTYf7Sv+It0fuzB4KWbiqL9yeSq61qtCX2AutwRsV4WA8OZQYfLdgw==",
|
||||
"requires": {
|
||||
"debug": "^4.3.4",
|
||||
"pino": "^8.8.0"
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
"@jambonz/speech-utils": "^0.1.22",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/time-series": "^0.2.9",
|
||||
"@jambonz/verb-specifications": "^0.0.83",
|
||||
"@jambonz/verb-specifications": "^0.0.85",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
|
||||
|
||||
@@ -351,6 +351,8 @@ speech_credential_sid CHAR(36) NOT NULL,
|
||||
model VARCHAR(512) NOT NULL,
|
||||
reported_usage ENUM('REPORTED_USAGE_UNSPECIFIED','REALTIME','OFFLINE') DEFAULT 'REALTIME',
|
||||
name VARCHAR(64) NOT NULL,
|
||||
voice_cloning_key MEDIUMTEXT,
|
||||
use_voice_cloning_key BOOLEAN DEFAULT false,
|
||||
PRIMARY KEY (google_custom_voice_sid)
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user