Fix/speechmatics (#1042)

* add speechmatics options

* wip

* speechmatics does not do endpointing for us so we need to flip on continuousAsr

* speechmatics: continousAsr should be at least equal to max_delay, if set
This commit is contained in:
Dave Horton
2025-01-15 19:12:15 -05:00
committed by GitHub
parent edb7e21ff9
commit f71f0ac69a
4 changed files with 51 additions and 11 deletions

View File

@@ -24,6 +24,7 @@ const makeTask = require('./make_task');
const assert = require('assert'); const assert = require('assert');
const SttTask = require('./stt-task'); const SttTask = require('./stt-task');
const { SpeechCredentialError } = require('../utils/error'); const { SpeechCredentialError } = require('../utils/error');
const SPEECHMATICS_DEFAULT_ASR_TIMEOUT = 1200;
class TaskGather extends SttTask { class TaskGather extends SttTask {
constructor(logger, opts, parentTask) { constructor(logger, opts, parentTask) {
@@ -162,6 +163,16 @@ class TaskGather extends SttTask {
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost}, this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
'Gather:exec - applying global sttHints'); 'Gather:exec - applying global sttHints');
} }
// specials case for speechmatics: they dont do endpointing so we need to enable continuous ASR
if (this.vendor === 'speechmatics' && !this.isContinuousAsr) {
const maxDelay = this.recognizer?.speechmaticsOptions?.transcription_config?.max_delay;
if (maxDelay) this.asrTimeout = Math.min(SPEECHMATICS_DEFAULT_ASR_TIMEOUT, maxDelay * 1000);
else this.asrTimeout = SPEECHMATICS_DEFAULT_ASR_TIMEOUT;
this.isContinuousAsr = true;
this.logger.debug(`Gather:exec - auto-enabling continuous ASR for speechmatics w/ timeout ${this.asrTimeout}`);
}
if (!this.isContinuousAsr && cs.isContinuousAsr) { if (!this.isContinuousAsr && cs.isContinuousAsr) {
this.isContinuousAsr = true; this.isContinuousAsr = true;
this.asrTimeout = cs.asrTimeout * 1000; this.asrTimeout = cs.asrTimeout * 1000;
@@ -832,7 +843,7 @@ class TaskGather extends SttTask {
const t = evt.alternatives[0].transcript; const t = evt.alternatives[0].transcript;
if (t) { if (t) {
/* remove trailing punctuation */ /* remove trailing punctuation */
if (/[,;:\.!\?]$/.test(t)) { if (this.vendor !== 'speechmatics' && /[,;:\.!\?]$/.test(t)) {
this.logger.debug('TaskGather:_onTranscription - removing trailing punctuation'); this.logger.debug('TaskGather:_onTranscription - removing trailing punctuation');
evt.alternatives[0].transcript = t.slice(0, -1); evt.alternatives[0].transcript = t.slice(0, -1);
} }

View File

@@ -209,7 +209,7 @@ const consolidateTranscripts = (bufferedTranscripts, channel, language, vendor)
const lastChar = acc.alternatives[0].transcript.slice(-1); const lastChar = acc.alternatives[0].transcript.slice(-1);
const firstChar = newTranscript.charAt(0); const firstChar = newTranscript.charAt(0);
if (lastChar.match(/\d/) && firstChar.match(/\d/)) { if (vendor === 'speechmatics' || (lastChar.match(/\d/) && firstChar.match(/\d/))) {
acc.alternatives[0].transcript += newTranscript; acc.alternatives[0].transcript += newTranscript;
} else { } else {
acc.alternatives[0].transcript += ` ${newTranscript}`; acc.alternatives[0].transcript += ` ${newTranscript}`;
@@ -957,6 +957,36 @@ module.exports = (logger) => {
SPEECHMATICS_TRANSLATION_PARTIALS: speechmaticsOptions.translation_config.enable_partials ? 1 : 0 SPEECHMATICS_TRANSLATION_PARTIALS: speechmaticsOptions.translation_config.enable_partials ? 1 : 0
} }
), ),
...(speechmaticsOptions.transcription_config?.domain &&
{SPEECHMATICS_DOMAIN: speechmaticsOptions.transcription_config.domain}),
...{SPEECHMATICS_MAX_DELAY: speechmaticsOptions.transcription_config?.max_delay || 0.7},
...{SPEECHMATICS_MAX_DELAY_MODE: speechmaticsOptions.transcription_config?.max_delay_mode || 'flexible'},
...(speechmaticsOptions.transcription_config?.diarization &&
{SPEECHMATICS_DIARIZATION: speechmaticsOptions.transcription_config.diarization}),
...(speechmaticsOptions.transcription_config?.speaker_diarization_config?.speaker_sensitivity &&
{SPEECHMATICS_DIARIZATION_SPEAKER_SENSITIVITY:
speechmaticsOptions.transcription_config.speaker_diarization_config.speaker_sensitivity}),
...(speechmaticsOptions.transcription_config?.speaker_diarization_config?.max_speakers &&
{SPEECHMATICS_DIARIZATION_MAX_SPEAKERS:
speechmaticsOptions.transcription_config.speaker_diarization_config.max_speakers}),
...(speechmaticsOptions.transcription_config?.output_locale &&
{SPEECHMATICS_OUTPUT_LOCALE: speechmaticsOptions.transcription_config.output_locale}),
...(speechmaticsOptions.transcription_config?.punctuation_overrides?.permitted_marks &&
{SPEECHMATICS_PUNCTUATION_ALLOWED:
speechmaticsOptions.transcription_config.punctuation_overrides.permitted_marks.join(',')}),
...(speechmaticsOptions.transcription_config?.punctuation_overrides?.sensitivity &&
{SPEECHMATICS_PUNCTUATION_SENSITIVITY:
speechmaticsOptions.transcription_config?.punctuation_overrides?.sensitivity}),
...(speechmaticsOptions.transcription_config?.operating_point &&
{SPEECHMATICS_OPERATING_POINT: speechmaticsOptions.transcription_config.operating_point}),
...(speechmaticsOptions.transcription_config?.enable_entities &&
{SPEECHMATICS_ENABLE_ENTTIES: speechmaticsOptions.transcription_config.enable_entities}),
...(speechmaticsOptions.transcription_config?.audio_filtering_config?.volume_threshold &&
{SPEECHMATICS_VOLUME_THRESHOLD:
speechmaticsOptions.transcription_config.audio_filtering_config.volume_threshold}),
...(speechmaticsOptions.transcription_config?.transcript_filtering_config?.remove_disfluencies &&
{SPEECHMATICS_REMOVE_DISFLUENCIES:
speechmaticsOptions.transcription_config.transcript_filtering_config.remove_disfluencies})
}; };
} }
else if (vendor.startsWith('custom:')) { else if (vendor.startsWith('custom:')) {

15
package-lock.json generated
View File

@@ -18,7 +18,7 @@
"@jambonz/speech-utils": "^0.2.1", "@jambonz/speech-utils": "^0.2.1",
"@jambonz/stats-collector": "^0.1.10", "@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.13", "@jambonz/time-series": "^0.2.13",
"@jambonz/verb-specifications": "^0.0.92", "@jambonz/verb-specifications": "^0.0.93",
"@opentelemetry/api": "^1.8.0", "@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0", "@opentelemetry/exporter-jaeger": "^1.23.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0", "@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
@@ -1671,10 +1671,9 @@
} }
}, },
"node_modules/@jambonz/verb-specifications": { "node_modules/@jambonz/verb-specifications": {
"version": "0.0.92", "version": "0.0.93",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.92.tgz", "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.93.tgz",
"integrity": "sha512-zb1y5Hq+FqGYleYYKZafEIHyhhlH3VHapTJh3N0s+2xdy8I2Gf17zJpUc45mhV/4ficlT3SSwETsBbMt20Hwog==", "integrity": "sha512-Ml1+fT+cNSm4sEkd6zQpG3g1WmBeoXRtgcXVDRn980gCjBKuegcaXHaaCod6ddyWOpj3YiA51PnVEpfpaXU76A==",
"license": "MIT",
"dependencies": { "dependencies": {
"debug": "^4.3.4", "debug": "^4.3.4",
"pino": "^8.8.0" "pino": "^8.8.0"
@@ -10939,9 +10938,9 @@
} }
}, },
"@jambonz/verb-specifications": { "@jambonz/verb-specifications": {
"version": "0.0.92", "version": "0.0.93",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.92.tgz", "resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.93.tgz",
"integrity": "sha512-zb1y5Hq+FqGYleYYKZafEIHyhhlH3VHapTJh3N0s+2xdy8I2Gf17zJpUc45mhV/4ficlT3SSwETsBbMt20Hwog==", "integrity": "sha512-Ml1+fT+cNSm4sEkd6zQpG3g1WmBeoXRtgcXVDRn980gCjBKuegcaXHaaCod6ddyWOpj3YiA51PnVEpfpaXU76A==",
"requires": { "requires": {
"debug": "^4.3.4", "debug": "^4.3.4",
"pino": "^8.8.0" "pino": "^8.8.0"

View File

@@ -33,7 +33,7 @@
"@jambonz/realtimedb-helpers": "^0.8.8", "@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.2.1", "@jambonz/speech-utils": "^0.2.1",
"@jambonz/stats-collector": "^0.1.10", "@jambonz/stats-collector": "^0.1.10",
"@jambonz/verb-specifications": "^0.0.92", "@jambonz/verb-specifications": "^0.0.93",
"@jambonz/time-series": "^0.2.13", "@jambonz/time-series": "^0.2.13",
"@opentelemetry/api": "^1.8.0", "@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0", "@opentelemetry/exporter-jaeger": "^1.23.0",