mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
update with various deepgram fixes, including for #700
This commit is contained in:
@@ -303,12 +303,12 @@ class TaskTranscribe extends SttTask {
|
|||||||
async _onTranscription(cs, ep, channel, evt, fsEvent) {
|
async _onTranscription(cs, ep, channel, evt, fsEvent) {
|
||||||
// make sure this is not a transcript from answering machine detection
|
// make sure this is not a transcript from answering machine detection
|
||||||
const bugname = fsEvent.getHeader('media-bugname');
|
const bugname = fsEvent.getHeader('media-bugname');
|
||||||
|
const finished = fsEvent.getHeader('transcription-session-finished');
|
||||||
if (bugname && this.bugname !== bugname) return;
|
if (bugname && this.bugname !== bugname) return;
|
||||||
if (this.paused) {
|
if (this.paused) {
|
||||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript');
|
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
|
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
|
||||||
|
|
||||||
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
|
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
|
||||||
@@ -319,8 +319,9 @@ class TaskTranscribe extends SttTask {
|
|||||||
else {
|
else {
|
||||||
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
|
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
|
||||||
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
|
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
|
||||||
|
evt.is_final = true;
|
||||||
this._bufferedTranscripts = [];
|
this._bufferedTranscripts = [];
|
||||||
this._resolve('speech', evt);
|
this._resolve(channel, evt);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -334,31 +335,89 @@ class TaskTranscribe extends SttTask {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
|
let emptyTranscript = false;
|
||||||
if (['microsoft', 'deepgram'].includes(this.vendor)) {
|
if (evt.is_final) {
|
||||||
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
|
if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
|
||||||
|
emptyTranscript = true;
|
||||||
|
if (finished === 'true' &&
|
||||||
|
['microsoft', 'deepgram'].includes(this.vendor) &&
|
||||||
|
this._bufferedTranscripts.length === 0) {
|
||||||
|
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (this.vendor !== 'deepgram') {
|
||||||
|
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (this.isContinuousAsr) {
|
||||||
|
this.logger.info({evt},
|
||||||
|
'TaskGather:_onTranscription - got empty deepgram transcript during continous asr, continue listening');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if (this.vendor === 'deepgram' && this._bufferedTranscripts.length > 0) {
|
||||||
|
this.logger.info({evt},
|
||||||
|
'TaskGather:_onTranscription - got empty transcript from deepgram, return the buffered transcripts');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (this.isContinuousAsr) {
|
||||||
|
/* append the transcript and start listening again for asrTimeout */
|
||||||
|
const t = evt.alternatives[0].transcript;
|
||||||
|
if (t) {
|
||||||
|
/* remove trailing punctuation */
|
||||||
|
if (/[,;:\.!\?]$/.test(t)) {
|
||||||
|
this.logger.debug('TaskGather:_onTranscription - removing trailing punctuation');
|
||||||
|
evt.alternatives[0].transcript = t.slice(0, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
|
||||||
|
this._bufferedTranscripts.push(evt);
|
||||||
|
this._startAsrTimer(channel);
|
||||||
|
|
||||||
|
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||||
|
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
|
||||||
|
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, listen again');
|
if (this.vendor === 'soniox') {
|
||||||
this._transcribe(ep);
|
/* compile transcripts into one */
|
||||||
}
|
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
||||||
return;
|
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
|
||||||
}
|
this._sonioxTranscripts = [];
|
||||||
|
}
|
||||||
|
else if (this.vendor === 'deepgram') {
|
||||||
|
/* compile transcripts into one */
|
||||||
|
if (!emptyTranscript) this._bufferedTranscripts.push(evt);
|
||||||
|
|
||||||
if (this.vendor === 'soniox') {
|
/* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
|
||||||
/* compile transcripts into one */
|
if (this._bufferedTranscripts.length === 0) return;
|
||||||
this._sonioxTranscripts.push(evt.vendor.finalWords);
|
evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language);
|
||||||
if (evt.is_final) {
|
this._bufferedTranscripts = [];
|
||||||
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
|
}
|
||||||
this._sonioxTranscripts = [];
|
|
||||||
|
/* here is where we return a final transcript */
|
||||||
|
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending final transcript');
|
||||||
|
this._resolve(channel, evt);
|
||||||
|
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||||
|
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
|
||||||
|
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
/* interim transcript */
|
||||||
|
|
||||||
if (this.isContinuousAsr && evt.is_final) {
|
/* deepgram can send a non-final transcript but with words that are final, so we need to buffer */
|
||||||
this._bufferedTranscripts.push(evt);
|
if (this.vendor === 'deepgram') {
|
||||||
this._startAsrTimer(channel);
|
const originalEvent = evt.vendor.evt;
|
||||||
} else {
|
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
|
||||||
await this._resolve(channel, evt);
|
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
|
||||||
|
this._bufferedTranscripts.push(evt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.interim) {
|
||||||
|
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending interim transcript');
|
||||||
|
this._resolve(channel, evt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user