transcribe must buffer transcripts for channel 1 and 2 separately

This commit is contained in:
Dave Horton
2024-04-03 07:45:48 -04:00
parent e1497f90a8
commit ccc93b7a78

View File

@@ -44,7 +44,7 @@ class TaskTranscribe extends SttTask {
this.isContinuousAsr = true; this.isContinuousAsr = true;
} }
/* buffer speech for continuous asr */ /* buffer speech for continuous asr */
this._bufferedTranscripts = []; this._bufferedTranscripts = [ [], [] ]; // for channel 1 and 2
this.bugname_prefix = 'transcribe_'; this.bugname_prefix = 'transcribe_';
this.paused = false; this.paused = false;
} }
@@ -304,6 +304,7 @@ class TaskTranscribe extends SttTask {
// make sure this is not a transcript from answering machine detection // make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname'); const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished'); const finished = fsEvent.getHeader('transcription-session-finished');
let bufferedTranscripts = this._bufferedTranscripts[channel - 1];
if (bugname && this.bugname !== bugname) return; if (bugname && this.bugname !== bugname) return;
if (this.paused) { if (this.paused) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript'); this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript');
@@ -313,14 +314,14 @@ class TaskTranscribe extends SttTask {
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') { if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */ /* we will only get this when we have set utterance_end_ms */
if (this._bufferedTranscripts.length === 0) { if (bufferedTranscripts.length === 0) {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts'); this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts');
} }
else { else {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript'); this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor); evt = this.consolidateTranscripts(bufferedTranscripts, channel, this.language, this.vendor);
evt.is_final = true; evt.is_final = true;
this._bufferedTranscripts = []; bufferedTranscripts = [];
this._resolve(channel, evt); this._resolve(channel, evt);
} }
return; return;
@@ -341,7 +342,7 @@ class TaskTranscribe extends SttTask {
emptyTranscript = true; emptyTranscript = true;
if (finished === 'true' && if (finished === 'true' &&
['microsoft', 'deepgram'].includes(this.vendor) && ['microsoft', 'deepgram'].includes(this.vendor) &&
this._bufferedTranscripts.length === 0) { bufferedTranscripts.length === 0) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding'); this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
return; return;
} }
@@ -354,7 +355,7 @@ class TaskTranscribe extends SttTask {
'TaskGather:_onTranscription - got empty deepgram transcript during continous asr, continue listening'); 'TaskGather:_onTranscription - got empty deepgram transcript during continous asr, continue listening');
return; return;
} }
else if (this.vendor === 'deepgram' && this._bufferedTranscripts.length > 0) { else if (this.vendor === 'deepgram' && bufferedTranscripts.length > 0) {
this.logger.info({evt}, this.logger.info({evt},
'TaskGather:_onTranscription - got empty transcript from deepgram, return the buffered transcripts'); 'TaskGather:_onTranscription - got empty transcript from deepgram, return the buffered transcripts');
} }
@@ -370,7 +371,7 @@ class TaskTranscribe extends SttTask {
} }
} }
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr'); this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
this._bufferedTranscripts.push(evt); bufferedTranscripts.push(evt);
this._startAsrTimer(channel); this._startAsrTimer(channel);
/* some STT engines will keep listening after a final response, so no need to restart */ /* some STT engines will keep listening after a final response, so no need to restart */
@@ -386,12 +387,12 @@ class TaskTranscribe extends SttTask {
} }
else if (this.vendor === 'deepgram') { else if (this.vendor === 'deepgram') {
/* compile transcripts into one */ /* compile transcripts into one */
if (!emptyTranscript) this._bufferedTranscripts.push(evt); if (!emptyTranscript) bufferedTranscripts.push(evt);
/* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */ /* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
if (this._bufferedTranscripts.length === 0) return; if (bufferedTranscripts.length === 0) return;
evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language); evt = this.consolidateTranscripts(bufferedTranscripts, channel, this.language);
this._bufferedTranscripts = []; bufferedTranscripts = [];
} }
/* here is where we return a final transcript */ /* here is where we return a final transcript */
@@ -410,7 +411,7 @@ class TaskTranscribe extends SttTask {
const originalEvent = evt.vendor.evt; const originalEvent = evt.vendor.evt;
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') { if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript'); this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
this._bufferedTranscripts.push(evt); bufferedTranscripts.push(evt);
} }
} }
@@ -570,8 +571,9 @@ class TaskTranscribe extends SttTask {
this._clearAsrTimer(channel); this._clearAsrTimer(channel);
this._asrTimer = setTimeout(() => { this._asrTimer = setTimeout(() => {
this.logger.debug(`TaskTranscribe:_startAsrTimer - asr timer went off for channel: ${channel}`); this.logger.debug(`TaskTranscribe:_startAsrTimer - asr timer went off for channel: ${channel}`);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language, this.vendor); const evt = this.consolidateTranscripts(
this._bufferedTranscripts = []; this._bufferedTranscripts[channel - 1], channel, this.language, this.vendor);
this._bufferedTranscripts[channel - 1] = [];
this._resolve(channel, evt); this._resolve(channel, evt);
}, this.asrTimeout); }, this.asrTimeout);
this.logger.debug(`TaskTranscribe:_startAsrTimer: set for ${this.asrTimeout}ms for channel ${channel}`); this.logger.debug(`TaskTranscribe:_startAsrTimer: set for ${this.asrTimeout}ms for channel ${channel}`);