send end of utterance events if using deepgram, interim events are enabled, and utterance_end_ms option is set (#772) (#782)

This commit is contained in:
Dave Horton
2024-06-13 13:18:32 -04:00
committed by GitHub
parent cafe149bdf
commit 76a3aa7f42

View File

@@ -215,7 +215,7 @@ class TaskTranscribe extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep, channel));
/* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */
if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
//if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
break;
case 'soniox':
@@ -339,6 +339,12 @@ class TaskTranscribe extends SttTask {
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */
/* DH: send a speech event when we get UtteranceEnd if they want interim events */
if (this.interim) {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, sending speech event');
this._resolve(channel, evt);
}
if (bufferedTranscripts.length === 0) {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts');
}
@@ -448,26 +454,31 @@ class TaskTranscribe extends SttTask {
}
async _resolve(channel, evt) {
/* we've got a transcript, so end the otel child span for this channel */
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'transcript',
'stt.result': JSON.stringify(evt)
});
this.childSpan[channel - 1].span.end();
if (evt.is_final) {
/* we've got a final transcript, so end the otel child span for this channel */
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'transcript',
'stt.result': JSON.stringify(evt)
});
this.childSpan[channel - 1].span.end();
}
}
if (this.transcriptionHook) {
const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3};
const payload = {
...this.cs.callInfo,
...httpHeaders,
...(evt.alternatives && {speech: evt}),
...(evt.type && {speechEvent: evt})
};
try {
const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, {
...this.cs.callInfo,
...httpHeaders,
speech: evt
});
this.logger.info({json}, 'sent transcriptionHook');
this.logger.debug({payload}, 'sending transcriptionHook');
const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, payload);
this.logger.info({json}, 'completed transcriptionHook');
if (json && Array.isArray(json) && !this.parentTask) {
const makeTask = require('./make_task');
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
@@ -488,7 +499,7 @@ class TaskTranscribe extends SttTask {
this._clearTimer();
this.notifyTaskDone();
}
else {
else if (evt.is_final) {
/* start another child span for this channel */
const {span, ctx} = this.startChildSpan(`${STT_LISTEN_SPAN_NAME}:${channel}`);
this.childSpan[channel - 1] = {span, ctx};