diff --git a/lib/tasks/transcribe.js b/lib/tasks/transcribe.js index 85ed6733..8001c569 100644 --- a/lib/tasks/transcribe.js +++ b/lib/tasks/transcribe.js @@ -215,7 +215,7 @@ class TaskTranscribe extends SttTask { this._onVendorConnectFailure.bind(this, cs, ep, channel)); /* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */ - if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true; + //if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true; break; case 'soniox': @@ -339,6 +339,12 @@ class TaskTranscribe extends SttTask { if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') { /* we will only get this when we have set utterance_end_ms */ + + /* DH: send a speech event when we get UtteranceEnd if they want interim events */ + if (this.interim) { + this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, sending speech event'); + this._resolve(channel, evt); + } if (bufferedTranscripts.length === 0) { this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts'); } @@ -448,26 +454,31 @@ class TaskTranscribe extends SttTask { } async _resolve(channel, evt) { - /* we've got a transcript, so end the otel child span for this channel */ - if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) { - this.childSpan[channel - 1].span.setAttributes({ - channel, - 'stt.resolve': 'transcript', - 'stt.result': JSON.stringify(evt) - }); - this.childSpan[channel - 1].span.end(); + if (evt.is_final) { + /* we've got a final transcript, so end the otel child span for this channel */ + if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) { + this.childSpan[channel - 1].span.setAttributes({ + channel, + 'stt.resolve': 'transcript', + 'stt.result': JSON.stringify(evt) + }); + this.childSpan[channel - 1].span.end(); + } } if (this.transcriptionHook) { const b3 = this.getTracingPropagation(); const httpHeaders = b3 && {b3}; + const payload = { + ...this.cs.callInfo, + ...httpHeaders, + ...(evt.alternatives && {speech: evt}), + ...(evt.type && {speechEvent: evt}) + }; try { - const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, { - ...this.cs.callInfo, - ...httpHeaders, - speech: evt - }); - this.logger.info({json}, 'sent transcriptionHook'); + this.logger.debug({payload}, 'sending transcriptionHook'); + const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, payload); + this.logger.info({json}, 'completed transcriptionHook'); if (json && Array.isArray(json) && !this.parentTask) { const makeTask = require('./make_task'); const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata)); @@ -488,7 +499,7 @@ class TaskTranscribe extends SttTask { this._clearTimer(); this.notifyTaskDone(); } - else { + else if (evt.is_final) { /* start another child span for this channel */ const {span, ctx} = this.startChildSpan(`${STT_LISTEN_SPAN_NAME}:${channel}`); this.childSpan[channel - 1] = {span, ctx};