support stt latency metrics (#1252)

* support stt latency metrics * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * wip * enable stt latency calculator by config verb * wip * wip * wip * fix jslint * fixed gather timeout does not have latency calculation * upadte verb specification to use notifySttLatency * move stt latency metric from call session to stt-latency calculator * wip
2026-02-15 10:49:07 +00:00 · 2025-07-29 20:56:37 +07:00
parent 5886d1d945
commit 158d9d7d25
8 changed files with 356 additions and 37 deletions
--- a/lib/tasks/transcribe.js
+++ b/lib/tasks/transcribe.js
@@ -152,12 +152,15 @@ class TaskTranscribe extends SttTask {
        .catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
    }

+    this.cs.emit('transcribe-stop');
+
    return stopTranscription;
  }

  async kill(cs) {
    super.kill(cs);
    const stopTranscription = this._stopTranscription();
+    cs.stopSttLatencyVad();
    // hangup after 1 sec if we don't get a final transcription
    if (stopTranscription) this._timer = setTimeout(() => this.notifyTaskDone(), 1500);
    else this.notifyTaskDone();
@@ -423,6 +426,9 @@ class TaskTranscribe extends SttTask {
      bugname: this.bugname,
      hostport: this.hostport
    });
+
+    // Some vendor use single connection, that we cannot use onConnect event to track transcription start
+    this.cs.emit('transcribe-start');
  }

  async _onTranscription(cs, ep, channel, evt, fsEvent) {
@@ -441,6 +447,9 @@ class TaskTranscribe extends SttTask {

    if (this.vendor === 'ibm' && evt?.state === 'listening') return;

+    // emit an event to the call session to track the time transcription is received
+    cs.emit('on-transcription');
+
    if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
      /* we will only get this when we have set utterance_end_ms */

@@ -602,14 +611,28 @@ class TaskTranscribe extends SttTask {
  }

  async _resolve(channel, evt) {
+    let sttLatencyMetrics = {};
    if (evt.is_final) {
+      const sttLatency = this.cs.calculateSttLatency();
+      if (sttLatency) {
+        sttLatencyMetrics = {
+          'stt.latency_ms': `${sttLatency.stt_latency_ms}`,
+          'stt.talkspurts': JSON.stringify(sttLatency.talkspurts),
+          'stt.start_time': sttLatency.stt_start_time,
+          'stt.stop_time': sttLatency.stt_stop_time,
+          'stt.usage': sttLatency.stt_usage,
+        };
+      }
+      // time to reset the stt latency
+      this.cs.emit('transcribe-start');
      /* we've got a final transcript, so end the otel child span for this channel */
      if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
        this.childSpan[channel - 1].span.setAttributes({
          channel,
          'stt.label': this.label || 'None',
          'stt.resolve': 'transcript',
-          'stt.result': JSON.stringify(evt)
+          'stt.result': JSON.stringify(evt),
+          ...sttLatencyMetrics
        });
        this.childSpan[channel - 1].span.end();
      }
@@ -618,9 +641,13 @@ class TaskTranscribe extends SttTask {
    if (this.transcriptionHook) {
      const b3 = this.getTracingPropagation();
      const httpHeaders = b3 && {b3};
+      const latencies = Object.fromEntries(
+        Object.entries(sttLatencyMetrics).map(([key, value]) => [key.replace('stt.', 'stt_'), value])
+      );
      const payload = {
        ...this.cs.callInfo,
        ...httpHeaders,
+        ...latencies,
        ...(evt.alternatives && {speech: evt}),
        ...(evt.type && {speechEvent: evt})
      };