support stt latency metrics (#1252)

* support stt latency metrics

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* enable stt latency calculator by config verb

* wip

* wip

* wip

* fix jslint

* fixed gather timeout does not have latency calculation

* upadte verb specification to use notifySttLatency

* move stt latency metric from call session to stt-latency calculator

* wip
This commit is contained in:
Hoan Luu Huu
2025-07-29 20:56:37 +07:00
committed by GitHub
parent 5886d1d945
commit 158d9d7d25
8 changed files with 356 additions and 37 deletions

View File

@@ -351,6 +351,7 @@ class TaskGather extends SttTask {
this.sayTask?.span.end();
this._stopVad();
this._resolve('killed');
cs.stopSttLatencyVad();
}
updateTaskInProgress(opts) {
@@ -390,16 +391,7 @@ class TaskGather extends SttTask {
if (this.digitBuffer.length === 0 && this.needsStt) {
// DTMF is higher priority than STT.
this.removeCustomEventListeners();
// Fix for https://github.com/jambonz/jambonz-feature-server/issues/1281
// We should immediately call stop transcription from gather
// so that next gather can start transcription immediately
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => this.logger.error({err},
` Received DTMF, Error stopping transcription for vendor ${this.vendor}`));
this._stopTranscribing(ep);
}
this.digitBuffer += evt.dtmf;
const len = this.digitBuffer.length;
@@ -686,6 +678,9 @@ class TaskGather extends SttTask {
target_sid: this.cs.callSid
});
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
// Some vendor use single connection, that we cannot use onConnect event to track transcription start
this.cs.emit('transcribe-start');
}
_startTimer() {
@@ -869,6 +864,10 @@ class TaskGather extends SttTask {
if (finished === 'true') return;
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
// emit an event to the call session to track the time transcription is received
cs.emit('on-transcription');
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */
if (this._bufferedTranscripts.length === 0) {
@@ -952,6 +951,12 @@ class TaskGather extends SttTask {
}
}
// receive a final transcript, calculate the stt latency for this transcript
const sttLatency = this.cs.calculateSttLatency();
if (!emptyTranscript && sttLatency) {
this.stt_latency_ms += `${sttLatency.stt_latency_ms},`;
}
if (this.isContinuousAsr) {
/* append the transcript and start listening again for asrTimeout */
const t = evt.alternatives[0].transcript;
@@ -1103,12 +1108,7 @@ class TaskGather extends SttTask {
async _startFallback(cs, ep, evt) {
if (this.canFallback) {
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
this._stopTranscribing(ep);
try {
this.logger.debug('gather:_startFallback');
this.notifyError({ msg: 'ASR error',
@@ -1237,21 +1237,26 @@ class TaskGather extends SttTask {
}
}
async _stopTranscribing(ep) {
// Fix for https://github.com/jambonz/jambonz-feature-server/issues/1281
// We should immediately call stop transcription from gather
// so that next gather can start transcription immediately
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => {
if (this.resolved) return;
this.logger.error({err}, 'Error stopping transcription');
});
this.cs.emit('transcribe-stop');
}
async _resolve(reason, evt) {
this.logger.info({evt}, `TaskGather:resolve with reason ${reason}`);
if (this.needsStt && this.ep && this.ep.connected) {
// Fix for https://github.com/jambonz/jambonz-feature-server/issues/1281
// We should immediately call stop transcription from gather
// so that next gather can start transcription immediately
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => {
if (this.resolved) return;
this.logger.error({err}, 'Error stopping transcription');
});
this._stopTranscribing(this.ep);
}
if (this.resolved) {
this.logger.debug('TaskGather:_resolve - already resolved');
@@ -1270,11 +1275,28 @@ class TaskGather extends SttTask {
this._clearAsrTimer();
this._clearFinalAsrTimer();
let sttLatencyMetrics = {};
if (this.needsStt) {
const sttLatency = this.cs.calculateSttLatency();
if (sttLatency) {
this.stt_latency_ms = this.stt_latency_ms.endsWith(',') ?
this.stt_latency_ms.slice(0, -1) : this.stt_latency_ms;
sttLatencyMetrics = {
'stt.latency_ms': this.stt_latency_ms,
'stt.talkspurts': JSON.stringify(sttLatency.talkspurts),
'stt.start_time': sttLatency.stt_start_time,
'stt.stop_time': sttLatency.stt_stop_time,
'stt.usage': sttLatency.stt_usage,
};
}
}
this.span.setAttributes({
channel: 1,
'stt.label': this.label || 'None',
'stt.resolve': reason,
'stt.result': JSON.stringify(evt)
'stt.result': JSON.stringify(evt),
...sttLatencyMetrics
});
if (this.callSession && this.callSession.callGone) {
@@ -1302,6 +1324,9 @@ class TaskGather extends SttTask {
let returnedVerbs = false;
try {
const latencies = Object.fromEntries(
Object.entries(sttLatencyMetrics).map(([key, value]) => [key.replace('stt.', 'stt_'), value])
);
if (reason.startsWith('dtmf')) {
if (this.parentTask) this.parentTask.emit('dtmf', evt);
else {
@@ -1315,7 +1340,7 @@ class TaskGather extends SttTask {
else {
this.emit('transcription', evt);
this.logger.debug('TaskGather:_resolve - invoking performAction');
returnedVerbs = await this.performAction({speech: evt, reason: 'speechDetected'});
returnedVerbs = await this.performAction({speech: evt, reason: 'speechDetected', ...latencies});
this.logger.debug({returnedVerbs}, 'TaskGather:_resolve - back from performAction');
}
}
@@ -1323,20 +1348,20 @@ class TaskGather extends SttTask {
if (this.parentTask) this.parentTask.emit('timeout', evt);
else {
this.emit('timeout', evt);
returnedVerbs = await this.performAction({reason: 'timeout'});
returnedVerbs = await this.performAction({reason: 'timeout', ...latencies});
}
}
else if (reason.startsWith('stt-error')) {
if (this.parentTask) this.parentTask.emit('stt-error', evt);
else {
this.emit('stt-error', evt);
returnedVerbs = await this.performAction({reason: 'error', details: evt.error});
returnedVerbs = await this.performAction({reason: 'error', details: evt.error, ...latencies});
}
} else if (reason.startsWith('stt-low-confidence')) {
if (this.parentTask) this.parentTask.emit('stt-low-confidence', evt);
else {
this.emit('stt-low-confidence', evt);
returnedVerbs = await this.performAction({speech:evt, reason: 'stt-low-confidence'});
returnedVerbs = await this.performAction({speech:evt, reason: 'stt-low-confidence', ...latencies});
}
}
} catch (err) { /*already logged error*/ }