support stt latency metrics (#1252)

* support stt latency metrics

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* enable stt latency calculator by config verb

* wip

* wip

* wip

* fix jslint

* fixed gather timeout does not have latency calculation

* upadte verb specification to use notifySttLatency

* move stt latency metric from call session to stt-latency calculator

* wip
This commit is contained in:
Hoan Luu Huu
2025-07-29 20:56:37 +07:00
committed by GitHub
parent 5886d1d945
commit 158d9d7d25
8 changed files with 356 additions and 37 deletions

View File

@@ -231,5 +231,5 @@ module.exports = {
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER, JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
JAMBONES_MEDIA_TIMEOUT_MS, JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS, JAMBONES_MEDIA_HOLD_TIMEOUT_MS,
JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS,
}; };

View File

@@ -38,6 +38,7 @@ const BADPRECONDITIONS = 'preconditions not met';
const CALLER_CANCELLED_ERR_MSG = 'Response not sent due to unknown transaction'; const CALLER_CANCELLED_ERR_MSG = 'Response not sent due to unknown transaction';
const { NonFatalTaskError} = require('../utils/error'); const { NonFatalTaskError} = require('../utils/error');
const { createMediaEndpoint } = require('../utils/media-endpoint'); const { createMediaEndpoint } = require('../utils/media-endpoint');
const SttLatencyCalculator = require('../utils/stt-latency-calculator');
const sqlRetrieveQueueEventHook = `SELECT * FROM webhooks const sqlRetrieveQueueEventHook = `SELECT * FROM webhooks
WHERE webhook_sid = WHERE webhook_sid =
( (
@@ -147,6 +148,30 @@ class CallSession extends Emitter {
this.conversationTurns = []; this.conversationTurns = [];
this.on('userSaid', this._onUserSaid.bind(this)); this.on('userSaid', this._onUserSaid.bind(this));
this.on('botSaid', this._onBotSaid.bind(this)); this.on('botSaid', this._onBotSaid.bind(this));
/**
* Support STT latency
*/
this.sttLatencyCalculator = new SttLatencyCalculator({
logger,
cs: this
});
this.on('transcribe-start', () => {
this.sttLatencyCalculator.resetTime();
});
this.on('on-transcription', () => {
this.sttLatencyCalculator.onTranscriptionReceived();
});
this.on('transcribe-stop', () => {
this.sttLatencyCalculator.onTranscribeStop();
});
}
get notifySttLatencyEnabled() {
return this._notifySttLatencyEnabled || false;
}
set notifySttLatencyEnabled(enabled) {
this._notifySttLatencyEnabled = enabled;
} }
/** /**
@@ -2480,6 +2505,8 @@ Duration=${duration} `
this.clearOrRestoreActionHookDelayProcessor().catch((err) => {}); this.clearOrRestoreActionHookDelayProcessor().catch((err) => {});
this.ttsStreamingBuffer?.stop(); this.ttsStreamingBuffer?.stop();
this.sttLatencyCalculator?.stop();
} }
/** /**
@@ -3127,6 +3154,20 @@ Duration=${duration} `
return `assistant: ${t.text}`; return `assistant: ${t.text}`;
}).join('\n'); }).join('\n');
} }
startSttLatencyVad() {
if (this.notifySttLatencyEnabled) {
this.sttLatencyCalculator.start();
}
}
stopSttLatencyVad() {
this.sttLatencyCalculator.stop();
}
calculateSttLatency() {
return this.sttLatencyCalculator.calculateLatency();
}
} }
module.exports = CallSession; module.exports = CallSession;

View File

@@ -24,6 +24,9 @@ class TaskConfig extends Task {
if ('notifyEvents' in this.data) { if ('notifyEvents' in this.data) {
this.notifyEvents = !!this.data.notifyEvents; this.notifyEvents = !!this.data.notifyEvents;
} }
if (this.hasNotifySttLatency) {
this.notifySttLatency = !!this.data.notifySttLatency;
}
if (this.bargeIn.enable) { if (this.bargeIn.enable) {
this.gatherOpts = { this.gatherOpts = {
@@ -83,6 +86,7 @@ class TaskConfig extends Task {
get hasVad() { return Object.keys(this.vad).length; } get hasVad() { return Object.keys(this.vad).length; }
get hasFillerNoise() { return Object.keys(this.fillerNoise).length; } get hasFillerNoise() { return Object.keys(this.fillerNoise).length; }
get hasReferHook() { return Object.keys(this.data).includes('referHook'); } get hasReferHook() { return Object.keys(this.data).includes('referHook'); }
get hasNotifySttLatency() { return Object.keys(this.data).includes('notifySttLatency'); }
get hasTtsStream() { return Object.keys(this.ttsStream).length; } get hasTtsStream() { return Object.keys(this.ttsStream).length; }
get summary() { get summary() {
@@ -112,6 +116,8 @@ class TaskConfig extends Task {
if (this.hasFillerNoise) phrase.push(`fillerNoise ${this.fillerNoise.enable ? 'on' : 'off'}`); if (this.hasFillerNoise) phrase.push(`fillerNoise ${this.fillerNoise.enable ? 'on' : 'off'}`);
if (this.data.amd) phrase.push('enable amd'); if (this.data.amd) phrase.push('enable amd');
if (this.notifyEvents) phrase.push(`event notification ${this.notifyEvents ? 'on' : 'off'}`); if (this.notifyEvents) phrase.push(`event notification ${this.notifyEvents ? 'on' : 'off'}`);
if (this.hasNotifySttLatency) phrase.push(
`notifySttLatency ${this.notifySttLatency ? 'on' : 'off'}`);
if (this.onHoldMusic) phrase.push(`onHoldMusic: ${this.onHoldMusic}`); if (this.onHoldMusic) phrase.push(`onHoldMusic: ${this.onHoldMusic}`);
if ('boostAudioSignal' in this.data) phrase.push(`setGain ${this.data.boostAudioSignal}`); if ('boostAudioSignal' in this.data) phrase.push(`setGain ${this.data.boostAudioSignal}`);
if (this.hasReferHook) phrase.push('set referHook'); if (this.hasReferHook) phrase.push('set referHook');
@@ -130,6 +136,11 @@ class TaskConfig extends Task {
cs.notifyEvents = !!this.data.notifyEvents; cs.notifyEvents = !!this.data.notifyEvents;
} }
if (this.hasNotifySttLatency) {
this.logger.debug(`turning notifySttLatency ${this.notifySttLatency ? 'on' : 'off'}`);
cs.notifySttLatencyEnabled = this.notifySttLatency;
}
if (this.onHoldMusic) { if (this.onHoldMusic) {
cs.onHoldMusic = this.onHoldMusic; cs.onHoldMusic = this.onHoldMusic;
} }
@@ -318,7 +329,10 @@ class TaskConfig extends Task {
voiceMs: this.vad.voiceMs || 250, voiceMs: this.vad.voiceMs || 250,
silenceMs: this.vad.silenceMs || 150, silenceMs: this.vad.silenceMs || 150,
strategy: this.vad.strategy || 'one-shot', strategy: this.vad.strategy || 'one-shot',
mode: (this.vad.mode !== undefined && this.vad.mode !== null) ? this.vad.mode : 2 mode: (this.vad.mode !== undefined && this.vad.mode !== null) ? this.vad.mode : 2,
vendor: this.vad.vendor || 'silero',
threshold: this.vad.threshold || 0.5,
speechPadMs: this.vad.speechPadMs || 30,
}; };
} }

View File

@@ -351,6 +351,7 @@ class TaskGather extends SttTask {
this.sayTask?.span.end(); this.sayTask?.span.end();
this._stopVad(); this._stopVad();
this._resolve('killed'); this._resolve('killed');
cs.stopSttLatencyVad();
} }
updateTaskInProgress(opts) { updateTaskInProgress(opts) {
@@ -390,16 +391,7 @@ class TaskGather extends SttTask {
if (this.digitBuffer.length === 0 && this.needsStt) { if (this.digitBuffer.length === 0 && this.needsStt) {
// DTMF is higher priority than STT. // DTMF is higher priority than STT.
this.removeCustomEventListeners(); this.removeCustomEventListeners();
// Fix for https://github.com/jambonz/jambonz-feature-server/issues/1281 this._stopTranscribing(ep);
// We should immediately call stop transcription from gather
// so that next gather can start transcription immediately
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => this.logger.error({err},
` Received DTMF, Error stopping transcription for vendor ${this.vendor}`));
} }
this.digitBuffer += evt.dtmf; this.digitBuffer += evt.dtmf;
const len = this.digitBuffer.length; const len = this.digitBuffer.length;
@@ -686,6 +678,9 @@ class TaskGather extends SttTask {
target_sid: this.cs.callSid target_sid: this.cs.callSid
}); });
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure')); }).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
// Some vendor use single connection, that we cannot use onConnect event to track transcription start
this.cs.emit('transcribe-start');
} }
_startTimer() { _startTimer() {
@@ -869,6 +864,10 @@ class TaskGather extends SttTask {
if (finished === 'true') return; if (finished === 'true') return;
if (this.vendor === 'ibm' && evt?.state === 'listening') return; if (this.vendor === 'ibm' && evt?.state === 'listening') return;
// emit an event to the call session to track the time transcription is received
cs.emit('on-transcription');
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') { if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */ /* we will only get this when we have set utterance_end_ms */
if (this._bufferedTranscripts.length === 0) { if (this._bufferedTranscripts.length === 0) {
@@ -952,6 +951,12 @@ class TaskGather extends SttTask {
} }
} }
// receive a final transcript, calculate the stt latency for this transcript
const sttLatency = this.cs.calculateSttLatency();
if (!emptyTranscript && sttLatency) {
this.stt_latency_ms += `${sttLatency.stt_latency_ms},`;
}
if (this.isContinuousAsr) { if (this.isContinuousAsr) {
/* append the transcript and start listening again for asrTimeout */ /* append the transcript and start listening again for asrTimeout */
const t = evt.alternatives[0].transcript; const t = evt.alternatives[0].transcript;
@@ -1103,12 +1108,7 @@ class TaskGather extends SttTask {
async _startFallback(cs, ep, evt) { async _startFallback(cs, ep, evt) {
if (this.canFallback) { if (this.canFallback) {
ep.stopTranscription({ this._stopTranscribing(ep);
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
try { try {
this.logger.debug('gather:_startFallback'); this.logger.debug('gather:_startFallback');
this.notifyError({ msg: 'ASR error', this.notifyError({ msg: 'ASR error',
@@ -1237,21 +1237,26 @@ class TaskGather extends SttTask {
} }
} }
async _stopTranscribing(ep) {
// Fix for https://github.com/jambonz/jambonz-feature-server/issues/1281
// We should immediately call stop transcription from gather
// so that next gather can start transcription immediately
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => {
if (this.resolved) return;
this.logger.error({err}, 'Error stopping transcription');
});
this.cs.emit('transcribe-stop');
}
async _resolve(reason, evt) { async _resolve(reason, evt) {
this.logger.info({evt}, `TaskGather:resolve with reason ${reason}`); this.logger.info({evt}, `TaskGather:resolve with reason ${reason}`);
if (this.needsStt && this.ep && this.ep.connected) { if (this.needsStt && this.ep && this.ep.connected) {
// Fix for https://github.com/jambonz/jambonz-feature-server/issues/1281 this._stopTranscribing(this.ep);
// We should immediately call stop transcription from gather
// so that next gather can start transcription immediately
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname,
gracefulShutdown: false
})
.catch((err) => {
if (this.resolved) return;
this.logger.error({err}, 'Error stopping transcription');
});
} }
if (this.resolved) { if (this.resolved) {
this.logger.debug('TaskGather:_resolve - already resolved'); this.logger.debug('TaskGather:_resolve - already resolved');
@@ -1270,11 +1275,28 @@ class TaskGather extends SttTask {
this._clearAsrTimer(); this._clearAsrTimer();
this._clearFinalAsrTimer(); this._clearFinalAsrTimer();
let sttLatencyMetrics = {};
if (this.needsStt) {
const sttLatency = this.cs.calculateSttLatency();
if (sttLatency) {
this.stt_latency_ms = this.stt_latency_ms.endsWith(',') ?
this.stt_latency_ms.slice(0, -1) : this.stt_latency_ms;
sttLatencyMetrics = {
'stt.latency_ms': this.stt_latency_ms,
'stt.talkspurts': JSON.stringify(sttLatency.talkspurts),
'stt.start_time': sttLatency.stt_start_time,
'stt.stop_time': sttLatency.stt_stop_time,
'stt.usage': sttLatency.stt_usage,
};
}
}
this.span.setAttributes({ this.span.setAttributes({
channel: 1, channel: 1,
'stt.label': this.label || 'None', 'stt.label': this.label || 'None',
'stt.resolve': reason, 'stt.resolve': reason,
'stt.result': JSON.stringify(evt) 'stt.result': JSON.stringify(evt),
...sttLatencyMetrics
}); });
if (this.callSession && this.callSession.callGone) { if (this.callSession && this.callSession.callGone) {
@@ -1302,6 +1324,9 @@ class TaskGather extends SttTask {
let returnedVerbs = false; let returnedVerbs = false;
try { try {
const latencies = Object.fromEntries(
Object.entries(sttLatencyMetrics).map(([key, value]) => [key.replace('stt.', 'stt_'), value])
);
if (reason.startsWith('dtmf')) { if (reason.startsWith('dtmf')) {
if (this.parentTask) this.parentTask.emit('dtmf', evt); if (this.parentTask) this.parentTask.emit('dtmf', evt);
else { else {
@@ -1315,7 +1340,7 @@ class TaskGather extends SttTask {
else { else {
this.emit('transcription', evt); this.emit('transcription', evt);
this.logger.debug('TaskGather:_resolve - invoking performAction'); this.logger.debug('TaskGather:_resolve - invoking performAction');
returnedVerbs = await this.performAction({speech: evt, reason: 'speechDetected'}); returnedVerbs = await this.performAction({speech: evt, reason: 'speechDetected', ...latencies});
this.logger.debug({returnedVerbs}, 'TaskGather:_resolve - back from performAction'); this.logger.debug({returnedVerbs}, 'TaskGather:_resolve - back from performAction');
} }
} }
@@ -1323,20 +1348,20 @@ class TaskGather extends SttTask {
if (this.parentTask) this.parentTask.emit('timeout', evt); if (this.parentTask) this.parentTask.emit('timeout', evt);
else { else {
this.emit('timeout', evt); this.emit('timeout', evt);
returnedVerbs = await this.performAction({reason: 'timeout'}); returnedVerbs = await this.performAction({reason: 'timeout', ...latencies});
} }
} }
else if (reason.startsWith('stt-error')) { else if (reason.startsWith('stt-error')) {
if (this.parentTask) this.parentTask.emit('stt-error', evt); if (this.parentTask) this.parentTask.emit('stt-error', evt);
else { else {
this.emit('stt-error', evt); this.emit('stt-error', evt);
returnedVerbs = await this.performAction({reason: 'error', details: evt.error}); returnedVerbs = await this.performAction({reason: 'error', details: evt.error, ...latencies});
} }
} else if (reason.startsWith('stt-low-confidence')) { } else if (reason.startsWith('stt-low-confidence')) {
if (this.parentTask) this.parentTask.emit('stt-low-confidence', evt); if (this.parentTask) this.parentTask.emit('stt-low-confidence', evt);
else { else {
this.emit('stt-low-confidence', evt); this.emit('stt-low-confidence', evt);
returnedVerbs = await this.performAction({speech:evt, reason: 'stt-low-confidence'}); returnedVerbs = await this.performAction({speech:evt, reason: 'stt-low-confidence', ...latencies});
} }
} }
} catch (err) { /*already logged error*/ } } catch (err) { /*already logged error*/ }

View File

@@ -4,6 +4,7 @@ const crypto = require('crypto');
const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/constants'); const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/constants');
const { SpeechCredentialError } = require('../utils/error'); const { SpeechCredentialError } = require('../utils/error');
const {JAMBONES_AWS_TRANSCRIBE_USE_GRPC} = require('../config'); const {JAMBONES_AWS_TRANSCRIBE_USE_GRPC} = require('../config');
const {TaskName} = require('../utils/constants.json');
/** /**
* "Please insert turns here: {{turns:4}}" * "Please insert turns here: {{turns:4}}"
@@ -84,6 +85,9 @@ class SttTask extends Task {
/*bug name prefix */ /*bug name prefix */
this.bugname_prefix = ''; this.bugname_prefix = '';
// stt latency calculator
this.stt_latency_ms = '';
} }
async exec(cs, {ep, ep2}) { async exec(cs, {ep, ep2}) {
@@ -91,6 +95,12 @@ class SttTask extends Task {
this.ep = ep; this.ep = ep;
this.ep2 = ep2; this.ep2 = ep2;
// start vad from stt latency calculator
if (this.name !== TaskName.Gather ||
this.name === TaskName.Gather && this.needsStt) {
cs.startSttLatencyVad();
}
// use session preferences if we don't have specific verb-level settings. // use session preferences if we don't have specific verb-level settings.
if (cs.recognizer) { if (cs.recognizer) {
for (const k in cs.recognizer) { for (const k in cs.recognizer) {
@@ -400,7 +410,7 @@ class SttTask extends Task {
dgOptions.utteranceEndMs = dgOptions.utteranceEndMs || asrTimeout; dgOptions.utteranceEndMs = dgOptions.utteranceEndMs || asrTimeout;
} }
_onVendorConnect(_cs, _ep) { _onVendorConnect(cs, _ep) {
this.logger.debug(`TaskGather:_on${this.vendor}Connect`); this.logger.debug(`TaskGather:_on${this.vendor}Connect`);
} }

View File

@@ -152,12 +152,15 @@ class TaskTranscribe extends SttTask {
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill')); .catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
} }
this.cs.emit('transcribe-stop');
return stopTranscription; return stopTranscription;
} }
async kill(cs) { async kill(cs) {
super.kill(cs); super.kill(cs);
const stopTranscription = this._stopTranscription(); const stopTranscription = this._stopTranscription();
cs.stopSttLatencyVad();
// hangup after 1 sec if we don't get a final transcription // hangup after 1 sec if we don't get a final transcription
if (stopTranscription) this._timer = setTimeout(() => this.notifyTaskDone(), 1500); if (stopTranscription) this._timer = setTimeout(() => this.notifyTaskDone(), 1500);
else this.notifyTaskDone(); else this.notifyTaskDone();
@@ -423,6 +426,9 @@ class TaskTranscribe extends SttTask {
bugname: this.bugname, bugname: this.bugname,
hostport: this.hostport hostport: this.hostport
}); });
// Some vendor use single connection, that we cannot use onConnect event to track transcription start
this.cs.emit('transcribe-start');
} }
async _onTranscription(cs, ep, channel, evt, fsEvent) { async _onTranscription(cs, ep, channel, evt, fsEvent) {
@@ -441,6 +447,9 @@ class TaskTranscribe extends SttTask {
if (this.vendor === 'ibm' && evt?.state === 'listening') return; if (this.vendor === 'ibm' && evt?.state === 'listening') return;
// emit an event to the call session to track the time transcription is received
cs.emit('on-transcription');
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') { if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */ /* we will only get this when we have set utterance_end_ms */
@@ -602,14 +611,28 @@ class TaskTranscribe extends SttTask {
} }
async _resolve(channel, evt) { async _resolve(channel, evt) {
let sttLatencyMetrics = {};
if (evt.is_final) { if (evt.is_final) {
const sttLatency = this.cs.calculateSttLatency();
if (sttLatency) {
sttLatencyMetrics = {
'stt.latency_ms': `${sttLatency.stt_latency_ms}`,
'stt.talkspurts': JSON.stringify(sttLatency.talkspurts),
'stt.start_time': sttLatency.stt_start_time,
'stt.stop_time': sttLatency.stt_stop_time,
'stt.usage': sttLatency.stt_usage,
};
}
// time to reset the stt latency
this.cs.emit('transcribe-start');
/* we've got a final transcript, so end the otel child span for this channel */ /* we've got a final transcript, so end the otel child span for this channel */
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) { if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({ this.childSpan[channel - 1].span.setAttributes({
channel, channel,
'stt.label': this.label || 'None', 'stt.label': this.label || 'None',
'stt.resolve': 'transcript', 'stt.resolve': 'transcript',
'stt.result': JSON.stringify(evt) 'stt.result': JSON.stringify(evt),
...sttLatencyMetrics
}); });
this.childSpan[channel - 1].span.end(); this.childSpan[channel - 1].span.end();
} }
@@ -618,9 +641,13 @@ class TaskTranscribe extends SttTask {
if (this.transcriptionHook) { if (this.transcriptionHook) {
const b3 = this.getTracingPropagation(); const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3}; const httpHeaders = b3 && {b3};
const latencies = Object.fromEntries(
Object.entries(sttLatencyMetrics).map(([key, value]) => [key.replace('stt.', 'stt_'), value])
);
const payload = { const payload = {
...this.cs.callInfo, ...this.cs.callInfo,
...httpHeaders, ...httpHeaders,
...latencies,
...(evt.alternatives && {speech: evt}), ...(evt.alternatives && {speech: evt}),
...(evt.type && {speechEvent: evt}) ...(evt.type && {speechEvent: evt})
}; };

View File

@@ -177,6 +177,9 @@
"VadDetection": { "VadDetection": {
"Detection": "vad_detect:detection" "Detection": "vad_detect:detection"
}, },
"SileroVadDetection": {
"Detection": "vad_silero:detect"
},
"ListenEvents": { "ListenEvents": {
"Connect": "mod_audio_fork::connect", "Connect": "mod_audio_fork::connect",
"ConnectFailure": "mod_audio_fork::connect_failed", "ConnectFailure": "mod_audio_fork::connect_failed",

View File

@@ -0,0 +1,199 @@
const { assert } = require('console');
const Emitter = require('events');
const {
VadDetection,
SileroVadDetection
} = require('../utils/constants.json');
class SttLatencyCalculator extends Emitter {
constructor({ logger, cs}) {
super();
this.logger = logger;
this.cs = cs;
this.isRunning = false;
this.isInTalkSpurt = false;
this.start_talking_time = 0;
this.talkspurts = [];
this.vendor = this.cs.vad?.vendor || 'silero';
this.stt_start_time = 0;
this.stt_stop_time = 0;
this.stt_on_transcription_time = 0;
}
set sttStartTime(time) {
this.stt_start_time = time;
}
get sttStartTime() {
return this.stt_start_time || 0;
}
set sttStopTime(time) {
this.stt_stop_time = time;
}
get sttStopTime() {
return this.stt_stop_time || 0;
}
set sttOnTranscriptionTime(time) {
this.stt_on_transcription_time = time;
}
get sttOnTranscriptionTime() {
return this.stt_on_transcription_time || 0;
}
_onVadDetected(_ep, _evt, fsEvent) {
if (fsEvent.getHeader('detected-event') === 'stop_talking') {
if (this.isInTalkSpurt) {
this.talkspurts.push({
start: this.start_talking_time,
stop: Date.now()
});
}
this.start_talking_time = 0;
this.isInTalkSpurt = false;
} else if (fsEvent.getHeader('detected-event') === 'start_talking') {
this.start_talking_time = Date.now();
this.isInTalkSpurt = true;
}
}
_startVad() {
assert(!this.isRunning, 'Latency calculator is already running');
assert(this.cs.ep, 'Callsession has no endpoint to start the latency calculator');
const ep = this.cs.ep;
if (!ep.sttLatencyVadHandler) {
ep.sttLatencyVadHandler = this._onVadDetected.bind(this, ep);
if (this.vendor === 'silero') {
ep.addCustomEventListener(SileroVadDetection.Detection, ep.sttLatencyVadHandler);
} else {
ep.addCustomEventListener(VadDetection.Detection, ep.sttLatencyVadHandler);
}
}
this.stop_talking_time = 0;
this.start_talking_time = 0;
this.vad = {
...(this.cs.vad || {}),
strategy: 'continuous',
bugname: 'stt-latency-calculator-vad',
vendor: this.vendor
};
ep.startVadDetection(this.vad);
this.isRunning = true;
}
_stopVad() {
if (this.isRunning) {
this.logger.warn('Latency calculator is still running, stopping VAD detection');
const ep = this.cs.ep;
ep.stopVadDetection(this.vad);
if (ep.sttLatencyVadHandler) {
if (this.vendor === 'silero') {
this.ep?.removeCustomEventListener(SileroVadDetection.Detection, ep.sttLatencyVadHandler);
} else {
this.ep?.removeCustomEventListener(VadDetection.Detection, ep.sttLatencyVadHandler);
}
ep.sttLatencyVadHandler = null;
}
this.isRunning = false;
this.logger.info('STT Latency Calculator stopped');
} else {
this.logger.warn('Latency calculator is not running, no VAD detection to stop');
}
}
start() {
if (this.isRunning) {
this.logger.warn('Latency calculator is already running');
return;
}
if (!this.cs.ep) {
this.logger.error('Callsession has no endpoint to start the latency calculator');
return;
}
this._startVad();
this.logger.info('STT Latency Calculator started');
}
stop() {
this._stopVad();
}
toUnixTimestamp(date) {
return Math.floor(date / 1000);
}
calculateLatency() {
if (!this.isRunning) {
this.logger.debug('Latency calculator is not running, cannot calculate latency, returning default values');
return null;
}
const stt_stop_time = this.stt_stop_time || Date.now();
if (this.isInTalkSpurt) {
this.talkspurts.push({
start: this.start_talking_time,
stop: stt_stop_time
});
this.isInTalkSpurt = false;
this.start_talking_time = 0;
}
const stt_on_transcription_time = this.stt_on_transcription_time || stt_stop_time;
const start_talking_time = this.talkspurts[0]?.start;
let lastIdx = this.talkspurts.length - 1;
lastIdx = lastIdx < 0 ? 0 : lastIdx;
const stop_talking_time = this.talkspurts[lastIdx]?.stop || stt_stop_time;
return {
stt_start_time: this.toUnixTimestamp(this.stt_start_time),
stt_stop_time: this.toUnixTimestamp(stt_stop_time),
start_talking_time: this.toUnixTimestamp(start_talking_time),
stop_talking_time: this.toUnixTimestamp(stop_talking_time),
stt_latency: parseFloat((Math.abs(stt_on_transcription_time - stop_talking_time)) / 1000).toFixed(2),
stt_latency_ms: Math.abs(stt_on_transcription_time - stop_talking_time),
stt_usage: parseFloat((stt_stop_time - this.stt_start_time) / 1000).toFixed(2),
talkspurts: this.talkspurts.map((ts) =>
([this.toUnixTimestamp(ts.start || 0), this.toUnixTimestamp(ts.stop || 0)]))
};
}
resetTime() {
if (!this.isRunning) {
return;
}
this.stt_start_time = Date.now();
this.stt_stop_time = 0;
this.stt_on_transcription_time = 0;
this.clearTalkspurts();
this.logger.info('STT Latency Calculator reset');
}
onTranscriptionReceived() {
if (!this.isRunning) {
return;
}
this.stt_on_transcription_time = Date.now();
this.logger.debug(`CallSession:on-transcription set to ${this.stt_on_transcription_time}`);
}
onTranscribeStop() {
if (!this.isRunning) {
return;
}
this.stt_stop_time = Date.now();
this.logger.debug(`CallSession:transcribe-stop set to ${this.stt_stop_time}`);
}
clearTalkspurts() {
this.talkspurts = [];
if (!this.isInTalkSpurt) {
this.start_talking_time = 0;
}
}
}
module.exports = SttLatencyCalculator;