From 2e9b86c0c4d99eee6528fc606158e8144a2ec6db Mon Sep 17 00:00:00 2001 From: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com> Date: Mon, 6 Nov 2023 21:31:24 +0700 Subject: [PATCH] feat calculate speech recognition latency (#341) * feat calculate speech recognition latency * fix review comments * wip * wip * wip * wip * wip --- src/api/jaeger-types.ts | 1 + .../internal/views/recent-calls/player.tsx | 91 +++++++++++++++++-- 2 files changed, 83 insertions(+), 9 deletions(-) diff --git a/src/api/jaeger-types.ts b/src/api/jaeger-types.ts index bafe079..cef3188 100644 --- a/src/api/jaeger-types.ts +++ b/src/api/jaeger-types.ts @@ -42,6 +42,7 @@ export interface WaveSurferSttResult { transcript: string; confidence: number; language_code: string; + latency?: number; } export interface WaveSurferDtmfResult { diff --git a/src/containers/internal/views/recent-calls/player.tsx b/src/containers/internal/views/recent-calls/player.tsx index 8cb85e3..0ca53b0 100644 --- a/src/containers/internal/views/recent-calls/player.tsx +++ b/src/containers/internal/views/recent-calls/player.tsx @@ -112,6 +112,42 @@ export const Player = ({ call }: PlayerProps) => { }); }; + const PEAKS_WINDOW = 10; + const PEAK_THRESHOLD = 0.03; + + const getSilenceStartTime = ( + start: number, + end: number, + channel: number + ): number => { + if (waveSurferRef.current) { + const peaks = waveSurferRef.current.exportPeaks(); + + if (peaks && peaks.length > channel) { + const duration = waveSurferRef.current.getDecodedData()?.duration; + if (duration && duration > 0) { + const data = peaks[channel]; + const startPeak = Math.ceil((start * data.length) / duration); + const endPeak = Math.ceil((end * data.length) / duration); + let count = 0; + for (let i = endPeak; i > startPeak; i--) + if (Math.abs(data[i]) > PEAK_THRESHOLD) { + count++; + if (count === PEAKS_WINDOW) { + return ( + ((i + PEAKS_WINDOW) * duration) / data.length + 50 / 1000 // this is 20 ms adjustment + ); + } + } else { + count = 0; + } + } + } + } + + return -1; + }; + const drawSttRegionForSpan = ( s: JaegerSpan, startPoint: JaegerSpan, @@ -128,26 +164,42 @@ export const Player = ({ call }: PlayerProps) => { const end = (s.endTimeUnixNano - startPoint.startTimeUnixNano) / 1_000_000_000; - const region = waveSurferRegionsPluginRef.current.addRegion({ - id: s.spanId, - start, - end, - color: "rgba(255, 0, 0, 0.15)", - drag: false, - resize: false, - }); + const endSpeechTime = getSilenceStartTime(start, end, channel); - changeRegionMouseStyle(region, channel); const [sttResult] = getSpanAttributeByName(s.attributes, "stt.result"); let att: WaveSurferSttResult; if (sttResult) { const data = JSON.parse(sttResult.value.stringValue); + att = { vendor: data.vendor.name, transcript: data.alternatives[0].transcript, confidence: data.alternatives[0].confidence, language_code: data.language_code, + ...(endSpeechTime > 0 && { latency: end - endSpeechTime }), }; + + const [sttResolve] = getSpanAttributeByName( + s.attributes, + "stt.resolve" + ); + if ( + endSpeechTime > 0 && + sttResolve && + sttResolve.value.stringValue === "speech" + ) { + const latencyRegion = waveSurferRegionsPluginRef.current.addRegion({ + id: s.spanId + "latency", + start: endSpeechTime, + end, + color: "rgba(255, 255, 0, 0.55)", + drag: false, + resize: false, + content: `${(end - endSpeechTime).toFixed(2)} sec`, + }); + + changeRegionMouseStyle(latencyRegion, channel); + } } else { const [sttResolve] = getSpanAttributeByName( s.attributes, @@ -171,6 +223,17 @@ export const Player = ({ call }: PlayerProps) => { } } + const region = waveSurferRegionsPluginRef.current.addRegion({ + id: s.spanId, + start, + end, + color: "rgba(255, 0, 0, 0.15)", + drag: false, + resize: false, + }); + + changeRegionMouseStyle(region, channel); + region.on("click", () => { setWaveSurferRegionData(att); }); @@ -475,6 +538,16 @@ export const Player = ({ call }: PlayerProps) => { )} + {waveSurferRegionData.latency && ( +