From 6043921067a742fc3f5697b7a86f8396d606aaf4 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Thu, 30 Nov 2023 10:38:33 -0500 Subject: [PATCH] aws stt: calculate transcript-level confidence based on word confidence scores --- lib/utils/transcription-utils.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js index 62d58b85..6a2e7c4f 100644 --- a/lib/utils/transcription-utils.js +++ b/lib/utils/transcription-utils.js @@ -369,7 +369,12 @@ const normalizeMicrosoft = (evt, channel, language) => { const normalizeAws = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const alternatives = evt.Transcript?.Results[0]?.Alternatives.map((alt) => { - return {transcript: alt.Transcript}; + const items = alt.Items.filter((item) => item.Type === 'pronunciation' && 'Confidence' in item); + const confidence = items.reduce((acc, item) => acc + item.Confidence, 0) / alt.Items.length; + return { + transcript: alt.Transcript, + confidence + }; }); return { language_code: language,