From d36e6b4c221a64070f7002eb819de03d36744226 Mon Sep 17 00:00:00 2001 From: rammohan-y <37395033+rammohan-y@users.noreply.github.com> Date: Tue, 11 Mar 2025 21:46:29 +0530 Subject: [PATCH] set the detected language as language_code when deepgram detects the language (#1116) https://github.com/jambonz/jambonz-feature-server/issues/1115 --- lib/utils/transcription-utils.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js index 294b6397..98e08049 100644 --- a/lib/utils/transcription-utils.js +++ b/lib/utils/transcription-utils.js @@ -304,13 +304,18 @@ const normalizeDeepgram = (evt, channel, language, shortUtterance) => { confidence: alt.confidence, transcript: alt.transcript, })); - + /** + * Some models (nova-2-general) return the detected language in the + * alternatives.languages array if the language is set as multi. + * If the language is detected, we use it as the language_code. + */ + const detectedLanguage = evt.channel?.alternatives?.[0]?.languages?.[0]; /** * note difference between is_final and speech_final in Deepgram: * https://developers.deepgram.com/docs/understand-endpointing-interim-results */ return { - language_code: language, + language_code: detectedLanguage || language, channel_tag: channel, is_final: shortUtterance ? evt.is_final : evt.speech_final, alternatives: alternatives.length ? [alternatives[0]] : [],