From d36e6b4c221a64070f7002eb819de03d36744226 Mon Sep 17 00:00:00 2001
From: rammohan-y <37395033+rammohan-y@users.noreply.github.com>
Date: Tue, 11 Mar 2025 21:46:29 +0530
Subject: [PATCH] set the detected language as language_code when deepgram
 detects the language (#1116)

https://github.com/jambonz/jambonz-feature-server/issues/1115
---
 lib/utils/transcription-utils.js | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/utils/transcription-utils.js b/lib/utils/transcription-utils.js
index 294b6397..98e08049 100644
--- a/lib/utils/transcription-utils.js
+++ b/lib/utils/transcription-utils.js
@@ -304,13 +304,18 @@ const normalizeDeepgram = (evt, channel, language, shortUtterance) => {
       confidence: alt.confidence,
       transcript: alt.transcript,
     }));
-
+  /**
+   * Some models (nova-2-general) return the detected language in the
+   * alternatives.languages array if the language is set as multi.
+   * If the language is detected, we use it as the language_code.
+   */
+  const detectedLanguage = evt.channel?.alternatives?.[0]?.languages?.[0];
   /**
    * note difference between is_final and speech_final in Deepgram:
    * https://developers.deepgram.com/docs/understand-endpointing-interim-results
    */
   return {
-    language_code: language,
+    language_code: detectedLanguage || language,
     channel_tag: channel,
     is_final: shortUtterance ? evt.is_final : evt.speech_final,
     alternatives: alternatives.length ? [alternatives[0]] : [],