support deepgram stt model (#528)

* support deepgram stt model * wip * wip
2025-12-19 05:37:43 +00:00 · 2025-05-28 19:01:20 +07:00
parent 844eec953c
commit 10818493bc
3 changed files with 66 additions and 34 deletions
--- a/src/containers/internal/views/applications/form.tsx
+++ b/src/containers/internal/views/applications/form.tsx
@@ -922,6 +922,9 @@ export const ApplicationForm = ({ application }: ApplicationFormProps) => {
          serviceProviderSid={
            currentServiceProvider?.service_provider_sid || ""
          }
          application_speech_synthesis_voice={
            application?.data?.speech_synthesis_voice
          }
          accountSid={accountSid}
          credentials={credentials}
          ttsVendor={[synthVendor, setSynthVendor]}
@@ -952,6 +955,9 @@ export const ApplicationForm = ({ application }: ApplicationFormProps) => {
                currentServiceProvider?.service_provider_sid || ""
              }
              accountSid={accountSid}
              application_speech_synthesis_voice={
                application?.data?.fallback_speech_synthesis_voice
              }
              credentials={credentials}
              ttsVendor={[
                fallbackSpeechSynthsisVendor,
--- a/src/containers/internal/views/applications/speech-selection.tsx
+++ b/src/containers/internal/views/applications/speech-selection.tsx
@@ -44,6 +44,7 @@ import {
 type SpeechProviderSelectionProbs = {
  accountSid: string;
  serviceProviderSid: string;
  application_speech_synthesis_voice: string | null | undefined;
  credentials: SpeechCredential[] | undefined;
  ttsVendor: [
    keyof SynthesisVendors,
@@ -67,6 +68,7 @@ type SpeechProviderSelectionProbs = {
 export const SpeechProviderSelection = ({
  accountSid,
  serviceProviderSid,
  application_speech_synthesis_voice,
  credentials,
  ttsVendor: [synthVendor, setSynthVendor],
  ttsVendorOptions,
@@ -242,10 +244,6 @@ export const SpeechProviderSelection = ({
        // Extract model
        if (json.models && json.models.length) {
          setSynthesisModelOptions(json.models);
          if (synthVendor === VENDOR_DEEPGRAM) {
            setSynthVoice(json.models[0].value);
            return;
          }
        }
        if (json.tts && json.tts.length) {
@@ -337,6 +335,7 @@ export const SpeechProviderSelection = ({
  const updateTtsVoice = (language: string, voice: string) => {
    if (shouldUpdateTtsVoice.current) {
      console.log("xhoaluu");
      setSynthLang(language);
      setSynthVoice(voice);
      shouldUpdateTtsVoice.current = false;
@@ -387,6 +386,20 @@ export const SpeechProviderSelection = ({
        toastError(error.msg);
      });
  };
  useEffect(() => {
    if (
      synthVendor === VENDOR_DEEPGRAM &&
      synthesisModelOptions.length > 0 &&
      !synthesisModelOptions.some(
        (m) => m.value === application_speech_synthesis_voice,
      )
    ) {
      setSynthVoice(synthesisModelOptions[0].value);
    } else {
      setSynthVoice(application_speech_synthesis_voice || "");
    }
  }, [synthesisModelOptions, application_speech_synthesis_voice]);
  return (
    <>
      <fieldset>
--- a/src/containers/internal/views/speech-services/form.tsx
+++ b/src/containers/internal/views/speech-services/form.tsx
@@ -259,6 +259,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
      case VENDOR_PLAYHT:
        return "Voice Engine";
      case VENDOR_CARTESIA:
      case VENDOR_DEEPGRAM:
        return "Model ID";
      default:
        return "Model";
@@ -436,6 +437,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
          deepgram_stt_uri: deepgramSttUri || null,
          deepgram_tts_uri: deepgramTtsUri || null,
          deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0,
          model_id: sttModelId || null,
        }),
        ...(vendor === VENDOR_SPEECHMATICS && {
          speechmatics_stt_uri: speechmaticsEndpoint || null,
@@ -562,7 +564,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
      vendor === VENDOR_PLAYHT ||
      vendor === VENDOR_RIMELABS ||
      vendor === VENDOR_CARTESIA ||
-      vendor === VENDOR_OPENAI
+      vendor === VENDOR_OPENAI ||
      vendor === VENDOR_DEEPGRAM
    ) {
      getSpeechSupportedLanguagesAndVoices(
        currentServiceProvider?.service_provider_sid,
@@ -572,21 +575,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
      ).then(({ json }) => {
        if (json.models) {
          setTtsModels(json.models);
          if (
            json.models.length > 0 &&
            !json.models.find((m) => m.value === ttsModelId)
          ) {
            setTtsModelId(json.models[0].value);
          }
        }
        if (json.sttModels) {
          setSttModels(json.sttModels);
          if (
            json.sttModels.length > 0 &&
            !json.sttModels.some((m) => m.value === sttModelId)
          ) {
            setSttModelId(json.sttModels[0].value);
          }
        }
      });
    } else {
@@ -594,6 +585,24 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
    }
  }, [vendor]);
  useEffect(() => {
    const modelId = credential?.data?.model_id || "";
    if (sttModels.length > 0 && !sttModels.some((m) => m.value === modelId)) {
      setSttModelId(sttModels[0].value);
    } else {
      setSttModelId(modelId);
    }
  }, [credential, sttModels]);
  useEffect(() => {
    const modelId = credential?.data?.model_id || "";
    if (ttsModels.length > 0 && !ttsModels.some((m) => m.value === modelId)) {
      setTtsModelId(sttModels[0].value);
    } else {
      setTtsModelId(modelId);
    }
  }, [credential, ttsModels]);
  useEffect(() => {
    setLocation();
    if (credential && credential.data) {
@@ -742,7 +751,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
      if (credential.data.model_id) {
        setTtsModelId(credential.data.model_id);
      }
-      if (credential.data.model_id && vendor === VENDOR_OPENAI) {
+      if (
        credential.data.model_id &&
        (vendor === VENDOR_OPENAI || vendor === VENDOR_DEEPGRAM)
      ) {
        setSttModelId(credential.data.model_id);
      }
      if (credential?.data?.playht_tts_uri) {
@@ -1717,22 +1729,23 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
              />
            </fieldset>
          )}
-        {vendor == VENDOR_OPENAI && sttModels.length > 0 && (
+        {(vendor == VENDOR_OPENAI || vendor === VENDOR_DEEPGRAM) &&
-          <fieldset>
+          sttModels.length > 0 && (
-            <label htmlFor={`${vendor}_stt_model_id`}>
+            <fieldset>
-              {getModelLabelByVendor(vendor)}
+              <label htmlFor={`${vendor}_stt_model_id`}>
-            </label>
+                {getModelLabelByVendor(vendor)}
-            <Selector
+              </label>
-              id={"stt_model_id"}
+              <Selector
-              name={"stt_model_id"}
+                id={"stt_model_id"}
-              value={sttModelId}
+                name={"stt_model_id"}
-              options={sttModels}
+                value={sttModelId}
-              onChange={(e) => {
+                options={sttModels}
-                setSttModelId(e.target.value);
+                onChange={(e) => {
-              }}
+                  setSttModelId(e.target.value);
-            />
+                }}
-          </fieldset>
+              />
-        )}
+            </fieldset>
          )}
        {(vendor === VENDOR_ELEVENLABS ||
          vendor === VENDOR_PLAYHT ||
          vendor === VENDOR_CARTESIA ||