support speechmatics speechcredential (#458)

* support speechmatics * support speechmatics regions * add env VITE_APP_DISABLE_ADDITIONAL_SPEECH_VENDORS
2026-01-25 02:08:19 +00:00 · 2024-10-11 19:58:23 +07:00
parent f1d2ed8abd
commit 7aaea04d3c
8 changed files with 146 additions and 5 deletions
--- a/.env
+++ b/.env
@@ -25,4 +25,6 @@ VITE_API_BASE_URL=http://127.0.0.1:3000/v1
 ## Base url for jambomz webapp
 #VITE_APP_BASE_URL="http://jambonz.one"
 ## Strip publishable key
-#VITE_APP_STRIPE_PUBLISHABLE_KEY="pk_test_EChRaX9Tjk8csZZVSeoGqNvu00lsJzjaU1"
+#VITE_APP_STRIPE_PUBLISHABLE_KEY="pk_test_EChRaX9Tjk8csZZVSeoGqNvu00lsJzjaU1"
+## ignore some specific speech vendors, defined by ADDITIONAL_SPEECH_VENDORS constant
+# VITE_APP_DISABLE_ADDITIONAL_SPEECH_VENDORS=true
--- a/src/api/constants.ts
+++ b/src/api/constants.ts
@@ -13,6 +13,7 @@ import type {
  WebHook,
  WebhookOption,
 } from "./types";
+import { Vendor } from "src/vendor/types";

 /** This window object is serialized and injected at docker runtime */
 /** The API url is constructed with the docker containers `ip:port` */
@@ -29,6 +30,7 @@ interface JambonzWindowObject {
  BASE_URL: string;
  DEFAULT_SERVICE_PROVIDER_SID: string;
  STRIPE_PUBLISHABLE_KEY: string;
+  DISABLE_ADDITIONAL_SPEECH_VENDORS: string;
 }

 declare global {
@@ -76,6 +78,13 @@ export const DISABLE_CALL_RECORDING: boolean =
  window.JAMBONZ?.DISABLE_CALL_RECORDING === "true" ||
  JSON.parse(import.meta.env.VITE_APP_DISABLE_CALL_RECORDING || "false");

+/** Disable additional speech vendors */
+export const DISABLE_ADDITIONAL_SPEECH_VENDORS: boolean =
+  window.JAMBONZ?.DISABLE_ADDITIONAL_SPEECH_VENDORS === "true" ||
+  JSON.parse(
+    import.meta.env.VITE_APP_DISABLE_ADDITIONAL_SPEECH_VENDORS || "false",
+  );
+
 export const DEFAULT_SERVICE_PROVIDER_SID: string =
  window.JAMBONZ?.DEFAULT_SERVICE_PROVIDER_SID ||
  import.meta.env.VITE_APP_DEFAULT_SERVICE_PROVIDER_SID;
@@ -228,6 +237,8 @@ export const VERBIO_STT_MODELS = [

 export const DEFAULT_VERBIO_MODEL = "V1";

+export const ADDITIONAL_SPEECH_VENDORS: Lowercase<Vendor>[] = ["speechmatics"];
+
 // Google Custom Voice reported usage options

 export const DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE = "REALTIME";
--- a/src/api/types.ts
+++ b/src/api/types.ts
@@ -430,6 +430,7 @@ export interface SpeechCredential {
  deepgram_stt_uri: null | string;
  deepgram_tts_uri: null | string;
  deepgram_stt_use_tls: number;
+  speechmatics_stt_uri: null | string;
 }

 export interface Alert {
--- a/src/containers/internal/views/applications/speech-selection.tsx
+++ b/src/containers/internal/views/applications/speech-selection.tsx
@@ -28,6 +28,7 @@ import {
  VENDOR_SONIOX,
  VENDOR_WELLSAID,
  VENDOR_WHISPER,
+  VENDOR_SPEECHMATICS,
 } from "src/vendor";
 import {
  LabelOptions,
@@ -360,8 +361,9 @@ export const SpeechProviderSelection = ({
          value={synthVendor}
          options={ttsVendorOptions.filter(
            (vendor) =>
-              vendor.value != VENDOR_ASSEMBLYAI &&
-              vendor.value != VENDOR_SONIOX &&
+              vendor.value !== VENDOR_ASSEMBLYAI &&
+              vendor.value !== VENDOR_SONIOX &&
+              vendor.value !== VENDOR_SPEECHMATICS &&
              vendor.value !== VENDOR_CUSTOM &&
              vendor.value !== VENDOR_COBALT,
          )}
--- a/src/containers/internal/views/speech-services/form.tsx
+++ b/src/containers/internal/views/speech-services/form.tsx
@@ -47,6 +47,7 @@ import {
  AWS_CREDENTIAL_ACCESS_KEY,
  AWS_INSTANCE_PROFILE,
  VENDOR_VERBIO,
+  VENDOR_SPEECHMATICS,
 } from "src/vendor";
 import { MSG_REQUIRED_FIELDS } from "src/constants";
 import {
@@ -74,11 +75,13 @@ import type {
 } from "src/api/types";
 import { setAccountFilter, setLocation } from "src/store/localStore";
 import {
+  ADDITIONAL_SPEECH_VENDORS,
  DEFAULT_ELEVENLABS_OPTIONS,
  DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
  DEFAULT_PLAYHT_OPTIONS,
  DEFAULT_RIMELABS_OPTIONS,
  DEFAULT_VERBIO_MODEL,
+  DISABLE_ADDITIONAL_SPEECH_VENDORS,
  DISABLE_CUSTOM_SPEECH,
  GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
  VERBIO_STT_MODELS,
@@ -167,6 +170,13 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
  const [tmpDeepgramSttUseTls, setTmpDeepgramSttUseTls] = useState(false);
  const [initialDeepgramOnpremCheck, setInitialDeepgramOnpremCheck] =
    useState(false);
+  const [initialSpeechmaticsOnpremCheck, setInitialSpeechMaticsOnpremCheck] =
+    useState(false);
+  const [speechmaticsEndpoint, setSpeechmaticsEndpoint] = useState("");
+  const [tmpHostedSpeechmaticsEndpoint, setTmpHostedSpeechmaticsEndpoint] =
+    useState("");
+  const [tmpOnpremSpeechmaticsEndpoint, setTmpOnpremSpeechmaticsEndpoint] =
+    useState("");
  const [awsCredentialType, setAwsCredentialType] = useState(
    AWS_CREDENTIAL_ACCESS_KEY,
  );
@@ -360,6 +370,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
          deepgram_tts_uri: deepgramTtsUri || null,
          deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0,
        }),
+        ...(vendor === VENDOR_SPEECHMATICS && {
+          speechmatics_stt_uri: speechmaticsEndpoint || null,
+        }),
        ...(vendor === VENDOR_VERBIO && {
          engine_version: engineVersion,
        }),
@@ -404,6 +417,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
              vendor === VENDOR_DEEPGRAM ||
              vendor === VENDOR_ASSEMBLYAI ||
              vendor === VENDOR_SONIOX ||
+              vendor === VENDOR_SPEECHMATICS ||
              vendor === VENDOR_ELEVENLABS ||
              vendor === VENDOR_PLAYHT ||
              vendor === VENDOR_RIMELABS ||
@@ -672,6 +686,13 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
    if (credential?.data?.engine_version) {
      setEngineVersion(credential.data.engine_version);
    }
+
+    if (credential?.data?.speechmatics_stt_uri) {
+      setInitialSpeechMaticsOnpremCheck(
+        !credential.data.speechmatics_stt_uri?.includes("speechmatics.com"),
+      );
+      setSpeechmaticsEndpoint(credential.data.speechmatics_stt_uri);
+    }
  }, [credential]);

  const updateCustomVoices = (
@@ -724,7 +745,12 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
            ]
              .concat(vendors)
              .filter(
-                (v) => !DISABLE_CUSTOM_SPEECH || v.value !== VENDOR_CUSTOM,
+                (v) =>
+                  (!DISABLE_CUSTOM_SPEECH || v.value !== VENDOR_CUSTOM) &&
+                  (!DISABLE_ADDITIONAL_SPEECH_VENDORS ||
+                    !ADDITIONAL_SPEECH_VENDORS.includes(
+                      v.value as Lowercase<Vendor>,
+                    )),
              )}
            onChange={(e) => {
              setVendor(e.target.value as Lowercase<Vendor>);
@@ -783,6 +809,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
            {vendor !== VENDOR_ASSEMBLYAI &&
              vendor !== VENDOR_COBALT &&
              vendor !== VENDOR_SONIOX &&
+              vendor !== VENDOR_SPEECHMATICS &&
              vendor != VENDOR_CUSTOM && (
                <label htmlFor="use_for_tts" className="chk">
                  <input
@@ -1339,7 +1366,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
          vendor === VENDOR_WHISPER ||
          vendor === VENDOR_PLAYHT ||
          vendor === VENDOR_RIMELABS ||
-          vendor === VENDOR_SONIOX) && (
+          vendor === VENDOR_SONIOX ||
+          vendor === VENDOR_SPEECHMATICS) && (
          <fieldset>
            {vendor === VENDOR_PLAYHT && (
              <>
@@ -1490,6 +1518,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
        {regions &&
          regions[vendor as keyof RegionVendors] &&
          vendor !== VENDOR_IBM &&
+          vendor !== VENDOR_SPEECHMATICS &&
          vendor !== VENDOR_MICROSOFT && (
            <fieldset>
              <label htmlFor="region">
@@ -1904,6 +1933,73 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
            </fieldset>
          </React.Fragment>
        )}
+        {vendor === VENDOR_SPEECHMATICS &&
+          regions &&
+          regions[vendor as keyof RegionVendors] && (
+            <fieldset>
+              <Checkzone
+                disabled={hasValue(credential)}
+                hidden
+                name="use_hosted_speechmatics_service"
+                label="Use hosted Speechmatics Service"
+                initialCheck={!initialSpeechmaticsOnpremCheck}
+                handleChecked={(e) => {
+                  setInitialSpeechMaticsOnpremCheck(!e.target.checked);
+                  if (e.target.checked) {
+                    setTmpOnpremSpeechmaticsEndpoint(speechmaticsEndpoint);
+                    setSpeechmaticsEndpoint(tmpHostedSpeechmaticsEndpoint);
+                    setTmpHostedSpeechmaticsEndpoint("");
+                  }
+                }}
+              >
+                <label htmlFor="speechmatics_endpoint">
+                  Endpoint {sttCheck && <span>*</span>}
+                </label>
+                <Selector
+                  id="speechmatics_endpoint"
+                  name="speechmatics_endpoint"
+                  value={speechmaticsEndpoint}
+                  required
+                  options={[
+                    {
+                      name: "Select a endpoint",
+                      value: "",
+                    },
+                  ].concat(regions[vendor as keyof RegionVendors])}
+                  onChange={(e) => setSpeechmaticsEndpoint(e.target.value)}
+                />
+              </Checkzone>
+
+              <Checkzone
+                disabled={hasValue(credential)}
+                hidden
+                name="use_on-prem_speechmatics_container"
+                label="Use on-prem Speechmatics container"
+                initialCheck={initialSpeechmaticsOnpremCheck}
+                handleChecked={(e) => {
+                  setInitialSpeechMaticsOnpremCheck(e.target.checked);
+                  if (e.target.checked) {
+                    setTmpHostedSpeechmaticsEndpoint(speechmaticsEndpoint);
+                    setSpeechmaticsEndpoint(tmpOnpremSpeechmaticsEndpoint);
+                    setTmpOnpremSpeechmaticsEndpoint("");
+                  }
+                }}
+              >
+                <label htmlFor="speechmatics_uri_for_stt">
+                  Endpoint URI<span>*</span>
+                </label>
+                <input
+                  id="speechmatics_uri_for_stt"
+                  required
+                  type="text"
+                  name="speechmatics_uri_for_stt"
+                  placeholder="Speechmatics URI for STT"
+                  value={speechmaticsEndpoint}
+                  onChange={(e) => setSpeechmaticsEndpoint(e.target.value)}
+                />
+              </Checkzone>
+            </fieldset>
+          )}

        <fieldset>
          <ButtonGroup left>
--- a/src/vendor/index.tsx
+++ b/src/vendor/index.tsx
@@ -15,6 +15,7 @@ export const VENDOR_DEEPGRAM = "deepgram";
 export const VENDOR_IBM = "ibm";
 export const VENDOR_NVIDIA = "nvidia";
 export const VENDOR_SONIOX = "soniox";
+export const VENDOR_SPEECHMATICS = "speechmatics";
 export const VENDOR_CUSTOM = "custom";
 export const VENDOR_COBALT = "cobalt";
 export const VENDOR_ELEVENLABS = "elevenlabs";
@@ -61,6 +62,10 @@ export const vendors: VendorOptions[] = [
    name: "Soniox",
    value: VENDOR_SONIOX,
  },
+  {
+    name: "Speechmatics",
+    value: VENDOR_SPEECHMATICS,
+  },
  {
    name: "Custom",
    value: VENDOR_CUSTOM,
@@ -124,17 +129,20 @@ export const useRegionVendors = () => {
      import("./regions/aws-regions"),
      import("./regions/ms-azure-regions"),
      import("./regions/ibm-regions"),
+      import("./regions/speechmatics-regions"),
    ]).then(
      ([
        { default: awsRegions },
        { default: msRegions },
        { default: ibmRegions },
+        { default: speechmaticsRegions },
      ]) => {
        if (!ignore) {
          setRegions({
            aws: awsRegions,
            microsoft: msRegions,
            ibm: ibmRegions,
+            speechmatics: speechmaticsRegions,
          });
        }
      },
--- a/src/vendor/regions/speechmatics-regions.ts
+++ b/src/vendor/regions/speechmatics-regions.ts
@@ -0,0 +1,18 @@
+import type { Region } from "../types";
+
+export const regions: Region[] = [
+  {
+    name: "EU (EU2 - On-demand)",
+    value: "eu2.rt.speechmatics.com",
+  },
+  {
+    name: "EU (EU1 - Enterprise)",
+    value: "neu.rt.speechmatics.com",
+  },
+  {
+    name: "US (US1 - Enterprise)",
+    value: "wus.rt.speechmatics.com",
+  },
+];
+
+export default regions;
--- a/src/vendor/types.ts
+++ b/src/vendor/types.ts
@@ -8,6 +8,7 @@ export type Vendor =
  | "IBM"
  | "Nvidia"
  | "Soniox"
+  | "Speechmatics"
  | "Cobalt"
  | "Custom"
  | "ElevenLabs"
@@ -71,6 +72,7 @@ export interface RegionVendors {
  aws: Region[];
  microsoft: Region[];
  ibm: Region[];
+  speechmatics: Region[];
 }

 export interface TtsModels {
@@ -88,6 +90,7 @@ export interface RecognizerVendors {
  ibm: Language[];
  nvidia: Language[];
  soniox: Language[];
+  speechmatics: Language[];
  cobalt: Language[];
  assemblyai: Language[];
 }