support speechmatics speechcredential (#458)

* support speechmatics

* support speechmatics regions

* add env VITE_APP_DISABLE_ADDITIONAL_SPEECH_VENDORS
This commit is contained in:
Hoan Luu Huu
2024-10-11 19:58:23 +07:00
committed by GitHub
parent f1d2ed8abd
commit 7aaea04d3c
8 changed files with 146 additions and 5 deletions

4
.env
View File

@@ -25,4 +25,6 @@ VITE_API_BASE_URL=http://127.0.0.1:3000/v1
## Base url for jambomz webapp
#VITE_APP_BASE_URL="http://jambonz.one"
## Strip publishable key
#VITE_APP_STRIPE_PUBLISHABLE_KEY="pk_test_EChRaX9Tjk8csZZVSeoGqNvu00lsJzjaU1"
#VITE_APP_STRIPE_PUBLISHABLE_KEY="pk_test_EChRaX9Tjk8csZZVSeoGqNvu00lsJzjaU1"
## ignore some specific speech vendors, defined by ADDITIONAL_SPEECH_VENDORS constant
# VITE_APP_DISABLE_ADDITIONAL_SPEECH_VENDORS=true

View File

@@ -13,6 +13,7 @@ import type {
WebHook,
WebhookOption,
} from "./types";
import { Vendor } from "src/vendor/types";
/** This window object is serialized and injected at docker runtime */
/** The API url is constructed with the docker containers `ip:port` */
@@ -29,6 +30,7 @@ interface JambonzWindowObject {
BASE_URL: string;
DEFAULT_SERVICE_PROVIDER_SID: string;
STRIPE_PUBLISHABLE_KEY: string;
DISABLE_ADDITIONAL_SPEECH_VENDORS: string;
}
declare global {
@@ -76,6 +78,13 @@ export const DISABLE_CALL_RECORDING: boolean =
window.JAMBONZ?.DISABLE_CALL_RECORDING === "true" ||
JSON.parse(import.meta.env.VITE_APP_DISABLE_CALL_RECORDING || "false");
/** Disable additional speech vendors */
export const DISABLE_ADDITIONAL_SPEECH_VENDORS: boolean =
window.JAMBONZ?.DISABLE_ADDITIONAL_SPEECH_VENDORS === "true" ||
JSON.parse(
import.meta.env.VITE_APP_DISABLE_ADDITIONAL_SPEECH_VENDORS || "false",
);
export const DEFAULT_SERVICE_PROVIDER_SID: string =
window.JAMBONZ?.DEFAULT_SERVICE_PROVIDER_SID ||
import.meta.env.VITE_APP_DEFAULT_SERVICE_PROVIDER_SID;
@@ -228,6 +237,8 @@ export const VERBIO_STT_MODELS = [
export const DEFAULT_VERBIO_MODEL = "V1";
export const ADDITIONAL_SPEECH_VENDORS: Lowercase<Vendor>[] = ["speechmatics"];
// Google Custom Voice reported usage options
export const DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE = "REALTIME";

View File

@@ -430,6 +430,7 @@ export interface SpeechCredential {
deepgram_stt_uri: null | string;
deepgram_tts_uri: null | string;
deepgram_stt_use_tls: number;
speechmatics_stt_uri: null | string;
}
export interface Alert {

View File

@@ -28,6 +28,7 @@ import {
VENDOR_SONIOX,
VENDOR_WELLSAID,
VENDOR_WHISPER,
VENDOR_SPEECHMATICS,
} from "src/vendor";
import {
LabelOptions,
@@ -360,8 +361,9 @@ export const SpeechProviderSelection = ({
value={synthVendor}
options={ttsVendorOptions.filter(
(vendor) =>
vendor.value != VENDOR_ASSEMBLYAI &&
vendor.value != VENDOR_SONIOX &&
vendor.value !== VENDOR_ASSEMBLYAI &&
vendor.value !== VENDOR_SONIOX &&
vendor.value !== VENDOR_SPEECHMATICS &&
vendor.value !== VENDOR_CUSTOM &&
vendor.value !== VENDOR_COBALT,
)}

View File

@@ -47,6 +47,7 @@ import {
AWS_CREDENTIAL_ACCESS_KEY,
AWS_INSTANCE_PROFILE,
VENDOR_VERBIO,
VENDOR_SPEECHMATICS,
} from "src/vendor";
import { MSG_REQUIRED_FIELDS } from "src/constants";
import {
@@ -74,11 +75,13 @@ import type {
} from "src/api/types";
import { setAccountFilter, setLocation } from "src/store/localStore";
import {
ADDITIONAL_SPEECH_VENDORS,
DEFAULT_ELEVENLABS_OPTIONS,
DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
DEFAULT_PLAYHT_OPTIONS,
DEFAULT_RIMELABS_OPTIONS,
DEFAULT_VERBIO_MODEL,
DISABLE_ADDITIONAL_SPEECH_VENDORS,
DISABLE_CUSTOM_SPEECH,
GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
VERBIO_STT_MODELS,
@@ -167,6 +170,13 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
const [tmpDeepgramSttUseTls, setTmpDeepgramSttUseTls] = useState(false);
const [initialDeepgramOnpremCheck, setInitialDeepgramOnpremCheck] =
useState(false);
const [initialSpeechmaticsOnpremCheck, setInitialSpeechMaticsOnpremCheck] =
useState(false);
const [speechmaticsEndpoint, setSpeechmaticsEndpoint] = useState("");
const [tmpHostedSpeechmaticsEndpoint, setTmpHostedSpeechmaticsEndpoint] =
useState("");
const [tmpOnpremSpeechmaticsEndpoint, setTmpOnpremSpeechmaticsEndpoint] =
useState("");
const [awsCredentialType, setAwsCredentialType] = useState(
AWS_CREDENTIAL_ACCESS_KEY,
);
@@ -360,6 +370,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
deepgram_tts_uri: deepgramTtsUri || null,
deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0,
}),
...(vendor === VENDOR_SPEECHMATICS && {
speechmatics_stt_uri: speechmaticsEndpoint || null,
}),
...(vendor === VENDOR_VERBIO && {
engine_version: engineVersion,
}),
@@ -404,6 +417,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
vendor === VENDOR_DEEPGRAM ||
vendor === VENDOR_ASSEMBLYAI ||
vendor === VENDOR_SONIOX ||
vendor === VENDOR_SPEECHMATICS ||
vendor === VENDOR_ELEVENLABS ||
vendor === VENDOR_PLAYHT ||
vendor === VENDOR_RIMELABS ||
@@ -672,6 +686,13 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
if (credential?.data?.engine_version) {
setEngineVersion(credential.data.engine_version);
}
if (credential?.data?.speechmatics_stt_uri) {
setInitialSpeechMaticsOnpremCheck(
!credential.data.speechmatics_stt_uri?.includes("speechmatics.com"),
);
setSpeechmaticsEndpoint(credential.data.speechmatics_stt_uri);
}
}, [credential]);
const updateCustomVoices = (
@@ -724,7 +745,12 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
]
.concat(vendors)
.filter(
(v) => !DISABLE_CUSTOM_SPEECH || v.value !== VENDOR_CUSTOM,
(v) =>
(!DISABLE_CUSTOM_SPEECH || v.value !== VENDOR_CUSTOM) &&
(!DISABLE_ADDITIONAL_SPEECH_VENDORS ||
!ADDITIONAL_SPEECH_VENDORS.includes(
v.value as Lowercase<Vendor>,
)),
)}
onChange={(e) => {
setVendor(e.target.value as Lowercase<Vendor>);
@@ -783,6 +809,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
{vendor !== VENDOR_ASSEMBLYAI &&
vendor !== VENDOR_COBALT &&
vendor !== VENDOR_SONIOX &&
vendor !== VENDOR_SPEECHMATICS &&
vendor != VENDOR_CUSTOM && (
<label htmlFor="use_for_tts" className="chk">
<input
@@ -1339,7 +1366,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
vendor === VENDOR_WHISPER ||
vendor === VENDOR_PLAYHT ||
vendor === VENDOR_RIMELABS ||
vendor === VENDOR_SONIOX) && (
vendor === VENDOR_SONIOX ||
vendor === VENDOR_SPEECHMATICS) && (
<fieldset>
{vendor === VENDOR_PLAYHT && (
<>
@@ -1490,6 +1518,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
{regions &&
regions[vendor as keyof RegionVendors] &&
vendor !== VENDOR_IBM &&
vendor !== VENDOR_SPEECHMATICS &&
vendor !== VENDOR_MICROSOFT && (
<fieldset>
<label htmlFor="region">
@@ -1904,6 +1933,73 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
</fieldset>
</React.Fragment>
)}
{vendor === VENDOR_SPEECHMATICS &&
regions &&
regions[vendor as keyof RegionVendors] && (
<fieldset>
<Checkzone
disabled={hasValue(credential)}
hidden
name="use_hosted_speechmatics_service"
label="Use hosted Speechmatics Service"
initialCheck={!initialSpeechmaticsOnpremCheck}
handleChecked={(e) => {
setInitialSpeechMaticsOnpremCheck(!e.target.checked);
if (e.target.checked) {
setTmpOnpremSpeechmaticsEndpoint(speechmaticsEndpoint);
setSpeechmaticsEndpoint(tmpHostedSpeechmaticsEndpoint);
setTmpHostedSpeechmaticsEndpoint("");
}
}}
>
<label htmlFor="speechmatics_endpoint">
Endpoint {sttCheck && <span>*</span>}
</label>
<Selector
id="speechmatics_endpoint"
name="speechmatics_endpoint"
value={speechmaticsEndpoint}
required
options={[
{
name: "Select a endpoint",
value: "",
},
].concat(regions[vendor as keyof RegionVendors])}
onChange={(e) => setSpeechmaticsEndpoint(e.target.value)}
/>
</Checkzone>
<Checkzone
disabled={hasValue(credential)}
hidden
name="use_on-prem_speechmatics_container"
label="Use on-prem Speechmatics container"
initialCheck={initialSpeechmaticsOnpremCheck}
handleChecked={(e) => {
setInitialSpeechMaticsOnpremCheck(e.target.checked);
if (e.target.checked) {
setTmpHostedSpeechmaticsEndpoint(speechmaticsEndpoint);
setSpeechmaticsEndpoint(tmpOnpremSpeechmaticsEndpoint);
setTmpOnpremSpeechmaticsEndpoint("");
}
}}
>
<label htmlFor="speechmatics_uri_for_stt">
Endpoint URI<span>*</span>
</label>
<input
id="speechmatics_uri_for_stt"
required
type="text"
name="speechmatics_uri_for_stt"
placeholder="Speechmatics URI for STT"
value={speechmaticsEndpoint}
onChange={(e) => setSpeechmaticsEndpoint(e.target.value)}
/>
</Checkzone>
</fieldset>
)}
<fieldset>
<ButtonGroup left>

View File

@@ -15,6 +15,7 @@ export const VENDOR_DEEPGRAM = "deepgram";
export const VENDOR_IBM = "ibm";
export const VENDOR_NVIDIA = "nvidia";
export const VENDOR_SONIOX = "soniox";
export const VENDOR_SPEECHMATICS = "speechmatics";
export const VENDOR_CUSTOM = "custom";
export const VENDOR_COBALT = "cobalt";
export const VENDOR_ELEVENLABS = "elevenlabs";
@@ -61,6 +62,10 @@ export const vendors: VendorOptions[] = [
name: "Soniox",
value: VENDOR_SONIOX,
},
{
name: "Speechmatics",
value: VENDOR_SPEECHMATICS,
},
{
name: "Custom",
value: VENDOR_CUSTOM,
@@ -124,17 +129,20 @@ export const useRegionVendors = () => {
import("./regions/aws-regions"),
import("./regions/ms-azure-regions"),
import("./regions/ibm-regions"),
import("./regions/speechmatics-regions"),
]).then(
([
{ default: awsRegions },
{ default: msRegions },
{ default: ibmRegions },
{ default: speechmaticsRegions },
]) => {
if (!ignore) {
setRegions({
aws: awsRegions,
microsoft: msRegions,
ibm: ibmRegions,
speechmatics: speechmaticsRegions,
});
}
},

View File

@@ -0,0 +1,18 @@
import type { Region } from "../types";
export const regions: Region[] = [
{
name: "EU (EU2 - On-demand)",
value: "eu2.rt.speechmatics.com",
},
{
name: "EU (EU1 - Enterprise)",
value: "neu.rt.speechmatics.com",
},
{
name: "US (US1 - Enterprise)",
value: "wus.rt.speechmatics.com",
},
];
export default regions;

3
src/vendor/types.ts vendored
View File

@@ -8,6 +8,7 @@ export type Vendor =
| "IBM"
| "Nvidia"
| "Soniox"
| "Speechmatics"
| "Cobalt"
| "Custom"
| "ElevenLabs"
@@ -71,6 +72,7 @@ export interface RegionVendors {
aws: Region[];
microsoft: Region[];
ibm: Region[];
speechmatics: Region[];
}
export interface TtsModels {
@@ -88,6 +90,7 @@ export interface RecognizerVendors {
ibm: Language[];
nvidia: Language[];
soniox: Language[];
speechmatics: Language[];
cobalt: Language[];
assemblyai: Language[];
}