support verbio speech (#434)

This commit is contained in:
Hoan Luu Huu
2024-05-29 18:57:05 +07:00
committed by GitHub
parent 6b9167e6b8
commit a3c48e7efb
5 changed files with 90 additions and 1 deletions

View File

@@ -205,6 +205,14 @@ export const DEFAULT_ELEVENLABS_MODEL = "eleven_multilingual_v2";
export const DEFAULT_WHISPER_MODEL = "tts-1";
// VERBIO
export const VERBIO_STT_MODELS = [
{ name: "V1", value: "V1" },
{ name: "V2", value: "V2" },
];
export const DEFAULT_VERBIO_MODEL = "V1";
// Google Custom Voice reported usage options
export const DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE = "REALTIME";

View File

@@ -402,6 +402,7 @@ export interface SpeechCredential {
custom_stt_endpoint_url: null | string;
custom_stt_endpoint: null | string;
client_id: null | string;
client_secret: null | string;
secret: null | string;
nuance_tts_uri: null | string;
nuance_stt_uri: null | string;
@@ -418,6 +419,7 @@ export interface SpeechCredential {
cobalt_server_uri: null | string;
model_id: null | string;
voice_engine: null | string;
engine_version: null | string;
model: null | string;
options: null | string;
deepgram_stt_uri: null | string;

View File

@@ -46,6 +46,7 @@ import {
AWS_CREDENTIAL_IAM_ASSUME_ROLE,
AWS_CREDENTIAL_ACCESS_KEY,
AWS_INSTANCE_PROFILE,
VENDOR_VERBIO,
} from "src/vendor";
import { MSG_REQUIRED_FIELDS } from "src/constants";
import {
@@ -77,8 +78,10 @@ import {
DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
DEFAULT_PLAYHT_OPTIONS,
DEFAULT_RIMELABS_OPTIONS,
DEFAULT_VERBIO_MODEL,
DISABLE_CUSTOM_SPEECH,
GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
VERBIO_STT_MODELS,
} from "src/api/constants";
type SpeechServiceFormProps = {
@@ -106,6 +109,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
const [secretAccessKey, setSecretAccessKey] = useState("");
const [clientId, setClientId] = useState("");
const [secretKey, setSecretKey] = useState("");
const [clientSecret, setClientSecret] = useState("");
const [googleServiceKey, setGoogleServiceKey] =
useState<GoogleServiceKey | null>(null);
const [sttRegion, setSttRegion] = useState("");
@@ -113,6 +117,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
const [ttsRegion, setTtsRegion] = useState("");
const [ttsApiKey, setTtsApiKey] = useState("");
const [ttsModelId, setTtsModelId] = useState("");
const [engineVersion, setEngineVersion] = useState(DEFAULT_VERBIO_MODEL);
const [instanceId, setInstanceId] = useState("");
const [initialCheckCustomTts, setInitialCheckCustomTts] = useState(false);
const [initialCheckCustomStt, setInitialCheckCustomStt] = useState(false);
@@ -353,6 +358,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
deepgram_stt_uri: deepgramSttUri || null,
deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0,
}),
...(vendor === VENDOR_VERBIO && {
engine_version: engineVersion,
}),
};
if (credential && credential.data) {
@@ -405,6 +413,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
userId && {
user_id: userId,
}),
...(vendor === VENDOR_VERBIO && {
client_id: clientId,
client_secret: clientSecret,
}),
riva_server_uri: vendor == VENDOR_NVIDIA ? rivaServerUri : null,
})
.then(({ json }) => {
@@ -647,6 +659,15 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
: AWS_INSTANCE_PROFILE,
);
}
if (credential?.data?.client_id) {
setClientId(credential.data.client_id);
}
if (credential?.data?.client_secret) {
setClientSecret(credential.data.client_secret);
}
if (credential?.data?.engine_version) {
setEngineVersion(credential.data.engine_version);
}
}, [credential]);
const updateCustomVoices = (
@@ -1183,6 +1204,58 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
</fieldset>
</>
)}
{vendor === VENDOR_VERBIO && (
<>
<fieldset>
<label htmlFor="verbio_client_id">
Client ID
{!onPremNuanceSttCheck && !onPremNuanceTtsCheck && (
<span>*</span>
)}
</label>
<input
id="verbio_client_id"
required
type="text"
name="verbio_client_id"
placeholder="Client ID"
value={clientId}
onChange={(e) => setClientId(e.target.value)}
disabled={credential ? true : false}
/>
<label htmlFor="verbio_client_secret">
Client secret
{!onPremNuanceSttCheck && !onPremNuanceTtsCheck && (
<span>*</span>
)}
</label>
<input
id="verbio_client_secret"
required
type="text"
name="verbio_client_secret"
placeholder="Client secret"
value={clientSecret}
onChange={(e) => setClientSecret(e.target.value)}
disabled={credential ? true : false}
/>
</fieldset>
<fieldset>
<label htmlFor={`${vendor}_tts_model_id`}>
Engine version<span>*</span>
</label>
<Selector
id={"verbio_engine_version"}
name={"verbio_engine_version"}
value={engineVersion}
options={VERBIO_STT_MODELS}
onChange={(e) => {
setEngineVersion(e.target.value);
}}
/>
</fieldset>
</>
)}
{vendor === VENDOR_AWS && (
<fieldset>
<label htmlFor="vendor">

View File

@@ -22,6 +22,7 @@ export const VENDOR_ASSEMBLYAI = "assemblyai";
export const VENDOR_WHISPER = "whisper";
export const VENDOR_PLAYHT = "playht";
export const VENDOR_RIMELABS = "rimelabs";
export const VENDOR_VERBIO = "verbio";
export const vendors: VendorOptions[] = [
{
@@ -88,6 +89,10 @@ export const vendors: VendorOptions[] = [
name: "RimeLabs",
value: VENDOR_RIMELABS,
},
{
name: "Verbio",
value: VENDOR_VERBIO,
},
].sort((a, b) => a.name.localeCompare(b.name)) as VendorOptions[];
export const AWS_CREDENTIAL_ACCESS_KEY = "access_key";

3
src/vendor/types.ts vendored
View File

@@ -14,7 +14,8 @@ export type Vendor =
| "assemblyai"
| "whisper"
| "playht"
| "rimelabs";
| "rimelabs"
| "verbio";
export interface VendorOptions {
name: Vendor;