From bc68eb8e71e1530bf57f355ced8715aef2201891 Mon Sep 17 00:00:00 2001 From: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com> Date: Mon, 8 Apr 2024 21:24:06 +0700 Subject: [PATCH] support mod_playht_tts (#423) * support mod_playht_tts * wip * wip * fix code style --- src/api/constants.ts | 15 ++- src/api/types.ts | 13 +++ .../internal/views/speech-services/form.tsx | 98 +++++++++++++++++-- src/vendor/index.tsx | 5 + src/vendor/types.ts | 3 +- 5 files changed, 126 insertions(+), 8 deletions(-) diff --git a/src/api/constants.ts b/src/api/constants.ts index ecf771e..03d4972 100644 --- a/src/api/constants.ts +++ b/src/api/constants.ts @@ -4,6 +4,7 @@ import type { LimitField, LimitUnitOption, PasswordSettings, + PlayHTOptions, SelectorOptions, SipGateway, SmppGateway, @@ -211,7 +212,7 @@ export const GOOGLE_CUSTOM_VOICES_REPORTED_USAGE = [ { name: "REALTIME", value: "REALTIME" }, { name: "OFFLINE", value: "OFFLINE" }, ]; -// Eleven Labs options +// ElevenLabs options export const DEFAULT_ELEVENLABS_OPTIONS: Partial = { optimize_streaming_latency: 3, voice_settings: { @@ -220,6 +221,18 @@ export const DEFAULT_ELEVENLABS_OPTIONS: Partial = { use_speaker_boost: true, }, }; + +// PlayHT options +export const DEFAULT_PLAYHT_OPTIONS: Partial = { + quality: "medium", + speed: 1, + seed: 1, + temperature: 1, + emotion: "female_happy", + voice_guidance: 3, + style_guidance: 20, + text_guidance: 1, +}; /** Password Length options */ export const PASSWORD_MIN = 8; diff --git a/src/api/types.ts b/src/api/types.ts index 4d5a5e4..9fcaea7 100644 --- a/src/api/types.ts +++ b/src/api/types.ts @@ -390,6 +390,7 @@ export interface SpeechCredential { region: null | string; aws_region: null | string; api_key: null | string; + user_id: null | string; access_key_id: null | string; secret_access_key: null | string; service_key: null | string; @@ -415,6 +416,7 @@ export interface SpeechCredential { label: null | string; cobalt_server_uri: null | string; model_id: null | string; + voice_engine: null | string; model: null | string; options: null | string; deepgram_stt_uri: null | string; @@ -718,3 +720,14 @@ export interface ElevenLabsOptions { use_speaker_boost: boolean; }>; } + +export interface PlayHTOptions { + quality: string; + speed: number; + seed: number; + temperature: number; + emotion: string; + voice_guidance: number; + style_guidance: number; + text_guidance: number; +} diff --git a/src/containers/internal/views/speech-services/form.tsx b/src/containers/internal/views/speech-services/form.tsx index 208a81e..2f229f7 100644 --- a/src/containers/internal/views/speech-services/form.tsx +++ b/src/containers/internal/views/speech-services/form.tsx @@ -40,6 +40,7 @@ import { VENDOR_ELEVENLABS, VENDOR_ASSEMBLYAI, VENDOR_WHISPER, + VENDOR_PLAYHT, } from "src/vendor"; import { MSG_REQUIRED_FIELDS } from "src/constants"; import { @@ -69,6 +70,7 @@ import { setAccountFilter, setLocation } from "src/store/localStore"; import { DEFAULT_ELEVENLABS_OPTIONS, DEFAULT_GOOGLE_CUSTOM_VOICES_REPORTED_USAGE, + DEFAULT_PLAYHT_OPTIONS, DISABLE_CUSTOM_SPEECH, GOOGLE_CUSTOM_VOICES_REPORTED_USAGE, } from "src/api/constants"; @@ -93,6 +95,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { ); const [region, setRegion] = useState(""); const [apiKey, setApiKey] = useState(""); + const [userId, setUserId] = useState(""); const [accessKeyId, setAccessKeyId] = useState(""); const [secretAccessKey, setSecretAccessKey] = useState(""); const [clientId, setClientId] = useState(""); @@ -179,6 +182,30 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { }); }; + const getDefaultVendorOptions = () => { + if (vendor) { + switch (vendor) { + case VENDOR_ELEVENLABS: + return DEFAULT_ELEVENLABS_OPTIONS; + case VENDOR_PLAYHT: + return DEFAULT_PLAYHT_OPTIONS; + } + } + return ""; + }; + + const getDefaultVendorApiDoc = () => { + if (vendor) { + switch (vendor) { + case VENDOR_ELEVENLABS: + return "https://elevenlabs.io/docs/api-reference/streaming"; + case VENDOR_PLAYHT: + return "https://docs.play.ht/reference/api-generate-tts-audio-stream"; + } + } + return ""; + }; + const handlePutGoogleCustomVoices = () => { if (!credential || !credential.data) { return; @@ -297,9 +324,13 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { ...((vendor === VENDOR_ELEVENLABS || vendor === VENDOR_WHISPER) && { model_id: ttsModelId || null, }), - ...(vendor === VENDOR_ELEVENLABS && { + ...((vendor === VENDOR_ELEVENLABS || vendor === VENDOR_PLAYHT) && { options: options || null, }), + ...(vendor === VENDOR_PLAYHT && + ttsModelId && { + voice_engine: ttsModelId, + }), ...(vendor === VENDOR_DEEPGRAM && { deepgram_stt_uri: deepgramSttUri || null, deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0, @@ -345,10 +376,15 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { vendor === VENDOR_ASSEMBLYAI || vendor === VENDOR_SONIOX || vendor === VENDOR_ELEVENLABS || + vendor === VENDOR_PLAYHT || vendor === VENDOR_WHISPER ? apiKey : null, }), + ...(vendor === VENDOR_PLAYHT && + userId && { + user_id: userId, + }), riva_server_uri: vendor == VENDOR_NVIDIA ? rivaServerUri : null, }) .then(({ json }) => { @@ -379,7 +415,11 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { }; useEffect(() => { - if (vendor === VENDOR_ELEVENLABS || vendor === VENDOR_WHISPER) { + if ( + vendor === VENDOR_ELEVENLABS || + vendor === VENDOR_WHISPER || + vendor === VENDOR_PLAYHT + ) { getSpeechSupportedLanguagesAndVoices( currentServiceProvider?.service_provider_sid, vendor, @@ -389,7 +429,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { setTtsModels(json.models); if ( json.models.length > 0 && - (vendor === VENDOR_ELEVENLABS || vendor === VENDOR_WHISPER) + (vendor === VENDOR_ELEVENLABS || + vendor === VENDOR_WHISPER || + vendor === VENDOR_PLAYHT) ) { setTtsModelId(json.models[0].value); } @@ -566,6 +608,14 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { } setInitialDeepgramOnpremCheck(hasValue(credential?.data?.deepgram_stt_uri)); setIsDeepgramOnpremEnabled(hasValue(credential?.data?.deepgram_stt_uri)); + + if (credential?.data?.user_id) { + setUserId(credential.data.user_id); + } + + if (credential?.data?.voice_engine) { + setTtsModelId(credential.data.voice_engine); + } }, [credential]); const updateCustomVoices = ( @@ -692,6 +742,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { {vendor !== VENDOR_WELLSAID && vendor !== VENDOR_CUSTOM && vendor !== VENDOR_WHISPER && + vendor !== VENDOR_PLAYHT && vendor !== VENDOR_ELEVENLABS && (