support deepgram stt model (#528)

* support deepgram stt model

* wip

* wip
This commit is contained in:
Hoan Luu Huu
2025-05-28 19:01:20 +07:00
committed by GitHub
parent 844eec953c
commit 10818493bc
3 changed files with 66 additions and 34 deletions

View File

@@ -922,6 +922,9 @@ export const ApplicationForm = ({ application }: ApplicationFormProps) => {
serviceProviderSid={ serviceProviderSid={
currentServiceProvider?.service_provider_sid || "" currentServiceProvider?.service_provider_sid || ""
} }
application_speech_synthesis_voice={
application?.data?.speech_synthesis_voice
}
accountSid={accountSid} accountSid={accountSid}
credentials={credentials} credentials={credentials}
ttsVendor={[synthVendor, setSynthVendor]} ttsVendor={[synthVendor, setSynthVendor]}
@@ -952,6 +955,9 @@ export const ApplicationForm = ({ application }: ApplicationFormProps) => {
currentServiceProvider?.service_provider_sid || "" currentServiceProvider?.service_provider_sid || ""
} }
accountSid={accountSid} accountSid={accountSid}
application_speech_synthesis_voice={
application?.data?.fallback_speech_synthesis_voice
}
credentials={credentials} credentials={credentials}
ttsVendor={[ ttsVendor={[
fallbackSpeechSynthsisVendor, fallbackSpeechSynthsisVendor,

View File

@@ -44,6 +44,7 @@ import {
type SpeechProviderSelectionProbs = { type SpeechProviderSelectionProbs = {
accountSid: string; accountSid: string;
serviceProviderSid: string; serviceProviderSid: string;
application_speech_synthesis_voice: string | null | undefined;
credentials: SpeechCredential[] | undefined; credentials: SpeechCredential[] | undefined;
ttsVendor: [ ttsVendor: [
keyof SynthesisVendors, keyof SynthesisVendors,
@@ -67,6 +68,7 @@ type SpeechProviderSelectionProbs = {
export const SpeechProviderSelection = ({ export const SpeechProviderSelection = ({
accountSid, accountSid,
serviceProviderSid, serviceProviderSid,
application_speech_synthesis_voice,
credentials, credentials,
ttsVendor: [synthVendor, setSynthVendor], ttsVendor: [synthVendor, setSynthVendor],
ttsVendorOptions, ttsVendorOptions,
@@ -242,10 +244,6 @@ export const SpeechProviderSelection = ({
// Extract model // Extract model
if (json.models && json.models.length) { if (json.models && json.models.length) {
setSynthesisModelOptions(json.models); setSynthesisModelOptions(json.models);
if (synthVendor === VENDOR_DEEPGRAM) {
setSynthVoice(json.models[0].value);
return;
}
} }
if (json.tts && json.tts.length) { if (json.tts && json.tts.length) {
@@ -337,6 +335,7 @@ export const SpeechProviderSelection = ({
const updateTtsVoice = (language: string, voice: string) => { const updateTtsVoice = (language: string, voice: string) => {
if (shouldUpdateTtsVoice.current) { if (shouldUpdateTtsVoice.current) {
console.log("xhoaluu");
setSynthLang(language); setSynthLang(language);
setSynthVoice(voice); setSynthVoice(voice);
shouldUpdateTtsVoice.current = false; shouldUpdateTtsVoice.current = false;
@@ -387,6 +386,20 @@ export const SpeechProviderSelection = ({
toastError(error.msg); toastError(error.msg);
}); });
}; };
useEffect(() => {
if (
synthVendor === VENDOR_DEEPGRAM &&
synthesisModelOptions.length > 0 &&
!synthesisModelOptions.some(
(m) => m.value === application_speech_synthesis_voice,
)
) {
setSynthVoice(synthesisModelOptions[0].value);
} else {
setSynthVoice(application_speech_synthesis_voice || "");
}
}, [synthesisModelOptions, application_speech_synthesis_voice]);
return ( return (
<> <>
<fieldset> <fieldset>

View File

@@ -259,6 +259,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
case VENDOR_PLAYHT: case VENDOR_PLAYHT:
return "Voice Engine"; return "Voice Engine";
case VENDOR_CARTESIA: case VENDOR_CARTESIA:
case VENDOR_DEEPGRAM:
return "Model ID"; return "Model ID";
default: default:
return "Model"; return "Model";
@@ -436,6 +437,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
deepgram_stt_uri: deepgramSttUri || null, deepgram_stt_uri: deepgramSttUri || null,
deepgram_tts_uri: deepgramTtsUri || null, deepgram_tts_uri: deepgramTtsUri || null,
deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0, deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0,
model_id: sttModelId || null,
}), }),
...(vendor === VENDOR_SPEECHMATICS && { ...(vendor === VENDOR_SPEECHMATICS && {
speechmatics_stt_uri: speechmaticsEndpoint || null, speechmatics_stt_uri: speechmaticsEndpoint || null,
@@ -562,7 +564,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
vendor === VENDOR_PLAYHT || vendor === VENDOR_PLAYHT ||
vendor === VENDOR_RIMELABS || vendor === VENDOR_RIMELABS ||
vendor === VENDOR_CARTESIA || vendor === VENDOR_CARTESIA ||
vendor === VENDOR_OPENAI vendor === VENDOR_OPENAI ||
vendor === VENDOR_DEEPGRAM
) { ) {
getSpeechSupportedLanguagesAndVoices( getSpeechSupportedLanguagesAndVoices(
currentServiceProvider?.service_provider_sid, currentServiceProvider?.service_provider_sid,
@@ -572,21 +575,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
).then(({ json }) => { ).then(({ json }) => {
if (json.models) { if (json.models) {
setTtsModels(json.models); setTtsModels(json.models);
if (
json.models.length > 0 &&
!json.models.find((m) => m.value === ttsModelId)
) {
setTtsModelId(json.models[0].value);
}
} }
if (json.sttModels) { if (json.sttModels) {
setSttModels(json.sttModels); setSttModels(json.sttModels);
if (
json.sttModels.length > 0 &&
!json.sttModels.some((m) => m.value === sttModelId)
) {
setSttModelId(json.sttModels[0].value);
}
} }
}); });
} else { } else {
@@ -594,6 +585,24 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
} }
}, [vendor]); }, [vendor]);
useEffect(() => {
const modelId = credential?.data?.model_id || "";
if (sttModels.length > 0 && !sttModels.some((m) => m.value === modelId)) {
setSttModelId(sttModels[0].value);
} else {
setSttModelId(modelId);
}
}, [credential, sttModels]);
useEffect(() => {
const modelId = credential?.data?.model_id || "";
if (ttsModels.length > 0 && !ttsModels.some((m) => m.value === modelId)) {
setTtsModelId(sttModels[0].value);
} else {
setTtsModelId(modelId);
}
}, [credential, ttsModels]);
useEffect(() => { useEffect(() => {
setLocation(); setLocation();
if (credential && credential.data) { if (credential && credential.data) {
@@ -742,7 +751,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
if (credential.data.model_id) { if (credential.data.model_id) {
setTtsModelId(credential.data.model_id); setTtsModelId(credential.data.model_id);
} }
if (credential.data.model_id && vendor === VENDOR_OPENAI) { if (
credential.data.model_id &&
(vendor === VENDOR_OPENAI || vendor === VENDOR_DEEPGRAM)
) {
setSttModelId(credential.data.model_id); setSttModelId(credential.data.model_id);
} }
if (credential?.data?.playht_tts_uri) { if (credential?.data?.playht_tts_uri) {
@@ -1717,22 +1729,23 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
/> />
</fieldset> </fieldset>
)} )}
{vendor == VENDOR_OPENAI && sttModels.length > 0 && ( {(vendor == VENDOR_OPENAI || vendor === VENDOR_DEEPGRAM) &&
<fieldset> sttModels.length > 0 && (
<label htmlFor={`${vendor}_stt_model_id`}> <fieldset>
{getModelLabelByVendor(vendor)} <label htmlFor={`${vendor}_stt_model_id`}>
</label> {getModelLabelByVendor(vendor)}
<Selector </label>
id={"stt_model_id"} <Selector
name={"stt_model_id"} id={"stt_model_id"}
value={sttModelId} name={"stt_model_id"}
options={sttModels} value={sttModelId}
onChange={(e) => { options={sttModels}
setSttModelId(e.target.value); onChange={(e) => {
}} setSttModelId(e.target.value);
/> }}
</fieldset> />
)} </fieldset>
)}
{(vendor === VENDOR_ELEVENLABS || {(vendor === VENDOR_ELEVENLABS ||
vendor === VENDOR_PLAYHT || vendor === VENDOR_PLAYHT ||
vendor === VENDOR_CARTESIA || vendor === VENDOR_CARTESIA ||