support deepgram stt model (#528)

* support deepgram stt model

* wip

* wip
This commit is contained in:
Hoan Luu Huu
2025-05-28 19:01:20 +07:00
committed by GitHub
parent 844eec953c
commit 10818493bc
3 changed files with 66 additions and 34 deletions

View File

@@ -922,6 +922,9 @@ export const ApplicationForm = ({ application }: ApplicationFormProps) => {
serviceProviderSid={
currentServiceProvider?.service_provider_sid || ""
}
application_speech_synthesis_voice={
application?.data?.speech_synthesis_voice
}
accountSid={accountSid}
credentials={credentials}
ttsVendor={[synthVendor, setSynthVendor]}
@@ -952,6 +955,9 @@ export const ApplicationForm = ({ application }: ApplicationFormProps) => {
currentServiceProvider?.service_provider_sid || ""
}
accountSid={accountSid}
application_speech_synthesis_voice={
application?.data?.fallback_speech_synthesis_voice
}
credentials={credentials}
ttsVendor={[
fallbackSpeechSynthsisVendor,

View File

@@ -44,6 +44,7 @@ import {
type SpeechProviderSelectionProbs = {
accountSid: string;
serviceProviderSid: string;
application_speech_synthesis_voice: string | null | undefined;
credentials: SpeechCredential[] | undefined;
ttsVendor: [
keyof SynthesisVendors,
@@ -67,6 +68,7 @@ type SpeechProviderSelectionProbs = {
export const SpeechProviderSelection = ({
accountSid,
serviceProviderSid,
application_speech_synthesis_voice,
credentials,
ttsVendor: [synthVendor, setSynthVendor],
ttsVendorOptions,
@@ -242,10 +244,6 @@ export const SpeechProviderSelection = ({
// Extract model
if (json.models && json.models.length) {
setSynthesisModelOptions(json.models);
if (synthVendor === VENDOR_DEEPGRAM) {
setSynthVoice(json.models[0].value);
return;
}
}
if (json.tts && json.tts.length) {
@@ -337,6 +335,7 @@ export const SpeechProviderSelection = ({
const updateTtsVoice = (language: string, voice: string) => {
if (shouldUpdateTtsVoice.current) {
console.log("xhoaluu");
setSynthLang(language);
setSynthVoice(voice);
shouldUpdateTtsVoice.current = false;
@@ -387,6 +386,20 @@ export const SpeechProviderSelection = ({
toastError(error.msg);
});
};
useEffect(() => {
if (
synthVendor === VENDOR_DEEPGRAM &&
synthesisModelOptions.length > 0 &&
!synthesisModelOptions.some(
(m) => m.value === application_speech_synthesis_voice,
)
) {
setSynthVoice(synthesisModelOptions[0].value);
} else {
setSynthVoice(application_speech_synthesis_voice || "");
}
}, [synthesisModelOptions, application_speech_synthesis_voice]);
return (
<>
<fieldset>

View File

@@ -259,6 +259,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
case VENDOR_PLAYHT:
return "Voice Engine";
case VENDOR_CARTESIA:
case VENDOR_DEEPGRAM:
return "Model ID";
default:
return "Model";
@@ -436,6 +437,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
deepgram_stt_uri: deepgramSttUri || null,
deepgram_tts_uri: deepgramTtsUri || null,
deepgram_stt_use_tls: deepgramSttUseTls ? 1 : 0,
model_id: sttModelId || null,
}),
...(vendor === VENDOR_SPEECHMATICS && {
speechmatics_stt_uri: speechmaticsEndpoint || null,
@@ -562,7 +564,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
vendor === VENDOR_PLAYHT ||
vendor === VENDOR_RIMELABS ||
vendor === VENDOR_CARTESIA ||
vendor === VENDOR_OPENAI
vendor === VENDOR_OPENAI ||
vendor === VENDOR_DEEPGRAM
) {
getSpeechSupportedLanguagesAndVoices(
currentServiceProvider?.service_provider_sid,
@@ -572,21 +575,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
).then(({ json }) => {
if (json.models) {
setTtsModels(json.models);
if (
json.models.length > 0 &&
!json.models.find((m) => m.value === ttsModelId)
) {
setTtsModelId(json.models[0].value);
}
}
if (json.sttModels) {
setSttModels(json.sttModels);
if (
json.sttModels.length > 0 &&
!json.sttModels.some((m) => m.value === sttModelId)
) {
setSttModelId(json.sttModels[0].value);
}
}
});
} else {
@@ -594,6 +585,24 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
}
}, [vendor]);
useEffect(() => {
const modelId = credential?.data?.model_id || "";
if (sttModels.length > 0 && !sttModels.some((m) => m.value === modelId)) {
setSttModelId(sttModels[0].value);
} else {
setSttModelId(modelId);
}
}, [credential, sttModels]);
useEffect(() => {
const modelId = credential?.data?.model_id || "";
if (ttsModels.length > 0 && !ttsModels.some((m) => m.value === modelId)) {
setTtsModelId(sttModels[0].value);
} else {
setTtsModelId(modelId);
}
}, [credential, ttsModels]);
useEffect(() => {
setLocation();
if (credential && credential.data) {
@@ -742,7 +751,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
if (credential.data.model_id) {
setTtsModelId(credential.data.model_id);
}
if (credential.data.model_id && vendor === VENDOR_OPENAI) {
if (
credential.data.model_id &&
(vendor === VENDOR_OPENAI || vendor === VENDOR_DEEPGRAM)
) {
setSttModelId(credential.data.model_id);
}
if (credential?.data?.playht_tts_uri) {
@@ -1717,22 +1729,23 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
/>
</fieldset>
)}
{vendor == VENDOR_OPENAI && sttModels.length > 0 && (
<fieldset>
<label htmlFor={`${vendor}_stt_model_id`}>
{getModelLabelByVendor(vendor)}
</label>
<Selector
id={"stt_model_id"}
name={"stt_model_id"}
value={sttModelId}
options={sttModels}
onChange={(e) => {
setSttModelId(e.target.value);
}}
/>
</fieldset>
)}
{(vendor == VENDOR_OPENAI || vendor === VENDOR_DEEPGRAM) &&
sttModels.length > 0 && (
<fieldset>
<label htmlFor={`${vendor}_stt_model_id`}>
{getModelLabelByVendor(vendor)}
</label>
<Selector
id={"stt_model_id"}
name={"stt_model_id"}
value={sttModelId}
options={sttModels}
onChange={(e) => {
setSttModelId(e.target.value);
}}
/>
</fieldset>
)}
{(vendor === VENDOR_ELEVENLABS ||
vendor === VENDOR_PLAYHT ||
vendor === VENDOR_CARTESIA ||