mirror of
https://github.com/jambonz/jambonz-webapp.git
synced 2026-01-25 02:08:19 +00:00
Compare commits
5 Commits
v0.9.5-11
...
fix/speech
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6216d1db1 | ||
|
|
6dd445fdb4 | ||
|
|
440cf25935 | ||
|
|
97736654ef | ||
|
|
2a3fc07306 |
@@ -99,27 +99,37 @@ export const SpeechProviderSelection = ({
|
||||
SelectorOption[]
|
||||
>([]);
|
||||
|
||||
const currentVendor = useRef("");
|
||||
|
||||
const currentServiceProvider = useSelectState("currentServiceProvider");
|
||||
const currentTtsVendor = useRef(synthVendor);
|
||||
const currentSttVendor = useRef(recogVendor);
|
||||
const shouldUpdateTtsVoice = useRef(false);
|
||||
const shouldUpdateSttLanguage = useRef(false);
|
||||
const ttsEffectTimer = useRef<number | null>(null);
|
||||
const sttEffectTimer = useRef<number | null>(null);
|
||||
|
||||
// Get Synthesis languages and voices
|
||||
useEffect(() => {
|
||||
if (
|
||||
!user ||
|
||||
!synthVendor ||
|
||||
(user?.scope === USER_ADMIN && !serviceProviderSid)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
currentTtsVendor.current = synthVendor;
|
||||
/** When Custom Vendor is used, user you have to input the lange and voice. */
|
||||
if (synthVendor.toString().startsWith(VENDOR_CUSTOM)) {
|
||||
setSynthVoice("");
|
||||
return;
|
||||
}
|
||||
currentVendor.current = synthVendor;
|
||||
if (
|
||||
!user ||
|
||||
(user?.scope === USER_ADMIN &&
|
||||
!currentServiceProvider?.service_provider_sid)
|
||||
) {
|
||||
return;
|
||||
// just execute last change
|
||||
if (ttsEffectTimer.current) {
|
||||
clearTimeout(ttsEffectTimer.current);
|
||||
}
|
||||
configSynthesis();
|
||||
}, [synthVendor, synthLabel, currentServiceProvider]);
|
||||
|
||||
ttsEffectTimer.current = setTimeout(() => {
|
||||
configSynthesis();
|
||||
}, 200);
|
||||
}, [synthVendor, synthLabel, serviceProviderSid]);
|
||||
|
||||
// Get Recognizer languages and voices
|
||||
useEffect(() => {
|
||||
@@ -130,13 +140,21 @@ export const SpeechProviderSelection = ({
|
||||
}
|
||||
if (
|
||||
!user ||
|
||||
(user?.scope === USER_ADMIN &&
|
||||
!currentServiceProvider?.service_provider_sid)
|
||||
!recogVendor ||
|
||||
(user?.scope === USER_ADMIN && !serviceProviderSid)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
configRecognizer();
|
||||
}, [recogVendor, recogLabel, currentServiceProvider]);
|
||||
currentSttVendor.current = recogVendor;
|
||||
// just execute last change
|
||||
if (sttEffectTimer.current) {
|
||||
clearTimeout(sttEffectTimer.current);
|
||||
}
|
||||
|
||||
sttEffectTimer.current = setTimeout(() => {
|
||||
configRecognizer();
|
||||
}, 200);
|
||||
}, [recogVendor, recogLabel, serviceProviderSid]);
|
||||
|
||||
useEffect(() => {
|
||||
if (credentials) {
|
||||
@@ -167,39 +185,40 @@ export const SpeechProviderSelection = ({
|
||||
}
|
||||
return lang.value === synthLang;
|
||||
})?.voices || [];
|
||||
setSynthesisVoiceOptions(voicesOpts);
|
||||
|
||||
if (synthVendor === VENDOR_GOOGLE) {
|
||||
getGoogleCustomVoices({
|
||||
...(synthLabel && { label: synthLabel }),
|
||||
account_sid: accountSid,
|
||||
service_provider_sid: serviceProviderSid,
|
||||
}).then(({ json }) => {
|
||||
// If after successfully fetching data, vendor is still good, then apply value
|
||||
if (currentVendor.current !== VENDOR_GOOGLE) {
|
||||
return;
|
||||
}
|
||||
const customVOices = json.map((v) => ({
|
||||
name: `${v.name} (Custom)`,
|
||||
value: `custom_${v.google_custom_voice_sid}`,
|
||||
}));
|
||||
setSynthesisGoogleCustomVoiceOptions(customVOices);
|
||||
setSynthesisVoiceOptions([...customVOices, ...voicesOpts]);
|
||||
if (customVOices.length > 0) {
|
||||
setSynthVoice(customVOices[0].value);
|
||||
}
|
||||
});
|
||||
if (synthVendor === VENDOR_GOOGLE && synthesisGoogleCustomVoiceOptions) {
|
||||
if (synthesisGoogleCustomVoiceOptions) {
|
||||
setSynthesisVoiceOptions([
|
||||
...synthesisGoogleCustomVoiceOptions,
|
||||
...voicesOpts,
|
||||
]);
|
||||
} else {
|
||||
setSynthesisVoiceOptions(voicesOpts);
|
||||
}
|
||||
if (synthesisGoogleCustomVoiceOptions.length > 0) {
|
||||
updateTtsVoice(synthesisGoogleCustomVoiceOptions[0].value);
|
||||
}
|
||||
} else {
|
||||
setSynthesisVoiceOptions(voicesOpts);
|
||||
}
|
||||
}
|
||||
}, [synthLang, synthesisSupportedLanguagesAndVoices]);
|
||||
}, [
|
||||
synthLang,
|
||||
synthesisSupportedLanguagesAndVoices,
|
||||
synthesisGoogleCustomVoiceOptions,
|
||||
]);
|
||||
|
||||
const configSynthesis = () => {
|
||||
getSpeechSupportedLanguagesAndVoices(
|
||||
currentServiceProvider?.service_provider_sid,
|
||||
serviceProviderSid,
|
||||
synthVendor,
|
||||
synthLabel
|
||||
)
|
||||
.then(({ json }) => {
|
||||
// while fetching data, user might change the vendor
|
||||
if (currentTtsVendor.current !== synthVendor) {
|
||||
return;
|
||||
}
|
||||
setSynthesisSupportedLanguagesAndVoices(json);
|
||||
// Extract model
|
||||
if (json.models && json.models.length) {
|
||||
@@ -219,21 +238,27 @@ export const SpeechProviderSelection = ({
|
||||
setSynthesisLanguageOptions(langOpts);
|
||||
|
||||
// Default setting
|
||||
if (synthVendor === VENDOR_GOOGLE && synthLang === LANG_EN_US) {
|
||||
setSynthVoice(LANG_EN_US_STANDARD_C);
|
||||
const googleLang = json.tts.find((lang) => lang.value === synthLang);
|
||||
if (
|
||||
synthVendor === VENDOR_GOOGLE &&
|
||||
(!googleLang ||
|
||||
!googleLang.voices.find((v) => v.value === synthVoice))
|
||||
) {
|
||||
setSynthLang(LANG_EN_US);
|
||||
updateTtsVoice(LANG_EN_US_STANDARD_C);
|
||||
return;
|
||||
}
|
||||
if (synthVendor === VENDOR_ELEVENLABS) {
|
||||
// Samve Voices applied to all languages
|
||||
// Voices are only available for the 1st language.
|
||||
setSynthLang(ELEVENLABS_LANG_EN);
|
||||
setSynthVoice(json.tts[0].voices[0].value);
|
||||
updateTtsVoice(json.tts[0].voices[0].value);
|
||||
return;
|
||||
}
|
||||
if (synthVendor === VENDOR_WHISPER) {
|
||||
const newLang = json.tts.find((lang) => lang.value === LANG_EN_US);
|
||||
setSynthLang(LANG_EN_US);
|
||||
setSynthVoice(newLang!.voices[0].value);
|
||||
updateTtsVoice(newLang!.voices[0].value);
|
||||
return;
|
||||
}
|
||||
/** Google and AWS have different language lists */
|
||||
@@ -241,28 +266,57 @@ export const SpeechProviderSelection = ({
|
||||
let newLang = json.tts.find((lang) => lang.value === synthLang);
|
||||
|
||||
if (newLang) {
|
||||
setSynthVoice(newLang.voices[0].value);
|
||||
updateTtsVoice(newLang.voices[0].value);
|
||||
return;
|
||||
}
|
||||
|
||||
newLang = json.tts.find((lang) => lang.value === LANG_EN_US);
|
||||
|
||||
setSynthLang(LANG_EN_US);
|
||||
setSynthVoice(newLang!.voices[0].value);
|
||||
updateTtsVoice(newLang!.voices[0].value);
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
toastError(error.msg);
|
||||
});
|
||||
|
||||
if (synthVendor === VENDOR_GOOGLE) {
|
||||
getGoogleCustomVoices({
|
||||
...(synthLabel && { label: synthLabel }),
|
||||
account_sid: accountSid,
|
||||
service_provider_sid: serviceProviderSid,
|
||||
}).then(({ json }) => {
|
||||
// If after successfully fetching data, vendor is still good, then apply value
|
||||
if (currentTtsVendor.current !== VENDOR_GOOGLE) {
|
||||
return;
|
||||
}
|
||||
const customVOices = json.map((v) => ({
|
||||
name: `${v.name} (Custom)`,
|
||||
value: `custom_${v.google_custom_voice_sid}`,
|
||||
}));
|
||||
setSynthesisGoogleCustomVoiceOptions(customVOices);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
const updateTtsVoice = (value: string) => {
|
||||
if (shouldUpdateTtsVoice.current) {
|
||||
setSynthVoice(value);
|
||||
shouldUpdateTtsVoice.current = false;
|
||||
}
|
||||
};
|
||||
|
||||
const configRecognizer = () => {
|
||||
getSpeechSupportedLanguagesAndVoices(
|
||||
currentServiceProvider?.service_provider_sid,
|
||||
serviceProviderSid,
|
||||
recogVendor,
|
||||
recogLabel
|
||||
)
|
||||
.then(({ json }) => {
|
||||
// while fetching data, the user might change the vendor
|
||||
if (currentSttVendor.current !== recogVendor) {
|
||||
return;
|
||||
}
|
||||
// Extract Language
|
||||
const langOpts = json.stt.map((lang) => ({
|
||||
name: lang.name,
|
||||
@@ -271,8 +325,12 @@ export const SpeechProviderSelection = ({
|
||||
setRecogLanguageOptions(langOpts);
|
||||
|
||||
/**When vendor is custom, Language is input by user */
|
||||
if (recogVendor.toString() === VENDOR_CUSTOM) return;
|
||||
|
||||
if (
|
||||
recogVendor.toString() === VENDOR_CUSTOM ||
|
||||
!shouldUpdateSttLanguage.current
|
||||
)
|
||||
return;
|
||||
shouldUpdateSttLanguage.current = false;
|
||||
/** Google and AWS have different language lists */
|
||||
/** If the new language doesn't map then default to "en-US" */
|
||||
const newLang = json.stt.find((lang) => lang.value === recogLang);
|
||||
@@ -282,10 +340,10 @@ export const SpeechProviderSelection = ({
|
||||
!newLang
|
||||
) {
|
||||
setRecogLang(LANG_EN_US);
|
||||
}
|
||||
// Default colbalt language
|
||||
if (recogVendor === VENDOR_COBALT) {
|
||||
} else if (recogVendor === VENDOR_COBALT && !newLang) {
|
||||
setRecogLang(LANG_COBALT_EN_US);
|
||||
} else if (langOpts.length && !newLang) {
|
||||
setRecogLang(langOpts[0].value);
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
@@ -309,6 +367,7 @@ export const SpeechProviderSelection = ({
|
||||
)}
|
||||
onChange={(e) => {
|
||||
const vendor = e.target.value as keyof SynthesisVendors;
|
||||
shouldUpdateTtsVoice.current = true;
|
||||
setSynthVendor(vendor);
|
||||
setSynthLabel("");
|
||||
setSynthesisLanguageOptions([]);
|
||||
@@ -353,6 +412,7 @@ export const SpeechProviderSelection = ({
|
||||
value={synthLang}
|
||||
options={synthesisLanguageOptions}
|
||||
onChange={(e) => {
|
||||
shouldUpdateTtsVoice.current = true;
|
||||
const language = e.target.value;
|
||||
setSynthLang(language);
|
||||
|
||||
@@ -456,6 +516,7 @@ export const SpeechProviderSelection = ({
|
||||
)}
|
||||
onChange={(e) => {
|
||||
const vendor = e.target.value as keyof RecognizerVendors;
|
||||
shouldUpdateSttLanguage.current = true;
|
||||
setRecogVendor(vendor);
|
||||
setRecogLabel("");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user