Compare commits

...

1 Commits

Author SHA1 Message Date
Hoan Luu Huu
4fc6b1ae40 support google gemini tts (#590)
* support google gemini tts

* wip
2026-01-23 10:07:08 -05:00
2 changed files with 222 additions and 187 deletions

View File

@@ -432,6 +432,13 @@ export const DEEPGRAM_STT_ENPOINT = [
{ name: "EU-hosted", value: "api.eu.deepgram.com" },
];
// ElevenLabs API URI options
export const ELEVENLABS_API_URI_OPTIONS = [
{ name: "US", value: "api.elevenlabs.io" },
{ name: "EU", value: "api.eu.residency.elevenlabs.io" },
{ name: "IN", value: "api.in.residency.elevenlabs.io" },
];
/** User scope values values */
export const USER_ADMIN = "admin";
export const USER_SP = "service_provider";

View File

@@ -97,6 +97,7 @@ import {
DEFAULT_VERBIO_MODEL,
DISABLE_ADDITIONAL_SPEECH_VENDORS,
DISABLE_CUSTOM_SPEECH,
ELEVENLABS_API_URI_OPTIONS,
GOOGLE_CUSTOM_VOICES_REPORTED_USAGE,
VERBIO_STT_MODELS,
} from "src/api/constants";
@@ -110,13 +111,6 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
const { toastError, toastSuccess } = useToast();
const navigate = useNavigate();
const user = useSelectState("user");
// ElevenLabs API URI options
const ELEVENLABS_API_URI_OPTIONS = [
{ name: "US", value: "api.elevenlabs.io" },
{ name: "EU", value: "api.eu.residency.elevenlabs.io" },
{ name: "IN", value: "api.in.residency.elevenlabs.io" },
];
const currentServiceProvider = useSelectState("currentServiceProvider");
const regions = useRegionVendors();
const [accounts] = useServiceProviderData<Account[]>("Accounts");
@@ -418,6 +412,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
...(vendor === VENDOR_AWS && {
aws_region: region || null,
}),
...(vendor === VENDOR_GOOGLE && {
model_id: ttsModelId || null,
}),
...(vendor === VENDOR_MICROSOFT && {
region: region || null,
use_custom_tts:
@@ -852,6 +849,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
setOptionsInitialChecked(true);
}
if (credential?.data?.vendor === VENDOR_GOOGLE) {
// Load model_id for Gemini TTS
if (credential.data.model_id) {
setTtsModelId(credential.data.model_id);
}
// let try to check if there is custom voices
getGoogleCustomVoices({
speech_credential_sid: credential.data.speech_credential_sid,
@@ -1236,6 +1237,23 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
</fieldset>
)}
{ttsCheck && vendor === VENDOR_GOOGLE && (
<>
<fieldset>
<label htmlFor="google_tts_model_id">
Model ID
<Tooltip text="Provide a model ID to enable Gemini TTS (e.g., gemini-2.5-flash-tts). Leave empty to use standard Google TTS.">
{" "}
</Tooltip>
</label>
<input
id="google_tts_model_id"
name="google_tts_model_id"
type="text"
placeholder="e.g., gemini-2.5-flash-tts"
value={ttsModelId}
onChange={(e) => setTtsModelId(e.target.value)}
/>
</fieldset>
<fieldset>
<label htmlFor="use_custom_voice" className="chk">
<input
@@ -1263,7 +1281,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
)}
{hasLength(customVoices) &&
customVoices.map((v, i) => (
<div key={`custom_voice_${i}`} className="customVoice">
<div
key={`custom_voice_${i}`}
className="customVoice"
>
<div>
<div>
<label htmlFor="custom_voice_name">
@@ -1285,7 +1306,11 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
required
value={v.name}
onChange={(e) => {
updateCustomVoices(i, "name", e.target.value);
updateCustomVoices(
i,
"name",
e.target.value,
);
}}
/>
</div>
@@ -1296,7 +1321,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
id={"google_custom_voices_reported_usage"}
name={"google_custom_voices_reported_usage"}
value={v.reported_usage}
options={GOOGLE_CUSTOM_VOICES_REPORTED_USAGE}
options={
GOOGLE_CUSTOM_VOICES_REPORTED_USAGE
}
onChange={(e) => {
updateCustomVoices(
i,
@@ -1448,6 +1475,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
</fieldset>
)}
</fieldset>
</>
)}
</>
)}