mirror of
https://github.com/jambonz/jambonz-webapp.git
synced 2026-01-25 02:08:19 +00:00
support openAi stt (#496)
* support openAi stt * wip * wip * add back model selection to openai
This commit is contained in:
@@ -728,6 +728,7 @@ export interface SpeechSupportedLanguagesAndVoices {
|
||||
tts: VoiceLanguage[];
|
||||
stt: Language[];
|
||||
models: Model[];
|
||||
sttModels: Model[];
|
||||
}
|
||||
|
||||
export interface ElevenLabsOptions {
|
||||
|
||||
@@ -33,6 +33,7 @@ import {
|
||||
VENDOR_CARTESIA,
|
||||
VENDOR_VOXIST,
|
||||
VENDOR_RIMELABS,
|
||||
VENDOR_OPENAI,
|
||||
} from "src/vendor";
|
||||
import {
|
||||
LabelOptions,
|
||||
@@ -397,6 +398,7 @@ export const SpeechProviderSelection = ({
|
||||
vendor.value !== VENDOR_SONIOX &&
|
||||
vendor.value !== VENDOR_SPEECHMATICS &&
|
||||
vendor.value !== VENDOR_CUSTOM &&
|
||||
vendor.value !== VENDOR_OPENAI &&
|
||||
vendor.value !== VENDOR_COBALT,
|
||||
)}
|
||||
onChange={(e) => {
|
||||
|
||||
@@ -51,6 +51,7 @@ import {
|
||||
VENDOR_SPEECHMATICS,
|
||||
VENDOR_CARTESIA,
|
||||
VENDOR_VOXIST,
|
||||
VENDOR_OPENAI,
|
||||
} from "src/vendor";
|
||||
import { MSG_REQUIRED_FIELDS } from "src/constants";
|
||||
import {
|
||||
@@ -124,6 +125,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
const [ttsRegion, setTtsRegion] = useState("");
|
||||
const [ttsApiKey, setTtsApiKey] = useState("");
|
||||
const [ttsModelId, setTtsModelId] = useState("");
|
||||
const [sttModelId, setSttModelId] = useState("");
|
||||
const [engineVersion, setEngineVersion] = useState(DEFAULT_VERBIO_MODEL);
|
||||
const [instanceId, setInstanceId] = useState("");
|
||||
const [initialCheckCustomTts, setInitialCheckCustomTts] = useState(false);
|
||||
@@ -167,6 +169,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
const [customVoices, setCustomVoices] = useState<GoogleCustomVoice[]>([]);
|
||||
const [customVoicesMessage, setCustomVoicesMessage] = useState("");
|
||||
const [ttsModels, setTtsModels] = useState<Model[]>([]);
|
||||
const [sttModels, setSttModels] = useState<Model[]>([]);
|
||||
const [optionsInitialChecked, setOptionsInitialChecked] = useState(false);
|
||||
const [options, setOptions] = useState("");
|
||||
const [tmpOptions, setTmpOptions] = useState("");
|
||||
@@ -248,6 +251,17 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
return "";
|
||||
};
|
||||
|
||||
const getModelLabelByVendor = (vendor: Lowercase<Vendor>) => {
|
||||
switch (vendor) {
|
||||
case VENDOR_PLAYHT:
|
||||
return "Voice Engine";
|
||||
case VENDOR_CARTESIA:
|
||||
return "Model ID";
|
||||
default:
|
||||
return "Model";
|
||||
}
|
||||
};
|
||||
|
||||
const handlePutGoogleCustomVoices = () => {
|
||||
if (!credential || !credential.data) {
|
||||
return;
|
||||
@@ -411,6 +425,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
ttsModelId && {
|
||||
voice_engine: ttsModelId,
|
||||
}),
|
||||
...(vendor === VENDOR_OPENAI &&
|
||||
sttModelId && {
|
||||
model_id: sttModelId,
|
||||
}),
|
||||
...(vendor === VENDOR_DEEPGRAM && {
|
||||
deepgram_stt_uri: deepgramSttUri || null,
|
||||
deepgram_tts_uri: deepgramTtsUri || null,
|
||||
@@ -469,7 +487,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
vendor === VENDOR_PLAYHT ||
|
||||
vendor === VENDOR_RIMELABS ||
|
||||
vendor === VENDOR_WHISPER ||
|
||||
vendor === VENDOR_CARTESIA
|
||||
vendor === VENDOR_CARTESIA ||
|
||||
vendor === VENDOR_OPENAI
|
||||
? apiKey
|
||||
: null,
|
||||
}),
|
||||
@@ -536,7 +555,8 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
vendor === VENDOR_WHISPER ||
|
||||
vendor === VENDOR_PLAYHT ||
|
||||
vendor === VENDOR_RIMELABS ||
|
||||
vendor === VENDOR_CARTESIA
|
||||
vendor === VENDOR_CARTESIA ||
|
||||
vendor === VENDOR_OPENAI
|
||||
) {
|
||||
getSpeechSupportedLanguagesAndVoices(
|
||||
currentServiceProvider?.service_provider_sid,
|
||||
@@ -553,6 +573,15 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
setTtsModelId(json.models[0].value);
|
||||
}
|
||||
}
|
||||
if (json.sttModels) {
|
||||
setSttModels(json.sttModels);
|
||||
if (
|
||||
json.sttModels.length > 0 &&
|
||||
!json.sttModels.some((m) => m.value === sttModelId)
|
||||
) {
|
||||
setSttModelId(json.sttModels[0].value);
|
||||
}
|
||||
}
|
||||
});
|
||||
} else {
|
||||
setTtsModels([]);
|
||||
@@ -707,6 +736,9 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
if (credential.data.model_id) {
|
||||
setTtsModelId(credential.data.model_id);
|
||||
}
|
||||
if (credential.data.model_id && vendor === VENDOR_OPENAI) {
|
||||
setSttModelId(credential.data.model_id);
|
||||
}
|
||||
}
|
||||
if (credential?.data?.options) {
|
||||
setOptions(credential.data.options);
|
||||
@@ -887,6 +919,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
vendor !== VENDOR_COBALT &&
|
||||
vendor !== VENDOR_SONIOX &&
|
||||
vendor !== VENDOR_SPEECHMATICS &&
|
||||
vendor !== VENDOR_OPENAI &&
|
||||
vendor != VENDOR_CUSTOM && (
|
||||
<label htmlFor="use_for_tts" className="chk">
|
||||
<input
|
||||
@@ -1522,6 +1555,7 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
vendor === VENDOR_RIMELABS ||
|
||||
vendor === VENDOR_SONIOX ||
|
||||
vendor === VENDOR_CARTESIA ||
|
||||
vendor === VENDOR_OPENAI ||
|
||||
vendor === VENDOR_SPEECHMATICS) && (
|
||||
<fieldset>
|
||||
{vendor === VENDOR_PLAYHT && (
|
||||
@@ -1557,40 +1591,16 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
/>
|
||||
</fieldset>
|
||||
)}
|
||||
{vendor === VENDOR_PLAYHT && ttsModels.length > 0 && (
|
||||
<fieldset>
|
||||
<label htmlFor={`${vendor}_tts_model_id`}>Voice engine</label>
|
||||
<Selector
|
||||
id={"tts_model_id"}
|
||||
name={"tts_model_id"}
|
||||
value={ttsModelId}
|
||||
options={ttsModels}
|
||||
onChange={(e) => {
|
||||
setTtsModelId(e.target.value);
|
||||
}}
|
||||
/>
|
||||
</fieldset>
|
||||
)}
|
||||
{vendor === VENDOR_CARTESIA && ttsModels.length > 0 && (
|
||||
<fieldset>
|
||||
<label htmlFor={`${vendor}_tts_model_id`}>Model Id</label>
|
||||
<Selector
|
||||
id={"tts_model_id"}
|
||||
name={"tts_model_id"}
|
||||
value={ttsModelId}
|
||||
options={ttsModels}
|
||||
onChange={(e) => {
|
||||
setTtsModelId(e.target.value);
|
||||
}}
|
||||
/>
|
||||
</fieldset>
|
||||
)}
|
||||
{(vendor == VENDOR_ELEVENLABS ||
|
||||
vendor == VENDOR_WHISPER ||
|
||||
vendor === VENDOR_CARTESIA ||
|
||||
vendor === VENDOR_PLAYHT ||
|
||||
vendor == VENDOR_RIMELABS) &&
|
||||
ttsModels.length > 0 && (
|
||||
<fieldset>
|
||||
<label htmlFor={`${vendor}_tts_model_id`}>Model</label>
|
||||
<label htmlFor={`${vendor}_tts_model_id`}>
|
||||
{getModelLabelByVendor(vendor)}
|
||||
</label>
|
||||
<Selector
|
||||
id={"tts_model_id"}
|
||||
name={"tts_model_id"}
|
||||
@@ -1602,6 +1612,22 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => {
|
||||
/>
|
||||
</fieldset>
|
||||
)}
|
||||
{vendor == VENDOR_OPENAI && sttModels.length > 0 && (
|
||||
<fieldset>
|
||||
<label htmlFor={`${vendor}_stt_model_id`}>
|
||||
{getModelLabelByVendor(vendor)}
|
||||
</label>
|
||||
<Selector
|
||||
id={"stt_model_id"}
|
||||
name={"stt_model_id"}
|
||||
value={sttModelId}
|
||||
options={sttModels}
|
||||
onChange={(e) => {
|
||||
setSttModelId(e.target.value);
|
||||
}}
|
||||
/>
|
||||
</fieldset>
|
||||
)}
|
||||
{(vendor === VENDOR_ELEVENLABS ||
|
||||
vendor === VENDOR_PLAYHT ||
|
||||
vendor === VENDOR_CARTESIA ||
|
||||
|
||||
5
src/vendor/index.tsx
vendored
5
src/vendor/index.tsx
vendored
@@ -26,6 +26,7 @@ export const VENDOR_PLAYHT = "playht";
|
||||
export const VENDOR_RIMELABS = "rimelabs";
|
||||
export const VENDOR_VERBIO = "verbio";
|
||||
export const VENDOR_CARTESIA = "cartesia";
|
||||
export const VENDOR_OPENAI = "openai";
|
||||
|
||||
export const vendors: VendorOptions[] = [
|
||||
{
|
||||
@@ -108,6 +109,10 @@ export const vendors: VendorOptions[] = [
|
||||
name: "Cartesia",
|
||||
value: VENDOR_CARTESIA,
|
||||
},
|
||||
{
|
||||
name: "OpenAI",
|
||||
value: VENDOR_OPENAI,
|
||||
},
|
||||
].sort((a, b) => a.name.localeCompare(b.name)) as VendorOptions[];
|
||||
|
||||
export const AWS_CREDENTIAL_ACCESS_KEY = "access_key";
|
||||
|
||||
1
src/vendor/types.ts
vendored
1
src/vendor/types.ts
vendored
@@ -18,6 +18,7 @@ export type Vendor =
|
||||
| "playht"
|
||||
| "rimelabs"
|
||||
| "verbio"
|
||||
| "openai"
|
||||
| "Cartesia";
|
||||
|
||||
export interface VendorOptions {
|
||||
|
||||
Reference in New Issue
Block a user