From 6a82102b4374c90ca8d4442da792e0a031b26905 Mon Sep 17 00:00:00 2001 From: EgleH Date: Mon, 5 Dec 2022 01:59:45 +0100 Subject: [PATCH] Add IBM as speech provider (#160) * add ibm speech * remove multimedia from recognizer languages Co-authored-by: eglehelms --- src/api/types.ts | 4 + .../internal/views/speech-services/form.tsx | 136 +++++++++++--- src/vendor/index.tsx | 33 +++- src/vendor/regions/ibm-regions.ts | 34 ++++ .../ibm-speech-recognizer-lang.ts | 86 +++++++++ .../ibm-speech-synthesis-lang.ts | 171 ++++++++++++++++++ src/vendor/types.ts | 6 +- 7 files changed, 441 insertions(+), 29 deletions(-) create mode 100644 src/vendor/regions/ibm-regions.ts create mode 100644 src/vendor/speech-recognizer/ibm-speech-recognizer-lang.ts create mode 100644 src/vendor/speech-synthesis/ibm-speech-synthesis-lang.ts diff --git a/src/api/types.ts b/src/api/types.ts index 6f92f92..dc424cf 100644 --- a/src/api/types.ts +++ b/src/api/types.ts @@ -278,6 +278,10 @@ export interface SpeechCredential { custom_stt_endpoint: null | string; client_id: null | string; secret: null | string; + tts_api_key: null | string; + tts_region: null | string; + stt_api_key: null | string; + stt_region: null | string; } export interface Alert { diff --git a/src/containers/internal/views/speech-services/form.tsx b/src/containers/internal/views/speech-services/form.tsx index 92215b5..e3ad387 100644 --- a/src/containers/internal/views/speech-services/form.tsx +++ b/src/containers/internal/views/speech-services/form.tsx @@ -25,6 +25,7 @@ import { VENDOR_NUANCE, VENDOR_WELLSAID, VENDOR_DEEPGRAM, + VENDOR_IBM, } from "src/vendor"; import { MSG_REQUIRED_FIELDS } from "src/constants"; import { getObscuredSecret } from "src/utils"; @@ -57,6 +58,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { const [secretKey, setSecretKey] = useState(""); const [googleServiceKey, setGoogleServiceKey] = useState(null); + const [sttRegion, setSttRegion] = useState(""); + const [sttApiKey, setSttApiKey] = useState(""); + const [ttsRegion, setTtsRegion] = useState(""); + const [ttsApiKey, setTtsApiKey] = useState(""); const [useCustomTts, setUseCustomTts] = useState(false); const [useCustomStt, setUseCustomStt] = useState(false); const [customTtsEndpoint, setCustomTtsEndpoint] = useState(""); @@ -111,6 +116,12 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { use_custom_stt: useCustomStt ? 1 : 0, custom_stt_endpoint: customSttEndpoint || null, }), + ...(vendor === VENDOR_IBM && { + stt_api_key: sttApiKey || null, + stt_region: sttRegion || null, + tts_api_key: ttsApiKey || null, + tts_region: ttsRegion || null, + }), }; if (credential && credential.data) { @@ -145,6 +156,10 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { : null, client_id: vendor === VENDOR_NUANCE ? clientId : null, secret: vendor === VENDOR_NUANCE ? secretKey : null, + stt_api_key: sttApiKey || null, + stt_region: sttRegion || null, + tts_api_key: ttsApiKey || null, + tts_region: ttsRegion || null, }) .then(({ json }) => { toastSuccess("Speech credential created successfully"); @@ -211,6 +226,22 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { setSecretKey(credential.data.secret); } + if (credential.data.tts_api_key) { + setSttApiKey(credential.data.tts_api_key); + } + + if (credential.data.tts_region) { + setTtsRegion(credential.data.tts_region); + } + + if (credential.data.stt_api_key) { + setSttApiKey(credential.data.stt_api_key); + } + + if (credential.data.stt_region) { + setSttRegion(credential.data.stt_region); + } + setUseCustomTts(credential.data.use_custom_tts > 0 ? true : false); setUseCustomStt(credential.data.use_custom_stt > 0 ? true : false); setCustomTtsEndpoint(credential.data.custom_tts_endpoint || ""); @@ -404,27 +435,90 @@ export const SpeechServiceForm = ({ credential }: SpeechServiceFormProps) => { /> )} - {/* Single region state var is used for both Microsoft and AWS */} - {regions && regions[vendor as keyof RegionVendors] && ( -
- - setRegion(e.target.value)} - /> -
- )} + {regions && + regions[vendor as keyof RegionVendors] && + vendor !== VENDOR_IBM && ( +
+ + setRegion(e.target.value)} + /> +
+ )} + {vendor === VENDOR_IBM && + regions && + regions[vendor as keyof RegionVendors] && ( +
+ + setTtsRegion(e.target.value)} + /> + + setTtsApiKey(e.target.value)} + disabled={credential ? true : false} + /> + + setSttRegion(e.target.value)} + /> + + setSttApiKey(e.target.value)} + disabled={credential ? true : false} + /> +
+ )} {vendor === VENDOR_MICROSOFT && (
diff --git a/src/vendor/index.tsx b/src/vendor/index.tsx index a2c39a0..3329e61 100644 --- a/src/vendor/index.tsx +++ b/src/vendor/index.tsx @@ -15,6 +15,7 @@ export const VENDOR_MICROSOFT = "microsoft"; export const VENDOR_WELLSAID = "wellsaid"; export const VENDOR_NUANCE = "nuance"; export const VENDOR_DEEPGRAM = "deepgram"; +export const VENDOR_IBM = "ibm"; export const vendors: VendorOptions[] = [ { @@ -41,6 +42,10 @@ export const vendors: VendorOptions[] = [ name: "deepgram", value: VENDOR_DEEPGRAM, }, + { + name: "IBM", + value: VENDOR_IBM, + }, ]; export const useRegionVendors = () => { @@ -52,14 +57,22 @@ export const useRegionVendors = () => { Promise.all([ import("./regions/aws-regions"), import("./regions/ms-azure-regions"), - ]).then(([{ default: awsRegions }, { default: msRegions }]) => { - if (!ignore) { - setRegions({ - aws: awsRegions, - microsoft: msRegions, - }); + import("./regions/ibm-regions"), + ]).then( + ([ + { default: awsRegions }, + { default: msRegions }, + { default: ibmRegions }, + ]) => { + if (!ignore) { + setRegions({ + aws: awsRegions, + microsoft: msRegions, + ibm: ibmRegions, + }); + } } - }); + ); return function cleanup() { ignore = true; @@ -84,11 +97,13 @@ export const useSpeechVendors = () => { import("./speech-recognizer/ms-speech-recognizer-lang"), import("./speech-recognizer/nuance-speech-recognizer-lang"), import("./speech-recognizer/deepgram-speech-recognizer-lang"), + import("./speech-recognizer/ibm-speech-recognizer-lang"), import("./speech-synthesis/aws-speech-synthesis-lang"), import("./speech-synthesis/google-speech-synthesis-lang"), import("./speech-synthesis/ms-speech-synthesis-lang"), import("./speech-synthesis/wellsaid-speech-synthesis-lang"), import("./speech-synthesis/nuance-speech-synthesis-lang"), + import("./speech-synthesis/ibm-speech-synthesis-lang"), ]).then( ([ { default: awsRecognizer }, @@ -96,11 +111,13 @@ export const useSpeechVendors = () => { { default: msRecognizer }, { default: nuanceRecognizer }, { default: deepgramRecognizer }, + { default: ibmRecognizer }, { default: awsSynthesis }, { default: googleSynthesis }, { default: msSynthesis }, { default: wellsaidSynthesis }, { default: nuanceSynthesis }, + { default: ibmSynthesis }, ]) => { if (!ignore) { setSpeech({ @@ -110,6 +127,7 @@ export const useSpeechVendors = () => { microsoft: msSynthesis, wellsaid: wellsaidSynthesis, nuance: nuanceSynthesis, + ibm: ibmSynthesis, }, recognizers: { aws: awsRecognizer, @@ -117,6 +135,7 @@ export const useSpeechVendors = () => { microsoft: msRecognizer, nuance: nuanceRecognizer, deepgram: deepgramRecognizer, + ibm: ibmRecognizer, }, }); } diff --git a/src/vendor/regions/ibm-regions.ts b/src/vendor/regions/ibm-regions.ts new file mode 100644 index 0000000..1a99d92 --- /dev/null +++ b/src/vendor/regions/ibm-regions.ts @@ -0,0 +1,34 @@ +import type { Region } from "../types"; + +export const regions: Region[] = [ + { + name: "US South (Dallas)", + value: "us-south", + }, + { + name: "US East (Washington)", + value: "us-east", + }, + { + name: "EU DE (Frankfurt)", + value: "eu-de", + }, + { + name: "EU GB (London)", + value: "eu-gb", + }, + { + name: "AU South (Sydney)", + value: "au-syd", + }, + { + name: "Japan (Tokyo)", + value: "jp-tok", + }, + { + name: "South Korea (Seul)", + value: "kr-seo", + }, +]; + +export default regions; diff --git a/src/vendor/speech-recognizer/ibm-speech-recognizer-lang.ts b/src/vendor/speech-recognizer/ibm-speech-recognizer-lang.ts new file mode 100644 index 0000000..9381616 --- /dev/null +++ b/src/vendor/speech-recognizer/ibm-speech-recognizer-lang.ts @@ -0,0 +1,86 @@ +import type { Language } from "../types"; + +export const languages: Language[] = [ + { + name: "Arabic (Modern Standard)", + code: "ar-MS_Telephony", + }, + { + name: "Chinese (Mandarin)", + code: "zh-CN_Telephony", + }, + { + name: "Czech ", + code: "cs-CZ_Telephony", + }, + { + name: "Dutch (Belgian)", + code: "nl-BE_Telephony", + }, + { + name: "Dutch (Netherlands)", + code: "nl-NL_Telephony", + }, + { + name: "English (all supported dialects)", + code: "en-WW_Medical_Telephony", + }, + { + name: "English (Australian)", + code: "en-AU_Telephony", + }, + { + name: "English (Indian)", + code: "en-IN_Telephony", + }, + { + name: "English (United Kingdom)", + code: "en-GB_Telephony", + }, + { + name: "English (United States)", + code: "en-US_Telephony", + }, + { + name: "French (Canadian)", + code: "fr-CA_Telephony", + }, + { + name: "French (France)", + code: "fr-FR_Telephony", + }, + { + name: "German", + code: "de-DE_Telephony", + }, + { + name: "Hindi (Indian)", + code: "hi-IN_Telephony", + }, + { + name: "Italian", + code: "it-IT_Telephony", + }, + { + name: "Korean", + code: "ko-KR_Telephony", + }, + { + name: "Portuguese (Brazilian)", + code: "pt-BR_Telephony", + }, + { + name: "Spanish (Mexican)", + code: "es-LA_Telephony", + }, + { + name: "Spanish (Castilian)", + code: "es-ES_Telephony", + }, + { + name: "Swedish ", + code: "sv-SE_Telephony", + }, +]; + +export default languages; diff --git a/src/vendor/speech-synthesis/ibm-speech-synthesis-lang.ts b/src/vendor/speech-synthesis/ibm-speech-synthesis-lang.ts new file mode 100644 index 0000000..e7fb895 --- /dev/null +++ b/src/vendor/speech-synthesis/ibm-speech-synthesis-lang.ts @@ -0,0 +1,171 @@ +import type { VoiceLanguage } from "../types"; + +export const languages: VoiceLanguage[] = [ + { + code: "de-DE", + name: "German (Germany)", + voices: [ + { value: "de-DE_DieterVoice", name: "Dieter (Male): Standard German" }, + { + value: "de-DE_DieterV2Voice", + name: "Dieter 2 (Male): Standard German", + }, + { + value: "de-DE_DieterV3Voice", + name: "Dieter 3 (Male): Standard German", + }, + { value: "de-DE_ErikaV3Voice", name: "Erika (Female): Standard German" }, + { value: "de-DE_BirgitVoice", name: "Brigit (Female): Standard German" }, + { + value: "de-DE_BirgitV2Voice", + name: "Brigit 2 (Female): Standard German", + }, + { + value: "de-DE_BirgitV3Voice", + name: "Brigit 3 (Female): Standard German", + }, + ], + }, + { + code: "en-US", + name: "English (US)", + voices: [ + { + value: "en-US_MichaelExpressive", + name: "Michael (Male): American English - Expressive", + }, + { value: "en-US_MichaelVoice", name: "Michael (Male): American English" }, + { + value: "en-US_MichaelV2Voice", + name: "Michael 2 (Male): American English", + }, + { + value: "en-US_MichaelV3Voice", + name: "Michael 3 (Male): American English", + }, + { value: "en-US_HenryV3Voice", name: "Henry (Male): American English" }, + { value: "en-US_EmilyV3Voice", name: "Emily (Female): American English" }, + { + value: "en-US_OliviaV3Voice", + name: "Olivia (Female): American English", + }, + { + value: "en-US_AllisonExpressive", + name: "Allison (Female): American English - Expressive", + }, + { + value: "en-US_AllisonVoice", + name: "Allison (Female): American English", + }, + { + value: "en-US_AllisonV2Voice", + name: "Allison 2 (Female): American English", + }, + { + value: "en-US_AllisonV3Voice", + name: "Allison 3 (Female): American English", + }, + { + value: "en-US_LisaExpressive", + name: "Lisa (Female): American English - Expressive", + }, + { value: "en-US_LisaVoice", name: "Lisa (Female): American English" }, + { value: "en-US_LisaV2Voice", name: "Lisa 2 (Female): American English" }, + { value: "en-US_LisaV3Voice", name: "Lisa 3 (Female): American English" }, + { value: "en-US_KevinV3Voice", name: "Kevin (Male): American English" }, + { + value: "en-US_EmmaExpressive", + name: "Emma (Female): American English - Expressive", + }, + ], + }, + { + code: "en-GB", + name: "English (GB)", + voices: [ + { value: "en-GB_JamesV3Voice", name: "James (Male)" }, + { value: "en-GB_KateVoice", name: "Kate (Female)" }, + { value: "en-GB_KateV3Voice", name: "Kate 2 (Female)" }, + { value: "en-GB_CharlotteV3Voice", name: "Kate (Female)" }, + ], + }, + { + code: "es-US", + name: "Spanish (North America)", + voices: [ + { + value: "es-US_SofiaVoice", + name: "Sofia (Female): North American Spanish", + }, + { + value: "es-US_SofiaV3Voice", + name: "Sofia 2 (Female): North American Spanish", + }, + ], + }, + { + code: "es-LA", + name: "Spanish (Latin America)", + voices: [ + { + value: "es-LA_SofiaVoice", + name: "Sofia (Female): Latin American Spanish", + }, + { + value: "es-LA_SofiaV3Voice", + name: "Sofia 2 (Female): Latin American Spanish", + }, + ], + }, + { + code: "es-ES", + name: "Spanish (Castilian)", + voices: [ + { value: "es-ES_LauraVoice", name: "Laura (Female)" }, + { value: "es-ES_LauraV3Voice", name: "Laura 2 (Female)" }, + { value: "es-ES_EnriqueVoice", name: "Enrique (Male)" }, + { value: "es-ES_EnriqueV3Voice", name: "Enrique 2 (Male)" }, + ], + }, + { + code: "fr-FR", + name: "French (FR)", + voices: [ + { value: "fr-FR_NicolasV3Voice", name: "Nicolas (Male)" }, + { value: "fr-FR_ReneeVoice", name: "Renee (Female)" }, + { value: "fr-FR_ReneeV3Voice", name: "Renee 2 (Female)" }, + ], + }, + { + code: "fr-CA", + name: "French (CA)", + voices: [{ value: "fr-CA_LouiseV3Voice", name: "Louise (Female)" }], + }, + { + code: "it-IT", + name: "Italian", + voices: [ + { value: "it-IT_FrancescaVoice", name: "Francesca (Female)" }, + { value: "it-IT_FrancescaV2Voice", name: "Francesca 2 (Female)" }, + { value: "it-IT_FrancescaV3Voice", name: "Francesca 3 (Female)" }, + ], + }, + { + code: "pt-BR", + name: "Portuguese (Brazil)", + voices: [ + { value: "pt-BR_IsabelaVoice", name: "Isabela (Female)" }, + { value: "pt-BR_IsabelaV3Voice", name: "Isabela 2 (Female)" }, + ], + }, + { + code: "ja-JP", + name: "Japanese", + voices: [ + { value: "ja-JP_EmiVoice", name: "Emi (Female)" }, + { value: "ja-JP_EmiV3Voice", name: "Emi 2 (Female)" }, + ], + }, +]; + +export default languages; diff --git a/src/vendor/types.ts b/src/vendor/types.ts index 619991c..c0e8841 100644 --- a/src/vendor/types.ts +++ b/src/vendor/types.ts @@ -4,7 +4,8 @@ export type Vendor = | "Microsoft" | "WellSaid" | "Nuance" - | "deepgram"; + | "deepgram" + | "IBM"; export interface VendorOptions { name: Vendor; @@ -49,6 +50,7 @@ export interface GoogleServiceKey { export interface RegionVendors { aws: Region[]; microsoft: Region[]; + ibm: Region[]; } export interface RecognizerVendors { @@ -57,6 +59,7 @@ export interface RecognizerVendors { microsoft: Language[]; nuance: Language[]; deepgram: Language[]; + ibm: Language[]; } export interface SynthesisVendors { @@ -65,6 +68,7 @@ export interface SynthesisVendors { microsoft: VoiceLanguage[]; wellsaid: VoiceLanguage[]; nuance: VoiceLanguage[]; + ibm: VoiceLanguage[]; } export interface MSRawSpeech {