const {TaskName} = require('./constants.json'); const stickyVars = { google: [ 'GOOGLE_SPEECH_HINTS', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL', 'GOOGLE_SPEECH_PROFANITY_FILTER', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS', 'GOOGLE_SPEECH_SINGLE_UTTERANCE', 'GOOGLE_SPEECH_SPEAKER_DIARIZATION', 'GOOGLE_SPEECH_USE_ENHANCED', 'GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES', 'GOOGLE_SPEECH_METADATA_INTERACTION_TYPE', 'GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE' ], microsoft: [ 'AZURE_SPEECH_HINTS', 'AZURE_SERVICE_ENDPOINT_ID', 'AZURE_REQUEST_SNR', 'AZURE_PROFANITY_OPTION', 'AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES', 'AZURE_SERVICE_ENDPOINT', 'AZURE_INITIAL_SPEECH_TIMEOUT_MS', 'AZURE_USE_OUTPUT_FORMAT_DETAILED', 'AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS' ], deepgram: [ 'DEEPGRAM_SPEECH_KEYWORDS', 'DEEPGRAM_API_KEY', 'DEEPGRAM_SPEECH_TIER', 'DEEPGRAM_SPEECH_MODEL', 'DEEPGRAM_SPEECH_ENABLE_SMART_FORMAT', 'DEEPGRAM_SPEECH_ENABLE_NO_DELAY', 'DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION', 'DEEPGRAM_SPEECH_PROFANITY_FILTER', 'DEEPGRAM_SPEECH_REDACT', 'DEEPGRAM_SPEECH_DIARIZE', 'DEEPGRAM_SPEECH_NER', 'DEEPGRAM_SPEECH_ALTERNATIVES', 'DEEPGRAM_SPEECH_NUMERALS', 'DEEPGRAM_SPEECH_SEARCH', 'DEEPGRAM_SPEECH_REPLACE', 'DEEPGRAM_SPEECH_ENDPOINTING', 'DEEPGRAM_SPEECH_UTTERANCE_END_MS', 'DEEPGRAM_SPEECH_VAD_TURNOFF', 'DEEPGRAM_SPEECH_TAG', 'DEEPGRAM_SPEECH_MODEL_VERSION', 'DEEPGRAM_SPEECH_FILLER_WORDS', 'DEEPGRAM_SPEECH_KEYTERMS', ], aws: [ 'AWS_VOCABULARY_NAME', 'AWS_VOCABULARY_FILTER_METHOD', 'AWS_VOCABULARY_FILTER_NAME', 'AWS_LANGUAGE_MODEL_NAME', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_REGION', 'AWS_SECURITY_TOKEN', 'AWS_PII_ENTITY_TYPES', ], nuance: [ 'NUANCE_ACCESS_TOKEN', 'NUANCE_KRYPTON_ENDPOINT', 'NUANCE_TOPIC', 'NUANCE_UTTERANCE_DETECTION_MODE', 'NUANCE_FILTER_PROFANITY', 'NUANCE_INCLUDE_TOKENIZATION', 'NUANCE_DISCARD_SPEAKER_ADAPTATION', 'NUANCE_SUPPRESS_CALL_RECORDING', 'NUANCE_MASK_LOAD_FAILURES', 'NUANCE_SUPPRESS_INITIAL_CAPITALIZATION', 'NUANCE_ALLOW_ZERO_BASE_LM_WEIGHT', 'NUANCE_FILTER_WAKEUP_WORD', 'NUANCE_NO_INPUT_TIMEOUT_MS', 'NUANCE_RECOGNITION_TIMEOUT_MS', 'NUANCE_UTTERANCE_END_SILENCE_MS', 'NUANCE_MAX_HYPOTHESES', 'NUANCE_SPEECH_DOMAIN', 'NUANCE_FORMATTING', 'NUANCE_RESOURCES' ], ibm: [ 'IBM_ACCESS_TOKEN', 'IBM_SPEECH_REGION', 'IBM_SPEECH_INSTANCE_ID', 'IBM_SPEECH_MODEL', 'IBM_SPEECH_LANGUAGE_CUSTOMIZATION_ID', 'IBM_SPEECH_ACOUSTIC_CUSTOMIZATION_ID', 'IBM_SPEECH_BASE_MODEL_VERSION', 'IBM_SPEECH_WATSON_METADATA', 'IBM_SPEECH_WATSON_LEARNING_OPT_OUT' ], nvidia: [ 'NVIDIA_HINTS' ], cobalt: [ 'COBALT_SPEECH_HINTS', 'COBALT_COMPILED_CONTEXT_DATA', 'COBALT_METADATA' ], soniox: [ 'SONIOX_PROFANITY_FILTER', 'SONIOX_MODEL' ], assemblyai: [ 'ASSEMBLYAI_API_KEY', 'ASSEMBLYAI_WORD_BOOST' ], voxist: [ 'VOXIST_API_KEY', ], cartesia: [ 'CARTESIA_API_KEY', 'CARTESIA_MODEL_ID' ], speechmatics: [ 'SPEECHMATICS_API_KEY', 'SPEECHMATICS_HOST', 'SPEECHMATICS_PATH', 'SPEECHMATICS_SPEECH_HINTS', 'SPEECHMATICS_TRANSLATION_LANGUAGES', 'SPEECHMATICS_TRANSLATION_PARTIALS' ], openai: [ 'OPENAI_API_KEY', 'OPENAI_MODEL', 'OPENAI_INPUT_AUDIO_NOISE_REDUCTION', 'OPENAI_TURN_DETECTION_TYPE', 'OPENAI_TURN_DETECTION_THRESHOLD', 'OPENAI_TURN_DETECTION_PREFIX_PADDING_MS', 'OPENAI_TURN_DETECTION_SILENCE_DURATION_MS', ], }; /** * @see https://developers.deepgram.com/docs/models-languages-overview */ const optimalDeepramModels = { zh: ['base', 'base'], 'zh-CN':['base', 'base'], 'zh-TW': ['base', 'base'], da: ['enhanced', 'enhanced'], en: ['nova-2-phonecall', 'nova-2'], 'en-US': ['nova-2-phonecall', 'nova-2'], 'en-AU': ['nova-2', 'nova-2'], 'en-GB': ['nova-2', 'nova-2'], 'en-IN': ['nova-2', 'nova-2'], 'en-NZ': ['nova-2', 'nova-2'], nl: ['nova-2', 'nova-2'], fr: ['nova-2', 'nova-2'], 'fr-CA': ['nova-2', 'nova-2'], de: ['nova-2', 'nova-2'], hi: ['nova-2', 'nova-2'], 'hi-Latn': ['nova-2', 'nova-2'], id: ['base', 'base'], it: ['nova-2', 'nova-2'], ja: ['enhanced', 'enhanced'], ko: ['nova-2', 'nova-2'], no: ['nova-2', 'nova-2'], pl: ['nova-2', 'nova-2'], pt: ['nova-2', 'nova-2'], 'pt-BR': ['nova-2', 'nova-2'], 'pt-PT': ['nova-2', 'nova-2'], ru: ['nova-2', 'nova-2'], es: ['nova-2', 'nova-2'], 'es-419': ['nova-2', 'nova-2'], 'es-LATAM': ['enhanced', 'enhanced'], sv: ['nova-2', 'nova-2'], ta: ['enhanced', 'enhanced'], taq: ['enhanced', 'enhanced'], tr: ['nova-2', 'nova-2'], uk: ['nova-2', 'nova-2'] }; const selectDefaultDeepgramModel = (task, language) => { if (language in optimalDeepramModels) { const [gather, transcribe] = optimalDeepramModels[language]; return task.name === TaskName.Gather ? gather : transcribe; } return 'base'; }; const optimalGoogleModels = { 'v1' : { 'en-IN':['telephony', 'telephony'], 'es-DO':['default', 'default'], 'es-MX':['default', 'default'], 'en-AU':['telephony', 'telephony'], 'en-GB':['telephony', 'telephony'], 'en-NZ':['telephony', 'telephony'] }, 'v2' : { 'en-IN':['telephony', 'long'] } }; const selectDefaultGoogleModel = (task, language, version) => { const useV2 = version === 'v2'; if (language in optimalGoogleModels[version]) { const [gather, transcribe] = optimalGoogleModels[version][language]; return task.name === TaskName.Gather ? gather : transcribe; } return task.name === TaskName.Gather ? (useV2 ? 'telephony_short' : 'command_and_search') : (useV2 ? 'long' : 'latest_long'); }; const consolidateTranscripts = (bufferedTranscripts, channel, language, vendor) => { if (bufferedTranscripts.length === 1) { bufferedTranscripts[0].is_final = true; return bufferedTranscripts[0]; } let totalConfidence = 0; const finalTranscript = bufferedTranscripts.reduce((acc, evt) => { totalConfidence += evt.alternatives[0].confidence; let newTranscript = evt.alternatives[0].transcript; // If new transcript consists only of digits, spaces, and a trailing comma or period if (newTranscript.match(/^[\d\s]+[,.]?$/)) { newTranscript = newTranscript.replace(/\s/g, ''); // Remove all spaces if (newTranscript.endsWith(',')) { newTranscript = newTranscript.slice(0, -1); // Remove the trailing comma } else if (newTranscript.endsWith('.')) { newTranscript = newTranscript.slice(0, -1); // Remove the trailing period } } const lastChar = acc.alternatives[0].transcript.slice(-1); const firstChar = newTranscript.charAt(0); if (vendor === 'speechmatics' || (lastChar.match(/\d/) && firstChar.match(/\d/))) { acc.alternatives[0].transcript += newTranscript; } else { acc.alternatives[0].transcript += ` ${newTranscript}`; } return acc; }, { language_code: language, channel_tag: channel, is_final: true, alternatives: [{ transcript: '' }] }); finalTranscript.alternatives[0].confidence = bufferedTranscripts.length === 1 ? bufferedTranscripts[0].alternatives[0].confidence : totalConfidence / bufferedTranscripts.length; finalTranscript.alternatives[0].transcript = finalTranscript.alternatives[0].transcript.trim(); finalTranscript.vendor = { name: vendor, evt: bufferedTranscripts }; return finalTranscript; }; const compileSonioxTranscripts = (finalWordChunks, channel, language) => { const words = finalWordChunks.flat(); const transcript = words.reduce((acc, word) => { if (word.text === '') return acc; if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`; return `${acc} ${word.text}`; }, '').trim(); const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== ''); const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length; const alternatives = [{transcript, confidence}]; return { language_code: language, channel_tag: channel, is_final: true, alternatives, vendor: { name: 'soniox', evt: words } }; }; const normalizeSoniox = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); /* an token indicates the end of an utterance */ const endTokenPos = evt.words.map((w) => w.text).indexOf(''); const endpointReached = endTokenPos !== -1; const words = endpointReached ? evt.words.slice(0, endTokenPos) : evt.words; /* note: we can safely ignore words after the token as they will be returned again */ const finalWords = words.filter((word) => word.is_final); const nonFinalWords = words.filter((word) => !word.is_final); const is_final = endpointReached && finalWords.length > 0; const transcript = words.reduce((acc, word) => { if ([',', '.', '?', '!'].includes(word.text)) return `${acc}${word.text}`; else return `${acc} ${word.text}`; }, '').trim(); const realWords = words.filter((word) => ![',.!?;'].includes(word.text) && word.text !== ''); const confidence = realWords.reduce((acc, word) => acc + word.confidence, 0) / realWords.length; const alternatives = [{transcript, confidence}]; return { language_code: language, channel_tag: channel, is_final, alternatives, vendor: { name: 'soniox', endpointReached, evt: copy, finalWords, nonFinalWords } }; }; const normalizeDeepgram = (evt, channel, language, shortUtterance) => { const copy = JSON.parse(JSON.stringify(evt)); const alternatives = (evt.channel?.alternatives || []) .map((alt) => ({ confidence: alt.confidence, transcript: alt.transcript, })); /** * Some models (nova-2-general) return the detected language in the * alternatives.languages array if the language is set as multi. * If the language is detected, we use it as the language_code. */ const detectedLanguage = evt.channel?.alternatives?.[0]?.languages?.[0]; /** * note difference between is_final and speech_final in Deepgram: * https://developers.deepgram.com/docs/understand-endpointing-interim-results */ return { language_code: detectedLanguage || language, channel_tag: channel, is_final: shortUtterance ? evt.is_final : evt.speech_final, alternatives: alternatives.length ? [alternatives[0]] : [], vendor: { name: 'deepgram', evt: copy } }; }; const normalizeNvidia = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const alternatives = (evt.alternatives || []) .map((alt) => ({ confidence: alt.confidence, transcript: alt.transcript, })); return { language_code: language, channel_tag: channel, is_final: evt.is_final, alternatives, vendor: { name: 'nvidia', evt: copy } }; }; const normalizeIbm = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); //const idx = evt.result_index; const result = evt.results[0]; return { language_code: language, channel_tag: channel, is_final: result.final, alternatives: result.alternatives, vendor: { name: 'ibm', evt: copy } }; }; const normalizeGoogle = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const language_code = evt.language_code || language; return { language_code: language_code, channel_tag: channel, is_final: evt.is_final, alternatives: [evt.alternatives[0]], vendor: { name: 'google', evt: copy } }; }; const normalizeCobalt = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const alternatives = (evt.alternatives || []) .map((alt) => ({ confidence: alt.confidence, transcript: alt.transcript_formatted, })); return { language_code: language, channel_tag: channel, is_final: evt.is_final, alternatives, vendor: { name: 'cobalt', evt: copy } }; }; const normalizeCustom = (evt, channel, language, vendor) => { const copy = JSON.parse(JSON.stringify(evt)); return { language_code: language, channel_tag: channel, is_final: evt.is_final, alternatives: [evt.alternatives[0]], vendor: { name: vendor, evt: copy } }; }; const normalizeNuance = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); return { language_code: language, channel_tag: channel, is_final: evt.is_final, alternatives: [evt.alternatives[0]], vendor: { name: 'nuance', evt: copy } }; }; const normalizeVerbio = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); return { language_code: language, channel_tag: channel, is_final: evt.is_final, alternatives: evt.alternatives, vendor: { name: 'verbio', evt: copy } }; }; const normalizeMicrosoft = (evt, channel, language, punctuation = true) => { const copy = JSON.parse(JSON.stringify(evt)); const nbest = evt.NBest; const language_code = evt.PrimaryLanguage?.Language || language; const alternatives = nbest ? nbest.map((n) => { return { confidence: n.Confidence, // remove all puntuation if needed transcript: punctuation ? n.Display : n.Display.replace(/\p{P}/gu, '') }; }) : [ { transcript: punctuation ? evt.DisplayText || evt.Text : (evt.DisplayText || evt.Text).replace(/\p{P}/gu, '') } ]; return { language_code, channel_tag: channel, is_final: evt.RecognitionStatus === 'Success', alternatives: [alternatives[0]], vendor: { name: 'microsoft', evt: copy } }; }; const normalizeAws = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const isGrpcPayload = Array.isArray(evt); if (isGrpcPayload) { /* legacy grpc api */ return { language_code: language, channel_tag: channel, is_final: evt[0].is_final, alternatives: evt[0].alternatives, vendor: { name: 'aws', evt: copy } }; } else { /* websocket api */ const alternatives = evt.Transcript?.Results[0]?.Alternatives.map((alt) => { const items = alt.Items.filter((item) => item.Type === 'pronunciation' && 'Confidence' in item); const confidence = items.reduce((acc, item) => acc + item.Confidence, 0) / items.length; return { transcript: alt.Transcript, confidence }; }); return { language_code: language, channel_tag: channel, is_final: evt.Transcript?.Results[0].IsPartial === false, alternatives, vendor: { name: 'aws', evt: copy } }; } }; const normalizeAssemblyAi = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); return { language_code: language, channel_tag: channel, is_final: evt.message_type === 'FinalTranscript', alternatives: [ { confidence: evt.confidence, transcript: evt.text, } ], vendor: { name: 'assemblyai', evt: copy } }; }; const normalizeVoxist = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); return { language_code: language, channel_tag: channel, is_final: evt.type === 'final', alternatives: [ { confidence: 1.00, transcript: evt.text, } ], vendor: { name: 'voxist', evt: copy } }; }; const normalizeCartesia = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); return { language_code: language, channel_tag: channel, is_final: evt.is_final, alternatives: [ { confidence: 1.00, transcript: evt.text, } ], vendor: { name: 'cartesia', evt: copy } }; }; const normalizeSpeechmatics = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const is_final = evt.message === 'AddTranscript'; const words = evt.results?.filter((r) => r.type === 'word') || []; const confidence = words.length > 0 ? words.reduce((acc, word) => acc + word.alternatives[0].confidence, 0) / words.length : 0; const alternative = { confidence, transcript: evt.metadata?.transcript }; const obj = { language_code: language, channel_tag: channel, is_final, alternatives: [alternative], vendor: { name: 'speechmatics', evt: copy } }; return obj; }; const calculateConfidence = (logprobsArray) => { // Sum the individual log probabilities const totalLogProb = logprobsArray.reduce((sum, tokenInfo) => sum + tokenInfo.logprob, 0); // Convert the total log probability back to a regular probability const confidence = Math.exp(totalLogProb); return confidence; }; const normalizeOpenAI = (evt, channel, language) => { const copy = JSON.parse(JSON.stringify(evt)); const obj = { language_code: language, channel_tag: channel, is_final: true, alternatives: [ { transcript: evt.transcript, confidence: evt.logprobs ? calculateConfidence(evt.logprobs) : 1.0, } ], vendor: { name: 'openai', evt: copy } }; return obj; }; module.exports = (logger) => { const normalizeTranscription = (evt, vendor, channel, language, shortUtterance, punctuation) => { //logger.debug({ evt, vendor, channel, language }, 'normalizeTranscription'); switch (vendor) { case 'deepgram': return normalizeDeepgram(evt, channel, language, shortUtterance); case 'microsoft': return normalizeMicrosoft(evt, channel, language, punctuation); case 'google': return normalizeGoogle(evt, channel, language); case 'aws': return normalizeAws(evt, channel, language); case 'nuance': return normalizeNuance(evt, channel, language); case 'ibm': return normalizeIbm(evt, channel, language); case 'nvidia': return normalizeNvidia(evt, channel, language); case 'soniox': return normalizeSoniox(evt, channel, language); case 'cobalt': return normalizeCobalt(evt, channel, language); case 'assemblyai': return normalizeAssemblyAi(evt, channel, language, shortUtterance); case 'voxist': return normalizeVoxist(evt, channel, language); case 'cartesia': return normalizeCartesia(evt, channel, language); case 'verbio': return normalizeVerbio(evt, channel, language); case 'speechmatics': return normalizeSpeechmatics(evt, channel, language); case 'openai': return normalizeOpenAI(evt, channel, language); default: if (vendor.startsWith('custom:')) { return normalizeCustom(evt, channel, language, vendor); } logger.error(`Unknown vendor ${vendor}`); return evt; } }; const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => { let opts = {}; const vendor = rOpts.vendor; if ('google' === vendor) { const useV2 = rOpts.googleOptions?.serviceVersion === 'v2'; const version = useV2 ? 'v2' : 'v1'; let {model} = rOpts; model = model || selectDefaultGoogleModel(task, language, version); opts = { ...opts, ...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}), ...(rOpts.separateRecognitionPerChannel && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}), ...(rOpts.separateRecognitionPerChanne === false && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}), ...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}), ...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}), ...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}), ...(rOpts.singleUtterance && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}), ...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}), ...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 && {GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}), ...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 && {GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}), ...(rOpts.enhancedModel !== false && {GOOGLE_SPEECH_USE_ENHANCED: 1}), ...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}), ...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}), ...(rOpts.words == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}), ...(rOpts.diarization === false && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' && {GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' && {GOOGLE_SPEECH_HINTS: JSON.stringify(rOpts.hints)}), ...(typeof rOpts.hintsBoost === 'number' && {GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}), // When altLanguages is emptylist, we have to send value to freeswitch to clear the previous settings ...(rOpts.altLanguages && {GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}), ...(rOpts.interactionType && {GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}), ...{GOOGLE_SPEECH_MODEL: rOpts.model || model}, ...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}), GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line', ...(useV2 && { GOOGLE_SPEECH_RECOGNIZER_PARENT: `projects/${sttCredentials.credentials.project_id}/locations/global`, GOOGLE_SPEECH_CLOUD_SERVICES_VERSION: 'v2', ...(rOpts.googleOptions?.speechStartTimeoutMs && { GOOGLE_SPEECH_START_TIMEOUT_MS: rOpts.googleOptions.speechStartTimeoutMs }), ...(rOpts.googleOptions?.speechEndTimeoutMs && { GOOGLE_SPEECH_END_TIMEOUT_MS: rOpts.googleOptions.speechEndTimeoutMs }), ...(rOpts.googleOptions?.transcriptNormalization && { GOOGLE_SPEECH_TRANSCRIPTION_NORMALIZATION: JSON.stringify(rOpts.googleOptions.transcriptNormalization) }), ...(rOpts.googleOptions?.enableVoiceActivityEvents && { GOOGLE_SPEECH_ENABLE_VOICE_ACTIVITY_EVENTS: rOpts.googleOptions.enableVoiceActivityEvents }), ...(rOpts.sgoogleOptions?.recognizerId) && {GOOGLE_SPEECH_RECOGNIZER_ID: rOpts.googleOptions.recognizerId}, ...(rOpts.googleOptions?.enableVoiceActivityEvents && { GOOGLE_SPEECH_ENABLE_VOICE_ACTIVITY_EVENTS: rOpts.googleOptions.enableVoiceActivityEvents }), }), }; } else if (['aws', 'polly'].includes(vendor)) { const {awsOptions = {}} = rOpts; const vocabularyName = awsOptions.vocabularyName || rOpts.vocabularyName; const vocabularyFilterName = awsOptions.vocabularyFilterName || rOpts.vocabularyFilterName; const filterMethod = awsOptions.vocabularyFilterMethod || rOpts.filterMethod; opts = { ...opts, ...(vocabularyName && {AWS_VOCABULARY_NAME: vocabularyName}), ...(vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: vocabularyFilterName}), ...(filterMethod && {AWS_VOCABULARY_FILTER_METHOD: filterMethod}), ...(sttCredentials && { AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId, AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey, AWS_REGION: sttCredentials.region, AWS_SECURITY_TOKEN: sttCredentials.securityToken }), ...(awsOptions.accessKey && {AWS_ACCESS_KEY_ID: awsOptions.accessKey}), ...(awsOptions.secretKey && {AWS_SECRET_ACCESS_KEY: awsOptions.secretKey}), ...(awsOptions.region && {AWS_REGION: awsOptions.region}), ...(awsOptions.securityToken && {AWS_SECURITY_TOKEN: awsOptions.securityToken}), ...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}), ...(awsOptions.piiEntityTypes?.length && {AWS_PII_ENTITY_TYPES: awsOptions.piiEntityTypes.join(',')}), ...(awsOptions.piiIdentifyEntities && {AWS_PII_IDENTIFY_ENTITIES: true}), ...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}), }; } else if ('microsoft' === vendor) { const {azureOptions = {}} = rOpts; opts = { ...opts, ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' && {AZURE_SPEECH_HINTS: rOpts.hints.map((h) => h.trim()).join(',')}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' && {AZURE_SPEECH_HINTS: rOpts.hints.map((h) => h.phrase).join(',')}), // When altLanguages is emptylist, we have to send value to freeswitch to clear the previous settings ...(rOpts.altLanguages && {AZURE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}), ...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}), ...(rOpts.profanityOption && {AZURE_PROFANITY_OPTION: rOpts.profanityOption}), ...(sttCredentials.use_custom_stt && sttCredentials.custom_stt_endpoint_url && {AZURE_SERVICE_ENDPOINT: sttCredentials.custom_stt_endpoint_url}), ...(rOpts.azureServiceEndpoint && {AZURE_SERVICE_ENDPOINT: rOpts.azureServiceEndpoint}), ...(rOpts.initialSpeechTimeoutMs > 0 && {AZURE_INITIAL_SPEECH_TIMEOUT_MS: rOpts.initialSpeechTimeoutMs}), ...(rOpts.requestSnr && {AZURE_REQUEST_SNR: 1}), ...(rOpts.audioLogging && {AZURE_AUDIO_LOGGING: 1}), ...{AZURE_USE_OUTPUT_FORMAT_DETAILED: 1}, ...(azureOptions.speechSegmentationSilenceTimeoutMs && {AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS: azureOptions.speechSegmentationSilenceTimeoutMs}), ...(azureOptions.languageIdMode && {AZURE_LANGUAGE_ID_MODE: azureOptions.languageIdMode}), ...(azureOptions.postProcessing && {AZURE_POST_PROCESSING_OPTION: azureOptions.postProcessing}), ...(sttCredentials && { ...(sttCredentials.api_key && {AZURE_SUBSCRIPTION_KEY: sttCredentials.api_key}), ...(sttCredentials.region && {AZURE_REGION: sttCredentials.region}), }), ...(sttCredentials.use_custom_stt && sttCredentials.custom_stt_endpoint && {AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint}), //azureSttEndpointId overrides sttCredentials.custom_stt_endpoint ...(rOpts.azureSttEndpointId && {AZURE_SERVICE_ENDPOINT_ID: rOpts.azureSttEndpointId}), ...(azureOptions.speechRecognitionMode && {AZURE_RECOGNITION_MODE: azureOptions.speechRecognitionMode}), }; } else if ('nuance' === vendor) { /** * Note: all nuance options are in recognizer.nuanceOptions, should migrate * other vendor settings to similar nested structure */ const {nuanceOptions = {}} = rOpts; opts = { ...opts, ...(sttCredentials.access_token) && {NUANCE_ACCESS_TOKEN: sttCredentials.access_token}, ...(sttCredentials.nuance_stt_uri) && {NUANCE_KRYPTON_ENDPOINT: sttCredentials.nuance_stt_uri}, ...(nuanceOptions.topic) && {NUANCE_TOPIC: nuanceOptions.topic}, ...(nuanceOptions.utteranceDetectionMode) && {NUANCE_UTTERANCE_DETECTION_MODE: nuanceOptions.utteranceDetectionMode}, ...(nuanceOptions.punctuation || rOpts.punctuation) && {NUANCE_PUNCTUATION: nuanceOptions.punctuation}, ...(nuanceOptions.profanityFilter) && {NUANCE_FILTER_PROFANITY: nuanceOptions.profanityFilter}, ...(nuanceOptions.includeTokenization) && {NUANCE_INCLUDE_TOKENIZATION: nuanceOptions.includeTokenization}, ...(nuanceOptions.discardSpeakerAdaptation) && {NUANCE_DISCARD_SPEAKER_ADAPTATION: nuanceOptions.discardSpeakerAdaptation}, ...(nuanceOptions.suppressCallRecording) && {NUANCE_SUPPRESS_CALL_RECORDING: nuanceOptions.suppressCallRecording}, ...(nuanceOptions.maskLoadFailures) && {NUANCE_MASK_LOAD_FAILURES: nuanceOptions.maskLoadFailures}, ...(nuanceOptions.suppressInitialCapitalization) && {NUANCE_SUPPRESS_INITIAL_CAPITALIZATION: nuanceOptions.suppressInitialCapitalization}, ...(nuanceOptions.allowZeroBaseLmWeight) && {NUANCE_ALLOW_ZERO_BASE_LM_WEIGHT: nuanceOptions.allowZeroBaseLmWeight}, ...(nuanceOptions.filterWakeupWord) && {NUANCE_FILTER_WAKEUP_WORD: nuanceOptions.filterWakeupWord}, ...(nuanceOptions.resultType) && {NUANCE_RESULT_TYPE: nuanceOptions.resultType || rOpts.interim ? 'partial' : 'final'}, ...(nuanceOptions.noInputTimeoutMs) && {NUANCE_NO_INPUT_TIMEOUT_MS: nuanceOptions.noInputTimeoutMs}, ...(nuanceOptions.recognitionTimeoutMs) && {NUANCE_RECOGNITION_TIMEOUT_MS: nuanceOptions.recognitionTimeoutMs}, ...(nuanceOptions.utteranceEndSilenceMs) && {NUANCE_UTTERANCE_END_SILENCE_MS: nuanceOptions.utteranceEndSilenceMs}, ...(nuanceOptions.maxHypotheses) && {NUANCE_MAX_HYPOTHESES: nuanceOptions.maxHypotheses}, ...(nuanceOptions.speechDomain) && {NUANCE_SPEECH_DOMAIN: nuanceOptions.speechDomain}, ...(nuanceOptions.formatting) && {NUANCE_FORMATTING: nuanceOptions.formatting}, ...(nuanceOptions.resources) && {NUANCE_RESOURCES: JSON.stringify(nuanceOptions.resources)}, }; } else if ('deepgram' === vendor) { let model = rOpts.deepgramOptions?.model || rOpts.model || sttCredentials.model_id; const {deepgramOptions = {}} = rOpts; const deepgramUri = deepgramOptions.deepgramSttUri || sttCredentials.deepgram_stt_uri; const useTls = deepgramOptions.deepgramSttUseTls || sttCredentials.deepgram_stt_use_tls; /* default to a sensible model if not supplied */ if (!model) { model = selectDefaultDeepgramModel(task, language); } opts = { ...opts, DEEPGRAM_SPEECH_MODEL: model, ...(deepgramUri && {DEEPGRAM_URI: deepgramUri}), ...(deepgramUri && useTls && {DEEPGRAM_USE_TLS: 1}), ...(sttCredentials.api_key) && {DEEPGRAM_API_KEY: sttCredentials.api_key}, ...(deepgramOptions.tier) && {DEEPGRAM_SPEECH_TIER: deepgramOptions.tier}, ...(deepgramOptions.punctuate) && {DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}, ...(deepgramOptions.smartFormatting) && {DEEPGRAM_SPEECH_ENABLE_SMART_FORMAT: 1}, ...(deepgramOptions.noDelay) && {DEEPGRAM_SPEECH_ENABLE_NO_DELAY: 1}, ...(deepgramOptions.profanityFilter) && {DEEPGRAM_SPEECH_PROFANITY_FILTER: 1}, ...(deepgramOptions.redact) && {DEEPGRAM_SPEECH_REDACT: deepgramOptions.redact}, ...(deepgramOptions.diarize) && {DEEPGRAM_SPEECH_DIARIZE: 1}, ...(deepgramOptions.diarizeVersion) && {DEEPGRAM_SPEECH_DIARIZE_VERSION: deepgramOptions.diarizeVersion}, ...(deepgramOptions.ner) && {DEEPGRAM_SPEECH_NER: 1}, ...(deepgramOptions.alternatives) && {DEEPGRAM_SPEECH_ALTERNATIVES: deepgramOptions.alternatives}, ...(deepgramOptions.numerals) && {DEEPGRAM_SPEECH_NUMERALS: deepgramOptions.numerals}, ...(deepgramOptions.search) && {DEEPGRAM_SPEECH_SEARCH: deepgramOptions.search.join(',')}, ...(deepgramOptions.replace) && {DEEPGRAM_SPEECH_REPLACE: deepgramOptions.replace.join(',')}, ...(rOpts.hints && rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' && {DEEPGRAM_SPEECH_KEYWORDS: rOpts.hints.map((h) => h.trim()).join(',')}), ...(rOpts.hints && rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' && {DEEPGRAM_SPEECH_KEYWORDS: rOpts.hints.map((h) => h.phrase).join(',')}), ...(deepgramOptions.keywords) && {DEEPGRAM_SPEECH_KEYWORDS: deepgramOptions.keywords.join(',')}, ...('endpointing' in deepgramOptions) && {DEEPGRAM_SPEECH_ENDPOINTING: deepgramOptions.endpointing === false ? 'false' : deepgramOptions.endpointing, // default DEEPGRAM_SPEECH_UTTERANCE_END_MS is 1000, will be override by user settings later if there is. DEEPGRAM_SPEECH_UTTERANCE_END_MS: 1000}, ...(deepgramOptions.utteranceEndMs) && {DEEPGRAM_SPEECH_UTTERANCE_END_MS: deepgramOptions.utteranceEndMs}, ...(deepgramOptions.vadTurnoff) && {DEEPGRAM_SPEECH_VAD_TURNOFF: deepgramOptions.vadTurnoff}, ...(deepgramOptions.tag) && {DEEPGRAM_SPEECH_TAG: deepgramOptions.tag}, ...(deepgramOptions.version) && {DEEPGRAM_SPEECH_MODEL_VERSION: deepgramOptions.version}, ...(deepgramOptions.fillerWords) && {DEEPGRAM_SPEECH_FILLER_WORDS: deepgramOptions.fillerWords}, ...((Array.isArray(deepgramOptions.keyterms) && deepgramOptions.keyterms.length > 0) && {DEEPGRAM_SPEECH_KEYTERMS: deepgramOptions.keyterms.join(',')}) }; } else if ('soniox' === vendor) { const {sonioxOptions = {}} = rOpts; const {storage = {}} = sonioxOptions; opts = { ...opts, ...(sttCredentials.api_key) && {SONIOX_API_KEY: sttCredentials.api_key}, ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' && {SONIOX_HINTS: rOpts.hints.join(',')}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' && {SONIOX_HINTS: JSON.stringify(rOpts.hints)}), ...(typeof rOpts.hintsBoost === 'number' && {SONIOX_HINTS_BOOST: rOpts.hintsBoost}), ...(sonioxOptions.model) && {SONIOX_MODEL: sonioxOptions.model}, ...((sonioxOptions.profanityFilter || rOpts.profanityFilter) && {SONIOX_PROFANITY_FILTER: 1}), ...(storage?.id && {SONIOX_STORAGE_ID: storage.id}), ...(storage?.id && storage?.title && {SONIOX_STORAGE_TITLE: storage.title}), ...(storage?.id && storage?.disableStoreAudio && {SONIOX_STORAGE_DISABLE_AUDIO: 1}), ...(storage?.id && storage?.disableStoreTranscript && {SONIOX_STORAGE_DISABLE_TRANSCRIPT: 1}), ...(storage?.id && storage?.disableSearch && {SONIOX_STORAGE_DISABLE_SEARCH: 1}) }; } else if ('ibm' === vendor) { const {ibmOptions = {}} = rOpts; opts = { ...opts, ...(sttCredentials.access_token) && {IBM_ACCESS_TOKEN: sttCredentials.access_token}, ...(sttCredentials.stt_region) && {IBM_SPEECH_REGION: sttCredentials.stt_region}, ...(sttCredentials.instance_id) && {IBM_SPEECH_INSTANCE_ID: sttCredentials.instance_id}, ...(ibmOptions.model) && {IBM_SPEECH_MODEL: ibmOptions.model}, ...(ibmOptions.language_customization_id) && {IBM_SPEECH_LANGUAGE_CUSTOMIZATION_ID: ibmOptions.language_customization_id}, ...(ibmOptions.acoustic_customization_id) && {IBM_SPEECH_ACOUSTIC_CUSTOMIZATION_ID: ibmOptions.acoustic_customization_id}, ...(ibmOptions.baseModelVersion) && {IBM_SPEECH_BASE_MODEL_VERSION: ibmOptions.baseModelVersion}, ...(ibmOptions.watsonMetadata) && {IBM_SPEECH_WATSON_METADATA: ibmOptions.watsonMetadata}, ...(ibmOptions.watsonLearningOptOut) && {IBM_SPEECH_WATSON_LEARNING_OPT_OUT: ibmOptions.watsonLearningOptOut} }; } else if ('nvidia' === vendor) { const {nvidiaOptions = {}} = rOpts; const rivaUri = nvidiaOptions.rivaUri || sttCredentials.riva_server_uri; opts = { ...opts, ...((nvidiaOptions.profanityFilter || rOpts.profanityFilter) && {NVIDIA_PROFANITY_FILTER: 1}), ...(!(nvidiaOptions.profanityFilter || rOpts.profanityFilter) && {NVIDIA_PROFANITY_FILTER: 0}), ...((nvidiaOptions.punctuation || rOpts.punctuation) && {NVIDIA_PUNCTUATION: 1}), ...(!(nvidiaOptions.punctuation || rOpts.punctuation) && {NVIDIA_PUNCTUATION: 0}), ...((rOpts.words || nvidiaOptions.wordTimeOffsets) && {NVIDIA_WORD_TIME_OFFSETS: 1}), ...(!(rOpts.words || nvidiaOptions.wordTimeOffsets) && {NVIDIA_WORD_TIME_OFFSETS: 0}), ...(nvidiaOptions.maxAlternatives && {NVIDIA_MAX_ALTERNATIVES: nvidiaOptions.maxAlternatives}), ...(!nvidiaOptions.maxAlternatives && {NVIDIA_MAX_ALTERNATIVES: 1}), ...(rOpts.model && {NVIDIA_MODEL: rOpts.model}), ...(rivaUri && {NVIDIA_RIVA_URI: rivaUri}), ...(nvidiaOptions.verbatimTranscripts && {NVIDIA_VERBATIM_TRANSCRIPTS: 1}), ...(rOpts.diarization && {NVIDIA_SPEAKER_DIARIZATION: 1}), ...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 && {NVIDIA_DIARIZATION_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}), ...(rOpts.separateRecognitionPerChannel && {NVIDIA_SEPARATE_RECOGNITION_PER_CHANNEL: 1}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' && {NVIDIA_HINTS: rOpts.hints.join(',')}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' && {NVIDIA_HINTS: JSON.stringify(rOpts.hints)}), ...(typeof rOpts.hintsBoost === 'number' && {NVIDIA_HINTS_BOOST: rOpts.hintsBoost}), ...(nvidiaOptions.customConfiguration && {NVIDIA_CUSTOM_CONFIGURATION: JSON.stringify(nvidiaOptions.customConfiguration)}), }; } else if ('cobalt' === vendor) { const {cobaltOptions = {}} = rOpts; const cobaltUri = cobaltOptions.serverUri || sttCredentials.cobalt_server_uri; opts = { ...opts, ...(rOpts.words && {COBALT_WORD_TIME_OFFSETS: 1}), ...(!rOpts.words && {COBALT_WORD_TIME_OFFSETS: 0}), ...(rOpts.model && {COBALT_MODEL: rOpts.model}), ...(cobaltUri && {COBALT_SERVER_URI: cobaltUri}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' && {COBALT_SPEECH_HINTS: rOpts.hints.join(',')}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' && {COBALT_SPEECH_HINTS: JSON.stringify(rOpts.hints)}), ...(rOpts.hints?.length > 0 && {COBALT_CONTEXT_TOKEN: cobaltOptions.contextToken || 'unk:default'}), ...(cobaltOptions.metadata && {COBALT_METADATA: cobaltOptions.metadata}), ...(cobaltOptions.enableConfusionNetwork && {COBALT_ENABLE_CONFUSION_NETWORK: 1}), ...(cobaltOptions.compiledContextData && {COBALT_COMPILED_CONTEXT_DATA: cobaltOptions.compiledContextData}), }; } else if ('assemblyai' === vendor) { opts = { ...opts, ...(sttCredentials.api_key) && {ASSEMBLYAI_API_KEY: sttCredentials.api_key}, ...(rOpts.hints?.length > 0 && {ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)}) }; } else if ('voxist' === vendor) { opts = { ...opts, ...(sttCredentials.api_key) && {VOXIST_API_KEY: sttCredentials.api_key}, }; } else if ('cartesia' === vendor) { opts = { ...opts, ...(sttCredentials.api_key && {CARTESIA_API_KEY: sttCredentials.api_key}), ...(sttCredentials.stt_model_id && { CARTESIA_MODEL_ID: sttCredentials.stt_model_id }) }; } else if ('openai' === vendor) { const {openaiOptions = {}} = rOpts; const model = openaiOptions.model || rOpts.model || sttCredentials.model_id || 'whisper-1'; const apiKey = openaiOptions.apiKey || sttCredentials.api_key; opts = { OPENAI_MODEL: model, OPENAI_API_KEY: apiKey, ...opts, ...(openaiOptions.prompt && {OPENAI_PROMPT: openaiOptions.prompt}), ...(openaiOptions.input_audio_noise_reduction && {OPENAI_INPUT_AUDIO_NOISE_REDUCTION: openaiOptions.input_audio_noise_reduction}), }; if (openaiOptions.turn_detection) { opts = { ...opts, OPENAI_TURN_DETECTION_TYPE: openaiOptions.turn_detection.type, ...(openaiOptions.turn_detection.threshold && { OPENAI_TURN_DETECTION_THRESHOLD: openaiOptions.turn_detection.threshold }), ...(openaiOptions.turn_detection.prefix_padding_ms && { OPENAI_TURN_DETECTION_PREFIX_PADDING_MS: openaiOptions.turn_detection.prefix_padding_ms }), ...(openaiOptions.turn_detection.silence_duration_ms && { OPENAI_TURN_DETECTION_SILENCE_DURATION_MS: openaiOptions.turn_detection.silence_duration_ms }), }; } } else if ('verbio' === vendor) { const {verbioOptions = {}} = rOpts; opts = { ...opts, ...(sttCredentials.access_token && { VERBIO_ACCESS_TOKEN: sttCredentials.access_token}), ...(sttCredentials.engine_version && {VERBIO_ENGINE_VERSION: sttCredentials.engine_version}), ...(language && {VERBIO_LANGUAGE: language}), ...(verbioOptions.enable_formatting && {VERBIO_ENABLE_FORMATTING: verbioOptions.enable_formatting}), ...(verbioOptions.enable_diarization && {VERBIO_ENABLE_DIARIZATION: verbioOptions.enable_diarization}), ...(verbioOptions.topic && {VERBIO_TOPIC: verbioOptions.topic}), ...(verbioOptions.inline_grammar && {VERBIO_INLINE_GRAMMAR: verbioOptions.inline_grammar}), ...(verbioOptions.grammar_uri && {VERBIO_GRAMMAR_URI: verbioOptions.grammar_uri}), ...(verbioOptions.label && {VERBIO_LABEL: verbioOptions.label}), ...(verbioOptions.recognition_timeout && {VERBIO_RECOGNITION_TIMEOUT: verbioOptions.recognition_timeout}), ...(verbioOptions.speech_complete_timeout && {VERBIO_SPEECH_COMPLETE_TIMEOUT: verbioOptions.speech_complete_timeout}), ...(verbioOptions.speech_incomplete_timeout && {VERBIO_SPEECH_INCOMPLETE_TIMEOUT: verbioOptions.speech_incomplete_timeout}), }; } else if ('speechmatics' === vendor) { const {speechmaticsOptions = {}} = rOpts; opts = { ...opts, ...(sttCredentials.api_key) && {SPEECHMATICS_API_KEY: sttCredentials.api_key}, ...(sttCredentials.speechmatics_stt_uri) && {SPEECHMATICS_HOST: sttCredentials.speechmatics_stt_uri}, ...(rOpts.hints?.length > 0 && {SPEECHMATICS_SPEECH_HINTS: rOpts.hints.join(',')}), ...(speechmaticsOptions.translation_config && { SPEECHMATICS_TRANSLATION_LANGUAGES: speechmaticsOptions.translation_config.target_languages.join(','), SPEECHMATICS_TRANSLATION_PARTIALS: speechmaticsOptions.translation_config.enable_partials ? 1 : 0 } ), ...(speechmaticsOptions.transcription_config?.domain && {SPEECHMATICS_DOMAIN: speechmaticsOptions.transcription_config.domain}), ...{SPEECHMATICS_MAX_DELAY: speechmaticsOptions.transcription_config?.max_delay || 0.7}, ...{SPEECHMATICS_MAX_DELAY_MODE: speechmaticsOptions.transcription_config?.max_delay_mode || 'flexible'}, ...(speechmaticsOptions.transcription_config?.diarization && {SPEECHMATICS_DIARIZATION: speechmaticsOptions.transcription_config.diarization}), ...(speechmaticsOptions.transcription_config?.speaker_diarization_config?.speaker_sensitivity && {SPEECHMATICS_DIARIZATION_SPEAKER_SENSITIVITY: speechmaticsOptions.transcription_config.speaker_diarization_config.speaker_sensitivity}), ...(speechmaticsOptions.transcription_config?.speaker_diarization_config?.max_speakers && {SPEECHMATICS_DIARIZATION_MAX_SPEAKERS: speechmaticsOptions.transcription_config.speaker_diarization_config.max_speakers}), ...(speechmaticsOptions.transcription_config?.output_locale && {SPEECHMATICS_OUTPUT_LOCALE: speechmaticsOptions.transcription_config.output_locale}), ...(speechmaticsOptions.transcription_config?.punctuation_overrides?.permitted_marks && {SPEECHMATICS_PUNCTUATION_ALLOWED: speechmaticsOptions.transcription_config.punctuation_overrides.permitted_marks.join(',')}), ...(speechmaticsOptions.transcription_config?.punctuation_overrides?.sensitivity && {SPEECHMATICS_PUNCTUATION_SENSITIVITY: speechmaticsOptions.transcription_config?.punctuation_overrides?.sensitivity}), ...(speechmaticsOptions.transcription_config?.operating_point && {SPEECHMATICS_OPERATING_POINT: speechmaticsOptions.transcription_config.operating_point}), ...(speechmaticsOptions.transcription_config?.enable_entities && {SPEECHMATICS_ENABLE_ENTTIES: speechmaticsOptions.transcription_config.enable_entities}), ...(speechmaticsOptions.transcription_config?.audio_filtering_config?.volume_threshold && {SPEECHMATICS_VOLUME_THRESHOLD: speechmaticsOptions.transcription_config.audio_filtering_config.volume_threshold}), ...(speechmaticsOptions.transcription_config?.transcript_filtering_config?.remove_disfluencies && {SPEECHMATICS_REMOVE_DISFLUENCIES: speechmaticsOptions.transcription_config.transcript_filtering_config.remove_disfluencies}) }; } else if (vendor.startsWith('custom:')) { let {options = {}} = rOpts.customOptions || {}; const {sampleRate} = rOpts.customOptions || {}; const {auth_token, custom_stt_url} = sttCredentials; options = { ...options, ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'string' && {hints: rOpts.hints}), ...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' && {hints: JSON.stringify(rOpts.hints)}), ...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost}), ...(task.cs?.callSid && {callSid: task.cs.callSid}) }; opts = { ...opts, ...(auth_token && {JAMBONZ_STT_API_KEY: auth_token}), JAMBONZ_STT_URL: custom_stt_url, ...(Object.keys(options).length > 0 && {JAMBONZ_STT_OPTIONS: JSON.stringify(options)}), ...(sampleRate && {JAMBONZ_STT_SAMPLING: sampleRate}) }; } (stickyVars[vendor] || []).forEach((key) => { if (!opts[key]) opts[key] = ''; }); return opts; }; const setSpeechCredentialsAtRuntime = (recognizer) => { if (!recognizer) return; if (recognizer.vendor === 'nuance') { const {clientId, secret, kryptonEndpoint} = recognizer.nuanceOptions || {}; if (clientId && secret) return {client_id: clientId, secret}; if (kryptonEndpoint) return {nuance_stt_uri: kryptonEndpoint}; } else if (recognizer.vendor === 'nvidia') { const {rivaUri} = recognizer.nvidiaOptions || {}; if (rivaUri) return {riva_uri: rivaUri}; } else if (recognizer.vendor === 'deepgram') { const {apiKey} = recognizer.deepgramOptions || {}; if (apiKey) return {api_key: apiKey}; } else if (recognizer.vendor === 'soniox') { const {apiKey} = recognizer.sonioxOptions || {}; if (apiKey) return {api_key: apiKey}; } else if (recognizer.vendor === 'cobalt') { const {serverUri} = recognizer.cobaltOptions || {}; if (serverUri) return {cobalt_server_uri: serverUri}; } else if (recognizer.vendor === 'ibm') { const {ttsApiKey, ttsRegion, sttApiKey, sttRegion, instanceId} = recognizer.ibmOptions || {}; if (ttsApiKey || sttApiKey) return { tts_api_key: ttsApiKey, tts_region: ttsRegion, stt_api_key: sttApiKey, stt_region: sttRegion, instance_id: instanceId }; } }; return { normalizeTranscription, setChannelVarsForStt, setSpeechCredentialsAtRuntime, compileSonioxTranscripts, consolidateTranscripts, }; };