initial changes for openai stt (#1127)

* initial changes for openai stt

* wip

* wip

* wip

* wip

* wip

* make minBargeinWordCount work for openai

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wipp

* wip

* wip

* wip

* openai stt: support for prompt templates

* lint

* wip

* support openai semantic_vad

* wip

* transcribe supports openai stt

* sip

* wip

* wip

* refactor list of stt vendors that dont need to be restarted after a final transcript

* cleanup

* wip

* cleanup

* wip

* wip

* wip

* remove credentials from log

* comment
This commit is contained in:
Dave Horton
2025-03-28 13:14:58 -04:00
committed by GitHub
parent ee846b283d
commit fcaf2e59e7
11 changed files with 382 additions and 29 deletions

View File

@@ -137,6 +137,18 @@
"Connect": "speechmatics_transcribe::connect",
"Error": "speechmatics_transcribe::error"
},
"OpenAITranscriptionEvents": {
"Transcription": "openai_transcribe::transcription",
"Translation": "openai_transcribe::translation",
"SpeechStarted": "openai_transcribe::speech_started",
"SpeechStopped": "openai_transcribe::speech_stopped",
"PartialTranscript": "openai_transcribe::partial_transcript",
"Info": "openai_transcribe::info",
"RecognitionStarted": "openai_transcribe::recognition_started",
"ConnectFailure": "openai_transcribe::connect_failed",
"Connect": "openai_transcribe::connect",
"Error": "openai_transcribe::error"
},
"JambonzTranscriptionEvents": {
"Transcription": "jambonz_transcribe::transcription",
"ConnectFailure": "jambonz_transcribe::connect_failed",

View File

@@ -142,6 +142,11 @@ const speechMapper = (cred) => {
obj.api_key = o.api_key;
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
}
else if ('openai' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
}
else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = o.auth_token;

View File

@@ -117,7 +117,16 @@ const stickyVars = {
'SPEECHMATICS_SPEECH_HINTS',
'SPEECHMATICS_TRANSLATION_LANGUAGES',
'SPEECHMATICS_TRANSLATION_PARTIALS'
]
],
openai: [
'OPENAI_API_KEY',
'OPENAI_MODEL',
'OPENAI_INPUT_AUDIO_NOISE_REDUCTION',
'OPENAI_TURN_DETECTION_TYPE',
'OPENAI_TURN_DETECTION_THRESHOLD',
'OPENAI_TURN_DETECTION_PREFIX_PADDING_MS',
'OPENAI_TURN_DETECTION_SILENCE_DURATION_MS',
],
};
/**
@@ -571,6 +580,35 @@ const normalizeSpeechmatics = (evt, channel, language) => {
return obj;
};
const calculateConfidence = (logprobsArray) => {
// Sum the individual log probabilities
const totalLogProb = logprobsArray.reduce((sum, tokenInfo) => sum + tokenInfo.logprob, 0);
// Convert the total log probability back to a regular probability
const confidence = Math.exp(totalLogProb);
return confidence;
};
const normalizeOpenAI = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const obj = {
language_code: language,
channel_tag: channel,
is_final: true,
alternatives: [
{
transcript: evt.transcript,
confidence: evt.logprobs ? calculateConfidence(evt.logprobs) : 1.0,
}
],
vendor: {
name: 'openai',
evt: copy
}
};
return obj;
};
module.exports = (logger) => {
const normalizeTranscription = (evt, vendor, channel, language, shortUtterance, punctuation) => {
@@ -602,6 +640,8 @@ module.exports = (logger) => {
return normalizeVerbio(evt, channel, language);
case 'speechmatics':
return normalizeSpeechmatics(evt, channel, language);
case 'openai':
return normalizeOpenAI(evt, channel, language);
default:
if (vendor.startsWith('custom:')) {
return normalizeCustom(evt, channel, language, vendor);
@@ -968,6 +1008,36 @@ module.exports = (logger) => {
{VOXIST_API_KEY: sttCredentials.api_key},
};
}
else if ('openai' === vendor) {
const {openaiOptions = {}} = rOpts;
const model = openaiOptions.model || rOpts.model || sttCredentials.model_id || 'whisper-1';
const apiKey = openaiOptions.apiKey || sttCredentials.api_key;
opts = {
OPENAI_MODEL: model,
OPENAI_API_KEY: apiKey,
...opts,
...(openaiOptions.prompt && {OPENAI_PROMPT: openaiOptions.prompt}),
...(openaiOptions.input_audio_noise_reduction &&
{OPENAI_INPUT_AUDIO_NOISE_REDUCTION: openaiOptions.input_audio_noise_reduction}),
};
if (openaiOptions.turn_detection) {
opts = {
...opts,
OPENAI_TURN_DETECTION_TYPE: openaiOptions.turn_detection.type,
...(openaiOptions.turn_detection.threshold && {
OPENAI_TURN_DETECTION_THRESHOLD: openaiOptions.turn_detection.threshold
}),
...(openaiOptions.turn_detection.prefix_padding_ms && {
OPENAI_TURN_DETECTION_PREFIX_PADDING_MS: openaiOptions.turn_detection.prefix_padding_ms
}),
...(openaiOptions.turn_detection.silence_duration_ms && {
OPENAI_TURN_DETECTION_SILENCE_DURATION_MS: openaiOptions.turn_detection.silence_duration_ms
}),
};
}
}
else if ('verbio' === vendor) {
const {verbioOptions = {}} = rOpts;
opts = {