mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
initial changes for openai stt (#1127)
* initial changes for openai stt * wip * wip * wip * wip * wip * make minBargeinWordCount work for openai * wip * wip * wip * wip * wip * wip * wip * wipp * wip * wip * wip * openai stt: support for prompt templates * lint * wip * support openai semantic_vad * wip * transcribe supports openai stt * sip * wip * wip * refactor list of stt vendors that dont need to be restarted after a final transcript * cleanup * wip * cleanup * wip * wip * wip * remove credentials from log * comment
This commit is contained in:
@@ -137,6 +137,18 @@
|
||||
"Connect": "speechmatics_transcribe::connect",
|
||||
"Error": "speechmatics_transcribe::error"
|
||||
},
|
||||
"OpenAITranscriptionEvents": {
|
||||
"Transcription": "openai_transcribe::transcription",
|
||||
"Translation": "openai_transcribe::translation",
|
||||
"SpeechStarted": "openai_transcribe::speech_started",
|
||||
"SpeechStopped": "openai_transcribe::speech_stopped",
|
||||
"PartialTranscript": "openai_transcribe::partial_transcript",
|
||||
"Info": "openai_transcribe::info",
|
||||
"RecognitionStarted": "openai_transcribe::recognition_started",
|
||||
"ConnectFailure": "openai_transcribe::connect_failed",
|
||||
"Connect": "openai_transcribe::connect",
|
||||
"Error": "openai_transcribe::error"
|
||||
},
|
||||
"JambonzTranscriptionEvents": {
|
||||
"Transcription": "jambonz_transcribe::transcription",
|
||||
"ConnectFailure": "jambonz_transcribe::connect_failed",
|
||||
|
||||
@@ -142,6 +142,11 @@ const speechMapper = (cred) => {
|
||||
obj.api_key = o.api_key;
|
||||
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
|
||||
}
|
||||
else if ('openai' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
obj.model_id = o.model_id;
|
||||
}
|
||||
else if (obj.vendor.startsWith('custom:')) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.auth_token = o.auth_token;
|
||||
|
||||
@@ -117,7 +117,16 @@ const stickyVars = {
|
||||
'SPEECHMATICS_SPEECH_HINTS',
|
||||
'SPEECHMATICS_TRANSLATION_LANGUAGES',
|
||||
'SPEECHMATICS_TRANSLATION_PARTIALS'
|
||||
]
|
||||
],
|
||||
openai: [
|
||||
'OPENAI_API_KEY',
|
||||
'OPENAI_MODEL',
|
||||
'OPENAI_INPUT_AUDIO_NOISE_REDUCTION',
|
||||
'OPENAI_TURN_DETECTION_TYPE',
|
||||
'OPENAI_TURN_DETECTION_THRESHOLD',
|
||||
'OPENAI_TURN_DETECTION_PREFIX_PADDING_MS',
|
||||
'OPENAI_TURN_DETECTION_SILENCE_DURATION_MS',
|
||||
],
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -571,6 +580,35 @@ const normalizeSpeechmatics = (evt, channel, language) => {
|
||||
return obj;
|
||||
};
|
||||
|
||||
const calculateConfidence = (logprobsArray) => {
|
||||
// Sum the individual log probabilities
|
||||
const totalLogProb = logprobsArray.reduce((sum, tokenInfo) => sum + tokenInfo.logprob, 0);
|
||||
|
||||
// Convert the total log probability back to a regular probability
|
||||
const confidence = Math.exp(totalLogProb);
|
||||
return confidence;
|
||||
};
|
||||
|
||||
const normalizeOpenAI = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const obj = {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: true,
|
||||
alternatives: [
|
||||
{
|
||||
transcript: evt.transcript,
|
||||
confidence: evt.logprobs ? calculateConfidence(evt.logprobs) : 1.0,
|
||||
}
|
||||
],
|
||||
vendor: {
|
||||
name: 'openai',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
return obj;
|
||||
};
|
||||
|
||||
module.exports = (logger) => {
|
||||
const normalizeTranscription = (evt, vendor, channel, language, shortUtterance, punctuation) => {
|
||||
|
||||
@@ -602,6 +640,8 @@ module.exports = (logger) => {
|
||||
return normalizeVerbio(evt, channel, language);
|
||||
case 'speechmatics':
|
||||
return normalizeSpeechmatics(evt, channel, language);
|
||||
case 'openai':
|
||||
return normalizeOpenAI(evt, channel, language);
|
||||
default:
|
||||
if (vendor.startsWith('custom:')) {
|
||||
return normalizeCustom(evt, channel, language, vendor);
|
||||
@@ -968,6 +1008,36 @@ module.exports = (logger) => {
|
||||
{VOXIST_API_KEY: sttCredentials.api_key},
|
||||
};
|
||||
}
|
||||
else if ('openai' === vendor) {
|
||||
const {openaiOptions = {}} = rOpts;
|
||||
const model = openaiOptions.model || rOpts.model || sttCredentials.model_id || 'whisper-1';
|
||||
const apiKey = openaiOptions.apiKey || sttCredentials.api_key;
|
||||
|
||||
opts = {
|
||||
OPENAI_MODEL: model,
|
||||
OPENAI_API_KEY: apiKey,
|
||||
...opts,
|
||||
...(openaiOptions.prompt && {OPENAI_PROMPT: openaiOptions.prompt}),
|
||||
...(openaiOptions.input_audio_noise_reduction &&
|
||||
{OPENAI_INPUT_AUDIO_NOISE_REDUCTION: openaiOptions.input_audio_noise_reduction}),
|
||||
};
|
||||
|
||||
if (openaiOptions.turn_detection) {
|
||||
opts = {
|
||||
...opts,
|
||||
OPENAI_TURN_DETECTION_TYPE: openaiOptions.turn_detection.type,
|
||||
...(openaiOptions.turn_detection.threshold && {
|
||||
OPENAI_TURN_DETECTION_THRESHOLD: openaiOptions.turn_detection.threshold
|
||||
}),
|
||||
...(openaiOptions.turn_detection.prefix_padding_ms && {
|
||||
OPENAI_TURN_DETECTION_PREFIX_PADDING_MS: openaiOptions.turn_detection.prefix_padding_ms
|
||||
}),
|
||||
...(openaiOptions.turn_detection.silence_duration_ms && {
|
||||
OPENAI_TURN_DETECTION_SILENCE_DURATION_MS: openaiOptions.turn_detection.silence_duration_ms
|
||||
}),
|
||||
};
|
||||
}
|
||||
}
|
||||
else if ('verbio' === vendor) {
|
||||
const {verbioOptions = {}} = rOpts;
|
||||
opts = {
|
||||
|
||||
Reference in New Issue
Block a user