mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-18 20:07:44 +00:00
initial changes for openai stt (#1127)
* initial changes for openai stt * wip * wip * wip * wip * wip * make minBargeinWordCount work for openai * wip * wip * wip * wip * wip * wip * wip * wipp * wip * wip * wip * openai stt: support for prompt templates * lint * wip * support openai semantic_vad * wip * transcribe supports openai stt * sip * wip * wip * refactor list of stt vendors that dont need to be restarted after a final transcript * cleanup * wip * cleanup * wip * wip * wip * remove credentials from log * comment
This commit is contained in:
@@ -135,6 +135,15 @@ class CallSession extends Emitter {
|
||||
this.requestor.on('handover', handover.bind(this));
|
||||
this.requestor.on('reconnect-error', this._onSessionReconnectError.bind(this));
|
||||
}
|
||||
|
||||
/**
|
||||
* Currently this is used for openai STT, which has a prompt paramater and
|
||||
* we have an experimental feature where you can send the conversation
|
||||
* history in the prompt
|
||||
*/
|
||||
this.conversationTurns = [];
|
||||
this.on('userSaid', this._onUserSaid.bind(this));
|
||||
this.on('botSaid', this._onBotSaid.bind(this));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1106,12 +1115,17 @@ class CallSession extends Emitter {
|
||||
};
|
||||
}
|
||||
else if ('speechmatics' === vendor) {
|
||||
this.logger.info({credential}, 'CallSession:getSpeechCredentials - speechmatics credential');
|
||||
return {
|
||||
api_key: credential.api_key,
|
||||
speechmatics_stt_uri: credential.speechmatics_stt_uri,
|
||||
};
|
||||
}
|
||||
else if ('openai' === vendor) {
|
||||
return {
|
||||
api_key: credential.api_key,
|
||||
model_id: credential.model_id,
|
||||
};
|
||||
}
|
||||
else if (vendor.startsWith('custom:')) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
@@ -1240,7 +1254,7 @@ class CallSession extends Emitter {
|
||||
this.logger.info('CallSession:exec all tasks complete');
|
||||
this._stopping = true;
|
||||
this._onTasksDone();
|
||||
this._clearResources();
|
||||
await this._clearResources();
|
||||
|
||||
|
||||
if (!this.isConfirmCallSession && !this.isSmsCallSession) sessionTracker.remove(this.callSid);
|
||||
@@ -2351,9 +2365,13 @@ Duration=${duration} `
|
||||
/**
|
||||
* Hang up the call and free the media endpoint
|
||||
*/
|
||||
_clearResources() {
|
||||
async _clearResources() {
|
||||
for (const resource of [this.dlg, this.ep, this.ep2]) {
|
||||
if (resource && resource.connected) resource.destroy();
|
||||
try {
|
||||
if (resource && resource.connected) await resource.destroy();
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'CallSession:_clearResources - error clearing resources');
|
||||
}
|
||||
}
|
||||
this.dlg = null;
|
||||
this.ep = null;
|
||||
@@ -3014,6 +3032,43 @@ Duration=${duration} `
|
||||
this._jambonzHangup('Max Call Duration');
|
||||
this._maxCallDurationTimer = null;
|
||||
}
|
||||
|
||||
_onUserSaid(transcript) {
|
||||
const count = this.conversationTurns.length;
|
||||
if (count === 0 || this.conversationTurns[count - 1].type === 'assistant') {
|
||||
this.conversationTurns.push({
|
||||
type: 'user',
|
||||
text: transcript
|
||||
});
|
||||
}
|
||||
else {
|
||||
this.conversationTurns[count - 1].text += ` ${transcript}`;
|
||||
}
|
||||
}
|
||||
|
||||
_onBotSaid(transcript) {
|
||||
const count = this.conversationTurns.length;
|
||||
if (count === 0 || this.conversationTurns[count - 1].type === 'user') {
|
||||
this.conversationTurns.push({
|
||||
type: 'assistant',
|
||||
text: transcript
|
||||
});
|
||||
}
|
||||
else {
|
||||
this.conversationTurns[count - 1].text += ` ${transcript}`;
|
||||
}
|
||||
}
|
||||
|
||||
getFormattedConversation(numTurns) {
|
||||
const turns = this.conversationTurns.slice(-numTurns);
|
||||
if (turns.length === 0) return null;
|
||||
return turns.map((t) => {
|
||||
if (t.type === 'user') {
|
||||
return `user: ${t.text}`;
|
||||
}
|
||||
return `assistant: ${t.text}`;
|
||||
}).join('\n');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = CallSession;
|
||||
|
||||
@@ -63,7 +63,7 @@ class RestCallSession extends CallSession {
|
||||
this.callInfo.callTerminationBy = terminatedBy;
|
||||
const duration = moment().diff(this.dlg.connectTime, 'seconds');
|
||||
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
|
||||
this.logger.debug(`RestCallSession: called party hung up by ${terminatedBy}`);
|
||||
this.logger.info(`RestCallSession: called party hung up by ${terminatedBy}`);
|
||||
this._callReleased();
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ const {
|
||||
JambonzTranscriptionEvents,
|
||||
AssemblyAiTranscriptionEvents,
|
||||
VoxistTranscriptionEvents,
|
||||
OpenAITranscriptionEvents,
|
||||
VadDetection,
|
||||
VerbioTranscriptionEvents,
|
||||
SpeechmaticsTranscriptionEvents
|
||||
@@ -83,6 +84,7 @@ class TaskGather extends SttTask {
|
||||
this._bufferedTranscripts = [];
|
||||
this.partialTranscriptsCount = 0;
|
||||
this.bugname_prefix = 'gather_';
|
||||
|
||||
}
|
||||
|
||||
get name() { return TaskName.Gather; }
|
||||
@@ -239,6 +241,7 @@ class TaskGather extends SttTask {
|
||||
const {span, ctx} = this.startChildSpan(`nested:${this.sayTask.summary}`);
|
||||
const process = () => {
|
||||
this.logger.debug('Gather: nested say task completed');
|
||||
this.playComplete = true;
|
||||
if (!this.listenDuringPrompt) {
|
||||
startDtmfListener();
|
||||
}
|
||||
@@ -269,6 +272,7 @@ class TaskGather extends SttTask {
|
||||
const {span, ctx} = this.startChildSpan(`nested:${this.playTask.summary}`);
|
||||
const process = () => {
|
||||
this.logger.debug('Gather: nested play task completed');
|
||||
this.playComplete = true;
|
||||
if (!this.listenDuringPrompt) {
|
||||
startDtmfListener();
|
||||
}
|
||||
@@ -559,6 +563,31 @@ class TaskGather extends SttTask {
|
||||
|
||||
break;
|
||||
|
||||
case 'openai':
|
||||
this.bugname = `${this.bugname_prefix}openai_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
ep, OpenAITranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
this.addCustomEventListener(
|
||||
ep, OpenAITranscriptionEvents.SpeechStarted, this._onOpenAISpeechStarted.bind(this, cs, ep));
|
||||
this.addCustomEventListener(
|
||||
ep, OpenAITranscriptionEvents.SpeechStopped, this._onOpenAISpeechStopped.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, OpenAITranscriptionEvents.Connect,
|
||||
this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, OpenAITranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, OpenAITranscriptionEvents.Error,
|
||||
this._onOpenAIErrror.bind(this, cs, ep));
|
||||
|
||||
/* openai delta transcripts are useful only for minBargeinWordCount eval */
|
||||
if (this.minBargeinWordCount > 1) {
|
||||
this.openaiPartials = [];
|
||||
opts.OPENAI_WANT_PARTIALS = 1;
|
||||
this.addCustomEventListener(
|
||||
ep, OpenAITranscriptionEvents.PartialTranscript, this._onOpenAIPartialTranscript.bind(this, cs, ep));
|
||||
}
|
||||
this.modelSupportsConversationTracking = opts.OPENAI_MODEL !== 'whisper-1';
|
||||
break;
|
||||
|
||||
default:
|
||||
if (this.vendor.startsWith('custom:')) {
|
||||
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
|
||||
@@ -590,6 +619,25 @@ class TaskGather extends SttTask {
|
||||
bugname: this.bugname
|
||||
}, 'Gather:_startTranscribing');
|
||||
|
||||
|
||||
/* special feature for openai: we can provide a prompt that includes recent conversation history */
|
||||
let prompt;
|
||||
if (this.vendor === 'openai') {
|
||||
if (this.modelSupportsConversationTracking) {
|
||||
prompt = this.formatOpenAIPrompt(this.cs, {
|
||||
prompt: this.data.recognizer?.openaiOptions?.prompt,
|
||||
hintsTemplate: this.data.recognizer?.openaiOptions?.promptTemplates?.hintsTemplate,
|
||||
// eslint-disable-next-line max-len
|
||||
conversationHistoryTemplate: this.data.recognizer?.openaiOptions?.promptTemplates?.conversationHistoryTemplate,
|
||||
hints: this.data.recognizer?.hints,
|
||||
});
|
||||
this.logger.debug({prompt}, 'Gather:_startTranscribing - created an openai prompt');
|
||||
}
|
||||
else if (this.data.recognizer?.hints?.length > 0) {
|
||||
prompt = this.data.recognizer?.hints.join(', ');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: we don't need to ask deepgram for interim results, because they
|
||||
* already send us words as they are finalized (is_final=true) even before
|
||||
@@ -601,6 +649,7 @@ class TaskGather extends SttTask {
|
||||
interim: this.interim,
|
||||
bugname: this.bugname,
|
||||
hostport: this.hostport,
|
||||
prompt
|
||||
}).catch((err) => {
|
||||
const {writeAlerts, AlertType} = this.cs.srf.locals;
|
||||
this.logger.error(err, 'TaskGather:_startTranscribing error');
|
||||
@@ -781,7 +830,11 @@ class TaskGather extends SttTask {
|
||||
const bugname = fsEvent.getHeader('media-bugname');
|
||||
const finished = fsEvent.getHeader('transcription-session-finished');
|
||||
this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription raw transcript');
|
||||
if (bugname && this.bugname !== bugname) return;
|
||||
if (bugname && this.bugname !== bugname) {
|
||||
this.logger.debug(
|
||||
`Gather:_onTranscription - ignoring transcript from ${bugname} because our bug is ${this.bugname}`);
|
||||
return;
|
||||
}
|
||||
if (finished === 'true') return;
|
||||
|
||||
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
|
||||
@@ -1084,6 +1137,33 @@ class TaskGather extends SttTask {
|
||||
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
|
||||
}
|
||||
|
||||
async _onOpenAIErrror(cs, _ep, evt) {
|
||||
// eslint-disable-next-line no-unused-vars
|
||||
const {message, ...e} = evt;
|
||||
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
|
||||
}
|
||||
|
||||
async _onOpenAISpeechStarted(cs, _ep, evt) {
|
||||
this.logger.debug({evt}, 'TaskGather:_onOpenAISpeechStarted');
|
||||
}
|
||||
|
||||
async _onOpenAISpeechStopped(cs, _ep, evt) {
|
||||
this.logger.debug({evt}, 'TaskGather:_onOpenAISpeechStopped');
|
||||
}
|
||||
|
||||
async _onOpenAIPartialTranscript(cs, _ep, evt) {
|
||||
if (!this.playComplete) {
|
||||
const words = evt.delta.split(' ').filter((w) => /[A-Za-z0-0]/.test(w));
|
||||
this.openaiPartials.push(...words);
|
||||
this.logger.debug({words, partials: this.openaiPartials, evt}, 'TaskGather:_onOpenAIPartialTranscript - words');
|
||||
if (this.openaiPartials.length >= this.minBargeinWordCount) {
|
||||
this.logger.debug({partials: this.openaiPartials}, 'killing audio due to speech (openai)');
|
||||
this._killAudio(cs);
|
||||
this.notifyStatus({event: 'speech-bargein-detected', words: this.openaiPartials});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async _onVendorError(cs, _ep, evt) {
|
||||
super._onVendorError(cs, _ep, evt);
|
||||
if (!(await this._startFallback(cs, _ep, evt))) {
|
||||
@@ -1193,6 +1273,7 @@ class TaskGather extends SttTask {
|
||||
}
|
||||
}
|
||||
else if (reason.startsWith('speech')) {
|
||||
this.cs.emit('userSaid', evt.alternatives[0].transcript);
|
||||
if (this.parentTask) this.parentTask.emit('transcription', evt);
|
||||
else {
|
||||
this.emit('transcription', evt);
|
||||
|
||||
@@ -5,6 +5,30 @@ const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/const
|
||||
const { SpeechCredentialError } = require('../utils/error');
|
||||
const {JAMBONES_AWS_TRANSCRIBE_USE_GRPC} = require('../config');
|
||||
|
||||
/**
|
||||
* "Please insert turns here: {{turns:4}}"
|
||||
// -> { processed: 'Please insert turns here: {{turns}}', turns: 4 }
|
||||
|
||||
processTurnString("Please insert turns here: {{turns}}"));
|
||||
// -> { processed: 'Please insert turns here: {{turns}}', turns: null }
|
||||
*/
|
||||
const processTurnString = (input) => {
|
||||
const regex = /\{\{turns(?::(\d+))?\}\}/;
|
||||
const match = input.match(regex);
|
||||
|
||||
if (!match) {
|
||||
return {
|
||||
processed: input,
|
||||
turns: null
|
||||
};
|
||||
}
|
||||
|
||||
const turns = match[1] ? parseInt(match[1], 10) : null;
|
||||
const processed = input.replace(regex, '{{turns}}');
|
||||
|
||||
return { processed, turns };
|
||||
};
|
||||
|
||||
class SttTask extends Task {
|
||||
|
||||
constructor(logger, data, parentTask) {
|
||||
@@ -290,6 +314,57 @@ class SttTask extends Task {
|
||||
});
|
||||
}
|
||||
|
||||
formatOpenAIPrompt(cs, {prompt, hintsTemplate, conversationHistoryTemplate, hints}) {
|
||||
let conversationHistoryPrompt, hintsPrompt;
|
||||
|
||||
/* generate conversation history from template */
|
||||
if (conversationHistoryTemplate) {
|
||||
const {processed, turns} = processTurnString(conversationHistoryTemplate);
|
||||
this.logger.debug({processed, turns}, 'SttTask: processed conversation history template');
|
||||
conversationHistoryPrompt = cs.getFormattedConversation(turns || 4);
|
||||
//this.logger.debug({conversationHistoryPrompt}, 'SttTask: conversation history');
|
||||
if (conversationHistoryPrompt) {
|
||||
conversationHistoryPrompt = processed.replace('{{turns}}', `\n${conversationHistoryPrompt}\nuser: `);
|
||||
}
|
||||
}
|
||||
|
||||
/* generate hints from template */
|
||||
if (hintsTemplate && Array.isArray(hints) && hints.length > 0) {
|
||||
hintsPrompt = hintsTemplate.replace('{{hints}}', hints);
|
||||
}
|
||||
|
||||
/* combine into final prompt */
|
||||
let finalPrompt = prompt || '';
|
||||
if (hintsPrompt) {
|
||||
finalPrompt = `${finalPrompt}\n${hintsPrompt}`;
|
||||
}
|
||||
if (conversationHistoryPrompt) {
|
||||
finalPrompt = `${finalPrompt}\n${conversationHistoryPrompt}`;
|
||||
}
|
||||
|
||||
this.logger.debug({
|
||||
finalPrompt,
|
||||
hints,
|
||||
hintsPrompt,
|
||||
conversationHistoryTemplate,
|
||||
conversationHistoryPrompt
|
||||
}, 'SttTask: formatted OpenAI prompt');
|
||||
return finalPrompt?.trimStart();
|
||||
}
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
doesVendorContinueListeningAfterFinalTranscript(vendor) {
|
||||
return (vendor.startsWith('custom:') || [
|
||||
'soniox',
|
||||
'aws',
|
||||
'microsoft',
|
||||
'deepgram',
|
||||
'google',
|
||||
'speechmatics',
|
||||
'openai',
|
||||
].includes(vendor));
|
||||
}
|
||||
|
||||
_onCompileContext(ep, key, evt) {
|
||||
const {addKey} = this.cs.srf.locals.dbHelpers;
|
||||
this.logger.debug({evt}, `received cobalt compile context event, will cache under ${key}`);
|
||||
|
||||
@@ -14,6 +14,7 @@ const {
|
||||
TranscribeStatus,
|
||||
AssemblyAiTranscriptionEvents,
|
||||
VoxistTranscriptionEvents,
|
||||
OpenAITranscriptionEvents,
|
||||
VerbioTranscriptionEvents,
|
||||
SpeechmaticsTranscriptionEvents
|
||||
} = require('../utils/constants.json');
|
||||
@@ -330,6 +331,20 @@ class TaskTranscribe extends SttTask {
|
||||
this._onSpeechmaticsError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'openai':
|
||||
this.bugname = `${this.bugname_prefix}openai_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
ep, OpenAITranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(ep, OpenAITranscriptionEvents.Connect,
|
||||
this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, OpenAITranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, OpenAITranscriptionEvents.Error,
|
||||
this._onOpenAIErrror.bind(this, cs, ep));
|
||||
|
||||
this.modelSupportsConversationTracking = opts.OPENAI_MODEL !== 'whisper-1';
|
||||
break;
|
||||
|
||||
default:
|
||||
if (this.vendor.startsWith('custom:')) {
|
||||
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
|
||||
@@ -365,6 +380,25 @@ class TaskTranscribe extends SttTask {
|
||||
async _transcribe(ep) {
|
||||
this.logger.debug(
|
||||
`TaskTranscribe:_transcribe - starting transcription vendor ${this.vendor} bugname ${this.bugname}`);
|
||||
|
||||
/* special feature for openai: we can provide a prompt that includes recent conversation history */
|
||||
let prompt;
|
||||
if (this.vendor === 'openai') {
|
||||
if (this.modelSupportsConversationTracking) {
|
||||
prompt = this.formatOpenAIPrompt(this.cs, {
|
||||
prompt: this.data.recognizer?.openaiOptions?.prompt,
|
||||
hintsTemplate: this.data.recognizer?.openaiOptions?.promptTemplates?.hintsTemplate,
|
||||
// eslint-disable-next-line max-len
|
||||
conversationHistoryTemplate: this.data.recognizer?.openaiOptions?.promptTemplates?.conversationHistoryTemplate,
|
||||
hints: this.data.recognizer?.hints,
|
||||
});
|
||||
this.logger.debug({prompt}, 'Gather:_startTranscribing - created an openai prompt');
|
||||
}
|
||||
else if (this.data.recognizer?.hints?.length > 0) {
|
||||
prompt = this.data.recognizer?.hints.join(', ');
|
||||
}
|
||||
}
|
||||
|
||||
await ep.startTranscription({
|
||||
vendor: this.vendor,
|
||||
interim: this.interim ? true : false,
|
||||
@@ -456,8 +490,9 @@ class TaskTranscribe extends SttTask {
|
||||
this._startAsrTimer(channel);
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics']
|
||||
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
|
||||
if (!this.doesVendorContinueListeningAfterFinalTranscript(this.vendor)) {
|
||||
this._startTranscribing(cs, ep, channel);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (this.vendor === 'soniox') {
|
||||
@@ -480,9 +515,7 @@ class TaskTranscribe extends SttTask {
|
||||
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending final transcript');
|
||||
this._resolve(channel, evt);
|
||||
|
||||
/* some STT engines will keep listening after a final response, so no need to restart */
|
||||
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics'].includes(this.vendor) &&
|
||||
!this.vendor.startsWith('custom:')) {
|
||||
if (!this.doesVendorContinueListeningAfterFinalTranscript(this.vendor)) {
|
||||
this.logger.debug('TaskTranscribe:_onTranscription - restarting transcribe');
|
||||
this._startTranscribing(cs, ep, channel);
|
||||
}
|
||||
@@ -733,6 +766,12 @@ class TaskTranscribe extends SttTask {
|
||||
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
|
||||
}
|
||||
|
||||
async _onOpenAIErrror(cs, _ep, evt) {
|
||||
// eslint-disable-next-line no-unused-vars
|
||||
const {message, ...e} = evt;
|
||||
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
|
||||
}
|
||||
|
||||
_startAsrTimer(channel) {
|
||||
if (this.vendor === 'deepgram') return; // no need
|
||||
assert(this.isContinuousAsr);
|
||||
|
||||
@@ -42,6 +42,11 @@ class TtsTask extends Task {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const fullText = Array.isArray(this.text) ? this.text.join(' ') : this.text;
|
||||
if (fullText.length > 0) {
|
||||
cs.emit('botSaid', fullText);
|
||||
}
|
||||
}
|
||||
|
||||
getTtsVendorData(cs) {
|
||||
|
||||
@@ -137,6 +137,18 @@
|
||||
"Connect": "speechmatics_transcribe::connect",
|
||||
"Error": "speechmatics_transcribe::error"
|
||||
},
|
||||
"OpenAITranscriptionEvents": {
|
||||
"Transcription": "openai_transcribe::transcription",
|
||||
"Translation": "openai_transcribe::translation",
|
||||
"SpeechStarted": "openai_transcribe::speech_started",
|
||||
"SpeechStopped": "openai_transcribe::speech_stopped",
|
||||
"PartialTranscript": "openai_transcribe::partial_transcript",
|
||||
"Info": "openai_transcribe::info",
|
||||
"RecognitionStarted": "openai_transcribe::recognition_started",
|
||||
"ConnectFailure": "openai_transcribe::connect_failed",
|
||||
"Connect": "openai_transcribe::connect",
|
||||
"Error": "openai_transcribe::error"
|
||||
},
|
||||
"JambonzTranscriptionEvents": {
|
||||
"Transcription": "jambonz_transcribe::transcription",
|
||||
"ConnectFailure": "jambonz_transcribe::connect_failed",
|
||||
|
||||
@@ -142,6 +142,11 @@ const speechMapper = (cred) => {
|
||||
obj.api_key = o.api_key;
|
||||
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
|
||||
}
|
||||
else if ('openai' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
obj.model_id = o.model_id;
|
||||
}
|
||||
else if (obj.vendor.startsWith('custom:')) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.auth_token = o.auth_token;
|
||||
|
||||
@@ -117,7 +117,16 @@ const stickyVars = {
|
||||
'SPEECHMATICS_SPEECH_HINTS',
|
||||
'SPEECHMATICS_TRANSLATION_LANGUAGES',
|
||||
'SPEECHMATICS_TRANSLATION_PARTIALS'
|
||||
]
|
||||
],
|
||||
openai: [
|
||||
'OPENAI_API_KEY',
|
||||
'OPENAI_MODEL',
|
||||
'OPENAI_INPUT_AUDIO_NOISE_REDUCTION',
|
||||
'OPENAI_TURN_DETECTION_TYPE',
|
||||
'OPENAI_TURN_DETECTION_THRESHOLD',
|
||||
'OPENAI_TURN_DETECTION_PREFIX_PADDING_MS',
|
||||
'OPENAI_TURN_DETECTION_SILENCE_DURATION_MS',
|
||||
],
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -571,6 +580,35 @@ const normalizeSpeechmatics = (evt, channel, language) => {
|
||||
return obj;
|
||||
};
|
||||
|
||||
const calculateConfidence = (logprobsArray) => {
|
||||
// Sum the individual log probabilities
|
||||
const totalLogProb = logprobsArray.reduce((sum, tokenInfo) => sum + tokenInfo.logprob, 0);
|
||||
|
||||
// Convert the total log probability back to a regular probability
|
||||
const confidence = Math.exp(totalLogProb);
|
||||
return confidence;
|
||||
};
|
||||
|
||||
const normalizeOpenAI = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const obj = {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: true,
|
||||
alternatives: [
|
||||
{
|
||||
transcript: evt.transcript,
|
||||
confidence: evt.logprobs ? calculateConfidence(evt.logprobs) : 1.0,
|
||||
}
|
||||
],
|
||||
vendor: {
|
||||
name: 'openai',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
return obj;
|
||||
};
|
||||
|
||||
module.exports = (logger) => {
|
||||
const normalizeTranscription = (evt, vendor, channel, language, shortUtterance, punctuation) => {
|
||||
|
||||
@@ -602,6 +640,8 @@ module.exports = (logger) => {
|
||||
return normalizeVerbio(evt, channel, language);
|
||||
case 'speechmatics':
|
||||
return normalizeSpeechmatics(evt, channel, language);
|
||||
case 'openai':
|
||||
return normalizeOpenAI(evt, channel, language);
|
||||
default:
|
||||
if (vendor.startsWith('custom:')) {
|
||||
return normalizeCustom(evt, channel, language, vendor);
|
||||
@@ -968,6 +1008,36 @@ module.exports = (logger) => {
|
||||
{VOXIST_API_KEY: sttCredentials.api_key},
|
||||
};
|
||||
}
|
||||
else if ('openai' === vendor) {
|
||||
const {openaiOptions = {}} = rOpts;
|
||||
const model = openaiOptions.model || rOpts.model || sttCredentials.model_id || 'whisper-1';
|
||||
const apiKey = openaiOptions.apiKey || sttCredentials.api_key;
|
||||
|
||||
opts = {
|
||||
OPENAI_MODEL: model,
|
||||
OPENAI_API_KEY: apiKey,
|
||||
...opts,
|
||||
...(openaiOptions.prompt && {OPENAI_PROMPT: openaiOptions.prompt}),
|
||||
...(openaiOptions.input_audio_noise_reduction &&
|
||||
{OPENAI_INPUT_AUDIO_NOISE_REDUCTION: openaiOptions.input_audio_noise_reduction}),
|
||||
};
|
||||
|
||||
if (openaiOptions.turn_detection) {
|
||||
opts = {
|
||||
...opts,
|
||||
OPENAI_TURN_DETECTION_TYPE: openaiOptions.turn_detection.type,
|
||||
...(openaiOptions.turn_detection.threshold && {
|
||||
OPENAI_TURN_DETECTION_THRESHOLD: openaiOptions.turn_detection.threshold
|
||||
}),
|
||||
...(openaiOptions.turn_detection.prefix_padding_ms && {
|
||||
OPENAI_TURN_DETECTION_PREFIX_PADDING_MS: openaiOptions.turn_detection.prefix_padding_ms
|
||||
}),
|
||||
...(openaiOptions.turn_detection.silence_duration_ms && {
|
||||
OPENAI_TURN_DETECTION_SILENCE_DURATION_MS: openaiOptions.turn_detection.silence_duration_ms
|
||||
}),
|
||||
};
|
||||
}
|
||||
}
|
||||
else if ('verbio' === vendor) {
|
||||
const {verbioOptions = {}} = rOpts;
|
||||
opts = {
|
||||
|
||||
41
package-lock.json
generated
41
package-lock.json
generated
@@ -18,7 +18,7 @@
|
||||
"@jambonz/speech-utils": "^0.2.3",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/time-series": "^0.2.13",
|
||||
"@jambonz/verb-specifications": "^0.0.98",
|
||||
"@jambonz/verb-specifications": "^0.0.101",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
|
||||
@@ -31,7 +31,7 @@
|
||||
"bent": "^7.3.12",
|
||||
"debug": "^4.3.4",
|
||||
"deepcopy": "^2.1.0",
|
||||
"drachtio-fsmrf": "^4.0.2",
|
||||
"drachtio-fsmrf": "^4.0.3",
|
||||
"drachtio-srf": "^5.0.2",
|
||||
"express": "^4.19.2",
|
||||
"express-validator": "^7.0.1",
|
||||
@@ -1512,10 +1512,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/verb-specifications": {
|
||||
"version": "0.0.98",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.98.tgz",
|
||||
"integrity": "sha512-G55q5JGtbdowj+hBVBlApBsMBwG4rneJqUc1jcp/IksrlPlUjxMZURXi6jxmg87lZSX/u88osoG2olXnFhYU3g==",
|
||||
"license": "MIT",
|
||||
"version": "0.0.101",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.101.tgz",
|
||||
"integrity": "sha512-tgVyttFTaiEOCUhQU15m7jaw+IUTvEV2P6g4argARaNuva5DH01Dzi2FJyqlCmGzsTmuVUjOmh13PfC/cIpEsA==",
|
||||
"dependencies": {
|
||||
"debug": "^4.3.4",
|
||||
"pino": "^8.8.0"
|
||||
@@ -3084,7 +3083,8 @@
|
||||
},
|
||||
"node_modules/camel-case": {
|
||||
"version": "4.1.2",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/camel-case/-/camel-case-4.1.2.tgz",
|
||||
"integrity": "sha512-gxGWBrTT1JuMx6R+o5PTXMmUnhnVzLQ9SNutD4YqKtI6ap897t3tKECYla6gCWEkplXnlNybEkZg9GEGxKFCgw==",
|
||||
"dependencies": {
|
||||
"pascal-case": "^3.1.2",
|
||||
"tslib": "^2.0.3"
|
||||
@@ -3531,7 +3531,8 @@
|
||||
},
|
||||
"node_modules/dot-case": {
|
||||
"version": "3.0.4",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/dot-case/-/dot-case-3.0.4.tgz",
|
||||
"integrity": "sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w==",
|
||||
"dependencies": {
|
||||
"no-case": "^3.0.4",
|
||||
"tslib": "^2.0.3"
|
||||
@@ -3559,9 +3560,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/drachtio-fsmrf": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/drachtio-fsmrf/-/drachtio-fsmrf-4.0.2.tgz",
|
||||
"integrity": "sha512-71IrfdLe3wRcEEQlCXgKpeSo7nstYJTK/bCot7+ayhjCmqHdgDk72GtaeMadK9Q+rxVWtoWeCkLfjXOz81GPqg==",
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/drachtio-fsmrf/-/drachtio-fsmrf-4.0.3.tgz",
|
||||
"integrity": "sha512-5j8LqPMHJEgK56gI6MTVbasxCS4cUjo9UdPO8P9qJGJfLG/k/LI6QQAzPrFUcGlpOQ3WYZNkOp/drsKdttlk2Q==",
|
||||
"dependencies": {
|
||||
"camel-case": "^4.1.2",
|
||||
"debug": "^2.6.9",
|
||||
@@ -3590,6 +3591,8 @@
|
||||
},
|
||||
"node_modules/drachtio-modesl": {
|
||||
"version": "1.2.9",
|
||||
"resolved": "https://registry.npmjs.org/drachtio-modesl/-/drachtio-modesl-1.2.9.tgz",
|
||||
"integrity": "sha512-Ob/N0ntwd/Qu6IWjRbUr17DSpw9dTpPNMwmi6ZTh8ryGRE29zlx6U446y/VYpN8ZN9rEi0OgTyAmUt3RjLoRyQ==",
|
||||
"license": "MPL-2.0",
|
||||
"dependencies": {
|
||||
"eventemitter2": "^6.4.4",
|
||||
@@ -3602,6 +3605,8 @@
|
||||
},
|
||||
"node_modules/drachtio-modesl/node_modules/xml2js": {
|
||||
"version": "0.4.23",
|
||||
"resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz",
|
||||
"integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"sax": ">=0.6.0",
|
||||
@@ -4152,6 +4157,8 @@
|
||||
},
|
||||
"node_modules/eventemitter2": {
|
||||
"version": "6.4.9",
|
||||
"resolved": "https://registry.npmjs.org/eventemitter2/-/eventemitter2-6.4.9.tgz",
|
||||
"integrity": "sha512-JEPTiaOt9f04oa6NOkc4aH+nVp5I3wEjpHbIPqfgCdD5v5bUzy7xQqwcVO2aDQgOWhI28da57HksMrzK9HlRxg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/events": {
|
||||
@@ -5992,7 +5999,8 @@
|
||||
},
|
||||
"node_modules/lower-case": {
|
||||
"version": "2.0.2",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/lower-case/-/lower-case-2.0.2.tgz",
|
||||
"integrity": "sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg==",
|
||||
"dependencies": {
|
||||
"tslib": "^2.0.3"
|
||||
}
|
||||
@@ -6326,7 +6334,8 @@
|
||||
},
|
||||
"node_modules/no-case": {
|
||||
"version": "3.0.4",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/no-case/-/no-case-3.0.4.tgz",
|
||||
"integrity": "sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg==",
|
||||
"dependencies": {
|
||||
"lower-case": "^2.0.2",
|
||||
"tslib": "^2.0.3"
|
||||
@@ -6738,7 +6747,8 @@
|
||||
},
|
||||
"node_modules/pascal-case": {
|
||||
"version": "3.1.2",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/pascal-case/-/pascal-case-3.1.2.tgz",
|
||||
"integrity": "sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g==",
|
||||
"dependencies": {
|
||||
"no-case": "^3.0.4",
|
||||
"tslib": "^2.0.3"
|
||||
@@ -7542,7 +7552,8 @@
|
||||
},
|
||||
"node_modules/snake-case": {
|
||||
"version": "3.0.4",
|
||||
"license": "MIT",
|
||||
"resolved": "https://registry.npmjs.org/snake-case/-/snake-case-3.0.4.tgz",
|
||||
"integrity": "sha512-LAOh4z89bGQvl9pFfNF8V146i7o7/CqFPbqzYgP+yYzDIDeS9HaNFtXABamRW+AQzEVODcvE79ljJ+8a9YSdMg==",
|
||||
"dependencies": {
|
||||
"dot-case": "^3.0.4",
|
||||
"tslib": "^2.0.3"
|
||||
|
||||
@@ -33,8 +33,8 @@
|
||||
"@jambonz/realtimedb-helpers": "^0.8.13",
|
||||
"@jambonz/speech-utils": "^0.2.3",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/verb-specifications": "^0.0.98",
|
||||
"@jambonz/time-series": "^0.2.13",
|
||||
"@jambonz/verb-specifications": "^0.0.101",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
|
||||
@@ -47,7 +47,7 @@
|
||||
"bent": "^7.3.12",
|
||||
"debug": "^4.3.4",
|
||||
"deepcopy": "^2.1.0",
|
||||
"drachtio-fsmrf": "^4.0.2",
|
||||
"drachtio-fsmrf": "^4.0.3",
|
||||
"drachtio-srf": "^5.0.2",
|
||||
"express": "^4.19.2",
|
||||
"express-validator": "^7.0.1",
|
||||
|
||||
Reference in New Issue
Block a user