initial changes for openai stt (#1127)

* initial changes for openai stt

* wip

* wip

* wip

* wip

* wip

* make minBargeinWordCount work for openai

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wipp

* wip

* wip

* wip

* openai stt: support for prompt templates

* lint

* wip

* support openai semantic_vad

* wip

* transcribe supports openai stt

* sip

* wip

* wip

* refactor list of stt vendors that dont need to be restarted after a final transcript

* cleanup

* wip

* cleanup

* wip

* wip

* wip

* remove credentials from log

* comment
This commit is contained in:
Dave Horton
2025-03-28 13:14:58 -04:00
committed by GitHub
parent ee846b283d
commit fcaf2e59e7
11 changed files with 382 additions and 29 deletions

View File

@@ -5,6 +5,30 @@ const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/const
const { SpeechCredentialError } = require('../utils/error');
const {JAMBONES_AWS_TRANSCRIBE_USE_GRPC} = require('../config');
/**
* "Please insert turns here: {{turns:4}}"
// -> { processed: 'Please insert turns here: {{turns}}', turns: 4 }
processTurnString("Please insert turns here: {{turns}}"));
// -> { processed: 'Please insert turns here: {{turns}}', turns: null }
*/
const processTurnString = (input) => {
const regex = /\{\{turns(?::(\d+))?\}\}/;
const match = input.match(regex);
if (!match) {
return {
processed: input,
turns: null
};
}
const turns = match[1] ? parseInt(match[1], 10) : null;
const processed = input.replace(regex, '{{turns}}');
return { processed, turns };
};
class SttTask extends Task {
constructor(logger, data, parentTask) {
@@ -290,6 +314,57 @@ class SttTask extends Task {
});
}
formatOpenAIPrompt(cs, {prompt, hintsTemplate, conversationHistoryTemplate, hints}) {
let conversationHistoryPrompt, hintsPrompt;
/* generate conversation history from template */
if (conversationHistoryTemplate) {
const {processed, turns} = processTurnString(conversationHistoryTemplate);
this.logger.debug({processed, turns}, 'SttTask: processed conversation history template');
conversationHistoryPrompt = cs.getFormattedConversation(turns || 4);
//this.logger.debug({conversationHistoryPrompt}, 'SttTask: conversation history');
if (conversationHistoryPrompt) {
conversationHistoryPrompt = processed.replace('{{turns}}', `\n${conversationHistoryPrompt}\nuser: `);
}
}
/* generate hints from template */
if (hintsTemplate && Array.isArray(hints) && hints.length > 0) {
hintsPrompt = hintsTemplate.replace('{{hints}}', hints);
}
/* combine into final prompt */
let finalPrompt = prompt || '';
if (hintsPrompt) {
finalPrompt = `${finalPrompt}\n${hintsPrompt}`;
}
if (conversationHistoryPrompt) {
finalPrompt = `${finalPrompt}\n${conversationHistoryPrompt}`;
}
this.logger.debug({
finalPrompt,
hints,
hintsPrompt,
conversationHistoryTemplate,
conversationHistoryPrompt
}, 'SttTask: formatted OpenAI prompt');
return finalPrompt?.trimStart();
}
/* some STT engines will keep listening after a final response, so no need to restart */
doesVendorContinueListeningAfterFinalTranscript(vendor) {
return (vendor.startsWith('custom:') || [
'soniox',
'aws',
'microsoft',
'deepgram',
'google',
'speechmatics',
'openai',
].includes(vendor));
}
_onCompileContext(ep, key, evt) {
const {addKey} = this.cs.srf.locals.dbHelpers;
this.logger.debug({evt}, `received cobalt compile context event, will cache under ${key}`);