Merge pull request #57 from jambonz/feat/whisper_tts_stream

support whisper streaming
This commit is contained in:
Dave Horton
2024-02-20 20:33:21 -05:00
committed by GitHub

View File

@@ -210,6 +210,10 @@ async function synthAudio(client, logger, stats, { account_sid,
}
break;
case 'whisper':
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text, renderForCaching});
if (typeof audioBuffer === 'object' && audioBuffer.filePath) {
return audioBuffer;
}
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text});
break;
case 'deepgram':
@@ -656,8 +660,24 @@ const synthElevenlabs = async(logger, {credentials, options, stats, language, vo
}
};
const synthWhisper = async(logger, {credentials, stats, voice, text}) => {
const {api_key, model_id, baseURL, timeout} = credentials;
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching}) => {
const {api_key, model_id, baseURL, timeout, speed} = credentials;
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching) {
let params = '';
params += `{api_key=${api_key}`;
params += `,model_id=${model_id}`;
params += `,voice=${voice}`;
params += ',write_cache_file=1';
if (speed) params += `,speed=${speed}`;
params += '}';
return {
filePath: `say:${params}${text.replace(/\n/g, ' ')}`,
servedFromCache: false,
rtt: 0
};
}
try {
const openai = new OpenAI.OpenAI({
apiKey: api_key,