mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
Merge pull request #57 from jambonz/feat/whisper_tts_stream
support whisper streaming
This commit is contained in:
@@ -210,6 +210,10 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
}
|
||||
break;
|
||||
case 'whisper':
|
||||
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text, renderForCaching});
|
||||
if (typeof audioBuffer === 'object' && audioBuffer.filePath) {
|
||||
return audioBuffer;
|
||||
}
|
||||
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text});
|
||||
break;
|
||||
case 'deepgram':
|
||||
@@ -656,8 +660,24 @@ const synthElevenlabs = async(logger, {credentials, options, stats, language, vo
|
||||
}
|
||||
};
|
||||
|
||||
const synthWhisper = async(logger, {credentials, stats, voice, text}) => {
|
||||
const {api_key, model_id, baseURL, timeout} = credentials;
|
||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching}) => {
|
||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching) {
|
||||
let params = '';
|
||||
params += `{api_key=${api_key}`;
|
||||
params += `,model_id=${model_id}`;
|
||||
params += `,voice=${voice}`;
|
||||
params += ',write_cache_file=1';
|
||||
if (speed) params += `,speed=${speed}`;
|
||||
params += '}';
|
||||
|
||||
return {
|
||||
filePath: `say:${params}${text.replace(/\n/g, ' ')}`,
|
||||
servedFromCache: false,
|
||||
rtt: 0
|
||||
};
|
||||
}
|
||||
try {
|
||||
const openai = new OpenAI.OpenAI({
|
||||
apiKey: api_key,
|
||||
|
||||
Reference in New Issue
Block a user