mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
Merge branch 'main' into feat/azure_tts
This commit is contained in:
@@ -77,7 +77,7 @@ const trimTrailingSilence = (buffer) => {
|
||||
*/
|
||||
async function synthAudio(client, logger, stats, { account_sid,
|
||||
vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId,
|
||||
disableTtsCache, renderForCaching, options
|
||||
disableTtsCache, renderForCaching, disableTtsStreaming, options
|
||||
}) {
|
||||
let audioBuffer;
|
||||
let servedFromCache = false;
|
||||
@@ -208,21 +208,14 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
break;
|
||||
case 'elevenlabs':
|
||||
audioBuffer = await synthElevenlabs(logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, filePath
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
|
||||
});
|
||||
if (typeof audioBuffer === 'object' && audioBuffer.filePath) {
|
||||
return audioBuffer;
|
||||
}
|
||||
else {
|
||||
audioBuffer = await synthElevenlabs(logger, {credentials, options, stats, language, voice, text, filePath});
|
||||
}
|
||||
if (audioBuffer?.filePath) return audioBuffer;
|
||||
break;
|
||||
case 'whisper':
|
||||
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text, renderForCaching});
|
||||
if (typeof audioBuffer === 'object' && audioBuffer.filePath) {
|
||||
return audioBuffer;
|
||||
}
|
||||
audioBuffer = await synthWhisper(logger, {credentials, stats, voice, text});
|
||||
audioBuffer = await synthWhisper(logger, {
|
||||
credentials, stats, voice, text, renderForCaching, disableTtsStreaming});
|
||||
if (audioBuffer?.filePath) return audioBuffer;
|
||||
break;
|
||||
case 'deepgram':
|
||||
audioBuffer = await synthDeepgram(logger, {credentials, stats, model, text});
|
||||
@@ -637,12 +630,14 @@ const synthCustomVendor = async(logger, {credentials, stats, language, voice, te
|
||||
}
|
||||
};
|
||||
|
||||
const synthElevenlabs = async(logger, {credentials, options, stats, language, voice, text, renderForCaching}) => {
|
||||
const synthElevenlabs = async(logger, {
|
||||
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
|
||||
}) => {
|
||||
const {api_key, model_id, options: credOpts} = credentials;
|
||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||
|
||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching) {
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
params += `{api_key=${api_key}`;
|
||||
params += `,model_id=${model_id}`;
|
||||
@@ -686,10 +681,10 @@ const synthElevenlabs = async(logger, {credentials, options, stats, language, vo
|
||||
}
|
||||
};
|
||||
|
||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching}) => {
|
||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching) {
|
||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
params += `{api_key=${api_key}`;
|
||||
params += `,model_id=${model_id}`;
|
||||
@@ -728,7 +723,7 @@ const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCa
|
||||
const synthDeepgram = async(logger, {credentials, stats, model, text}) => {
|
||||
const {api_key} = credentials;
|
||||
try {
|
||||
const post = bent('https://api.beta.deepgram.com', 'POST', 'buffer', {
|
||||
const post = bent('https://api.deepgram.com', 'POST', 'buffer', {
|
||||
'Authorization': `Token ${api_key}`,
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json'
|
||||
|
||||
@@ -539,7 +539,7 @@ test('Deepgram speech synth tests', async(t) => {
|
||||
credentials: {
|
||||
api_key: process.env.DEEPGRAM_API_KEY
|
||||
},
|
||||
model: 'alpha-asteria-en-v2',
|
||||
model: 'aura-asteria-en',
|
||||
text,
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized deepgram audio to ${opts.filePath}`);
|
||||
|
||||
Reference in New Issue
Block a user