mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
support inworld tts
This commit is contained in:
@@ -98,7 +98,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
|
||||
'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia'].includes(vendor) ||
|
||||
'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia', 'inworld'].includes(vendor) ||
|
||||
vendor.startsWith('custom'),
|
||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
|
||||
if ('google' === vendor) {
|
||||
@@ -141,6 +141,10 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
|
||||
assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
|
||||
assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
|
||||
} else if ('inworld' === vendor) {
|
||||
assert.ok(voice, 'synthAudio requires voice when inworld is used');
|
||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when inworld is used');
|
||||
assert.ok(credentials.model_id, 'synthAudio requires model_id when inworld is used');
|
||||
} else if ('rimelabs' === vendor) {
|
||||
assert.ok(voice, 'synthAudio requires voice when rimelabs is used');
|
||||
assert.ok(credentials.api_key, 'synthAudio requires api_key when rimelabs is used');
|
||||
@@ -238,6 +242,10 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
audioData = await synthCartesia(logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming});
|
||||
break;
|
||||
case 'inworld':
|
||||
audioData = await synthInworld(logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming});
|
||||
break;
|
||||
case 'rimelabs':
|
||||
audioData = await synthRimelabs(logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming});
|
||||
@@ -954,6 +962,70 @@ const synthPlayHT = async(client, logger, {
|
||||
}
|
||||
};
|
||||
|
||||
const synthInworld = async(logger, {
|
||||
credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
|
||||
}) => {
|
||||
const {api_key, model_id, options: credOpts} = credentials;
|
||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||
|
||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
params += `{api_key=${api_key}`;
|
||||
params += `,model_id=${model_id}`;
|
||||
params += ',vendor=inworld';
|
||||
params += `,voice=${voice}`;
|
||||
params += ',write_cache_file=1';
|
||||
if (opts.temperature) params += `,temperature=${opts.temperature}`;
|
||||
if (opts.audioConfig?.pitch) params += `,pitch=${opts.pitch}`;
|
||||
if (opts.audioConfig?.speakingRate) params += `,speakingRate=${opts.speakingRate}`;
|
||||
params += '}';
|
||||
|
||||
return {
|
||||
filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
|
||||
servedFromCache: false,
|
||||
rtt: 0
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const url = 'https://api.inworld.ai/tts/v1/voice';
|
||||
const sampleRate = 8000;
|
||||
const options = {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Basic ${api_key}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
voiceId: voice,
|
||||
modelId: model_id,
|
||||
audioConfig: {
|
||||
...(opts.audioConfig || {}),
|
||||
audioEncoding: 'MP3',
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
const response = await fetch(url, options);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(await response.text());
|
||||
}
|
||||
const json = await response.json();
|
||||
return {
|
||||
audioContent: Buffer.from(json.audioContent, 'base64'),
|
||||
extension: 'mp3',
|
||||
sampleRate
|
||||
};
|
||||
} catch (err) {
|
||||
logger.info({err}, 'synth inworld returned error');
|
||||
stats.increment('tts.count', ['vendor:inworld', 'accepted:no']);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const synthRimelabs = async(logger, {
|
||||
credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming
|
||||
}) => {
|
||||
|
||||
Reference in New Issue
Block a user