mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
Merge pull request #13 from jambonz/feat/aws-v3
fix: custom tts support multiple audio types
This commit is contained in:
@@ -155,7 +155,8 @@ async function synthAudio(client, logger, stats, { account_sid,
|
||||
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
|
||||
break;
|
||||
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
|
||||
audioBuffer = await synthCustomVendor(logger, {credentials, stats, language, voice, text});
|
||||
({ audioBuffer, filePath } = await synthCustomVendor(logger,
|
||||
{credentials, stats, language, voice, text, filePath}));
|
||||
break;
|
||||
default:
|
||||
assert(`synthAudio: unsupported speech vendor ${vendor}`);
|
||||
@@ -440,30 +441,56 @@ const synthNvidia = async(client, logger, {credentials, stats, language, voice,
|
||||
};
|
||||
|
||||
|
||||
// CustomVendor accept only mp3
|
||||
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text}) => {
|
||||
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text, filePath}) => {
|
||||
const {vendor, auth_token, custom_tts_url} = credentials;
|
||||
|
||||
try {
|
||||
const post = bent('POST', 'buffer', {
|
||||
const post = bent('POST', {
|
||||
'Authorization': `Bearer ${auth_token}`,
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
|
||||
const mp3 = await post(custom_tts_url, {
|
||||
const response = await post(custom_tts_url, {
|
||||
language,
|
||||
format: 'audio/mpeg',
|
||||
voice,
|
||||
type: text.startsWith('<speak>') ? 'ssml' : 'text',
|
||||
text
|
||||
});
|
||||
|
||||
return mp3;
|
||||
const regex = /\.[^\.]*$/g;
|
||||
const mime = response.headers['content-type'];
|
||||
const buffer = await response.arrayBuffer();
|
||||
return {
|
||||
audioBuffer: buffer,
|
||||
filePath: filePath.replace(regex, getFileExtFromMime(mime))
|
||||
};
|
||||
} catch (err) {
|
||||
logger.info({err}, `Vendor ${vendor} returned error`);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
const getFileExtFromMime = (mime) => {
|
||||
switch (mime) {
|
||||
case 'audio/wav':
|
||||
case 'audio/x-wav':
|
||||
return '.wav';
|
||||
case /audio\/l16.*rate=8000/.test(mime) ? mime : 'cant match value':
|
||||
return '.r8';
|
||||
case /audio\/l16.*rate=16000/.test(mime) ? mime : 'cant match value':
|
||||
return '.r16';
|
||||
case /audio\/l16.*rate=24000/.test(mime) ? mime : 'cant match value':
|
||||
return '.r24';
|
||||
case /audio\/l16.*rate=32000/.test(mime) ? mime : 'cant match value':
|
||||
return '.r32';
|
||||
case /audio\/l16.*rate=48000/.test(mime) ? mime : 'cant match value':
|
||||
return '.r48';
|
||||
case 'audio/mpeg':
|
||||
case 'audio/mp3':
|
||||
return '.mp3';
|
||||
default:
|
||||
return '.wav';
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = synthAudio;
|
||||
|
||||
@@ -348,7 +348,6 @@ test('Custom Vendor speech synth tests', async(t) => {
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/somethingnew`);
|
||||
t.ok(obj.headers.Authorization == 'Bearer some_jwt_token', 'Custom Vendor Authentication Header is correct');
|
||||
t.ok(obj.body.language == 'en-US', 'Custom Vendor Language is correct');
|
||||
t.ok(obj.body.format == 'audio/mpeg', 'Custom Vendor format is correct');
|
||||
t.ok(obj.body.voice == 'English-US.Female-1', 'Custom Vendor voice is correct');
|
||||
t.ok(obj.body.type == 'text', 'Custom Vendor type is correct');
|
||||
t.ok(obj.body.text == 'This is a test. This is only a test', 'Custom Vendor text is correct');
|
||||
|
||||
Reference in New Issue
Block a user