Merge pull request #13 from jambonz/feat/aws-v3

fix: custom tts support multiple audio types
This commit is contained in:
Dave Horton
2023-03-15 08:51:25 -04:00
committed by GitHub
2 changed files with 35 additions and 9 deletions

View File

@@ -155,7 +155,8 @@ async function synthAudio(client, logger, stats, { account_sid,
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
break;
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
audioBuffer = await synthCustomVendor(logger, {credentials, stats, language, voice, text});
({ audioBuffer, filePath } = await synthCustomVendor(logger,
{credentials, stats, language, voice, text, filePath}));
break;
default:
assert(`synthAudio: unsupported speech vendor ${vendor}`);
@@ -440,30 +441,56 @@ const synthNvidia = async(client, logger, {credentials, stats, language, voice,
};
// CustomVendor accept only mp3
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text}) => {
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text, filePath}) => {
const {vendor, auth_token, custom_tts_url} = credentials;
try {
const post = bent('POST', 'buffer', {
const post = bent('POST', {
'Authorization': `Bearer ${auth_token}`,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post(custom_tts_url, {
const response = await post(custom_tts_url, {
language,
format: 'audio/mpeg',
voice,
type: text.startsWith('<speak>') ? 'ssml' : 'text',
text
});
return mp3;
const regex = /\.[^\.]*$/g;
const mime = response.headers['content-type'];
const buffer = await response.arrayBuffer();
return {
audioBuffer: buffer,
filePath: filePath.replace(regex, getFileExtFromMime(mime))
};
} catch (err) {
logger.info({err}, `Vendor ${vendor} returned error`);
throw err;
}
};
const getFileExtFromMime = (mime) => {
switch (mime) {
case 'audio/wav':
case 'audio/x-wav':
return '.wav';
case /audio\/l16.*rate=8000/.test(mime) ? mime : 'cant match value':
return '.r8';
case /audio\/l16.*rate=16000/.test(mime) ? mime : 'cant match value':
return '.r16';
case /audio\/l16.*rate=24000/.test(mime) ? mime : 'cant match value':
return '.r24';
case /audio\/l16.*rate=32000/.test(mime) ? mime : 'cant match value':
return '.r32';
case /audio\/l16.*rate=48000/.test(mime) ? mime : 'cant match value':
return '.r48';
case 'audio/mpeg':
case 'audio/mp3':
return '.mp3';
default:
return '.wav';
}
};
module.exports = synthAudio;

View File

@@ -348,7 +348,6 @@ test('Custom Vendor speech synth tests', async(t) => {
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/somethingnew`);
t.ok(obj.headers.Authorization == 'Bearer some_jwt_token', 'Custom Vendor Authentication Header is correct');
t.ok(obj.body.language == 'en-US', 'Custom Vendor Language is correct');
t.ok(obj.body.format == 'audio/mpeg', 'Custom Vendor format is correct');
t.ok(obj.body.voice == 'English-US.Female-1', 'Custom Vendor voice is correct');
t.ok(obj.body.type == 'text', 'Custom Vendor type is correct');
t.ok(obj.body.text == 'This is a test. This is only a test', 'Custom Vendor text is correct');