custom tts vendor

This commit is contained in:
Quan HL
2023-03-05 21:01:45 +07:00
parent 3e503996e6
commit d423cbc19a
2 changed files with 76 additions and 2 deletions

View File

@@ -60,8 +60,9 @@ async function synthAudio(client, logger, stats, {
let rtt;
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
vendor.startsWith('custom'),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
if ('google' === vendor) {
assert.ok(language, 'synthAudio requires language when google is used');
}
@@ -91,6 +92,8 @@ async function synthAudio(client, logger, stats, {
language = 'en-US'; // WellSaid only supports English atm
assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
assert.ok(!text.startsWith('<speak'), 'wellsaid does not support SSML tags');
} else if (vendor.startsWith('custom')) {
assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
}
const key = makeSynthKey({
@@ -151,6 +154,9 @@ async function synthAudio(client, logger, stats, {
case 'wellsaid':
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
break;
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
audioBuffer = await synthCustomVendor(logger, {credentials, stats, language, voice, text});
break;
default:
assert(`synthAudio: unsupported speech vendor ${vendor}`);
}
@@ -433,4 +439,31 @@ const synthNvidia = async(client, logger, {credentials, stats, language, voice,
});
};
// CustomVendor accept only mp3
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text}) => {
const {vendor, auth_token, custom_tts_url} = credentials;
try {
const post = bent('POST', 'buffer', {
'Authorization': `Bearer ${auth_token}`,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post(custom_tts_url, {
language,
format: 'audio/mpeg',
voice,
type: text.startsWith('<speak>') ? 'ssml' : 'text',
text
});
return mp3;
} catch (err) {
logger.info({err}, `Vendor ${vendor} returned error`);
throw err;
}
};
module.exports = synthAudio;

View File

@@ -326,6 +326,47 @@ test('IBM watson speech synth tests', async(t) => {
client.quit();
});
test('Custom Vendor speech synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
// if (!process.env.CUSTOM_VENDOR_TTS_URL) {
// t.pass('skipping Custom Vendor speech synth tests since CUSTOM_VENDOR_TTS_URL not provided');
// return t.end();
// }
try {
let opts = await synthAudio(stats, {
vendor: 'custom:somethingnew',
credentials: {
use_for_tts: 1,
custom_tts_url: process.env.CUSTOM_VENDOR_TTS_URL,
auth_token: 'some_jwt_token'
},
language: 'en-US',
voice: 'English-US.Female-1',
text: 'This is a test. This is only a test',
});
t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
opts = await synthAudio(stats, {
vendor: 'custom:somethingnew',
credentials: {
use_for_tts: 1,
custom_tts_url: process.env.CUSTOM_VENDOR_TTS_URL,
auth_token: 'some_jwt_token'
},
language: 'en-US',
voice: 'English-US.Female-1',
text: '<speak>This is a test. This is only a test</speak>',
});
t.ok(!opts.servedFromCache, `successfully synthesized nuance audio to ${opts.filePath}`);
} catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('TTS Cache tests', async(t) => {
const fn = require('..');
const {purgeTtsCache, client} = fn(opts, logger);