This commit is contained in:
Quan HL
2023-08-25 13:49:04 +07:00
parent 40f51e7509
commit b1049aad7f

View File

@@ -303,7 +303,65 @@ const synthIbm = async(logger, {credentials, stats, voice, text}) => {
}
};
const synthMicrosoft = async(logger, {
async function synthMicrosoft(logger, {
credentials,
stats,
language,
voice,
text,
filePath
}) {
const synthFunc = process.env.ONPREM_AZURE_SYNTHESIZE_BY_HTTP ?
_synthMicrosoftByHttp : _synthMicrosoftBySdk;
return await synthFunc(logger, {
credentials,
stats,
language,
voice,
text,
filePath
});
}
async function _synthMicrosoftByHttp (logger, {
credentials,
stats,
language,
voice,
text,
filePath
}) {
const {api_key: apiKey, region, use_custom_tts, custom_tts_endpoint, custom_tts_endpoint_url} = credentials;
if (use_custom_tts && !content.startsWith('<speak')) {
/**
* Note: it seems that to use custom voice ssml is required with the voice attribute
* Otherwise sending plain text we get "Voice does not match"
*/
content = `<speak>${text}</speak>`;
}
if (content.startsWith('<speak>')) {
/* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
// eslint-disable-next-line max-len
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
logger.info({content}, 'synthMicrosoft');
}
try {
const post = bent('POST', 'buffer', {
'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
'Content-Type': 'application/ssml+xml'
});
const mp3 = await post(custom_tts_endpoint_url, content);
return mp3;
} catch (err) {
logger.info({err}, '_synthMicrosoftByHttp returned error');
throw err;
}
}
const _synthMicrosoftBySdk = async(logger, {
credentials,
stats,
language,