mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
Merge pull request #113 from vasudevanubrolu/feat/893-azure-ssml
Feat/893 azure ssml
This commit is contained in:
@@ -2,6 +2,7 @@ const JAMBONES_TTS_TRIM_SILENCE = process.env.JAMBONES_TTS_TRIM_SILENCE;
|
|||||||
const JAMBONES_DISABLE_TTS_STREAMING = process.env.JAMBONES_DISABLE_TTS_STREAMING;
|
const JAMBONES_DISABLE_TTS_STREAMING = process.env.JAMBONES_DISABLE_TTS_STREAMING;
|
||||||
const JAMBONES_DISABLE_AZURE_TTS_STREAMING = process.env.JAMBONES_DISABLE_AZURE_TTS_STREAMING;
|
const JAMBONES_DISABLE_AZURE_TTS_STREAMING = process.env.JAMBONES_DISABLE_AZURE_TTS_STREAMING;
|
||||||
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
|
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
|
||||||
|
const JAMBONES_AZURE_ENABLE_SSML = process.env.JAMBONES_AZURE_ENABLE_SSML;
|
||||||
|
|
||||||
const JAMBONES_HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
|
const JAMBONES_HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
|
||||||
const JAMBONES_HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
|
const JAMBONES_HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
|
||||||
@@ -21,5 +22,6 @@ module.exports = {
|
|||||||
JAMBONES_TTS_CACHE_DURATION_MINS,
|
JAMBONES_TTS_CACHE_DURATION_MINS,
|
||||||
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
|
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
|
||||||
TMP_FOLDER,
|
TMP_FOLDER,
|
||||||
HTTP_TIMEOUT
|
HTTP_TIMEOUT,
|
||||||
|
JAMBONES_AZURE_ENABLE_SSML
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ const {
|
|||||||
JAMBONES_HTTP_PROXY_PORT,
|
JAMBONES_HTTP_PROXY_PORT,
|
||||||
JAMBONES_TTS_CACHE_DURATION_MINS,
|
JAMBONES_TTS_CACHE_DURATION_MINS,
|
||||||
JAMBONES_TTS_TRIM_SILENCE,
|
JAMBONES_TTS_TRIM_SILENCE,
|
||||||
|
JAMBONES_AZURE_ENABLE_SSML
|
||||||
} = require('./config');
|
} = require('./config');
|
||||||
const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
|
const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
|
||||||
const OpenAI = require('openai');
|
const OpenAI = require('openai');
|
||||||
@@ -466,7 +467,6 @@ async function _synthOnPremMicrosoft(logger, {
|
|||||||
}) {
|
}) {
|
||||||
const {use_custom_tts, custom_tts_endpoint_url, api_key} = credentials;
|
const {use_custom_tts, custom_tts_endpoint_url, api_key} = credentials;
|
||||||
let content = text;
|
let content = text;
|
||||||
|
|
||||||
if (use_custom_tts && !content.startsWith('<speak')) {
|
if (use_custom_tts && !content.startsWith('<speak')) {
|
||||||
/**
|
/**
|
||||||
* Note: it seems that to use custom voice ssml is required with the voice attribute
|
* Note: it seems that to use custom voice ssml is required with the voice attribute
|
||||||
@@ -479,9 +479,13 @@ async function _synthOnPremMicrosoft(logger, {
|
|||||||
/* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
|
/* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
|
||||||
const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
|
const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
|
||||||
// eslint-disable-next-line max-len
|
// eslint-disable-next-line max-len
|
||||||
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
|
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><lang xml:lang="${language}"><voice name="${voice}">${words}</voice></lang></speak>`;
|
||||||
logger.info({content}, 'synthMicrosoft');
|
logger.info({content}, 'synthMicrosoft');
|
||||||
}
|
}
|
||||||
|
else if (JAMBONES_AZURE_ENABLE_SSML) {
|
||||||
|
// eslint-disable-next-line max-len
|
||||||
|
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}"><lang xml:lang="${language}">${text}</lang></voice></speak>`;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const trimSilence = JAMBONES_TTS_TRIM_SILENCE;
|
const trimSilence = JAMBONES_TTS_TRIM_SILENCE;
|
||||||
@@ -518,19 +522,23 @@ const synthMicrosoft = async(logger, {
|
|||||||
let content = text;
|
let content = text;
|
||||||
if (use_custom_tts && !content.startsWith('<speak')) {
|
if (use_custom_tts && !content.startsWith('<speak')) {
|
||||||
/**
|
/**
|
||||||
* Note: it seems that to use custom voice ssml is required with the voice attribute
|
* Note: it seems that to use custom voice ssml is required with the voice attribute
|
||||||
* Otherwise sending plain text we get "Voice does not match"
|
* Otherwise sending plain text we get "Voice does not match"
|
||||||
*/
|
*/
|
||||||
content = `<speak>${text}</speak>`;
|
content = `<speak>${text}</speak>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (content.startsWith('<speak>')) {
|
if (content.startsWith('<speak>')) {
|
||||||
/* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
|
/* microsoft enforces some properties and uses voice xml element so if the user did not supply do it for them */
|
||||||
const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
|
const words = content.slice(7, -8).trim().replace(/(\r\n|\n|\r)/gm, ' ');
|
||||||
// eslint-disable-next-line max-len
|
// eslint-disable-next-line max-len
|
||||||
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
|
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><lang xml:lang="${language}"><voice name="${voice}">${words}</voice></lang></speak>`;
|
||||||
logger.info({content}, 'synthMicrosoft');
|
logger.info({content}, 'synthMicrosoft');
|
||||||
}
|
}
|
||||||
|
else if (JAMBONES_AZURE_ENABLE_SSML) {
|
||||||
|
// eslint-disable-next-line max-len
|
||||||
|
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}"><lang xml:lang="${language}">${text}</lang></voice></speak>`;
|
||||||
|
}
|
||||||
if (!JAMBONES_DISABLE_TTS_STREAMING && !JAMBONES_DISABLE_AZURE_TTS_STREAMING &&
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !JAMBONES_DISABLE_AZURE_TTS_STREAMING &&
|
||||||
!renderForCaching && !disableTtsStreaming) {
|
!renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
|
|||||||
Reference in New Issue
Block a user