mirror of
https://github.com/jambonz/speech-utils.git
synced 2025-12-19 03:37:49 +00:00
Improve handling of TTS cache by adding the file extension to the cache key
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
const fs = require('fs/promises');
|
const fs = require('fs/promises');
|
||||||
const {noopLogger, makeSynthKey} = require('./utils');
|
const {noopLogger, makeSynthKey} = require('./utils');
|
||||||
const EXPIRES = (process.env.JAMBONES_TTS_CACHE_DURATION_MINS || 4 * 60) * 60; // cache tts for 4 hours
|
const {JAMBONES_TTS_CACHE_DURATION_MINS} = require('./config');
|
||||||
|
const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
|
||||||
|
|
||||||
async function addFileToCache(client, logger, path,
|
async function addFileToCache(client, logger, path,
|
||||||
{account_sid, vendor, language, voice, deploymentId, engine, text}) {
|
{account_sid, vendor, language, voice, deploymentId, engine, text}) {
|
||||||
|
|||||||
17
lib/config.js
Normal file
17
lib/config.js
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
const JAMBONES_TTS_TRIM_SILENCE = process.env.JAMBONES_TTS_TRIM_SILENCE;
|
||||||
|
const JAMBONES_DISABLE_TTS_STREAMING = process.env.JAMBONES_DISABLE_TTS_STREAMING;
|
||||||
|
|
||||||
|
const JAMBONES_HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
|
||||||
|
const JAMBONES_HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
|
||||||
|
const JAMBONES_TTS_CACHE_DURATION_MINS = (parseInt(process.env.JAMBONES_TTS_CACHE_DURATION_MINS) || 4 * 60) * 60; // cache tts for 4 hours
|
||||||
|
|
||||||
|
const TMP_FOLDER = '/tmp';
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
JAMBONES_TTS_TRIM_SILENCE,
|
||||||
|
JAMBONES_DISABLE_TTS_STREAMING,
|
||||||
|
JAMBONES_HTTP_PROXY_IP,
|
||||||
|
JAMBONES_HTTP_PROXY_PORT,
|
||||||
|
JAMBONES_TTS_CACHE_DURATION_MINS,
|
||||||
|
TMP_FOLDER
|
||||||
|
};
|
||||||
@@ -19,7 +19,8 @@ const {
|
|||||||
createNuanceClient,
|
createNuanceClient,
|
||||||
createKryptonClient,
|
createKryptonClient,
|
||||||
createRivaClient,
|
createRivaClient,
|
||||||
noopLogger
|
noopLogger,
|
||||||
|
makeFilePath
|
||||||
} = require('./utils');
|
} = require('./utils');
|
||||||
const getNuanceAccessToken = require('./get-nuance-access-token');
|
const getNuanceAccessToken = require('./get-nuance-access-token');
|
||||||
const {
|
const {
|
||||||
@@ -36,8 +37,13 @@ const {
|
|||||||
const {SynthesizeSpeechRequest} = require('../stubs/riva/proto/riva_tts_pb');
|
const {SynthesizeSpeechRequest} = require('../stubs/riva/proto/riva_tts_pb');
|
||||||
const {AudioEncoding} = require('../stubs/riva/proto/riva_audio_pb');
|
const {AudioEncoding} = require('../stubs/riva/proto/riva_audio_pb');
|
||||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
const EXPIRES = (process.env.JAMBONES_TTS_CACHE_DURATION_MINS || 4 * 60) * 60; // cache tts for 4 hours
|
const {
|
||||||
const TMP_FOLDER = '/tmp';
|
JAMBONES_DISABLE_TTS_STREAMING,
|
||||||
|
JAMBONES_HTTP_PROXY_IP,
|
||||||
|
JAMBONES_HTTP_PROXY_PORT,
|
||||||
|
JAMBONES_TTS_CACHE_DURATION_MINS,
|
||||||
|
} = require('./config');
|
||||||
|
const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
|
||||||
const OpenAI = require('openai');
|
const OpenAI = require('openai');
|
||||||
const getAwsAuthToken = require('./get-aws-sts-token');
|
const getAwsAuthToken = require('./get-aws-sts-token');
|
||||||
|
|
||||||
@@ -149,19 +155,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
|||||||
text
|
text
|
||||||
});
|
});
|
||||||
let filePath;
|
let filePath;
|
||||||
if (['nuance', 'nvidia'].includes(vendor) ||
|
filePath = makeFilePath(vendor, key, salt);
|
||||||
(
|
|
||||||
(process.env.JAMBONES_TTS_TRIM_SILENCE || !process.env.JAMBONES_DISABLE_TTS_STREAMING) &&
|
|
||||||
['microsoft', 'azure'].includes(vendor)
|
|
||||||
) ||
|
|
||||||
(
|
|
||||||
!process.env.JAMBONES_DISABLE_TTS_STREAMING &&
|
|
||||||
['elevenlabs', 'deepgram', 'rimelabs'].includes(vendor)
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
filePath = `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt || ''}`)}.r8`;
|
|
||||||
}
|
|
||||||
else filePath = `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt || ''}`)}.mp3`;
|
|
||||||
debug(`synth key is ${key}`);
|
debug(`synth key is ${key}`);
|
||||||
let cached;
|
let cached;
|
||||||
if (!disableTtsCache) {
|
if (!disableTtsCache) {
|
||||||
@@ -444,7 +438,7 @@ const synthMicrosoft = async(logger, {
|
|||||||
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
|
content = `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${language}"><voice name="${voice}">${words}</voice></speak>`;
|
||||||
logger.info({content}, 'synthMicrosoft');
|
logger.info({content}, 'synthMicrosoft');
|
||||||
}
|
}
|
||||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
params += `{api_key=${apiKey}`;
|
params += `{api_key=${apiKey}`;
|
||||||
params += `,language=${language}`;
|
params += `,language=${language}`;
|
||||||
@@ -454,8 +448,8 @@ const synthMicrosoft = async(logger, {
|
|||||||
if (region) params += `,region=${region}`;
|
if (region) params += `,region=${region}`;
|
||||||
if (custom_tts_endpoint) params += `,endpointId=${custom_tts_endpoint}`;
|
if (custom_tts_endpoint) params += `,endpointId=${custom_tts_endpoint}`;
|
||||||
if (custom_tts_endpoint_url) params += `,endpoint=${custom_tts_endpoint_url}`;
|
if (custom_tts_endpoint_url) params += `,endpoint=${custom_tts_endpoint_url}`;
|
||||||
if (process.env.JAMBONES_HTTP_PROXY_IP) params += `,http_proxy_ip=${process.env.JAMBONES_HTTP_PROXY_IP}`;
|
if (JAMBONES_HTTP_PROXY_IP) params += `,http_proxy_ip=${JAMBONES_HTTP_PROXY_IP}`;
|
||||||
if (process.env.JAMBONES_HTTP_PROXY_PORT) params += `,http_proxy_port=${process.env.JAMBONES_HTTP_PROXY_PORT}`;
|
if (JAMBONES_HTTP_PROXY_PORT) params += `,http_proxy_port=${JAMBONES_HTTP_PROXY_PORT}`;
|
||||||
params += '}';
|
params += '}';
|
||||||
return {
|
return {
|
||||||
filePath: `say:${params}${content.replace(/\n/g, ' ')}`,
|
filePath: `say:${params}${content.replace(/\n/g, ' ')}`,
|
||||||
@@ -484,10 +478,10 @@ const synthMicrosoft = async(logger, {
|
|||||||
SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm :
|
SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm :
|
||||||
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
|
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
|
||||||
|
|
||||||
if (process.env.JAMBONES_HTTP_PROXY_IP && process.env.JAMBONES_HTTP_PROXY_PORT) {
|
if (JAMBONES_HTTP_PROXY_IP && JAMBONES_HTTP_PROXY_PORT) {
|
||||||
logger.debug(
|
logger.debug(
|
||||||
`synthMicrosoft: using proxy ${process.env.JAMBONES_HTTP_PROXY_IP}:${process.env.JAMBONES_HTTP_PROXY_PORT}`);
|
`synthMicrosoft: using proxy ${JAMBONES_HTTP_PROXY_IP}:${JAMBONES_HTTP_PROXY_PORT}`);
|
||||||
speechConfig.setProxy(process.env.JAMBONES_HTTP_PROXY_IP, process.env.JAMBONES_HTTP_PROXY_PORT);
|
speechConfig.setProxy(JAMBONES_HTTP_PROXY_IP, JAMBONES_HTTP_PROXY_PORT);
|
||||||
}
|
}
|
||||||
const synthesizer = new SpeechSynthesizer(speechConfig);
|
const synthesizer = new SpeechSynthesizer(speechConfig);
|
||||||
|
|
||||||
@@ -673,7 +667,7 @@ const synthElevenlabs = async(logger, {
|
|||||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||||
|
|
||||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
params += `{api_key=${api_key}`;
|
params += `{api_key=${api_key}`;
|
||||||
params += ',vendor=elevenlabs';
|
params += ',vendor=elevenlabs';
|
||||||
@@ -726,7 +720,7 @@ const synthPlayHT = async(logger, {
|
|||||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||||
|
|
||||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
params += `{api_key=${api_key}`;
|
params += `{api_key=${api_key}`;
|
||||||
params += `,user_id=${user_id}`;
|
params += `,user_id=${user_id}`;
|
||||||
@@ -781,7 +775,7 @@ const synthRimelabs = async(logger, {
|
|||||||
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
|
||||||
|
|
||||||
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
|
||||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
params += `{api_key=${api_key}`;
|
params += `{api_key=${api_key}`;
|
||||||
params += `,model_id=${model_id}`;
|
params += `,model_id=${model_id}`;
|
||||||
@@ -823,7 +817,7 @@ const synthRimelabs = async(logger, {
|
|||||||
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCaching, disableTtsStreaming}) => {
|
||||||
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
const {api_key, model_id, baseURL, timeout, speed} = credentials;
|
||||||
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
/* if the env is set to stream then bag out, unless we are specifically rendering to generate a cache file */
|
||||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
params += `{api_key=${api_key}`;
|
params += `{api_key=${api_key}`;
|
||||||
params += `,model_id=${model_id}`;
|
params += `,model_id=${model_id}`;
|
||||||
@@ -862,7 +856,7 @@ const synthWhisper = async(logger, {credentials, stats, voice, text, renderForCa
|
|||||||
|
|
||||||
const synthDeepgram = async(logger, {credentials, stats, model, text, renderForCaching, disableTtsStreaming}) => {
|
const synthDeepgram = async(logger, {credentials, stats, model, text, renderForCaching, disableTtsStreaming}) => {
|
||||||
const {api_key} = credentials;
|
const {api_key} = credentials;
|
||||||
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||||
let params = '';
|
let params = '';
|
||||||
params += `{api_key=${api_key}`;
|
params += `{api_key=${api_key}`;
|
||||||
params += ',vendor=deepgram';
|
params += ',vendor=deepgram';
|
||||||
|
|||||||
43
lib/utils.js
43
lib/utils.js
@@ -7,6 +7,7 @@ const HTTP_TIMEOUT = 5000;
|
|||||||
const NUANCE_AUTH_ENDPOINT = 'tts.api.nuance.com:443';
|
const NUANCE_AUTH_ENDPOINT = 'tts.api.nuance.com:443';
|
||||||
const grpc = require('@grpc/grpc-js');
|
const grpc = require('@grpc/grpc-js');
|
||||||
const formurlencoded = require('form-urlencoded');
|
const formurlencoded = require('form-urlencoded');
|
||||||
|
const { JAMBONES_DISABLE_TTS_STREAMING, JAMBONES_TTS_TRIM_SILENCE, TMP_FOLDER } = require('./config');
|
||||||
|
|
||||||
const debug = require('debug')('jambonz:realtimedb-helpers');
|
const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||||
/**
|
/**
|
||||||
@@ -19,7 +20,44 @@ const debug = require('debug')('jambonz:realtimedb-helpers');
|
|||||||
function makeSynthKey({account_sid = '', vendor, language, voice, engine = '', text}) {
|
function makeSynthKey({account_sid = '', vendor, language, voice, engine = '', text}) {
|
||||||
const hash = crypto.createHash('sha1');
|
const hash = crypto.createHash('sha1');
|
||||||
hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
|
hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
|
||||||
return `tts${account_sid ? (':' + account_sid) : ''}:${hash.digest('hex')}`;
|
const hexHashKey = hash.digest('hex');
|
||||||
|
const accountKey = account_sid ? `:${account_sid}` : '';
|
||||||
|
const extension = getFileExtension(vendor);
|
||||||
|
const key = `tts${accountKey}:${extension}:${hexHashKey}`;
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeFilePath(vendor, key, salt = '') {
|
||||||
|
const extension = getFileExtension(vendor);
|
||||||
|
return `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt}`)}.${extension}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFileExtension(vendor) {
|
||||||
|
const mp3Extension = 'mp3';
|
||||||
|
const r8Extension = 'r8';
|
||||||
|
|
||||||
|
switch (vendor) {
|
||||||
|
case 'azure':
|
||||||
|
case 'microsoft':
|
||||||
|
if (!JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
|
||||||
|
return r8Extension;
|
||||||
|
} else {
|
||||||
|
return mp3Extension;
|
||||||
|
}
|
||||||
|
case 'deepgram':
|
||||||
|
case 'elevenlabs':
|
||||||
|
case 'rimlabs':
|
||||||
|
if (!JAMBONES_DISABLE_TTS_STREAMING) {
|
||||||
|
return r8Extension;
|
||||||
|
} else {
|
||||||
|
return mp3Extension;
|
||||||
|
}
|
||||||
|
case 'nuance':
|
||||||
|
case 'nvidia':
|
||||||
|
return r8Extension;
|
||||||
|
default:
|
||||||
|
return mp3Extension;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const noopLogger = {
|
const noopLogger = {
|
||||||
@@ -123,5 +161,6 @@ module.exports = {
|
|||||||
createRivaClient,
|
createRivaClient,
|
||||||
makeBasicAuthHeader,
|
makeBasicAuthHeader,
|
||||||
NUANCE_AUTH_ENDPOINT,
|
NUANCE_AUTH_ENDPOINT,
|
||||||
noopLogger
|
noopLogger,
|
||||||
|
makeFilePath
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
"test": "NODE_ENV=test node test/ ",
|
"test": "NODE_ENV=test node test/ ",
|
||||||
"coverage": "nyc --reporter html --report-dir ./coverage npm run test",
|
"coverage": "nyc --reporter html --report-dir ./coverage npm run test",
|
||||||
"jslint": "eslint index.js lib",
|
"jslint": "eslint index.js lib",
|
||||||
|
"jslint:fix": "eslint --fix '**/*.js'",
|
||||||
"build": "./build_stubs.sh"
|
"build": "./build_stubs.sh"
|
||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
|
|||||||
Reference in New Issue
Block a user