Merge pull request #84 from jambonz/feat/precache_audio_with_tts_stream

support precache audio with tts stream enabled
This commit is contained in:
Dave Horton
2024-08-12 09:26:00 -04:00
committed by GitHub
4 changed files with 42 additions and 10 deletions

View File

@@ -13,6 +13,11 @@ jobs:
with:
node-version: lts/*
- run: npm install
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
docker-compose --version
- run: npm run jslint
- run: sudo apt update && sudo apt install -y squid
- run: sudo cp test/squid.conf /etc/squid/squid.conf

View File

@@ -1,6 +1,7 @@
const JAMBONES_TTS_TRIM_SILENCE = process.env.JAMBONES_TTS_TRIM_SILENCE;
const JAMBONES_DISABLE_TTS_STREAMING = process.env.JAMBONES_DISABLE_TTS_STREAMING;
const JAMBONES_DISABLE_AZURE_TTS_STREAMING = process.env.JAMBONES_DISABLE_AZURE_TTS_STREAMING;
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
const JAMBONES_HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
const JAMBONES_HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
@@ -18,6 +19,7 @@ module.exports = {
JAMBONES_HTTP_PROXY_IP,
JAMBONES_HTTP_PROXY_PORT,
JAMBONES_TTS_CACHE_DURATION_MINS,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
TMP_FOLDER,
HTTP_TIMEOUT
};

View File

@@ -44,6 +44,7 @@ const {
JAMBONES_HTTP_PROXY_IP,
JAMBONES_HTTP_PROXY_PORT,
JAMBONES_TTS_CACHE_DURATION_MINS,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
} = require('./config');
const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
const OpenAI = require('openai');
@@ -86,7 +87,7 @@ const trimTrailingSilence = (buffer) => {
*/
async function synthAudio(client, createHash, retrieveHash, logger, stats, { account_sid,
vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId,
disableTtsCache, renderForCaching, disableTtsStreaming, options
disableTtsCache, renderForCaching = false, disableTtsStreaming, options
}) {
let audioBuffer;
let servedFromCache = false;
@@ -157,20 +158,42 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
}
}
const key = makeSynthKey({
account_sid,
vendor,
language: language || '',
voice: voice || deploymentId,
engine,
text
text,
renderForCaching
});
let filePath;
filePath = makeFilePath(vendor, key, salt);
filePath = makeFilePath({vendor, key, salt, renderForCaching});
debug(`synth key is ${key}`);
let cached;
if (!disableTtsCache) {
cached = await client.get(key);
/**
* If we are using tts streaming and also precaching audio, audio could have been cached by streaming (r8)
* or here in speech-utils due to precaching (mp3), so we need to check for both keys.
*/
if (!cached && JAMBONES_EAGERLY_PRE_CACHE_AUDIO) {
const preCachekey = makeSynthKey({
account_sid,
vendor,
language: language || '',
voice: voice || deploymentId,
engine,
text,
renderForCaching: true
});
cached = await client.get(preCachekey);
if (cached) {
// Precache audio is available update filpath with precache file extension.
filePath = makeFilePath({vendor, key, salt, renderForCaching: true});
}
}
}
if (cached) {
// found in cache - extend the expiry and use it

View File

@@ -16,29 +16,31 @@ const debug = require('debug')('jambonz:realtimedb-helpers');
*/
//const nuanceClientMap = new Map();
function makeSynthKey({account_sid = '', vendor, language, voice, engine = '', text}) {
function makeSynthKey({
account_sid = '', vendor, language, voice, engine = '', text,
renderForCaching = false}) {
const hash = crypto.createHash('sha1');
hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
const hexHashKey = hash.digest('hex');
const accountKey = account_sid ? `:${account_sid}` : '';
const namespace = vendor.startsWith('custom') ? vendor : getFileExtension(vendor);
const namespace = vendor.startsWith('custom') ? vendor : getFileExtension({vendor, renderForCaching});
const key = `tts${accountKey}:${namespace}:${hexHashKey}`;
return key;
}
function makeFilePath(vendor, key, salt = '') {
const extension = getFileExtension(vendor);
function makeFilePath({vendor, key, salt = '', renderForCaching = false}) {
const extension = getFileExtension({vendor, renderForCaching});
return `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt}`)}.${extension}`;
}
function getFileExtension(vendor) {
function getFileExtension({vendor, renderForCaching = false}) {
const mp3Extension = 'mp3';
const r8Extension = 'r8';
switch (vendor) {
case 'azure':
case 'microsoft':
if (!JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
if (!renderForCaching && !JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
return r8Extension;
} else {
return mp3Extension;
@@ -46,7 +48,7 @@ function getFileExtension(vendor) {
case 'deepgram':
case 'elevenlabs':
case 'rimlabs':
if (!JAMBONES_DISABLE_TTS_STREAMING) {
if (!renderForCaching && !JAMBONES_DISABLE_TTS_STREAMING) {
return r8Extension;
} else {
return mp3Extension;