mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-01-25 02:08:26 +00:00
Merge pull request #84 from jambonz/feat/precache_audio_with_tts_stream
support precache audio with tts stream enabled
This commit is contained in:
5
.github/workflows/ci.yml
vendored
5
.github/workflows/ci.yml
vendored
@@ -13,6 +13,11 @@ jobs:
|
||||
with:
|
||||
node-version: lts/*
|
||||
- run: npm install
|
||||
- name: Install Docker Compose
|
||||
run: |
|
||||
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
|
||||
sudo chmod +x /usr/local/bin/docker-compose
|
||||
docker-compose --version
|
||||
- run: npm run jslint
|
||||
- run: sudo apt update && sudo apt install -y squid
|
||||
- run: sudo cp test/squid.conf /etc/squid/squid.conf
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
const JAMBONES_TTS_TRIM_SILENCE = process.env.JAMBONES_TTS_TRIM_SILENCE;
|
||||
const JAMBONES_DISABLE_TTS_STREAMING = process.env.JAMBONES_DISABLE_TTS_STREAMING;
|
||||
const JAMBONES_DISABLE_AZURE_TTS_STREAMING = process.env.JAMBONES_DISABLE_AZURE_TTS_STREAMING;
|
||||
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO;
|
||||
|
||||
const JAMBONES_HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
|
||||
const JAMBONES_HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
|
||||
@@ -18,6 +19,7 @@ module.exports = {
|
||||
JAMBONES_HTTP_PROXY_IP,
|
||||
JAMBONES_HTTP_PROXY_PORT,
|
||||
JAMBONES_TTS_CACHE_DURATION_MINS,
|
||||
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
|
||||
TMP_FOLDER,
|
||||
HTTP_TIMEOUT
|
||||
};
|
||||
|
||||
@@ -44,6 +44,7 @@ const {
|
||||
JAMBONES_HTTP_PROXY_IP,
|
||||
JAMBONES_HTTP_PROXY_PORT,
|
||||
JAMBONES_TTS_CACHE_DURATION_MINS,
|
||||
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
|
||||
} = require('./config');
|
||||
const EXPIRES = JAMBONES_TTS_CACHE_DURATION_MINS;
|
||||
const OpenAI = require('openai');
|
||||
@@ -86,7 +87,7 @@ const trimTrailingSilence = (buffer) => {
|
||||
*/
|
||||
async function synthAudio(client, createHash, retrieveHash, logger, stats, { account_sid,
|
||||
vendor, language, voice, gender, text, engine, salt, model, credentials, deploymentId,
|
||||
disableTtsCache, renderForCaching, disableTtsStreaming, options
|
||||
disableTtsCache, renderForCaching = false, disableTtsStreaming, options
|
||||
}) {
|
||||
let audioBuffer;
|
||||
let servedFromCache = false;
|
||||
@@ -157,20 +158,42 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const key = makeSynthKey({
|
||||
account_sid,
|
||||
vendor,
|
||||
language: language || '',
|
||||
voice: voice || deploymentId,
|
||||
engine,
|
||||
text
|
||||
text,
|
||||
renderForCaching
|
||||
});
|
||||
let filePath;
|
||||
filePath = makeFilePath(vendor, key, salt);
|
||||
filePath = makeFilePath({vendor, key, salt, renderForCaching});
|
||||
debug(`synth key is ${key}`);
|
||||
let cached;
|
||||
if (!disableTtsCache) {
|
||||
cached = await client.get(key);
|
||||
/**
|
||||
* If we are using tts streaming and also precaching audio, audio could have been cached by streaming (r8)
|
||||
* or here in speech-utils due to precaching (mp3), so we need to check for both keys.
|
||||
*/
|
||||
if (!cached && JAMBONES_EAGERLY_PRE_CACHE_AUDIO) {
|
||||
const preCachekey = makeSynthKey({
|
||||
account_sid,
|
||||
vendor,
|
||||
language: language || '',
|
||||
voice: voice || deploymentId,
|
||||
engine,
|
||||
text,
|
||||
renderForCaching: true
|
||||
});
|
||||
cached = await client.get(preCachekey);
|
||||
if (cached) {
|
||||
// Precache audio is available update filpath with precache file extension.
|
||||
filePath = makeFilePath({vendor, key, salt, renderForCaching: true});
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cached) {
|
||||
// found in cache - extend the expiry and use it
|
||||
|
||||
16
lib/utils.js
16
lib/utils.js
@@ -16,29 +16,31 @@ const debug = require('debug')('jambonz:realtimedb-helpers');
|
||||
*/
|
||||
//const nuanceClientMap = new Map();
|
||||
|
||||
function makeSynthKey({account_sid = '', vendor, language, voice, engine = '', text}) {
|
||||
function makeSynthKey({
|
||||
account_sid = '', vendor, language, voice, engine = '', text,
|
||||
renderForCaching = false}) {
|
||||
const hash = crypto.createHash('sha1');
|
||||
hash.update(`${language}:${vendor}:${voice}:${engine}:${text}`);
|
||||
const hexHashKey = hash.digest('hex');
|
||||
const accountKey = account_sid ? `:${account_sid}` : '';
|
||||
const namespace = vendor.startsWith('custom') ? vendor : getFileExtension(vendor);
|
||||
const namespace = vendor.startsWith('custom') ? vendor : getFileExtension({vendor, renderForCaching});
|
||||
const key = `tts${accountKey}:${namespace}:${hexHashKey}`;
|
||||
return key;
|
||||
}
|
||||
|
||||
function makeFilePath(vendor, key, salt = '') {
|
||||
const extension = getFileExtension(vendor);
|
||||
function makeFilePath({vendor, key, salt = '', renderForCaching = false}) {
|
||||
const extension = getFileExtension({vendor, renderForCaching});
|
||||
return `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt}`)}.${extension}`;
|
||||
}
|
||||
|
||||
function getFileExtension(vendor) {
|
||||
function getFileExtension({vendor, renderForCaching = false}) {
|
||||
const mp3Extension = 'mp3';
|
||||
const r8Extension = 'r8';
|
||||
|
||||
switch (vendor) {
|
||||
case 'azure':
|
||||
case 'microsoft':
|
||||
if (!JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
|
||||
if (!renderForCaching && !JAMBONES_DISABLE_TTS_STREAMING || JAMBONES_TTS_TRIM_SILENCE) {
|
||||
return r8Extension;
|
||||
} else {
|
||||
return mp3Extension;
|
||||
@@ -46,7 +48,7 @@ function getFileExtension(vendor) {
|
||||
case 'deepgram':
|
||||
case 'elevenlabs':
|
||||
case 'rimlabs':
|
||||
if (!JAMBONES_DISABLE_TTS_STREAMING) {
|
||||
if (!renderForCaching && !JAMBONES_DISABLE_TTS_STREAMING) {
|
||||
return r8Extension;
|
||||
} else {
|
||||
return mp3Extension;
|
||||
|
||||
Reference in New Issue
Block a user