Merge pull request #7 from jambonz/feat/custom-vendor

custom tts vendor
This commit is contained in:
Dave Horton
2023-03-06 10:13:27 -05:00
committed by GitHub
7 changed files with 1317 additions and 3 deletions
+35 -2
View File
@@ -60,8 +60,9 @@ async function synthAudio(client, logger, stats, {
let rtt;
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
vendor.startsWith('custom'),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
if ('google' === vendor) {
assert.ok(language, 'synthAudio requires language when google is used');
}
@@ -91,6 +92,8 @@ async function synthAudio(client, logger, stats, {
language = 'en-US'; // WellSaid only supports English atm
assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
assert.ok(!text.startsWith('<speak'), 'wellsaid does not support SSML tags');
} else if (vendor.startsWith('custom')) {
assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
}
const key = makeSynthKey({
@@ -151,6 +154,9 @@ async function synthAudio(client, logger, stats, {
case 'wellsaid':
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
break;
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
audioBuffer = await synthCustomVendor(logger, {credentials, stats, language, voice, text});
break;
default:
assert(`synthAudio: unsupported speech vendor ${vendor}`);
}
@@ -433,4 +439,31 @@ const synthNvidia = async(client, logger, {credentials, stats, language, voice,
});
};
// CustomVendor accept only mp3
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text}) => {
const {vendor, auth_token, custom_tts_url} = credentials;
try {
const post = bent('POST', 'buffer', {
'Authorization': `Bearer ${auth_token}`,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post(custom_tts_url, {
language,
format: 'audio/mpeg',
voice,
type: text.startsWith('<speak>') ? 'ssml' : 'text',
text
});
return mp3;
} catch (err) {
logger.info({err}, `Vendor ${vendor} returned error`);
throw err;
}
};
module.exports = synthAudio;
+25 -1
View File
@@ -1,10 +1,20 @@
version: '3'
version: '3.9'
networks:
speech-utils:
driver: bridge
ipam:
config:
- subnet: 172.41.0.0/16
services:
redis:
image: redis:alpine
ports:
- "3379:6379"
networks:
speech-utils:
ipv4_address: 172.41.0.5
redis-auth:
image: redis:alpine
@@ -13,3 +23,17 @@ services:
- "3380:6379"
volumes:
- ./tmp:/tmp
networks:
speech-utils:
ipv4_address: 172.41.0.6
webhook-tts-scaffold:
image: jambonz/webhook-tts-test-scaffold:latest
ports:
- "3100:3000/tcp"
volumes:
- ./test-apps:/tmp
networks:
speech-utils:
ipv4_address: 172.41.0.10
+49
View File
@@ -4,6 +4,8 @@ const opts = config.get('redis');
const fs = require('fs');
const {makeSynthKey} = require('../lib/utils');
const logger = require('pino')();
const bent = require('bent');
const getJSON = bent('json')
process.on('unhandledRejection', (reason, p) => {
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
@@ -326,6 +328,53 @@ test('IBM watson speech synth tests', async(t) => {
client.quit();
});
test('Custom Vendor speech synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
try {
let opts = await synthAudio(stats, {
vendor: 'custom:somethingnew',
credentials: {
use_for_tts: 1,
custom_tts_url: "http://127.0.0.1:3100/somethingnew",
auth_token: 'some_jwt_token'
},
language: 'en-US',
voice: 'English-US.Female-1',
text: 'This is a test. This is only a test',
});
t.ok(!opts.servedFromCache, `successfully synthesized custom vendor audio to ${opts.filePath}`);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/somethingnew`);
t.ok(obj.headers.Authorization == 'Bearer some_jwt_token', 'Custom Vendor Authentication Header is correct');
t.ok(obj.body.language == 'en-US', 'Custom Vendor Language is correct');
t.ok(obj.body.format == 'audio/mpeg', 'Custom Vendor format is correct');
t.ok(obj.body.voice == 'English-US.Female-1', 'Custom Vendor voice is correct');
t.ok(obj.body.type == 'text', 'Custom Vendor type is correct');
t.ok(obj.body.text == 'This is a test. This is only a test', 'Custom Vendor text is correct');
opts = await synthAudio(stats, {
vendor: 'custom:somethingnew2',
credentials: {
use_for_tts: 1,
custom_tts_url: "http://127.0.0.1:3100/somethingnew2",
auth_token: 'some_jwt_token'
},
language: 'en-US',
voice: 'English-US.Female-1',
text: '<speak>This is a test. This is only a test</speak>',
});
t.ok(!opts.servedFromCache, `successfully synthesized Custom Vendor audio to ${opts.filePath}`);
obj = await getJSON(`http://127.0.0.1:3100/lastRequest/somethingnew2`);
t.ok(obj.body.type == 'ssml', 'Custom Vendor type is correct');
t.ok(obj.body.text == '<speak>This is a test. This is only a test</speak>');
} catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('TTS Cache tests', async(t) => {
const fn = require('..');
const {purgeTtsCache, client} = fn(opts, logger);
+23
View File
@@ -0,0 +1,23 @@
FROM --platform=linux/amd64 node:18.6.0-alpine as base
RUN apk --update --no-cache add --virtual .builds-deps build-base python3
WORKDIR /opt/app/
FROM base as build
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
FROM base
COPY --from=build /opt/app /opt/app/
ARG NODE_ENV
ENV NODE_ENV $NODE_ENV
CMD [ "node", "app.js" ]
+125
View File
@@ -0,0 +1,125 @@
const express = require('express');
const app = express();
const Websocket = require('ws');
const listenPort = process.env.HTTP_PORT || 3000;
let hook_mapping = new Map();
let ws_packet_count = new Map();
let ws_metadata = new Map();
/** websocket server for listen audio */
const recvAudio = (socket, req) => {
let packets = 0;
let path = req.url;
console.log('received websocket connection');
socket.on('message', (data, isBinary) => {
if (!isBinary) {
try {
const msg = JSON.parse(data);
console.log({msg}, 'received websocket message');
ws_metadata.set(path, msg);
}
catch (err) {
console.log({err}, 'error parsing websocket message');
}
}
else {
packets += data.length;
}
});
socket.on('error', (err) => {
console.log({err}, 'listen websocket: error');
});
socket.on('close', () => {
ws_packet_count.set(path, packets);
})
};
const wsServer = new Websocket.Server({ noServer: true });
wsServer.setMaxListeners(0);
wsServer.on('connection', recvAudio.bind(null));
const server = app.listen(listenPort, () => {
console.log(`sample jambones app server listening on ${listenPort}`);
});
server.on('upgrade', (request, socket, head) => {
console.log('received upgrade request');
wsServer.handleUpgrade(request, socket, head, (socket) => {
wsServer.emit('connection', socket, request);
});
});
app.use(express.urlencoded({ extended: true }));
app.use(express.json());
/*
* Markup language
*/
app.all('/:key', (req, res) => {
let key = req.params.key;
console.log(req.body, 'POST /' + key);
addRequestToMap(key, req, hook_mapping);
return res.json({"audio":"content"})
});
// Fetch Requests
app.get('/requests/:key', (req, res) => {
let key = req.params.key;
if (hook_mapping.has(key)) {
return res.json(hook_mapping.get(key));
} else {
return res.sendStatus(404);
}
})
app.get('/lastRequest/:key', (req, res) => {
let key = req.params.key;
if (hook_mapping.has(key)) {
let requests = hook_mapping.get(key);
return res.json(requests[requests.length - 1]);
} else {
return res.sendStatus(404);
}
})
// WS Fetch
app.get('/ws_packet_count/:key', (req, res) => {
let key = `/${req.params.key}`;
console.log(key, ws_packet_count);
if (ws_packet_count.has(key)) {
return res.json({ count: ws_packet_count.get(key) });
} else {
return res.sendStatus(404);
}
})
app.get('/ws_metadata/:key', (req, res) => {
let key = `/${req.params.key}`;
console.log(key, ws_packet_count);
if (ws_metadata.has(key)) {
return res.json({ metadata: ws_metadata.get(key) });
} else {
return res.sendStatus(404);
}
})
function addRequestToMap(key, req, map) {
let headers = new Map()
for(let i = 0; i < req.rawHeaders.length; i++) {
if (i % 2 === 0) {
headers.set(req.rawHeaders[i], req.rawHeaders[i + 1])
}
}
let request = {
'url': req.url,
'headers': Object.fromEntries(headers),
'body': req.body
}
if (map.has(key)) {
map.get(key).push(request);
} else {
map.set(key, [request]);
}
}
+1045
View File
File diff suppressed because it is too large Load Diff
+15
View File
@@ -0,0 +1,15 @@
{
"name": "webhook_tts",
"version": "1.0.0",
"description": "simple webhook tts for test purposes",
"main": "app.js",
"scripts": {
"start": "node app"
},
"author": "Dave Horton",
"license": "MIT",
"dependencies": {
"express": "^4.18.2",
"ws": "^8.12.0"
}
}