mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-07-04 19:31:49 +00:00
Merge pull request #7 from jambonz/feat/custom-vendor
custom tts vendor
This commit is contained in:
+35
-2
@@ -60,8 +60,9 @@ async function synthAudio(client, logger, stats, {
|
||||
let rtt;
|
||||
logger = logger || noopLogger;
|
||||
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor),
|
||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
|
||||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm'].includes(vendor) ||
|
||||
vendor.startsWith('custom'),
|
||||
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid, not ${vendor}`);
|
||||
if ('google' === vendor) {
|
||||
assert.ok(language, 'synthAudio requires language when google is used');
|
||||
}
|
||||
@@ -91,6 +92,8 @@ async function synthAudio(client, logger, stats, {
|
||||
language = 'en-US'; // WellSaid only supports English atm
|
||||
assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
|
||||
assert.ok(!text.startsWith('<speak'), 'wellsaid does not support SSML tags');
|
||||
} else if (vendor.startsWith('custom')) {
|
||||
assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
|
||||
}
|
||||
|
||||
const key = makeSynthKey({
|
||||
@@ -151,6 +154,9 @@ async function synthAudio(client, logger, stats, {
|
||||
case 'wellsaid':
|
||||
audioBuffer = await synthWellSaid(logger, {credentials, stats, language, voice, text, filePath});
|
||||
break;
|
||||
case vendor.startsWith('custom') ? vendor : 'cant_match_value':
|
||||
audioBuffer = await synthCustomVendor(logger, {credentials, stats, language, voice, text});
|
||||
break;
|
||||
default:
|
||||
assert(`synthAudio: unsupported speech vendor ${vendor}`);
|
||||
}
|
||||
@@ -433,4 +439,31 @@ const synthNvidia = async(client, logger, {credentials, stats, language, voice,
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
// CustomVendor accept only mp3
|
||||
const synthCustomVendor = async(logger, {credentials, stats, language, voice, text}) => {
|
||||
const {vendor, auth_token, custom_tts_url} = credentials;
|
||||
|
||||
try {
|
||||
const post = bent('POST', 'buffer', {
|
||||
'Authorization': `Bearer ${auth_token}`,
|
||||
'Accept': 'audio/mpeg',
|
||||
'Content-Type': 'application/json'
|
||||
});
|
||||
|
||||
const mp3 = await post(custom_tts_url, {
|
||||
language,
|
||||
format: 'audio/mpeg',
|
||||
voice,
|
||||
type: text.startsWith('<speak>') ? 'ssml' : 'text',
|
||||
text
|
||||
});
|
||||
|
||||
return mp3;
|
||||
} catch (err) {
|
||||
logger.info({err}, `Vendor ${vendor} returned error`);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
module.exports = synthAudio;
|
||||
|
||||
@@ -1,10 +1,20 @@
|
||||
version: '3'
|
||||
version: '3.9'
|
||||
|
||||
networks:
|
||||
speech-utils:
|
||||
driver: bridge
|
||||
ipam:
|
||||
config:
|
||||
- subnet: 172.41.0.0/16
|
||||
|
||||
services:
|
||||
redis:
|
||||
image: redis:alpine
|
||||
ports:
|
||||
- "3379:6379"
|
||||
networks:
|
||||
speech-utils:
|
||||
ipv4_address: 172.41.0.5
|
||||
|
||||
redis-auth:
|
||||
image: redis:alpine
|
||||
@@ -13,3 +23,17 @@ services:
|
||||
- "3380:6379"
|
||||
volumes:
|
||||
- ./tmp:/tmp
|
||||
networks:
|
||||
speech-utils:
|
||||
ipv4_address: 172.41.0.6
|
||||
|
||||
webhook-tts-scaffold:
|
||||
image: jambonz/webhook-tts-test-scaffold:latest
|
||||
ports:
|
||||
- "3100:3000/tcp"
|
||||
volumes:
|
||||
- ./test-apps:/tmp
|
||||
networks:
|
||||
speech-utils:
|
||||
ipv4_address: 172.41.0.10
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ const opts = config.get('redis');
|
||||
const fs = require('fs');
|
||||
const {makeSynthKey} = require('../lib/utils');
|
||||
const logger = require('pino')();
|
||||
const bent = require('bent');
|
||||
const getJSON = bent('json')
|
||||
|
||||
process.on('unhandledRejection', (reason, p) => {
|
||||
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
|
||||
@@ -326,6 +328,53 @@ test('IBM watson speech synth tests', async(t) => {
|
||||
client.quit();
|
||||
});
|
||||
|
||||
test('Custom Vendor speech synth tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {synthAudio, client} = fn(opts, logger);
|
||||
|
||||
try {
|
||||
let opts = await synthAudio(stats, {
|
||||
vendor: 'custom:somethingnew',
|
||||
credentials: {
|
||||
use_for_tts: 1,
|
||||
custom_tts_url: "http://127.0.0.1:3100/somethingnew",
|
||||
auth_token: 'some_jwt_token'
|
||||
},
|
||||
language: 'en-US',
|
||||
voice: 'English-US.Female-1',
|
||||
text: 'This is a test. This is only a test',
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized custom vendor audio to ${opts.filePath}`);
|
||||
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/somethingnew`);
|
||||
t.ok(obj.headers.Authorization == 'Bearer some_jwt_token', 'Custom Vendor Authentication Header is correct');
|
||||
t.ok(obj.body.language == 'en-US', 'Custom Vendor Language is correct');
|
||||
t.ok(obj.body.format == 'audio/mpeg', 'Custom Vendor format is correct');
|
||||
t.ok(obj.body.voice == 'English-US.Female-1', 'Custom Vendor voice is correct');
|
||||
t.ok(obj.body.type == 'text', 'Custom Vendor type is correct');
|
||||
t.ok(obj.body.text == 'This is a test. This is only a test', 'Custom Vendor text is correct');
|
||||
|
||||
opts = await synthAudio(stats, {
|
||||
vendor: 'custom:somethingnew2',
|
||||
credentials: {
|
||||
use_for_tts: 1,
|
||||
custom_tts_url: "http://127.0.0.1:3100/somethingnew2",
|
||||
auth_token: 'some_jwt_token'
|
||||
},
|
||||
language: 'en-US',
|
||||
voice: 'English-US.Female-1',
|
||||
text: '<speak>This is a test. This is only a test</speak>',
|
||||
});
|
||||
t.ok(!opts.servedFromCache, `successfully synthesized Custom Vendor audio to ${opts.filePath}`);
|
||||
obj = await getJSON(`http://127.0.0.1:3100/lastRequest/somethingnew2`);
|
||||
t.ok(obj.body.type == 'ssml', 'Custom Vendor type is correct');
|
||||
t.ok(obj.body.text == '<speak>This is a test. This is only a test</speak>');
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
t.end(err);
|
||||
}
|
||||
client.quit();
|
||||
});
|
||||
|
||||
test('TTS Cache tests', async(t) => {
|
||||
const fn = require('..');
|
||||
const {purgeTtsCache, client} = fn(opts, logger);
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
FROM --platform=linux/amd64 node:18.6.0-alpine as base
|
||||
|
||||
RUN apk --update --no-cache add --virtual .builds-deps build-base python3
|
||||
|
||||
WORKDIR /opt/app/
|
||||
|
||||
FROM base as build
|
||||
|
||||
COPY package.json package-lock.json ./
|
||||
|
||||
RUN npm ci
|
||||
|
||||
COPY . .
|
||||
|
||||
FROM base
|
||||
|
||||
COPY --from=build /opt/app /opt/app/
|
||||
|
||||
ARG NODE_ENV
|
||||
|
||||
ENV NODE_ENV $NODE_ENV
|
||||
|
||||
CMD [ "node", "app.js" ]
|
||||
@@ -0,0 +1,125 @@
|
||||
const express = require('express');
|
||||
const app = express();
|
||||
const Websocket = require('ws');
|
||||
const listenPort = process.env.HTTP_PORT || 3000;
|
||||
let hook_mapping = new Map();
|
||||
let ws_packet_count = new Map();
|
||||
let ws_metadata = new Map();
|
||||
|
||||
/** websocket server for listen audio */
|
||||
const recvAudio = (socket, req) => {
|
||||
let packets = 0;
|
||||
let path = req.url;
|
||||
console.log('received websocket connection');
|
||||
socket.on('message', (data, isBinary) => {
|
||||
if (!isBinary) {
|
||||
try {
|
||||
const msg = JSON.parse(data);
|
||||
console.log({msg}, 'received websocket message');
|
||||
ws_metadata.set(path, msg);
|
||||
}
|
||||
catch (err) {
|
||||
console.log({err}, 'error parsing websocket message');
|
||||
}
|
||||
}
|
||||
else {
|
||||
packets += data.length;
|
||||
}
|
||||
});
|
||||
socket.on('error', (err) => {
|
||||
console.log({err}, 'listen websocket: error');
|
||||
});
|
||||
|
||||
socket.on('close', () => {
|
||||
ws_packet_count.set(path, packets);
|
||||
})
|
||||
};
|
||||
|
||||
const wsServer = new Websocket.Server({ noServer: true });
|
||||
wsServer.setMaxListeners(0);
|
||||
wsServer.on('connection', recvAudio.bind(null));
|
||||
|
||||
const server = app.listen(listenPort, () => {
|
||||
console.log(`sample jambones app server listening on ${listenPort}`);
|
||||
});
|
||||
server.on('upgrade', (request, socket, head) => {
|
||||
console.log('received upgrade request');
|
||||
wsServer.handleUpgrade(request, socket, head, (socket) => {
|
||||
wsServer.emit('connection', socket, request);
|
||||
});
|
||||
});
|
||||
|
||||
app.use(express.urlencoded({ extended: true }));
|
||||
app.use(express.json());
|
||||
|
||||
/*
|
||||
* Markup language
|
||||
*/
|
||||
|
||||
app.all('/:key', (req, res) => {
|
||||
let key = req.params.key;
|
||||
console.log(req.body, 'POST /' + key);
|
||||
addRequestToMap(key, req, hook_mapping);
|
||||
return res.json({"audio":"content"})
|
||||
});
|
||||
|
||||
// Fetch Requests
|
||||
app.get('/requests/:key', (req, res) => {
|
||||
let key = req.params.key;
|
||||
if (hook_mapping.has(key)) {
|
||||
return res.json(hook_mapping.get(key));
|
||||
} else {
|
||||
return res.sendStatus(404);
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
app.get('/lastRequest/:key', (req, res) => {
|
||||
let key = req.params.key;
|
||||
if (hook_mapping.has(key)) {
|
||||
let requests = hook_mapping.get(key);
|
||||
return res.json(requests[requests.length - 1]);
|
||||
} else {
|
||||
return res.sendStatus(404);
|
||||
}
|
||||
})
|
||||
|
||||
// WS Fetch
|
||||
app.get('/ws_packet_count/:key', (req, res) => {
|
||||
let key = `/${req.params.key}`;
|
||||
console.log(key, ws_packet_count);
|
||||
if (ws_packet_count.has(key)) {
|
||||
return res.json({ count: ws_packet_count.get(key) });
|
||||
} else {
|
||||
return res.sendStatus(404);
|
||||
}
|
||||
})
|
||||
|
||||
app.get('/ws_metadata/:key', (req, res) => {
|
||||
let key = `/${req.params.key}`;
|
||||
console.log(key, ws_packet_count);
|
||||
if (ws_metadata.has(key)) {
|
||||
return res.json({ metadata: ws_metadata.get(key) });
|
||||
} else {
|
||||
return res.sendStatus(404);
|
||||
}
|
||||
})
|
||||
|
||||
function addRequestToMap(key, req, map) {
|
||||
let headers = new Map()
|
||||
for(let i = 0; i < req.rawHeaders.length; i++) {
|
||||
if (i % 2 === 0) {
|
||||
headers.set(req.rawHeaders[i], req.rawHeaders[i + 1])
|
||||
}
|
||||
}
|
||||
let request = {
|
||||
'url': req.url,
|
||||
'headers': Object.fromEntries(headers),
|
||||
'body': req.body
|
||||
}
|
||||
if (map.has(key)) {
|
||||
map.get(key).push(request);
|
||||
} else {
|
||||
map.set(key, [request]);
|
||||
}
|
||||
}
|
||||
Generated
+1045
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "webhook_tts",
|
||||
"version": "1.0.0",
|
||||
"description": "simple webhook tts for test purposes",
|
||||
"main": "app.js",
|
||||
"scripts": {
|
||||
"start": "node app"
|
||||
},
|
||||
"author": "Dave Horton",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"express": "^4.18.2",
|
||||
"ws": "^8.12.0"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user