Files
jambonz-api-server/lib/routes/api/tts-cache.js
2024-01-11 15:59:24 +07:00

127 lines
3.6 KiB
JavaScript

const router = require('express').Router();
const {
parseAccountSid
} = require('./utils');
const SpeechCredential = require('../../models/speech-credential');
const fs = require('fs');
const { v4: uuidv4 } = require('uuid');
const {DbErrorBadRequest} = require('../../utils/errors');
const Account = require('../../models/account');
const sysError = require('../error');
const { getSpeechCredential, decryptCredential } = require('../../utils/speech-utils');
const PCMToMP3Encoder = require('../../record/encoder');
router.delete('/', async(req, res) => {
const {purgeTtsCache} = req.app.locals;
const account_sid = parseAccountSid(req);
if (account_sid) {
await purgeTtsCache({account_sid});
} else {
await purgeTtsCache();
}
res.sendStatus(204);
});
router.get('/', async(req, res) => {
const {getTtsSize} = req.app.locals;
const account_sid = parseAccountSid(req);
let size = 0;
if (account_sid) {
size = await getTtsSize(`tts:${account_sid}:*`);
} else {
size = await getTtsSize();
}
res.status(200).json({size});
});
router.post('/Synthesize', async(req, res) => {
const {logger, synthAudio} = req.app.locals;
try {
const accountSid = parseAccountSid(req);
const body = req.body;
const encodingMp3 = req.body.encodingMp3 || false;
if (!body.speech_credential_sid || !body.text || !body.language || !body.voice) {
throw new DbErrorBadRequest('speech_credential_sid, text, language, voice are all required');
}
const result = await Account.retrieve(accountSid);
if (!result || result.length === 0 || !result[0].is_active) {
throw new DbErrorBadRequest(`Account not found for sid ${accountSid}`);
}
const credentials = await SpeechCredential.retrieve(body.speech_credential_sid);
if (!credentials || credentials.length === 0) {
throw new
DbErrorBadRequest(`There is no available speech credential for ${body.speech_credential_sid}`);
}
const {credential, ...obj} = credentials[0];
decryptCredential(obj, credential, logger, false);
const cred = getSpeechCredential(obj, logger);
const { text, language, engine = 'standard' } = body;
const salt = uuidv4();
/* parse Nuance voices into name and model */
let voice = body.voice;
let model;
if (cred.vendor === 'nuance' && voice) {
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
if (arr) {
voice = arr[1];
model = arr[2];
}
}
const stats = {
histogram: () => {},
increment: () => {},
};
const { filePath } = await synthAudio(stats, {
account_sid: accountSid,
text,
vendor: cred.vendor,
language,
voice,
engine,
model,
salt,
credentials: cred,
disableTtsCache: false
});
let contentType = 'audio/mpeg';
let readStream = fs.createReadStream(filePath);
if (['nuance', 'nvidia'].includes(cred.vendor) ||
(
process.env.JAMBONES_TTS_TRIM_SILENCE &&
['microsoft', 'azure'].includes(cred.vendor)
)
) {
if (encodingMp3) {
readStream = readStream
.pipe(new PCMToMP3Encoder({
channels: 1,
sampleRate: 8000,
bitRate: 128
}, logger));
} else {
contentType = 'application/octet-stream';
}
}
res.writeHead(200, {
'Content-Type': contentType,
});
readStream.pipe(res);
readStream.on('end', () => {
fs.unlink(filePath, (err) => {
if (err) throw err;
logger.info(`${filePath} was deleted`);
});
});
} catch (err) {
sysError(logger, res, err);
}
});
module.exports = router;