Compare commits

...

30 Commits

Author SHA1 Message Date
Quan HL
7f904930ae fix azure cannot download mp3 if encodingMp3 = true 2024-05-03 19:43:45 +07:00
Hoan Luu Huu
c0fab2880b fix cannot send multipart to aws due to min size (#319) 2024-05-03 07:37:38 -04:00
Hoan Luu Huu
ce2fa392a4 support aws speech by roleArn (#313)
* support aws speech by roleArn

* support 3 types of aws  credentials

* wip

* wip

* update speech util version
2024-05-02 07:57:22 -04:00
Hoan Luu Huu
3b47162d13 Feat/record with pipeline (#318)
* use pipeline for nodejs streams

* use pipeline for nodejs streams
2024-04-30 07:39:24 -04:00
Hoan Luu Huu
b765232d4f api server cannot synthesize text after upgrade latest speech-utils (#317)
* api server cannot synthesize text after upgrade latest speech-utils

* wip

* add testcase for synthesize text

* fix synthesize testcase
2024-04-29 19:48:34 -04:00
Dave Horton
2436bea6ea add support for LCC updateCall with conferenceParticipantState (#296)
* add support for LCC updateCall with conferenceParticipantState

* wip

* wip
2024-04-22 11:06:08 -04:00
Dave Horton
f67abddbd4 bug: attempting to add duplicate dns records on hosted system (#312) 2024-04-19 18:13:27 -04:00
Hoan Luu Huu
39fcb17dec support mod_rimelabs_tts (#310)
* support mod_rimelabs_tts

* update speech utils 0.0.51
2024-04-12 07:25:04 -04:00
Hoan Luu Huu
80418aa7e5 check playht can fetch voices when adding new speech credential (#309)
* check playht can fetch voices when adding new speech credential

* wip

* wip

* wip

* wip
2024-04-12 07:01:13 -04:00
Hoan Luu Huu
b21d10eb3e fetch playht custom voice (#307) 2024-04-09 08:48:18 -04:00
Hoan Luu Huu
7875eb51b9 playht should return list of voice match voice engine configured at speech credentials (#306) 2024-04-09 06:53:17 -04:00
Hoan Luu Huu
e2c1383723 support mod_playht_tts (#304)
* support mod_playht_tts

* wip

* wip

* wip

* wip

* wip

* update speech utils version
2024-04-08 10:21:29 -04:00
Dave Horton
40de2c5945 option_ping was incorrectly removed, adding back (#305) 2024-04-08 08:56:31 -04:00
Dave Horton
3a299bc3ca update to speech utils with azure 1.36.0 (#303) 2024-04-07 17:45:33 -04:00
Dave Horton
70c9407742 update to speech utils with azure 1.36.0 2024-04-07 12:16:55 -04:00
Dave Horton
dba66d58fc back out column addition of -register_use_tls 2024-04-06 13:48:26 -04:00
Dave Horton
0ff3d22faf Revert "feat send options ping for sip gateway (#273)"
This reverts commit a4792a521f.
2024-04-06 13:27:32 -04:00
Hoan Luu Huu
187a428a75 register use tls (#302) 2024-04-04 08:02:29 -04:00
Hoan Luu Huu
a4792a521f feat send options ping for sip gateway (#273)
* feat send options ping for sip gateway

* update upgrade db script to have 8006
2024-03-30 09:14:29 -04:00
Dave Horton
3ac9693735 update speech-utils with fixes for deepgram production api and tts streaming 2024-03-24 08:15:00 -04:00
Dave Horton
3ad54a0e72 update to released deepgram tts voices (#299) 2024-03-13 09:16:12 -04:00
Hoan Luu Huu
bd8fb2f9db remove use_streaming from speech credential (#294)
* remove use_streaming from speech credential

* wip
2024-02-20 08:01:33 -05:00
Dave Horton
32b317ae68 update to latest speech-utils 2024-02-12 21:11:49 -05:00
Hoan Luu Huu
40e8d08727 support deepgram tts onprem (#292)
* support deepgram tts onprem

* wip

* wip

* deepgram disable speech test if api_key is missng
2024-02-12 09:27:13 -05:00
Hoan Luu Huu
256ca440a0 add use_streaming flag for elevenlabs and whisper (#290)
* add use_streaming flag for elevenlabs (not for whisper yet)
---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-02-12 09:18:49 -05:00
Markus Frindt
68d73345ef Improve Swagger file, add login route, fix swagger linting (#291)
Co-authored-by: Markus Frindt <m.frindt@cognigy.com>
2024-02-06 12:10:31 -05:00
Hoan Luu Huu
54dd72ff66 fetch list of tts voices from provider (#289)
* fetch list of tts voices from provider

* revert serve-integration

* fix

* fix for aws

* fix for aws

* fix for aws

* update speech-utils version
2024-01-25 12:03:02 -05:00
Dave Horton
832a4e8032 update db-helpers 2024-01-17 13:24:51 -05:00
Hoan Luu Huu
33c3b99e2e update paid account to active if it's in deactivated (#287)
* update paid account to active if it's in deactivated

* fix review comment
2024-01-17 09:20:38 -05:00
Hoan Luu Huu
8b2a2e196e Feat/record upload buffer (#285)
* uploader with buffer for google and azure

* wip

* wip

* wip
2024-01-15 09:51:10 -05:00
23 changed files with 4597 additions and 13179 deletions

2
app.js
View File

@@ -52,6 +52,7 @@ const {
getTtsVoices,
getTtsSize,
purgeTtsCache,
getAwsAuthToken,
synthAudio
} = require('@jambonz/speech-utils')({}, logger);
const {
@@ -95,6 +96,7 @@ app.locals = {
deleteKey,
getTtsVoices,
getTtsSize,
getAwsAuthToken,
purgeTtsCache,
synthAudio,
lookupAppBySid,

View File

@@ -162,7 +162,7 @@ regex VARCHAR(32) NOT NULL COMMENT 'regex-based pattern match against dialed num
description VARCHAR(1024),
priority INTEGER NOT NULL COMMENT 'lower priority routes are attempted first',
PRIMARY KEY (lcr_route_sid)
) COMMENT='An ordered list of digit patterns in an LCR table. The pat';
) COMMENT='An ordered list of digit patterns in an LCR table. The patterns are tested in sequence until one matches';
CREATE TABLE lcr
(
@@ -173,7 +173,7 @@ default_carrier_set_entry_sid CHAR(36) COMMENT 'default carrier/route to use whe
service_provider_sid CHAR(36),
account_sid CHAR(36),
PRIMARY KEY (lcr_sid)
) COMMENT='An LCR (least cost routing) table that is used by a service ';
) COMMENT='An LCR (least cost routing) table that is used by a service provider or account to make decisions about routing outbound calls when multiple carriers are available.';
CREATE TABLE password_settings
(
@@ -458,6 +458,7 @@ inbound BOOLEAN NOT NULL COMMENT 'if true, whitelist this IP to allow inbound ca
outbound BOOLEAN NOT NULL COMMENT 'if true, include in least-cost routing when placing calls to the PSTN',
voip_carrier_sid CHAR(36) NOT NULL,
is_active BOOLEAN NOT NULL DEFAULT 1,
send_options_ping BOOLEAN NOT NULL DEFAULT 0,
pad_crypto BOOLEAN NOT NULL DEFAULT 0,
protocol ENUM('udp','tcp','tls', 'tls/srtp') DEFAULT 'udp' COMMENT 'Outbound call protocol',
PRIMARY KEY (sip_gateway_sid)
@@ -495,7 +496,7 @@ messaging_hook_sid CHAR(36) COMMENT 'webhook to call for inbound SMS/MMS ',
app_json TEXT,
speech_synthesis_vendor VARCHAR(64) NOT NULL DEFAULT 'google',
speech_synthesis_language VARCHAR(12) NOT NULL DEFAULT 'en-US',
speech_synthesis_voice VARCHAR(64),
speech_synthesis_voice VARCHAR(256),
speech_synthesis_label VARCHAR(64),
speech_recognizer_vendor VARCHAR(64) NOT NULL DEFAULT 'google',
speech_recognizer_language VARCHAR(64) NOT NULL DEFAULT 'en-US',
@@ -503,7 +504,7 @@ speech_recognizer_label VARCHAR(64),
use_for_fallback_speech BOOLEAN DEFAULT false,
fallback_speech_synthesis_vendor VARCHAR(64),
fallback_speech_synthesis_language VARCHAR(12),
fallback_speech_synthesis_voice VARCHAR(64),
fallback_speech_synthesis_voice VARCHAR(256),
fallback_speech_synthesis_label VARCHAR(64),
fallback_speech_recognizer_vendor VARCHAR(64),
fallback_speech_recognizer_language VARCHAR(64),

View File

@@ -551,7 +551,7 @@
</location>
<size>
<width>293.00</width>
<height>540.00</height>
<height>560.00</height>
</size>
<zorder>6</zorder>
<SQLField>
@@ -2332,7 +2332,7 @@
</location>
<size>
<width>281.00</width>
<height>240.00</height>
<height>260.00</height>
</size>
<zorder>7</zorder>
<SQLField>
@@ -2399,10 +2399,18 @@
<notNull><![CDATA[1]]></notNull>
<uid><![CDATA[27D4A5BD-8093-4ADD-B5B5-D546844206F9]]></uid>
</SQLField>
<SQLField>
<name><![CDATA[send_options_ping]]></name>
<type><![CDATA[BOOLEAN]]></type>
<defaultValue><![CDATA[0]]></defaultValue>
<notNull><![CDATA[1]]></notNull>
<uid><![CDATA[E04C19A2-12BF-443F-AB61-96990224A18D]]></uid>
</SQLField>
<SQLField>
<name><![CDATA[pad_crypto]]></name>
<type><![CDATA[BOOLEAN]]></type>
<defaultValue><![CDATA[0]]></defaultValue>
<forcedUnique><![CDATA[0]]></forcedUnique>
<notNull><![CDATA[1]]></notNull>
<uid><![CDATA[C5C0043B-100A-4476-BF01-BE0777AE27C0]]></uid>
</SQLField>
@@ -2560,7 +2568,7 @@
</SQLField>
<SQLField>
<name><![CDATA[speech_synthesis_voice]]></name>
<type><![CDATA[VARCHAR(64)]]></type>
<type><![CDATA[VARCHAR(256)]]></type>
<notNull><![CDATA[0]]></notNull>
<uid><![CDATA[929D66F0-64B9-4D7C-AB4B-24F131E1178F]]></uid>
</SQLField>
@@ -2610,7 +2618,7 @@
</SQLField>
<SQLField>
<name><![CDATA[fallback_speech_synthesis_voice]]></name>
<type><![CDATA[VARCHAR(64)]]></type>
<type><![CDATA[VARCHAR(256)]]></type>
<notNull><![CDATA[0]]></notNull>
<uid><![CDATA[6A0E92C9-32B9-4179-A893-3DADF5DD7728]]></uid>
</SQLField>
@@ -3097,17 +3105,17 @@
<overviewPanelHidden><![CDATA[0]]></overviewPanelHidden>
<pageBoundariesVisible><![CDATA[0]]></pageBoundariesVisible>
<PageGridVisible><![CDATA[0]]></PageGridVisible>
<RightSidebarWidth><![CDATA[1924.000000]]></RightSidebarWidth>
<RightSidebarWidth><![CDATA[1235.000000]]></RightSidebarWidth>
<sidebarIndex><![CDATA[2]]></sidebarIndex>
<snapToGrid><![CDATA[0]]></snapToGrid>
<SourceSidebarWidth><![CDATA[0.000000]]></SourceSidebarWidth>
<SourceSidebarWidth><![CDATA[312.000000]]></SourceSidebarWidth>
<SQLEditorFileFormatVersion><![CDATA[4]]></SQLEditorFileFormatVersion>
<uid><![CDATA[58C99A00-06C9-478C-A667-C63842E088F3]]></uid>
<windowHeight><![CDATA[985.000000]]></windowHeight>
<windowLocationX><![CDATA[-1307.000000]]></windowLocationX>
<windowLocationY><![CDATA[1008.000000]]></windowLocationY>
<windowScrollOrigin><![CDATA[{1.5, 786}]]></windowScrollOrigin>
<windowWidth><![CDATA[2201.000000]]></windowWidth>
<windowHeight><![CDATA[870.000000]]></windowHeight>
<windowLocationX><![CDATA[-1164.000000]]></windowLocationX>
<windowLocationY><![CDATA[1131.000000]]></windowLocationY>
<windowScrollOrigin><![CDATA[{0.5, 0}]]></windowScrollOrigin>
<windowWidth><![CDATA[1512.000000]]></windowWidth>
</SQLDocumentInfo>
<AllowsIndexRenamingOnInsert><![CDATA[1]]></AllowsIndexRenamingOnInsert>
<defaultLabelExpanded><![CDATA[1]]></defaultLabelExpanded>

View File

@@ -190,7 +190,12 @@ const sql = {
'ALTER TABLE google_custom_voices ADD FOREIGN KEY speech_credential_sid_idxfk (speech_credential_sid) REFERENCES speech_credentials (speech_credential_sid) ON DELETE CASCADE',
'ALTER TABLE clients ADD COLUMN allow_direct_queue_calling BOOLEAN NOT NULL DEFAULT 1',
'ALTER TABLE clients ADD COLUMN allow_direct_user_calling BOOLEAN NOT NULL DEFAULT 1',
'ALTER TABLE clients ADD COLUMN allow_direct_app_calling BOOLEAN NOT NULL DEFAULT 1'
'ALTER TABLE clients ADD COLUMN allow_direct_app_calling BOOLEAN NOT NULL DEFAULT 1',
],
9000: [
'ALTER TABLE sip_gateways ADD COLUMN send_options_ping BOOLEAN NOT NULL DEFAULT 0',
'ALTER TABLE applications MODIFY COLUMN speech_synthesis_voice VARCHAR(256)',
'ALTER TABLE applications MODIFY COLUMN fallback_speech_synthesis_voice VARCHAR(256)',
]
};
@@ -223,6 +228,7 @@ const doIt = async() => {
if (val < 8003) upgrades.push(...sql['8003']);
if (val < 8004) upgrades.push(...sql['8004']);
if (val < 8005) upgrades.push(...sql['8005']);
if (val < 9000) upgrades.push(...sql['9000']);
// perform all upgrades
logger.info({upgrades}, 'applying schema upgrades..');

View File

@@ -199,8 +199,9 @@ class Account extends Model {
debug(r3, 'Account.activateSubscription - replaced old subscription');
/* update account.plan to paid, if it isnt already */
/* update account.is_active to 1, if account is deactivated */
await promisePool.execute(
'UPDATE accounts SET plan_type = \'paid\' WHERE account_sid = ?',
'UPDATE accounts SET plan_type = \'paid\', is_active = 1 WHERE account_sid = ?',
[account_sid]);
return true;
}

View File

@@ -61,6 +61,10 @@ VoipCarrier.fields = [
name: 'requires_register',
type: 'number'
},
{
name: 'register_use_tls',
type: 'number'
},
{
name: 'register_username',
type: 'string'

View File

@@ -1,28 +1,58 @@
const { Writable } = require('stream');
const { BlobServiceClient } = require('@azure/storage-blob');
const { v4: uuidv4 } = require('uuid');
const streamBuffers = require('stream-buffers');
class AzureStorageUploadStream extends Writable {
constructor(logger, opts) {
super(opts);
const blobServiceClient = BlobServiceClient.fromConnectionString(opts.connection_string);
this.blockBlobClient = blobServiceClient.getContainerClient(opts.bucketName).getBlockBlobClient(opts.Key);
this.metadata = opts.metadata;
this.blocks = [];
this.bufferSize = 2 * 1024 * 1024; // Buffer size set to 2MB
this.buffer = new streamBuffers.WritableStreamBuffer({
initialSize: this.bufferSize,
incrementAmount: this.bufferSize
});
}
async _write(chunk, encoding, callback) {
const blockID = uuidv4().replace(/-/g, '');
this.blocks.push(blockID);
try {
await this.blockBlobClient.stageBlock(blockID, chunk, chunk.length);
this.buffer.write(chunk, encoding);
if (this.buffer.size() >= this.bufferSize) {
const blockID = uuidv4().replace(/-/g, '');
this.blocks.push(blockID);
try {
const dataToWrite = this.buffer.getContents();
await this.blockBlobClient.stageBlock(blockID, dataToWrite, dataToWrite.length);
callback();
} catch (error) {
callback(error);
}
} else {
callback();
} catch (error) {
callback(error);
}
}
async _final(callback) {
// Write any remaining data in buffer
if (this.buffer.size() > 0) {
const remainingData = this.buffer.getContents();
const blockID = uuidv4().replace(/-/g, '');
this.blocks.push(blockID);
try {
await this.blockBlobClient.stageBlock(blockID, remainingData, remainingData.length);
} catch (error) {
callback(error);
return;
}
}
try {
await this.blockBlobClient.commitBlockList(this.blocks);
// remove all null/undefined props

View File

@@ -1,5 +1,6 @@
const { Storage } = require('@google-cloud/storage');
const { Writable } = require('stream');
const streamBuffers = require('stream-buffers');
class GoogleStorageUploadStream extends Writable {
@@ -12,18 +13,38 @@ class GoogleStorageUploadStream extends Writable {
this.gcsFile = storage.bucket(opts.bucketName).file(opts.Key);
this.writeStream = this.gcsFile.createWriteStream();
this.bufferSize = 2 * 1024 * 1024; // Buffer size set to 2MB
this.buffer = new streamBuffers.WritableStreamBuffer({
initialSize: this.bufferSize,
incrementAmount: this.bufferSize
});
this.writeStream.on('error', (err) => this.logger.error(err));
this.writeStream.on('finish', () => {
this.logger.info('google storage Upload completed.');
this.logger.info('Google storage Upload completed.');
this._addMetadata();
});
}
_write(chunk, encoding, callback) {
this.writeStream.write(chunk, encoding, callback);
this.buffer.write(chunk, encoding);
// Write to GCS when buffer reaches desired size
if (this.buffer.size() >= this.bufferSize) {
const dataToWrite = this.buffer.getContents();
this.writeStream.write(dataToWrite, callback);
} else {
callback();
}
}
_final(callback) {
// Write any remaining data in the buffer to GCS
if (this.buffer.size() > 0) {
const remainingData = this.buffer.getContents();
this.writeStream.write(remainingData);
}
this.writeStream.end();
this.writeStream.once('finish', callback);
}
@@ -33,7 +54,7 @@ class GoogleStorageUploadStream extends Writable {
await this.gcsFile.setMetadata({metadata: this.metadata});
this.logger.info('Google storage Upload and metadata setting completed.');
} catch (err) {
this.logger.error(err, 'Google storage An error occurred while setting metadata');
this.logger.error(err, 'Google storage An error occurred while setting metadata');
}
}
}

View File

@@ -3,6 +3,7 @@ const Websocket = require('ws');
const PCMToMP3Encoder = require('./encoder');
const wav = require('wav');
const { getUploader } = require('./utils');
const { pipeline } = require('stream');
async function upload(logger, socket) {
socket._recvInitialMetadata = false;
@@ -60,22 +61,19 @@ async function upload(logger, socket) {
bitrate: 128
}, logger);
}
const handleError = (err, streamType) => {
logger.error(
{ err },
`Error while streaming for vendor: ${obj.vendor}, pipe: ${streamType}: ${err.message}`
);
};
/* start streaming data */
const duplex = Websocket.createWebSocketStream(socket);
duplex
.on('error', (err) => handleError(err, 'duplex'))
.pipe(encoder)
.on('error', (err) => handleError(err, 'encoder'))
.pipe(uploadStream)
.on('error', (err) => handleError(err, 'uploadStream'));
pipeline(
Websocket.createWebSocketStream(socket),
encoder,
uploadStream,
(error) => {
if (error) {
logger.error({ error }, 'pipeline error, cannot upload data to storage');
socket.close();
}
}
);
} else {
logger.info(`account ${accountSid} does not have any bucket credential, close the socket`);
socket.close();

View File

@@ -265,7 +265,8 @@ function validateUpdateCall(opts) {
'sip_request',
'record',
'tag',
'dtmf'
'dtmf',
'conferenceParticipantAction'
]
.reduce((acc, prop) => (opts[prop] ? ++acc : acc), 0);
@@ -316,6 +317,19 @@ function validateUpdateCall(opts) {
if (opts.tag && (typeof opts.tag !== 'object' || Array.isArray(opts.tag) || opts.tag === null)) {
throw new DbErrorBadRequest('invalid tag data');
}
if (opts.conferenceParticipantAction) {
if (!['tag', 'untag', 'coach', 'uncoach', 'mute', 'unmute', 'hold', 'unhold']
.includes(opts.conferenceParticipantAction.action)) {
throw new DbErrorBadRequest(
`conferenceParticipantAction invalid action property ${opts.conferenceParticipantAction.action}`);
}
if ('tag' == opts.conferenceParticipantAction.action && !opts.tag) {
throw new DbErrorBadRequest('conferenceParticipantAction requires tag property when action is \'tag\'');
}
if ('coach' == opts.conferenceParticipantAction.action && !opts.tag) {
throw new DbErrorBadRequest('conferenceParticipantAction requires tag property when action is \'coach\'');
}
}
}
function validateTo(to) {

View File

@@ -31,6 +31,7 @@ router.post('/:sip_realm', async(req, res) => {
const [sbcs] = await promisePool.query('SELECT ipv4 from sbc_addresses');
if (sbcs.length === 0) throw new Error('no SBC addresses provisioned in the database!');
const ips = sbcs.map((s) => s.ipv4);
const uniqueIps = [...new Set(ips)];
/* retrieve existing dns records */
const [old_recs] = await promisePool.query('SELECT record_id from dns_records WHERE account_sid = ?',
@@ -48,7 +49,7 @@ router.post('/:sip_realm', async(req, res) => {
}
/* add the dns records */
const records = await createDnsRecords(logger, domain, subdomain, ips);
const records = await createDnsRecords(logger, domain, subdomain, uniqueIps);
if (!records) throw new Error(`failure updating dns records for ${sip_realm}`);
const values = records.map((r) => {
return `('${uuid()}', '${account_sid}', '${r.type}', ${r.id})`;

View File

@@ -6,7 +6,9 @@ const sysError = require('../error');
const {decrypt, encrypt} = require('../../utils/encrypt-decrypt');
const {parseAccountSid, parseServiceProviderSid, parseSpeechCredentialSid} = require('./utils');
const {decryptCredential, testWhisper, testDeepgramTTS,
getLanguagesAndVoicesForVendor} = require('../../utils/speech-utils');
getLanguagesAndVoicesForVendor,
testPlayHT,
testRimelabs} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -111,11 +113,14 @@ const encryptCredential = (obj) => {
secret_access_key,
aws_region,
api_key,
role_arn,
region,
client_id,
secret,
nuance_tts_uri,
nuance_stt_uri,
deepgram_stt_uri,
deepgram_stt_use_tls,
use_custom_tts,
custom_tts_endpoint,
custom_tts_endpoint_url,
@@ -133,6 +138,8 @@ const encryptCredential = (obj) => {
auth_token = '',
cobalt_server_uri,
model_id,
user_id,
voice_engine,
options
} = obj;
@@ -149,10 +156,17 @@ const encryptCredential = (obj) => {
return encrypt(service_key);
case 'aws':
assert(access_key_id, 'invalid aws speech credential: access_key_id is required');
assert(secret_access_key, 'invalid aws speech credential: secret_access_key is required');
assert(aws_region, 'invalid aws speech credential: aws_region is required');
const awsData = JSON.stringify({aws_region, access_key_id, secret_access_key});
// AWS polly can work for 3 types of credentials:
// 1/ access_key_id and secret_access_key
// 2/ RoleArn Assume role
// 3/ RoleArn assigned to instance profile where will run this application
const awsData = JSON.stringify(
{
aws_region,
...(access_key_id && {access_key_id}),
...(secret_access_key && {secret_access_key}),
...(role_arn && {role_arn}),
});
return encrypt(awsData);
case 'microsoft':
@@ -185,8 +199,11 @@ const encryptCredential = (obj) => {
return encrypt(nuanceData);
case 'deepgram':
assert(api_key, 'invalid deepgram speech credential: api_key is required');
const deepgramData = JSON.stringify({api_key});
// API key is optional if onprem
if (!deepgram_stt_uri) {
assert(api_key, 'invalid deepgram speech credential: api_key is required');
}
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls});
return encrypt(deepgramData);
case 'ibm':
@@ -214,6 +231,19 @@ const encryptCredential = (obj) => {
const elevenlabsData = JSON.stringify({api_key, model_id, options});
return encrypt(elevenlabsData);
case 'playht':
assert(api_key, 'invalid playht speech credential: api_key is required');
assert(user_id, 'invalid playht speech credential: user_id is required');
assert(voice_engine, 'invalid voice_engine speech credential: voice_engine is required');
const playhtData = JSON.stringify({api_key, user_id, voice_engine, options});
return encrypt(playhtData);
case 'rimelabs':
assert(api_key, 'invalid rimelabs speech credential: api_key is required');
assert(model_id, 'invalid rimelabs speech credential: model_id is required');
const rimelabsData = JSON.stringify({api_key, model_id, options});
return encrypt(rimelabsData);
case 'assemblyai':
assert(api_key, 'invalid assemblyai speech credential: api_key is required');
const assemblyaiData = JSON.stringify({api_key});
@@ -413,7 +443,10 @@ router.put('/:sid', async(req, res) => {
custom_tts_url,
cobalt_server_uri,
model_id,
options
voice_engine,
options,
deepgram_stt_uri,
deepgram_stt_use_tls,
} = req.body;
const newCred = {
@@ -436,7 +469,10 @@ router.put('/:sid', async(req, res) => {
custom_tts_url,
cobalt_server_uri,
model_id,
options
voice_engine,
options,
deepgram_stt_uri,
deepgram_stt_use_tls,
};
logger.info({o, newCred}, 'updating speech credential with this new credential');
obj.credential = encryptCredential(newCred);
@@ -513,12 +549,13 @@ router.get('/:sid/test', async(req, res) => {
}
}
else if (cred.vendor === 'aws') {
const {getTtsVoices, getAwsAuthToken} = req.app.locals;
if (cred.use_for_tts) {
const {getTtsVoices} = req.app.locals;
try {
await testAwsTts(logger, getTtsVoices, {
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || process.env.AWS_REGION
});
results.tts.status = 'ok';
@@ -530,9 +567,10 @@ router.get('/:sid/test', async(req, res) => {
}
if (cred.use_for_stt) {
try {
await testAwsStt(logger, {
await testAwsStt(logger, getAwsAuthToken, {
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || process.env.AWS_REGION
});
results.stt.status = 'ok';
@@ -647,7 +685,7 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
if (cred.use_for_stt) {
if (cred.use_for_stt && api_key) {
try {
await testDeepgramStt(logger, {api_key});
results.stt.status = 'ok';
@@ -715,6 +753,33 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'playht') {
if (cred.use_for_tts) {
try {
await testPlayHT(logger, synthAudio, credential);
results.tts.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
let reason = err.message;
// if error is from bent, let get the body
try {
reason = await err.text();
} catch {}
results.tts = {status: 'fail', reason};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'rimelabs') {
if (cred.use_for_tts) {
try {
await testRimelabs(logger, synthAudio, credential);
results.tts.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.tts = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'assemblyai') {
const {api_key} = credential;
if (cred.use_for_stt) {
@@ -752,7 +817,7 @@ router.get('/:sid/test', async(req, res) => {
*/
router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
const logger = req.app.locals.logger;
const {logger, getTtsVoices} = req.app.locals;
try {
const {vendor, label} = req.query;
if (!vendor) {
@@ -767,7 +832,7 @@ router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
const tmp = credentials && credentials.length > 0 ? credentials[0] : null;
const cred = tmp ? JSON.parse(decrypt(tmp.credential)) : null;
try {
const data = await getLanguagesAndVoicesForVendor(logger, vendor, cred);
const data = await getLanguagesAndVoicesForVendor(logger, vendor, cred, getTtsVoices);
res.status(200).json(data);
} catch (err) {
throw new DbErrorUnprocessableRequest(err.message);

View File

@@ -10,6 +10,7 @@ const Account = require('../../models/account');
const sysError = require('../error');
const { getSpeechCredential, decryptCredential } = require('../../utils/speech-utils');
const PCMToMP3Encoder = require('../../record/encoder');
const { pipeline } = require('stream');
router.delete('/', async(req, res) => {
const {purgeTtsCache} = req.app.locals;
@@ -69,6 +70,8 @@ router.post('/Synthesize', async(req, res) => {
voice = arr[1];
model = arr[2];
}
} else if (cred.vendor === 'deepgram') {
model = voice;
}
const stats = {
histogram: () => {},
@@ -84,7 +87,8 @@ router.post('/Synthesize', async(req, res) => {
model,
salt,
credentials: cred,
disableTtsCache: false
disableTtsCache: false,
disableTtsStreaming: true
});
let contentType = 'audio/mpeg';
@@ -92,17 +96,27 @@ router.post('/Synthesize', async(req, res) => {
let readStream = fs.createReadStream(filePath);
if (['nuance', 'nvidia'].includes(cred.vendor) ||
(
process.env.JAMBONES_TTS_TRIM_SILENCE &&
(process.env.JAMBONES_TTS_TRIM_SILENCE || !process.env.JAMBONES_DISABLE_TTS_STREAMING) &&
['microsoft', 'azure'].includes(cred.vendor)
)
) {
if (encodingMp3) {
readStream = readStream
.pipe(new PCMToMP3Encoder({
readStream = pipeline(
readStream,
new PCMToMP3Encoder({
channels: 1,
sampleRate: 8000,
bitRate: 128
}, logger));
}, logger),
(err) => {
if (err) {
logger.error('ttscache/Synthesize failed:', err);
if (!res.headersSent) {
res.status(500).end('Server error');
}
}
}
);
} else {
contentType = 'application/octet-stream';
}
@@ -110,10 +124,17 @@ router.post('/Synthesize', async(req, res) => {
res.writeHead(200, {
'Content-Type': contentType,
});
readStream.pipe(res);
readStream.on('end', () => {
fs.unlink(filePath, (err) => {
if (err) throw err;
pipeline(readStream, res, (err) => {
if (err) {
logger.error('ttscache/Synthesize failed:', err);
if (!res.headersSent) {
res.status(500).end('Server error');
}
}
fs.unlink(filePath, (unlinkErr) => {
if (unlinkErr) throw unlinkErr;
logger.info(`${filePath} was deleted`);
});
});

View File

@@ -382,11 +382,35 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/GeneralError'
/login:
post:
tags:
- Authentication
summary: login and retrieve a JWT
operationId: login
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/Login'
responses:
200:
description: user logged in
content:
application/json:
schema:
$ref: '#/components/schemas/SuccessfulLogin'
500:
description: system error
content:
application/json:
schema:
$ref: '#/components/schemas/GeneralError'
/logout:
post:
tags:
- Authentication
summary: log out and deactivate jwt
summary: log out and deactivate the JWT
operationId: logoutUser
responses:
204:
@@ -584,10 +608,9 @@ paths:
content:
application/json:
schema:
type:
array
type: array
items:
$ref: '#/components/schemas/Users'
$ref: '#/components/schemas/UserList'
403:
description: unauthorized
500:
@@ -610,27 +633,13 @@ paths:
- Users
summary: retrieve user information
operationId: getUser
requestBody:
content:
application/json:
schema:
type: object
properties:
name:
type: string
email:
type: string
is_active:
type: boolean
force_change:
type: boolean
scope:
type: string
permissions:
type: array
responses:
204:
200:
description: user information
content:
application/json:
schema:
$ref: '#/components/schemas/UserProfile'
403:
description: user information
content:
@@ -674,6 +683,8 @@ paths:
type: string
permissions:
type: array
items:
type: string
responses:
204:
description: user updated
@@ -712,6 +723,8 @@ paths:
type: string
permissions:
type: array
items:
type: string
old_password:
type: string
description: existing password, which is to be replaced
@@ -998,7 +1011,7 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/GeneralError'
/AccountTest/:ServiceProviderSid:
/AccountTest/{ServiceProviderSid}:
parameters:
- name: ServiceProviderSid
in: path
@@ -1095,6 +1108,9 @@ paths:
requires_register:
type: boolean
description: wehther this provider requires us to send a REGISTER to them in order to receive calls
register_use_tls:
type: boolean
description: wehther this provider requires us to send a REGISTER use TLS protocol
register_username:
type: string
description: sip username to authenticate with, if registration is required
@@ -1971,7 +1987,7 @@ paths:
tags:
- Service Providers
summary: add a VoiPCarrier to a service provider based on PredefinedCarrier template
operationId: createVoipCarrierFromTemplate
operationId: createVoipCarrierFromTemplateBySP
responses:
201:
description: voip carrier successfully created
@@ -2079,6 +2095,12 @@ paths:
summary: get supported languages, voices and models
operationId: supportedLanguagesAndVoices
parameters:
- name: ServiceProviderSid
in: path
required: true
schema:
type: string
format: uuid
- name: vendor
in: query
required: true
@@ -2920,7 +2942,7 @@ paths:
tags:
- Accounts
summary: get a specific speech credential
operationId: getSpeechCredential
operationId: getSpeechCredentialByAccount
responses:
200:
description: retrieve speech credentials for a specified account
@@ -2934,7 +2956,7 @@ paths:
tags:
- Accounts
summary: update a speech credential
operationId: updateSpeechCredential
operationId: updateSpeechCredentialByAccount
requestBody:
content:
application/json:
@@ -2955,7 +2977,7 @@ paths:
tags:
- Accounts
summary: delete a speech credential
operationId: deleteSpeechCredential
operationId: deleteSpeechCredentialByAccount
responses:
204:
description: credential successfully deleted
@@ -2966,8 +2988,14 @@ paths:
tags:
- Accounts
summary: get supported languages, voices and models
operationId: supportedLanguagesAndVoices
operationId: supportedLanguagesAndVoicesByAccount
parameters:
- name: AccountSid
in: path
required: true
schema:
type: string
format: uuid
- name: vendor
in: query
required: true
@@ -2995,7 +3023,7 @@ paths:
tags:
- Accounts
summary: test a speech credential
operationId: testSpeechCredential
operationId: testSpeechCredentialByAccount
parameters:
- name: AccountSid
in: path
@@ -3241,7 +3269,7 @@ paths:
tags:
- Service Providers
summary: retrieve pcap for a call
operationId: getRecentCallTrace
operationId: getRecentCallTraceBySP
responses:
200:
description: retrieve sip trace data
@@ -3327,7 +3355,7 @@ paths:
tags:
- Service Providers
summary: retrieve recent calls for an account
operationId: listRecentCalls
operationId: listRecentCallsBySP
responses:
200:
description: retrieve recent call records for a specified account
@@ -3428,7 +3456,7 @@ paths:
tags:
- Service Providers
summary: retrieve sip trace detail for a call
operationId: getRecentCallTrace
operationId: getRecentCallTraceByCallId
responses:
200:
description: retrieve sip trace data
@@ -3455,7 +3483,7 @@ paths:
tags:
- Accounts
summary: retrieve pcap for a call
operationId: getRecentCallTrace
operationId: getRecentCallTraceByAccount
responses:
200:
description: retrieve sip trace data
@@ -3641,7 +3669,7 @@ paths:
tags:
- Accounts
summary: retrieve alerts for an account
operationId: listAlerts
operationId: listAlertsByAccount
responses:
200:
description: retrieve alerts for a specified account
@@ -4124,6 +4152,22 @@ paths:
type: string
siprecServerURL:
type: string
conferenceParticipantAction:
type: object
properties:
action:
type: string
enum:
- tag
- untag
- coach
- uncoach
- mute
- unmute
- hold
- unhold
tag:
type: string
responses:
200:
description: Accepted
@@ -4240,7 +4284,7 @@ paths:
tags:
- Accounts
summary: retrieve online sip users for an account
operationId: listQueues
operationId: listRegisteredSipUsers
responses:
200:
description: retrieve online sip users for an account
@@ -4254,7 +4298,7 @@ paths:
tags:
- Accounts
summary: retrieve online sip users for an account by list of sip username
operationId: listRegisteredSipUsers
operationId: listRegisteredSipUsersByUsername
requestBody:
content:
application/json:
@@ -4273,6 +4317,12 @@ paths:
$ref: '#/components/schemas/RegisteredClient'
/Accounts/{AccountSid}/RegisteredSipUsers/{Client}:
parameters:
- name: AccountSid
in: path
required: true
schema:
type: string
format: uuid
- name: Client
in: path
required: true
@@ -4293,6 +4343,13 @@ paths:
schema:
$ref: '#/components/schemas/RegisteredClient'
/Accounts/{AccountSid}/TtsCache/Synthesize:
parameters:
- name: AccountSid
in: path
required: true
schema:
type: string
format: uuid
post:
tags:
- Accounts
@@ -5031,17 +5088,32 @@ components:
scheme: bearer
bearerFormat: token
schemas:
SuccessfulLogin:
type: object
required:
- username
- password
properties:
token:
type: string
user_sid:
type: string
scope:
type: string
force_change:
type: boolean
Login:
type: object
properties:
user_sid:
username:
type: string
api_token:
type: string
change_password:
type: boolean
password:
type: string
required:
- user_sid
- username
- password
SuccessfulApiKeyAdd:
type: object
required:
@@ -6096,8 +6168,23 @@ components:
type: array
items:
$ref: '#/components/schemas/TtsModel'
UserList:
type: object
properties:
name:
type: string
email:
type: string
is_active:
type: boolean
force_change:
type: boolean
scope:
type: string
permissions:
type: array
items:
type: string
security:
- bearerAuth: []

View File

@@ -1,9 +1,14 @@
module.exports = [
{ name: 'Aurora English (US) Female', value: 'alpha-aurora-en-v2' },
{ name: 'Asteria English (US) Female', value: 'alpha-asteria-en-v2' },
{ name: 'Artemis English (UK) Female', value: 'alpha-artemis-en-v3' },
{ name: 'Andromeda English (US) Female', value: 'alpha-andromeda-en-v3' },
{ name: 'Stella English (UK) Female', value: 'alpha-stella-en-v2' },
{ name: 'Orion English (US) Male', value: 'alpha-orion-en-v2' },
{ name: 'Atlas English (US) Male', value: 'alpha-atlas-en-v3' },
{ name: 'Asteria English (US) Female', value: 'aura-asteria-en' },
{ name: 'Luna English (US) Female', value: 'aura-luna-en' },
{ name: 'Stella English (US) Female', value: 'aura-stella-en' },
{ name: 'Stella English (UK) Female', value: 'aura-athena-en' },
{ name: 'Hera English (US) Female', value: 'aura-hera-en' },
{ name: 'Orion English (US) Male', value: 'aura-orion-en' },
{ name: 'Arcas English (US) Male', value: 'aura-arcas-en' },
{ name: 'Perseus English (US) Male', value: 'aura-perseus-en' },
{ name: 'Angus English (Ireland) Male', value: 'aura-angus-en' },
{ name: 'Orpheus English (US) Male', value: 'aura-orpheus-en' },
{ name: 'Helios English (UK) Male', value: 'aura-helios-en' },
{ name: 'Zeus English (US) Male', value: 'aura-zeus-en' },
];

View File

@@ -0,0 +1,6 @@
module.exports = [
{ name: 'PlayHT2.0-turbo', value: 'PlayHT2.0-turbo' },
{ name: 'PlayHT2.0', value: 'PlayHT2.0' },
{ name: 'PlayHT1.0', value: 'PlayHT1.0' },
];

View File

@@ -0,0 +1,5 @@
module.exports = [
{ name: 'Mist', value: 'mist' },
{ name: 'V1', value: 'v1' },
];

View File

@@ -0,0 +1,710 @@
module.exports = [
{
value: 'en-US',
name: 'English (US)',
voices: [
{
value:
's3://mockingbird-prod/abigail_vo_6661b91f-4012-44e3-ad12-589fbdee9948/voices/speaker/manifest.json',
name: 'Abigail - american, female, narrative, smooth',
},
{
value: 'abram',
name: 'Abram - british, old, male, low, narrative, slow, round',
},
{
value: 'adolfo',
name: 'Adolfo - american, adult, male, neutral, narrative, fast, thick',
},
{
value: 'adrian',
name: 'Adrian - american, old, male, neutral, narrative, fast, thick',
},
{
value: 'ahmed',
name: 'Logan - british, old, male, neutral, narrative, neutral, thick',
},
{
value: 'alex',
name: 'Alex - british, adult, male, high, narrative, slow, thick',
},
{
value: 'alexander',
name: 'Alexander - british, old, male, high, narrative, fast, thick',
},
{
value: 'alfonso',
name: 'Alfonso - american, adult, male, neutral, videos, neutral, gravelly',
},
{
value: 'alphonso',
name: 'Alphonso - american, adult, female, low, videos, neutral, smooth',
},
{
value: 'amado',
name: 'Amado - american, old, male, low, narrative, fast, smooth',
},
{
value: 'anny',
name: 'Anny - american, youth, female, neutral, narrative, neutral, thick',
},
{
value: 'anthony',
name: 'Anthony - american, adult, male, neutral, training, slow, thick',
},
{
value: 'spencer',
name: 'April - british, adult, female, neutral, narrative, slow, smooth',
},
{
value: 'victor',
name: 'Ariana - american, youth, female, high, videos, fast, thick',
},
{
value: 'arthur',
name: 'Arthur - british, adult, male, neutral, narrative, neutral, smooth',
},
{
value: 'aubrey',
name: 'Aubrey - british, adult, male, neutral, videos, neutral, smooth',
},
{
value: 'hipolito',
name: 'Audrey - american, adult, female, low, narrative, slow, round',
},
{
value: 'aurora',
name: 'Aurora - british, adult, female, low, training, slow, round',
},
{
value: 'axel',
name: 'Axel - american, adult, male, neutral, narrative, fast, thick',
},
{
value:
's3://mockingbird-prod/ayla_vo_commercials_d66900d5-69f5-476f-9bd6-8eab2936dda3/voices/speaker/manifest.json',
name: 'Ayla (Advertising) - american, female, advertising',
},
{
value:
's3://mockingbird-prod/ayla_vo_expressive_16095e08-b9e8-429b-947c-47a75e41053b/voices/speaker/manifest.json',
name: 'Ayla (Expressive) - american, female, narrative',
},
{
value:
's3://mockingbird-prod/ayla_vo_meditation_d11dd9da-b5f1-4709-95a6-e6d5dc77614a/voices/speaker/manifest.json',
name: 'Ayla (Meditation) - american, female, meditation',
},
{
value:
's3://mockingbird-prod/ayla_vo_narrative_d8199dfd-b50f-40c7-9d99-e203ba5f4152/voices/speaker/manifest.json',
name: 'Ayla (Narrative) - american, female, narrative',
},
{
value:
's3://mockingbird-prod/ayla_vo_training_e6751ca5-e47c-4c4b-ad05-d3a194417600/voices/speaker/manifest.json',
name: 'Ayla (Training) - american, female, training',
},
{
value: 'benton',
name: 'Benton - american, old, male, high, videos, fast, smooth',
},
{
value: 'bertram',
name: 'Bertram - british, adult, male, low, narrative, neutral, gravelly',
},
{
value: 'bill',
name: 'Harper - american, adult, female, high, videos, fast, smooth',
},
{
// eslint-disable-next-line max-len
value:'s3://mockingbird-prod/nathan_drake_carmelo_pampillonio_7d540ad6-7d32-41f6-8d53-2584901aa03d/voices/speaker/manifest.json',
name: 'Billy - american, male, gaming',
},
{
value: 'blaine',
name: 'Blaine - british, adult, male, high, narrative, neutral, thick',
},
{
value: 'booker',
name: 'Booker - british, youth, male, neutral, narrative, neutral, round',
},
{
value: 'bret',
name: 'Bret - american, adult, female, neutral, narrative, slow, smooth',
},
{
value: 'bruce',
name: 'Bruce - british, adult, male, high, training, fast, thick',
},
{
value: 'bryan',
name: 'Bryan - american, adult, male, low, videos, fast, gravelly',
},
{
value: 'carlo',
name: 'Carlo - british, adult, male, neutral, advertising, neutral, smooth',
},
{
value: 'carter',
name: 'Carter - american, adult, male, neutral, narrative, neutral, thick',
},
{
value: 'charles',
name: 'Charles - american, adult, male, neutral, narrative, neutral, round',
},
{
value: 'charlotte',
name: 'Charlotte - canadian, adult, female, low, narrative, neutral, smooth',
},
{
value:
's3://voice-cloning-zero-shot/028a32d4-6a79-4ca3-a303-d6559843114b/chris/manifest.json',
name: 'Chris - american, adult, male,',
},
{
value: 'chuck',
name: 'Chuck - british, adult, male, neutral, videos, slow, round',
},
{
value: 'clark',
name: 'Clark - british, old, male, neutral, narrative, slow, smooth',
},
{
value: 'clifton',
name: 'Clifton - american, old, male, high, narrative, neutral, gravelly',
},
{
value: 'hayden',
name: 'Cooper - american, adult, male, neutral, narrative, neutral, round',
},
{
value: 'daisy',
name: 'Daisy - british, adult, female, low, narrative, neutral, gravelly',
},
{
value: 'dane',
name: 'Dane - american, adult, male, neutral, videos, neutral, round',
},
{
value: 'daniel',
name: 'Daniel - canadian, adult, male, low, narrative, neutral, smooth',
},
{
value: 'darnell',
name: 'Darnell - american, youth, male, neutral, narrative, neutral, smooth',
},
{
value: 'daron',
name: 'Daron - american, old, male, low, narrative, slow, round',
},
{
value: 'darrell',
name: 'Darrell - british, adult, male, neutral, advertising, neutral, thick',
},
{
value: 's3://peregrine-voices/a10/manifest.json',
name: 'Davis - american, adult, male,',
},
{
value: 'ignacio',
name: 'Delilah - american, adult, female, neutral, narrative, slow, smooth',
},
{
value: 'denis',
name: 'Eleanor - british, adult, female, neutral, advertising, neutral, smooth',
},
{
value: 'dick',
name: 'Dick - american, adult, male, neutral, training, fast, smooth',
},
{
value: 'domenic',
name: 'Domenic - british, adult, male, high, videos, neutral, thick',
},
{
value: 's3://peregrine-voices/donna_meditation_saad/manifest.json',
name: 'Donna (Meditation) - american, female, meditation',
},
{
value: 's3://peregrine-voices/donna_parrot_saad/manifest.json',
name: 'Donna (Narrative) - american, female, narrative',
},
{
value: 'donovan',
name: 'Donovan - american, adult, male, low, narrative, neutral, smooth',
},
{
value: 'dudley',
name: 'Dudley - american, old, male, low, narrative, fast, smooth',
},
{
value: 'dylan',
name: 'Dylan - british, old, male, high, gaming, slow, smooth',
},
{
value: 'earle',
name: 'Earle - british, adult, male, high, narrative, neutral, gravelly',
},
{
value: 'efren',
name: 'Efren - american, adult, male, neutral, training, slow, thick',
},
{
value: 'denis',
name: 'Eleanor - british, adult, female, neutral, advertising, neutral, smooth',
},
{
value: 'elijah',
name: 'Elijah - american, old, male, neutral, training, neutral, gravelly',
},
{
value: 'ellie',
name: 'Ellie - american, adult, female, low, training, slow, smooth',
},
{
value: 'erasmo',
name: 'Erasmo - american, old, male, low, training, fast, smooth',
},
{
value: 's3://peregrine-voices/evelyn 2 saad parrot/manifest.json',
name: 'Evelyn - american, adult, female, low, videos, neutral, smooth',
},
{
value: 'fletcher',
name: 'Fletcher - british, adult, male, neutral, narrative, fast, gravelly',
},
{
value: 'florencio',
name: 'Madison - british, old, female, neutral, narrative, slow, round',
},
{
value: 'flynn',
name: 'Flynn - british, adult, male, neutral, narrative, fast, round',
},
{
value: 'gabriel',
name: 'Samantha - american, old, female, neutral, narrative, neutral, thick',
},
{
value: 'greg',
name: 'Greg - british, adult, male, high, narrative, slow, round',
},
{
value: 'harold',
name: 'Harold - american, adult, male, neutral, narrative, slow, smooth',
},
{
value: 'bill',
name: 'Harper - american, adult, female, high, videos, fast, smooth',
},
{
value: 'harris',
name: 'Harris - british, adult, male, low, narrative, fast, smooth',
},
{
value: 'harrison',
name: 'Harrison - american, adult, male, neutral, narrative, fast, round',
},
{
value: 'hayden',
name: 'Cooper - american, adult, male, neutral, narrative, neutral, round',
},
{
value: 'hipolito',
name: 'Audrey - american, adult, female, low, narrative, slow, round',
},
{
value:
's3://mockingbird-prod/hook_1_chico_a3e5e83f-08ae-4a9f-825c-7e48d32d2fd8/voices/speaker/manifest.json',
name: 'Hook - american, male, gaming',
},
{
value: 's3://peregrine-voices/hudson saad parrot/manifest.json',
name: 'Hudson - american, adult, male, neutral, videos, neutral, thick',
},
{
value: 'hunter',
name: 'Hunter - british, old, male, high, narrative, fast, round',
},
{
value: 'ignacio',
name: 'Delilah - american, adult, female, neutral, narrative, slow, smooth',
},
{
value: 's3://peregrine-voices/mel28/manifest.json',
name: 'Jack - american, adult, male,',
},
{
value: 'jarrett',
name: 'Jarrett - american, adult, male, low, advertising, slow, smooth',
},
{
value:
's3://voice-cloning-zero-shot/801a663f-efd0-4254-98d0-5c175514c3e8/jennifer/manifest.json',
name: 'Jennifer - american, adult, female,',
},
{
value: 'jerrell',
name: 'Jerrell - american, adult, male, low, narrative, neutral, round',
},
{
value: 'jordan',
name: 'Jordan - american, adult, male, neutral, training, slow, round',
},
{
value:
's3://voice-cloning-zero-shot/dc23bb38-f568-4323-b6fb-7d64f685b97a/joseph/manifest.json',
name: 'Joseph - american, adult, male,',
},
{
value: 'judson',
name: 'Judson - american, adult, male, low, narrative, slow, smooth',
},
{
value: 'lance',
name: 'Lance - british, adult, male, low, videos, neutral, smooth',
},
{
value: 'larry',
name: 'Larry - american, adult, male, neutral, narrative, neutral, smooth',
},
{
value: 's3://peregrine-voices/larry_ads3_parrot_saad/manifest.json',
name: 'Larry (Advertising) - american, adult, male, neutral, advertising, neutral, smooth',
},
{
value:
's3://mockingbird-prod/larry_vo_narrative_4bd5c1bd-f662-4a38-b5b9-76563f7b92ec/voices/speaker/manifest.json',
name: 'Larry (Narrative) - american, adult, male, neutral, narrative, neutral, smooth',
},
{
value: 'lillian',
name: 'Lillian - british, old, female, neutral, training, slow, round',
},
{
value: 'ahmed',
name: 'Logan - british, old, male, neutral, narrative, neutral, thick',
},
{
value: 'lottie',
name: 'Lottie - british, adult, female, low, narrative, slow, smooth',
},
{
value: 'lucius',
name: 'Lucius - british, adult, male, low, narrative, slow, smooth',
},
{
value: 'mickey',
name: 'Madelyn - british, adult, female, neutral, videos, fast, thick',
},
{
value:
's3://voice-cloning-zero-shot/09b5c0cc-a8f4-4450-aaab-3657b9965d0b/podcaster/manifest.json',
name: 'Matt - american, adult, male,',
},
{
value: 's3://peregrine-voices/mel21/manifest.json',
name: 'Melissa - american, adult, female,',
},
{
value: 'micah',
name: 'Micah - british, adult, female, neutral, narrative, neutral, smooth',
},
{
value:
's3://voice-cloning-zero-shot/7c339a9d-370f-4643-adf5-4134e3ec9886/mlae02/manifest.json',
name: 'Michael - american, adult, male,',
},
{
value: 'mickey',
name: 'Madelyn - british, adult, female, neutral, videos, fast, thick',
},
{
value:
's3://voice-cloning-zero-shot/7c38b588-14e8-42b9-bacd-e03d1d673c3c/nicole/manifest.json',
name: 'Nicole - american, adult, female,',
},
{
value: 's3://peregrine-voices/nolan saad parrot/manifest.json',
name: 'Nolan - british, adult, male, high, videos, neutral, round',
},
{
value: 'nova',
name: 'Nova - american, adult, female, whisper, narrative, slow, smooth',
},
{
value: 'oliver',
name: 'Oliver - british, adult, male, high, videos, neutral, round',
},
{
value: 'oscar',
name: 'Oscar - british, adult, male, neutral, narrative, slow, smooth',
},
{
value: 'owen',
name: 'Owen - american, youth, male, high, narrative, neutral, round',
},
{
value: 'pedro',
name: 'Pedro - american, adult, male, neutral, narrative, slow, round',
},
{
value: 'phoebe',
name: 'Phoebe - british, adult, female, high, videos, fast, smooth',
},
{
value: 'randall',
name: 'Randall - british, adult, male, high, narrative, fast, thick',
},
{
value: 'reynaldo',
name: 'Reynaldo - british, old, male, low, narrative, fast, smooth',
},
{
value: 'rodrick',
name: 'Rodrick - american, adult, male, neutral, narrative, neutral, smooth',
},
{
value: 'gabriel',
name: 'Samantha - american, old, female, neutral, narrative, neutral, thick',
},
{
value: 'samuel',
name: 'Samuel - american, old, male, high, narrative, slow, gravelly',
},
{
value:
// eslint-disable-next-line max-len
's3://mockingbird-prod/agent_47_carmelo_pampillonio_58e796e1-0b87-4f3e-8b36-7def6d65ce66/voices/speaker/manifest.json',
name: 'Sarge - american, male, gaming',
},
{
value:
's3://voice-cloning-zero-shot/1f44b3e7-22ea-4c2e-87d0-b4d9c8f1d47d/sophia/manifest.json',
name: 'Sophia - american, adult, female,',
},
{
value: 'spencer',
name: 'April - british, adult, female, neutral, narrative, slow, smooth',
},
{
value: 'stella',
name: 'Stella - british, old, female, neutral, training, slow, round',
},
{
value: 'susan',
name: 'Susan - american, adult, female, high, videos, neutral, round',
},
{
value:
// eslint-disable-next-line max-len
's3://mockingbird-prod/susan_vo_commercials_0f4fa663-6eba-4582-be1e-2d5bde798f1c/voices/speaker/manifest.json',
name: 'Susan (Advertising) - american, adult, female, high, advertising, neutral, round',
},
{
value:
's3://mockingbird-prod/susan_vo_narrative_73051c90-460b-4e54-adab-9235f45c5e5f/voices/speaker/manifest.json',
name: 'Susan (Narrative) - american, adult, female, high, narrative, neutral, round',
},
{
value:
's3://mockingbird-prod/susan_vo_training_46ffcc60-d630-42f6-acfe-4affd003ae7a/voices/speaker/manifest.json',
name: 'Susan (Training) - american, adult, female, high, training, neutral, round',
},
{
value: 'theodore',
name: 'Theodore - american, old, male, neutral, narrative, neutral, gravelly',
},
{
value: 'victor',
name: 'Ariana - american, youth, female, high, videos, fast, thick',
},
{
value: 'wilbert',
name: 'Wilbert - british, adult, male, neutral, narrative, neutral, round',
},
{
value: 'wilbur',
name: 'Wilbur - american, youth, male, neutral, narrative, neutral, smooth',
},
{
value: 'wilfred',
name: 'Wilfred - american, old, male, low, training, slow, smooth',
},
{
value: 's3://peregrine-voices/mel22/manifest.json',
name: 'Will - american, adult, male,',
},
{
value: 'william',
name: 'William - american, adult, male, neutral, videos, neutral, round',
},
{
value:
// eslint-disable-next-line max-len
's3://mockingbird-prod/william_vo_narrative_0eacdff5-6243-4e26-8b3b-66e03458c1d1/voices/speaker/manifest.json',
name: 'William (Narrative) - american, adult, male, neutral, narrative, neutral, round',
},
{
value:
's3://mockingbird-prod/william_vo_training_1b939b71-14fa-41f0-b1db-7d94f194ad0a/voices/speaker/manifest.json',
name: 'William (Training) - american, adult, male, neutral, training, neutral, round',
},
],
},
{
value: 'en-GB',
name: 'English (GB)',
voices: [
{
value: 's3://peregrine-voices/arthur ads parrot saad/manifest.json',
name: 'Arthur (Advertising) - british, adult, male, neutral, advertising, neutral, smooth',
},
{
value:
// eslint-disable-next-line max-len
's3://mockingbird-prod/arthur_vo_meditatoin_211f702d-b185-4115-b8b4-801f8130a38d/voices/speaker/manifest.json',
name: 'Arthur (Meditation) - british, adult, male, neutral, meditation, neutral, smooth',
},
{
value:
's3://mockingbird-prod/arthur_vo_narrative_a33fd610-73a9-4401-9a78-6b8219c68a9e/voices/speaker/manifest.json',
name: 'Arthur (Narrative) - british, adult, male, neutral, narrative, neutral, smooth',
},
{
value:
's3://mockingbird-prod/arthur_vo_training_9281c8fd-c7f0-4445-a148-466292d3d329/voices/speaker/manifest.json',
name: 'Arthur (Training) - british, adult, male, neutral, training, neutral, smooth',
},
{
value:
's3://mockingbird-prod/eileen_vo_5d7b2bcc-d635-4301-97e8-d97c13768514/voices/speaker/manifest.json',
name: 'Eileen - british, female, narrative',
},
{
value: 'frankie',
name: 'Frankie - british, old, male, neutral, training, neutral, thick',
},
{
value:
's3://voice-cloning-zero-shot/418a94fa-2395-4487-81d8-22daf107781f/george/manifest.json',
name: 'George - british, adult, male,',
},
{
value: 'julian',
name: 'Julian - british, adult, male, neutral, videos, neutral, round',
},
{
value:
's3://voice-cloning-zero-shot/0b5b2e4b-5103-425e-8aa0-510dd35226e2/mark/manifest.json',
name: 'Mark - british, adult, male,',
},
{
value: 's3://peregrine-voices/oliver_ads2_parrot_saad/manifest.json',
name: 'Oliver (Advertising) - british, adult, male, high, advertising, neutral, round',
},
{
value:
's3://peregrine-voices/oliver_narrative2_parrot_saad/manifest.json',
name: 'Oliver (Narrative) - british, adult, male, high, narrative, neutral, round',
},
{
value:
's3://mockingbird-prod/oliver_vo_training_6e3f604a-5605-4542-948d-347b0d7546fc/voices/speaker/manifest.json',
name: 'Oliver (Training) - british, adult, male, high, training, neutral, round',
},
{
value:
's3://voice-cloning-zero-shot/820da3d2-3a3b-42e7-844d-e68db835a206/sarah/manifest.json',
name: 'Sarah - british, adult, female,',
},
],
},
{
value: 'en-AU',
name: 'English (AU)',
voices: [
{
value: 's3://peregrine-voices/barry ads parrot saad/manifest.json',
name: 'Barry (Advertising) - australian, male, advertising',
},
{
value:
's3://peregrine-voices/barry narrative parrot saad/manifest.json',
name: 'Barry (Narrative) - australian, male, narrative',
},
{
value: 'frederick',
name: 'Frederick - australian, adult, male, low, narrative, slow, thick',
},
{
value: 's3://peregrine-voices/russell2_parrot_saad/manifest.json',
name: 'Russell - australian, male,',
},
],
},
{
value: 'en-CA',
name: 'English (CA)',
voices: [
{
value: 's3://peregrine-voices/charlotte ads parrot saad/manifest.json',
name: 'Charlotte (Advertising) - canadian, adult, female, low, advertising, neutral, smooth',
},
{
value:
's3://peregrine-voices/charlotte meditation 2 parrot saad/manifest.json',
name: 'Charlotte (Meditation) - canadian, adult, female, low, meditation, neutral, smooth',
},
{
value:
// eslint-disable-next-line max-len
's3://mockingbird-prod/charlotte_vo_narrative_9290be17-ccea-4700-a7fd-a8fe5c49fb20/voices/speaker/manifest.json',
name: 'Charlotte (Narrative) - canadian, adult, female, low, narrative, neutral, smooth',
},
{
value:
's3://peregrine-voices/charlotte_training_parrot_saad/manifest.json',
name: 'Charlotte (Training) - canadian, adult, female, low, training, neutral, smooth',
},
{
value:
// eslint-disable-next-line max-len
's3://mockingbird-prod/olivia_vo_commercials_6e3c384f-15d6-4fe7-b9a4-0cb1d69daeba/voices/speaker/manifest.json',
name: 'Olivia (Advertising) - canadian, female, advertising',
},
{
value: 's3://peregrine-voices/olivia_ads3_parrot_saad/manifest.json',
name: 'Olivia (Narrative) - canadian, female, narrative',
},
{
value:
's3://mockingbird-prod/olivia_vo_training_4376204f-a411-4e5d-a5c0-ce6cc3908052/voices/speaker/manifest.json',
name: 'Olivia (Training) - canadian, female, training',
},
],
},
{
value: 'en-IE',
name: 'English (IE)',
voices: [
{
value: 'florencio',
name: 'Madison - irish, old, female, neutral, narrative, slow, round',
},
],
},
{
value: 'en-NZ',
name: 'English (NZ)',
voices: [
{
value:
's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
name: 'Ruby - australian, adult, female,',
},
],
},
];

View File

@@ -17,10 +17,13 @@ const TtsIbmLanguagesVoices = require('./speech-data/tts-ibm');
const TtsNvidiaLanguagesVoices = require('./speech-data/tts-nvidia');
const TtsElevenlabsLanguagesVoices = require('./speech-data/tts-elevenlabs');
const TtsWhisperLanguagesVoices = require('./speech-data/tts-whisper');
const TtsPlayHtLanguagesVoices = require('./speech-data/tts-playht');
const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
const TtsModelElevenLabs = require('./speech-data/tts-model-elevenlabs');
const TtsModelWhisper = require('./speech-data/tts-model-whisper');
const TtsModelPlayHT = require('./speech-data/tts-model-playht');
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
const SttAwsLanguagesVoices = require('./speech-data/stt-aws');
@@ -162,16 +165,26 @@ const testAwsTts = async(logger, getTtsVoices, credentials) => {
}
};
const testAwsStt = async(logger, credentials) => {
const testAwsStt = async(logger, getAwsAuthToken, credentials) => {
try {
const {region, accessKeyId, secretAccessKey} = credentials;
const client = new TranscribeClient({
region,
credentials: {
accessKeyId,
secretAccessKey
}
});
const {region, accessKeyId, secretAccessKey, roleArn} = credentials;
let client = null;
if (accessKeyId && secretAccessKey) {
client = new TranscribeClient({
region,
credentials: {
accessKeyId,
secretAccessKey
}
});
} else if (roleArn) {
client = new TranscribeClient({
region,
credentials: await getAwsAuthToken(null, null, region, roleArn),
});
} else {
client = new TranscribeClient({region});
}
const command = new ListVocabulariesCommand({});
const response = await client.send(command);
return response;
@@ -240,6 +253,50 @@ const testElevenlabs = async(logger, credentials) => {
}
};
const testPlayHT = async(logger, synthAudio, credentials) => {
try {
await synthAudio(
{
increment: () => {},
histogram: () => {}
},
{
vendor: 'playht',
credentials,
language: 'en-US',
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
text: 'Hi there and welcome to jambones!'
}
);
// Test if playHT can fetch voices
await fetchLayHTVoices(credentials);
} catch (err) {
logger.info({err}, 'synth Playht returned error');
throw err;
}
};
const testRimelabs = async(logger, synthAudio, credentials) => {
try {
await synthAudio(
{
increment: () => {},
histogram: () => {}
},
{
vendor: 'rimelabs',
credentials,
language: 'en-US',
voice: 'amber',
text: 'Hi there and welcome to jambones!'
}
);
} catch (err) {
logger.info({err}, 'synth Playht returned error');
throw err;
}
};
const testWhisper = async(logger, synthAudio, credentials) => {
try {
await synthAudio({increment: () => {}, histogram: () => {}},
@@ -263,9 +320,7 @@ const testDeepgramTTS = async(logger, synthAudio, credentials) => {
{
vendor: 'deepgram',
credentials,
language: 'en-US',
voice: 'alpha-aurora-en-v2',
model: 'alpha-aurora-en-v2',
model: 'aura-asteria-en',
text: 'Hi there and welcome to jambones!'
}
);
@@ -355,6 +410,7 @@ const getSpeechCredential = (credential, logger) => {
...credential,
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || 'us-east-1'
};
}
@@ -376,6 +432,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
else if ('aws' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.access_key_id = o.access_key_id;
obj.role_arn = o.role_arn;
obj.secret_access_key = isObscureKey ? obscureKey(o.secret_access_key) : o.secret_access_key;
obj.aws_region = o.aws_region;
logger.info({obj, o}, 'retrieving aws speech credential');
@@ -406,6 +463,8 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
else if ('deepgram' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.deepgram_stt_uri = o.deepgram_stt_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
}
else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
@@ -428,6 +487,17 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
obj.options = o.options;
} else if ('playht' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.user_id = o.user_id;
obj.voice_engine = o.voice_engine;
obj.options = o.options;
} else if ('rimelabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
obj.options = o.options;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = isObscureKey ? obscureKey(o.auth_token) : o.auth_token;
@@ -464,49 +534,95 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
]
}
*/
async function getLanguagesAndVoicesForVendor(logger, vendor, credential) {
async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTtsVoices) {
switch (vendor) {
case 'google':
return await getLanguagesVoicesForGoogle(credential);
return await getLanguagesVoicesForGoogle(credential, getTtsVoices, logger);
case 'aws':
return await getLanguagesVoicesForAws(credential);
return await getLanguagesVoicesForAws(credential, getTtsVoices, logger);
case 'microsoft':
return await getLanguagesVoicesForMicrosoft(credential);
return await getLanguagesVoicesForMicrosoft(credential, getTtsVoices, logger);
case 'wellsaid':
return await getLanguagesVoicesForWellsaid(credential);
return await getLanguagesVoicesForWellsaid(credential, getTtsVoices, logger);
case 'nuance':
return await getLanguagesVoicesForNuane(credential);
return await getLanguagesVoicesForNuane(credential, getTtsVoices, logger);
case 'deepgram':
return await getLanguagesVoicesForDeepgram(credential);
return await getLanguagesVoicesForDeepgram(credential, getTtsVoices, logger);
case 'ibm':
return await getLanguagesVoicesForIbm(credential);
return await getLanguagesVoicesForIbm(credential, getTtsVoices, logger);
case 'nvidia':
return await getLanguagesVoicesForNvida(credential);
return await getLanguagesVoicesForNvida(credential, getTtsVoices, logger);
case 'cobalt':
return await getLanguagesVoicesForCobalt(credential);
return await getLanguagesVoicesForCobalt(credential, getTtsVoices, logger);
case 'soniox':
return await getLanguagesVoicesForSoniox(credential);
return await getLanguagesVoicesForSoniox(credential, getTtsVoices, logger);
case 'elevenlabs':
return await getLanguagesVoicesForElevenlabs(credential);
return await getLanguagesVoicesForElevenlabs(credential, getTtsVoices, logger);
case 'playht':
return await getLanguagesVoicesForPlayHT(credential, getTtsVoices, logger);
case 'rimelabs':
return await getLanguagesVoicesForRimelabs(credential, getTtsVoices, logger);
case 'assemblyai':
return await getLanguagesVoicesForAssemblyAI(credential);
return await getLanguagesVoicesForAssemblyAI(credential, getTtsVoices, logger);
case 'whisper':
return await getLanguagesVoicesForWhisper(credential);
return await getLanguagesVoicesForWhisper(credential, getTtsVoices, logger);
default:
logger.info(`invalid vendor ${vendor}, return empty result`);
throw new Error(`Invalid vendor ${vendor}`);
}
}
async function getLanguagesVoicesForGoogle(credential) {
async function getLanguagesVoicesForGoogle(credential, getTtsVoices, logger) {
if (credential) {
try {
const [result] = await getTtsVoices({
vendor: 'google',
credentials: credential
});
const tts = parseGooglelanguagesVoices(result.voices);
return tranform(tts, SttGoogleLanguagesVoices);
} catch (err) {
logger.info('Error while fetching google languages, voices, return predefined values', err);
}
}
return tranform(TtsGoogleLanguagesVoices, SttGoogleLanguagesVoices);
}
async function getLanguagesVoicesForAws(credential) {
async function getLanguagesVoicesForAws(credential, getTtsVoices, logger) {
if (credential) {
try {
const result = await getTtsVoices({
vendor: 'aws',
credentials: {
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
roleArn: credential.role_arn,
region: credential.aws_region || process.env.AWS_REGION
}
});
const tts = parseAwsLanguagesVoices(result.Voices);
return tranform(tts, SttAwsLanguagesVoices);
} catch (err) {
logger.info('Error while fetching AWS languages, voices, return predefined values', err);
}
}
return tranform(TtsAwsLanguagesVoices, SttAwsLanguagesVoices);
}
async function getLanguagesVoicesForMicrosoft(credential) {
async function getLanguagesVoicesForMicrosoft(credential, getTtsVoices, logger) {
if (credential) {
try {
const get = bent('https://westus.tts.speech.microsoft.com', 'GET', 'json', {
'Ocp-Apim-Subscription-Key' : credential.api_key
});
const voices = await get('/cognitiveservices/voices/list');
const tts = parseMicrosoftLanguagesVoices(voices);
return tranform(tts, SttMicrosoftLanguagesVoices);
} catch (err) {
logger.info('Error while fetching Microsoft languages, voices, return predefined values', err);
}
}
return tranform(TtsMicrosoftLanguagesVoices, SttMicrosoftLanguagesVoices);
}
@@ -514,7 +630,19 @@ async function getLanguagesVoicesForWellsaid(credential) {
return tranform(TtsWellsaidLanguagesVoices);
}
async function getLanguagesVoicesForNuane(credential) {
async function getLanguagesVoicesForNuane(credential, getTtsVoices, logger) {
if (credential) {
try {
const result = await getTtsVoices({
vendor: 'nuance',
credentials: credential
});
const tts = parseNuanceLanguagesVoices(result.result.voices);
return tranform(tts, SttNuanceLanguagesVoices);
} catch (err) {
logger.info('Error while fetching IBM languages, voices, return predefined values', err);
}
}
return tranform(TtsNuanceLanguagesVoices, SttNuanceLanguagesVoices);
}
@@ -522,7 +650,19 @@ async function getLanguagesVoicesForDeepgram(credential) {
return tranform(undefined, SttDeepgramLanguagesVoices, TtsModelDeepgram);
}
async function getLanguagesVoicesForIbm(credential) {
async function getLanguagesVoicesForIbm(credential, getTtsVoices, logger) {
if (credential) {
try {
const result = await getTtsVoices({
vendor: 'ibm',
credentials: credential
});
const tts = parseIBMLanguagesVoices(result.result.voices);
return tranform(tts, SttIbmLanguagesVoices);
} catch (err) {
logger.info('Error while fetching IBM languages, voices, return predefined values', err);
}
}
return tranform(TtsIbmLanguagesVoices, SttIbmLanguagesVoices);
}
@@ -580,6 +720,92 @@ async function getLanguagesVoicesForElevenlabs(credential) {
}
}
const concat = (a) => {
return a ? ` ${a},` : '';
};
const fetchLayHTVoices = async(credential) => {
if (credential) {
const get = bent('https://api.play.ht', 'GET', 'json', {
'AUTHORIZATION' : credential.api_key,
'X-USER-ID': credential.user_id,
'Accept': 'application/json'
});
const voices = await get('/api/v2/voices');
let clone_voices = [];
try {
// try if the account has permission to cloned voice
//otherwise ignore this.
clone_voices = await get('/api/v2/cloned-voices');
} catch {}
return [clone_voices, voices];
}
};
async function getLanguagesVoicesForPlayHT(credential) {
if (credential) {
const [cloned_voice, voices] = await fetchLayHTVoices(credential);
const list_voices = [...cloned_voice, ...voices];
const buildVoice = (d) => {
let name = `${d.name} -${concat(d.accent)}${concat(d.age)}${concat(d.gender)}${concat(d.loudness)}` +
`${concat(d.style)}${concat(d.tempo)}${concat(d.texture)}` ;
name = name.endsWith(',') ? name.trim().slice(0, -1) : name;
return {
value: `${d.id}`,
name
};
};
const ttsVoices = list_voices.reduce((acc, voice) => {
if (!credential.voice_engine.includes(voice.voice_engine)) {
return acc;
}
const languageCode = voice.language_code;
// custom voice does not have language code
if (!languageCode) {
voice.language_code = 'en';
voice.language = 'Custom-English';
}
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push(buildVoice(voice));
} else {
acc.push({
value: voice.language_code,
name: voice.language,
voices: [buildVoice(voice)]
});
}
return acc;
}, []);
return tranform(ttsVoices, undefined, TtsModelPlayHT);
}
return tranform(TtsPlayHtLanguagesVoices, undefined, TtsModelPlayHT);
}
async function getLanguagesVoicesForRimelabs(credential) {
const model_id = credential ? credential.model_id : null;
const get = bent('https://users.rime.ai', 'GET', 'json', {
'Accept': 'application/json'
});
const voices = await get('/data/voices/all.json');
let selectedVoices = model_id ? voices[model_id] : Object.values(voices).reduce((acc, val) => [...acc, ...val], []);
selectedVoices = selectedVoices.map((v) => ({
name: v.charAt(0).toUpperCase() + v.slice(1),
value: v
}));
const ttsVoices = [
{
value: 'en-US',
name: 'English (US)',
voices: selectedVoices
}
];
return tranform(ttsVoices, undefined, TtsModelRimelabs);
}
async function getLanguagesVoicesForAssemblyAI(credential) {
return tranform(undefined, SttAssemblyaiLanguagesVoices);
}
@@ -596,6 +822,125 @@ function tranform(tts, stt, models) {
};
}
function parseGooglelanguagesVoices(data) {
return data.reduce((acc, voice) => {
const languageCode = voice.languageCodes[0];
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push({
value: voice.name,
name: `${voice.name.substring(languageCode.length + 1, voice.name.length)} (${voice.ssmlGender})`
});
} else {
acc.push({
value: languageCode,
name: SttGoogleLanguagesVoices.find((lang) => lang.value === languageCode)?.name || languageCode,
voices: [{
value: voice.name,
name: `${voice.name.substring(languageCode.length + 1, voice.name.length)} (${voice.ssmlGender})`
}]
});
}
return acc;
}, []);
}
function parseIBMLanguagesVoices(data) {
return data.reduce((acc, voice) => {
const languageCode = voice.language;
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push({
value: voice.name,
name: `(${voice.gender}) ${voice.description}`
});
} else {
acc.push({
value: languageCode,
name: SttGoogleLanguagesVoices.find((lang) => lang.value === languageCode)?.name || languageCode,
voices: [{
value: voice.name,
name: `(${voice.gender}) ${voice.description}`
}]
});
}
return acc;
}, []);
}
function parseAwsLanguagesVoices(data) {
return data.reduce((acc, voice) => {
const languageCode = voice.LanguageCode;
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push({
value: voice.Id,
name: `(${voice.Gender}) ${voice.Name}`
});
} else {
acc.push({
value: languageCode,
name: voice.LanguageName,
voices: [{
value: voice.Id,
name: `(${voice.Gender}) ${voice.Name}`
}]
});
}
return acc;
}, []);
}
function parseNuanceLanguagesVoices(data) {
return data.reduce((acc, voice) => {
const languageCode = voice.language;
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push({
value: voice.name,
name: voice.name,
model: voice.model
});
} else {
acc.push({
value: languageCode,
name: SttGoogleLanguagesVoices.find((lang) => lang.value === languageCode)?.name || languageCode,
voices: [{
value: voice.name,
name: voice.name,
model: voice.model
}]
});
}
return acc;
}, []);
}
function parseMicrosoftLanguagesVoices(data) {
return data.reduce((acc, voice) => {
const languageCode = voice.Locale;
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push({
value: voice.ShortName,
name: `${voice.DisplayName} (${voice.Gender})`,
});
} else {
acc.push({
value: voice.Locale,
name: voice.LocaleName,
voices: [{
value: voice.ShortName,
name: `${voice.DisplayName} (${voice.Gender})`,
}]
});
}
return acc;
}, []);
}
module.exports = {
testGoogleTts,
testGoogleStt,
@@ -612,6 +957,8 @@ module.exports = {
testIbmStt,
testSonioxStt,
testElevenlabs,
testPlayHT,
testRimelabs,
testAssemblyStt,
testDeepgramTTS,
getSpeechCredential,

15866
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
{
"name": "jambonz-api-server",
"version": "0.8.5",
"version": "0.9.0",
"description": "",
"main": "app.js",
"scripts": {
@@ -19,53 +19,54 @@
"url": "https://github.com/jambonz/jambonz-api-server.git"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.363.0",
"@aws-sdk/client-transcribe": "^3.363.0",
"@azure/storage-blob": "^12.15.0",
"@aws-sdk/client-s3": "^3.550.0",
"@aws-sdk/client-transcribe": "^3.549.0",
"@azure/storage-blob": "^12.17.0",
"@deepgram/sdk": "^1.21.0",
"@google-cloud/speech": "^5.2.0",
"@google-cloud/storage": "^6.12.0",
"@jambonz/db-helpers": "^0.9.1",
"@google-cloud/speech": "^6.5.0",
"@google-cloud/storage": "^7.9.0",
"@jambonz/db-helpers": "^0.9.3",
"@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.7",
"@jambonz/speech-utils": "^0.0.33",
"@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.1.0",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.45",
"@soniox/soniox-node": "^1.1.1",
"argon2": "^0.30.3",
"assemblyai": "^3.0.1",
"@jambonz/verb-specifications": "^0.0.69",
"@soniox/soniox-node": "^1.2.2",
"argon2": "^0.40.1",
"assemblyai": "^4.3.4",
"bent": "^7.3.12",
"cors": "^2.8.5",
"debug": "^4.3.4",
"express": "^4.18.1",
"express-rate-limit": "^6.4.0",
"form-data": "^2.5.1",
"helmet": "^5.1.0",
"ibm-watson": "^7.1.2",
"jsonwebtoken": "^9.0.0",
"mailgun.js": "^9.1.2",
"microsoft-cognitiveservices-speech-sdk": "1.31.0",
"mysql2": "^2.3.3",
"nocache": "3.0.4",
"passport": "^0.6.0",
"express": "^4.19.2",
"express-rate-limit": "^7.2.0",
"form-data": "^4.0.0",
"helmet": "^7.1.0",
"ibm-watson": "^9.0.1",
"jsonwebtoken": "^9.0.2",
"mailgun.js": "^10.2.1",
"microsoft-cognitiveservices-speech-sdk": "1.36.0",
"mysql2": "^3.9.3",
"nocache": "4.0.0",
"passport": "^0.7.0",
"passport-http-bearer": "^1.0.1",
"pino": "^5.17.0",
"short-uuid": "^4.1.0",
"stripe": "^8.222.0",
"swagger-ui-express": "^4.4.0",
"uuid": "^8.3.2",
"pino": "^8.20.0",
"short-uuid": "^4.2.2",
"stream-buffers": "^3.0.2",
"stripe": "^14.24.0",
"swagger-ui-express": "^5.0.0",
"uuid": "^9.0.1",
"wav": "^1.0.2",
"ws": "^8.12.1",
"ws": "^8.16.0",
"yamljs": "^0.3.0"
},
"devDependencies": {
"eslint": "^8.39.0",
"eslint-plugin-promise": "^6.1.1",
"husky": "7.0.4",
"husky": "9.0.11",
"nyc": "^15.1.0",
"request": "^2.88.2",
"request-promise-native": "^1.0.9",
"tape": "^5.5.3"
"tape": "^5.7.5"
}
}

View File

@@ -10,7 +10,7 @@ networks:
services:
mysql:
platform: linux/x86_64
# platform: linux/x86_64
image: mysql:5.7
ports:
- "3360:3306"
@@ -36,7 +36,7 @@ services:
ipv4_address: 172.58.0.3
influxdb:
platform: linux/x86_64
# platform: linux/x86_64
image: influxdb:1.8
ports:
- "8086:8086"

View File

@@ -170,6 +170,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "en-US-Standard-C"
}
});
t.ok(result.statusCode === 200, 'successfully google tested synthesize');
}
/* add / test a credential for microsoft */
@@ -198,6 +212,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "en-US-AvaMultilingualNeural"
}
});
t.ok(result.statusCode === 200, 'successfully microsoft tested synthesize');
}
/* add / test a credential for AWS */
@@ -227,6 +255,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for AWS tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for AWS stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "Joanna"
}
});
t.ok(result.statusCode === 200, 'successfully AWS tested synthesize');
}
/* add a credential for wellsaid */
@@ -253,6 +295,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "3"
}
});
t.ok(result.statusCode === 200, 'successfully Wellsaid tested synthesize');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
@@ -285,6 +341,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for deepgram');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "aura-asteria-en"
}
});
t.ok(result.statusCode === 200, 'successfully deepgram tested synthesize');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
@@ -292,6 +362,60 @@ test('speech credentials tests', async(t) => {
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
// test create deepgram onprem
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'deepgram',
use_for_stt: true,
deepgram_stt_uri: "127.0.0.1:50002",
deepgram_stt_use_tls: true
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for deepgram');
const dg_sid = result.body.sid;
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
});
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram');
t.ok(result.body.deepgram_stt_uri === '127.0.0.1:50002', "deepgram_stt_uri is correct for deepgram");
t.ok(result.body.deepgram_stt_use_tls === true, "deepgram_stt_use_tls is correct for deepgram");
result = await request.put(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'deepgram',
use_for_stt: true,
deepgram_stt_uri: "127.0.0.2:50002",
deepgram_stt_use_tls: false
}
});
t.ok(result.statusCode === 204, 'successfully updated speech credential for deepgram onprem');
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
});
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram onprem');
t.ok(result.body.deepgram_stt_uri === '127.0.0.2:50002', "deepgram_stt_uri is correct for deepgram onprem");
t.ok(result.body.deepgram_stt_use_tls === false, "deepgram_stt_use_tls is correct for deepgram onprem");
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential for deepgram onprem');
/* add a credential for ibm tts */
if (process.env.IBM_TTS_API_KEY && process.env.IBM_TTS_REGION) {
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
@@ -350,6 +474,20 @@ test('speech credentials tests', async(t) => {
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for ibm stt');
result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
speech_credential_sid: ms_sid,
text: "Hello How are you",
language: "en-US",
voice: "en-US_MichaelExpressive"
}
});
t.ok(result.statusCode === 200, 'successfully IBM tested synthesize');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
@@ -482,7 +620,7 @@ test('speech credentials tests', async(t) => {
model_id: 'eleven_multilingual_v2'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for Cobalt');
t.ok(result.statusCode === 201, 'successfully added speech credential for elevenlabs');
const elevenlabs_sid = result.body.sid;
/* delete the credential */
@@ -490,7 +628,54 @@ test('speech credentials tests', async(t) => {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential for Cobalt');
t.ok(result.statusCode === 204, 'successfully deleted speech credential for elevenlabs');
/* add a credential for playht */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'playht',
use_for_stt: false,
use_for_tts: true,
api_key: 'asdasdasdasddsadasda',
user_id: 'user_id',
voice_engine: 'PlayHT2.0-turbo'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for playht');
const playht_sid = result.body.sid;
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${playht_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential for playht');
/* add a credential for rimelabs */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'rimelabs',
use_for_stt: false,
use_for_tts: true,
api_key: 'asdasdasdasddsadasda',
model_id: 'mist',
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for rimelabs');
const rimelabs_sid = result.body.sid;
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${rimelabs_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential for rimelabs');
/* add a credential for custom voices google */
@@ -558,6 +743,30 @@ test('speech credentials tests', async(t) => {
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
/* add a credential for aws polly by roleArn */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'aws',
labe: 'aws_polly_with_arn',
use_for_tts: true,
use_for_stt: false,
role_arn: 'Arn::aws::role',
aws_region: 'us-east-1'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for AWS Polly By RoleArn');
const awsPollySid = result.body.sid;
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${awsPollySid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
/* Check google supportedLanguagesAndVoices */
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/speech/supportedLanguagesAndVoices?vendor=google`, {
resolveWithFullResponse: true,