Compare commits

...

10 Commits

Author SHA1 Message Date
Hoan Luu Huu
b5bede7a08 add support for speechmatics languages and voices (#355) 2024-10-11 19:54:22 -04:00
Hoan Luu Huu
6e779f6744 support stt speechmatics (#353)
* support stt speechmatics

* support speechmatics region authentication

* update testcase for speechmatics_stt_uri
2024-10-11 09:17:40 -04:00
Hoan Luu Huu
77b9ca4cba update speech version 0.1.18 (#354) 2024-10-11 08:42:13 -04:00
Hoan Luu Huu
0451b6982c Merge pull request #350 from jambonz/feat/playht30
support playht3.0
2024-10-10 10:40:57 +07:00
Hoan Luu Huu
71adc577e9 Merge branch 'main' into feat/playht30 2024-10-10 10:38:13 +07:00
Hoan Luu Huu
e8b32103fe update speech version (#352) 2024-10-09 19:44:43 -04:00
Hoan Luu Huu
57d8d0a02c allow system information contains log level and account has enable_debug_log (#351)
* allow system information contains log level and account has enable_debug_log

* update upgrade db script
2024-10-07 09:52:11 -04:00
Quan HL
a41760fa9f PlayHT version 3.0 support PlayHt2.0 voices 2024-10-03 13:00:42 +07:00
Quan HL
c6bae80a03 support playht3.0 2024-09-27 11:13:59 +07:00
Dave Horton
4cddbd83a1 update to version of realtime-db with fix for expires (#349) 2024-09-18 08:24:47 -04:00
12 changed files with 2027 additions and 1196 deletions

View File

@@ -162,7 +162,7 @@ regex VARCHAR(32) NOT NULL COMMENT 'regex-based pattern match against dialed num
description VARCHAR(1024),
priority INTEGER NOT NULL COMMENT 'lower priority routes are attempted first',
PRIMARY KEY (lcr_route_sid)
) COMMENT='An ordered list of digit patterns in an LCR table. The pat';
) COMMENT='An ordered list of digit patterns in an LCR table. The patterns are tested in sequence until one matches';
CREATE TABLE lcr
(
@@ -173,7 +173,7 @@ default_carrier_set_entry_sid CHAR(36) COMMENT 'default carrier/route to use whe
service_provider_sid CHAR(36),
account_sid CHAR(36),
PRIMARY KEY (lcr_sid)
) COMMENT='An LCR (least cost routing) table that is used by a service ';
) COMMENT='An LCR (least cost routing) table that is used by a service provider or account to make decisions about routing outbound calls when multiple carriers are available.';
CREATE TABLE password_settings
(
@@ -359,7 +359,8 @@ CREATE TABLE system_information
domain_name VARCHAR(255),
sip_domain_name VARCHAR(255),
monitoring_domain_name VARCHAR(255),
private_network_cidr VARCHAR(8192)
private_network_cidr VARCHAR(8192),
log_level ENUM('info', 'debug') NOT NULL DEFAULT 'info'
);
CREATE TABLE users
@@ -553,6 +554,7 @@ siprec_hook_sid CHAR(36),
record_all_calls BOOLEAN NOT NULL DEFAULT false,
record_format VARCHAR(16) NOT NULL DEFAULT 'mp3',
bucket_credential VARCHAR(8192) COMMENT 'credential used to authenticate with storage service',
enable_debug_log BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (account_sid)
) COMMENT='An enterprise that uses the platform for comm services';

File diff suppressed because one or more lines are too long

View File

@@ -200,6 +200,8 @@ const sql = {
],
9002: [
'ALTER TABLE system_information ADD COLUMN private_network_cidr VARCHAR(8192)',
'ALTER TABLE system_information ADD COLUMN log_level ENUM(\'info\', \'debug\') NOT NULL DEFAULT \'info\'',
'ALTER TABLE accounts ADD COLUMN enable_debug_log BOOLEAN NOT NULL DEFAULT false',
]
};

View File

@@ -10,7 +10,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testPlayHT,
testRimelabs,
testVerbioTts,
testVerbioStt} = require('../../utils/speech-utils');
testVerbioStt,
testSpeechmaticsStt} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -122,6 +123,7 @@ const encryptCredential = (obj) => {
secret,
nuance_tts_uri,
nuance_stt_uri,
speechmatics_stt_uri,
deepgram_stt_uri,
deepgram_stt_use_tls,
deepgram_tts_uri,
@@ -236,6 +238,12 @@ const encryptCredential = (obj) => {
const elevenlabsData = JSON.stringify({api_key, model_id, options});
return encrypt(elevenlabsData);
case 'speechmatics':
assert(api_key, 'invalid speechmatics speech credential: api_key is required');
assert(speechmatics_stt_uri, 'invalid speechmatics speech credential: speechmatics_stt_uri is required');
const speechmaticsData = JSON.stringify({api_key, speechmatics_stt_uri, options});
return encrypt(speechmaticsData);
case 'playht':
assert(api_key, 'invalid playht speech credential: api_key is required');
assert(user_id, 'invalid playht speech credential: user_id is required');
@@ -768,6 +776,18 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'speechmatics') {
const {api_key} = credential;
if (cred.use_for_stt) {
try {
await testSpeechmaticsStt(logger, {api_key});
results.stt.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.stt = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'playht') {
if (cred.use_for_tts) {
try {

View File

@@ -0,0 +1,218 @@
module.exports = [
{
name: 'Automatic',
value: 'auto',
},
{
name: 'Arabic',
value: 'ar',
},
{
name: 'Bashkir',
value: 'ba',
},
{
name: 'Basque',
value: 'eu',
},
{
name: 'Belarusian',
value: 'be',
},
{
name: 'Bulgarian',
value: 'bg',
},
{
name: 'Cantonese',
value: 'yue',
},
{
name: 'Catalan',
value: 'ca',
},
{
name: 'Croatian',
value: 'hr',
},
{
name: 'Czech',
value: 'cs',
},
{
name: 'Danish',
value: 'da',
},
{
name: 'Dutch',
value: 'nl',
},
{
name: 'English',
value: 'en',
},
{
name: 'Esperanto',
value: 'eo',
},
{
name: 'Estonian',
value: 'et',
},
{
name: 'Finnish',
value: 'fi',
},
{
name: 'French',
value: 'fr',
},
{
name: 'Galician',
value: 'gl',
},
{
name: 'German',
value: 'de',
},
{
name: 'Greek',
value: 'el',
},
{
name: 'Hebrew',
value: 'he',
},
{
name: 'Hindi',
value: 'hi',
},
{
name: 'Hungarian',
value: 'hu',
},
{
name: 'Irish',
value: 'ga',
},
{
name: 'Interlingua',
value: 'ia',
},
{
name: 'Italian',
value: 'it',
},
{
name: 'Indonesian',
value: 'id',
},
{
name: 'Japanese',
value: 'ja',
},
{
name: 'Korean',
value: 'ko',
},
{
name: 'Latvian',
value: 'lv',
},
{
name: 'Lithuanian',
value: 'lt',
},
{
name: 'Maltese',
value: 'mt',
},
{
name: 'Malay',
value: 'ms',
},
{
name: 'Mandarin',
value: 'cmn',
},
{
name: 'Marathi',
value: 'mr',
},
{
name: 'Mongolian',
value: 'mn',
},
{
name: 'Norwegian',
value: 'no',
},
{
name: 'Persian',
value: 'fa',
},
{
name: 'Polish',
value: 'pl',
},
{
name: 'Portuguese',
value: 'pt',
},
{
name: 'Romanian',
value: 'ro',
},
{
name: 'Russian',
value: 'ru',
},
{
name: 'Slovakian',
value: 'sk',
},
{
name: 'Slovenian',
value: 'sl',
},
{
name: 'Spanish',
value: 'es',
},
{
name: 'Spanish & English bilingual',
value: 'es',
},
{
name: 'Swedish',
value: 'sv',
},
{
name: 'Tamil',
value: 'ta',
},
{
name: 'Thai',
value: 'th',
},
{
name: 'Turkish',
value: 'tr',
},
{
name: 'Uyghur',
value: 'ug',
},
{
name: 'Ukrainian',
value: 'uk',
},
{
name: 'Vietnamese',
value: 'vi',
},
{
name: 'Welsh',
value: 'cy',
},
];

View File

@@ -1,4 +1,5 @@
module.exports = [
{ name: 'Play3.0', value: 'Play3.0' },
{ name: 'PlayHT2.0-turbo', value: 'PlayHT2.0-turbo' },
{ name: 'PlayHT2.0', value: 'PlayHT2.0' },
{ name: 'PlayHT1.0', value: 'PlayHT1.0' },

View File

@@ -7,6 +7,7 @@ const bent = require('bent');
const fs = require('fs');
const { AssemblyAI } = require('assemblyai');
const {decrypt, obscureKey} = require('./encrypt-decrypt');
const { RealtimeSession } = require('speechmatics');
const TtsGoogleLanguagesVoices = require('./speech-data/tts-google');
const TtsAwsLanguagesVoices = require('./speech-data/tts-aws');
@@ -35,6 +36,7 @@ const SttIbmLanguagesVoices = require('./speech-data/stt-ibm');
const SttNvidiaLanguagesVoices = require('./speech-data/stt-nvidia');
const SttCobaltLanguagesVoices = require('./speech-data/stt-cobalt');
const SttSonioxLanguagesVoices = require('./speech-data/stt-soniox');
const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics');
const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
@@ -54,6 +56,61 @@ const testSonioxStt = async(logger, credentials) => {
});
};
const testSpeechmaticsStt = async(logger, credentials) => {
const {api_key, speechmatics_stt_uri} = credentials;
return new Promise(async(resolve, reject) => {
try {
const session = new RealtimeSession({ apiKey: api_key, realtimeUrl: speechmatics_stt_uri });
let transcription = '';
session.addListener('Error', (error) => {
reject(error);
});
session.addListener('AddTranscript', (message) => {
transcription += message.metadata.transcript;
});
session.addListener('EndOfTranscript', () => {
resolve(transcription);
});
session
.start({
transcription_config: {
language: 'en',
operating_point: 'enhanced',
enable_partials: true,
max_delay: 2,
},
audio_format: { type: 'file' },
})
.then(() => {
//prepare file stream
const fileStream = fs.createReadStream(`${__dirname}/../../data/test_audio.wav`);
//send it
fileStream.on('data', (sample) => {
session.sendAudio(sample);
});
//end the session
fileStream.on('end', () => {
session.stop();
});
return;
})
.catch((error) => {
reject(error);
});
} catch (error) {
logger.info({error}, 'failed to get speechmatics transcript');
reject(error);
}
});
};
const testNuanceTts = async(logger, getTtsVoices, credentials) => {
const voices = await getTtsVoices({vendor: 'nuance', credentials});
return voices;
@@ -532,6 +589,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
} else if ('speechmatics' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
@@ -623,6 +684,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
return await getLanguagesVoicesForWhisper(credential, getTtsVoices, logger);
case 'verbio':
return await getLanguagesVoicesForVerbio(credential, getTtsVoices, logger);
case 'speechmatics':
return await getLanguagesVoicesForSpeechmatics(credential, getTtsVoices, logger);
default:
logger.info(`invalid vendor ${vendor}, return empty result`);
throw new Error(`Invalid vendor ${vendor}`);
@@ -735,6 +798,10 @@ async function getLanguagesVoicesForSoniox(credential) {
return tranform(undefined, SttSonioxLanguagesVoices);
}
async function getLanguagesVoicesForSpeechmatics(credential) {
return tranform(undefined, SttSpeechmaticsLanguagesVoices);
}
async function getLanguagesVoicesForElevenlabs(credential) {
if (credential) {
const get = bent('https://api.elevenlabs.io', 'GET', 'json', {
@@ -816,7 +883,10 @@ async function getLanguagesVoicesForPlayHT(credential) {
};
const ttsVoices = list_voices.reduce((acc, voice) => {
if (!credential.voice_engine.includes(voice.voice_engine)) {
// Play3.0 support all voice for PlayHT2.0*
const filteredVoiceEngine = credential.voice_engine === 'Play3.0' ?
`${credential.voice_engine}_PlayHT2.0_PlayHT2.0-turbo` : credential.voice_engine;
if (!filteredVoiceEngine.includes(voice.voice_engine)) {
return acc;
}
const languageCode = voice.language_code;
@@ -1063,5 +1133,6 @@ module.exports = {
testWhisper,
testVerbioTts,
testVerbioStt,
getLanguagesAndVoicesForVendor
getLanguagesAndVoicesForVendor,
testSpeechmaticsStt
};

2808
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -28,8 +28,8 @@
"@jambonz/db-helpers": "^0.9.3",
"@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.9",
"@jambonz/speech-utils": "^0.1.13",
"@jambonz/realtimedb-helpers": "^0.8.10",
"@jambonz/speech-utils": "^0.1.18",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.72",
"@soniox/soniox-node": "^1.2.2",
@@ -52,6 +52,7 @@
"passport-http-bearer": "^1.0.1",
"pino": "^8.20.0",
"short-uuid": "^4.2.2",
"speechmatics": "^4.0.0",
"stream-buffers": "^3.0.2",
"stripe": "^14.24.0",
"swagger-ui-express": "^5.0.0",

View File

@@ -152,7 +152,9 @@ test('account tests', async(t) => {
auth: authAdmin,
json: true,
});
console.log(result);
t.ok(result.name === 'daveh' , 'successfully retrieved account by sid');
t.ok(result.enable_debug_log === 0 , 'enable_debug_log default value ok');
/* update account with account level token */
result = await request.put(`/Accounts/${sid}`, {
@@ -177,8 +179,8 @@ test('account tests', async(t) => {
name: 'recordings',
access_key_id: 'access_key_id',
secret_access_key: 'secret access key'
}
},
enable_debug_log: true
}
});
t.ok(result.statusCode === 204, 'successfully updated account using account level token');
@@ -194,6 +196,7 @@ test('account tests', async(t) => {
t.ok(result.bucket_credential.access_key_id === 'access_key_id', 'bucket_access_key_id was updated');
t.ok(result.record_all_calls === 1, 'record_all_calls was updated');
t.ok(result.record_format === 'wav', 'record_format was updated');
t.ok(result.enable_debug_log, 'enable_debug_log was updated');
/* verify that account level api key last_used was updated*/
result = await request.get(`/Accounts/${sid}/ApiKeys`, {

View File

@@ -532,6 +532,39 @@ test('speech credentials tests', async(t) => {
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
/* add a credential for Speechmatics */
if (process.env.SPEECHMATICS_API_KEY) {
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'speechmatics',
use_for_stt: true,
api_key: process.env.SPEECHMATICS_API_KEY,
speechmatics_stt_uri: 'eu2.rt.speechmatics.com'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for speechmatics');
const ms_sid = result.body.sid;
/* test the speech credential */
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
});
console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for speechmatics');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
}
/* add a credential for nvidia */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,

View File

@@ -17,7 +17,8 @@ test('system information test', async(t) => {
domain_name: 'test.com',
sip_domain_name: 'sip.test.com',
monitoring_domain_name: 'monitor.test.com',
private_network_cidr: '192.168.1.0/24, 10.10.100.1'
private_network_cidr: '192.168.1.0/24, 10.10.100.1',
log_level: 'info'
}
});
t.ok(result.statusCode === 201, 'successfully created system information ');
@@ -26,6 +27,7 @@ test('system information test', async(t) => {
t.ok(body.sip_domain_name === 'sip.test.com', 'added sip_domain_name ok');
t.ok(body.monitoring_domain_name === 'monitor.test.com', 'added monitoring_domain_name ok');
t.ok(body.private_network_cidr === '192.168.1.0/24, 10.10.100.1', 'added private_network_cidr ok');
t.ok(body.log_level === 'info', 'added log_level ok');
result = await request.get('/SystemInformation', {
auth: authAdmin,
@@ -35,6 +37,7 @@ test('system information test', async(t) => {
t.ok(result.sip_domain_name === 'sip.test.com', 'get sip_domain_name ok');
t.ok(result.monitoring_domain_name === 'monitor.test.com', 'get monitoring_domain_name ok');
t.ok(result.private_network_cidr === '192.168.1.0/24, 10.10.100.1', 'get private_network_cidr ok');
t.ok(result.log_level === 'info', 'added log_level ok');
result = await request.post('/SystemInformation', {
resolveWithFullResponse: true,
@@ -44,7 +47,8 @@ test('system information test', async(t) => {
domain_name: 'test1.com',
sip_domain_name: 'sip1.test.com',
monitoring_domain_name: 'monitor1.test.com',
private_network_cidr: ''
private_network_cidr: '',
log_level: 'debug'
}
});
t.ok(result.statusCode === 201, 'successfully updated system information ');
@@ -53,6 +57,7 @@ test('system information test', async(t) => {
t.ok(body.sip_domain_name === 'sip1.test.com', 'updated sip_domain_name ok');
t.ok(body.monitoring_domain_name === 'monitor1.test.com', 'updated monitoring_domain_name ok');
t.ok(body.private_network_cidr === '', 'updated private_network_cidr ok');
t.ok(body.log_level === 'debug', 'updated log_level ok');
result = await request.get('/SystemInformation', {
auth: authAdmin,
@@ -61,6 +66,7 @@ test('system information test', async(t) => {
t.ok(result.domain_name === 'test1.com', 'get domain_name ok');
t.ok(result.sip_domain_name === 'sip1.test.com', 'get sip_domain_name ok');
t.ok(result.monitoring_domain_name === 'monitor1.test.com', 'get monitoring_domain_name ok');
t.ok(result.log_level === 'debug', 'updated log_level ok');
} catch(err) {
console.error(err);