Compare commits

...

7 Commits

Author SHA1 Message Date
Hoan Luu Huu
1c55bad04f support openai stt (#402)
* support openai stt

* wip

* wip

* add stt languages for openai
2025-03-28 10:14:50 -04:00
Hoan Luu Huu
32a2bfcdb5 support cartesia sonic-2 model (#403)
* support cartesia sonic-2 model

* wip

* fix typo

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2025-03-28 09:52:01 -04:00
Hoan Luu Huu
becc1636b7 deepgram milti languages (#397) 2025-03-17 21:10:22 -04:00
Sam Machin
68a9b4226d fix min 25 (#396) 2025-03-11 07:48:04 -04:00
rammohan-y
b154b56064 updated realtimedb-helper to 0.8.13 (#395) 2025-03-10 09:54:24 -04:00
Hoan Luu Huu
556d5c3526 support fetching logs from cloudwatch for call_sid (#393)
* support fetching logs from cloudwatch for call_sid

* wip

* wip

* wip

* ưip

* wip

* fix review comments
2025-03-10 08:31:24 -04:00
Hoan Luu Huu
2ac0da0d14 Should not allow create phone_number on different voip_carrier and account (#394)
* shouldnt create phonenumber on different carrier and account

* wip
2025-03-06 07:40:54 -05:00
12 changed files with 2135 additions and 150 deletions

View File

@@ -16,7 +16,7 @@ router.get('/', async(req, res) => {
const service_provider_sid = account_sid ? null : parseServiceProviderSid(req.originalUrl);
const {page, count, alert_type, days, start, end} = req.query || {};
if (!page || page < 1) throw new DbErrorBadRequest('missing or invalid "page" query arg');
if (!count || count < 25 || count > 500) throw new DbErrorBadRequest('missing or invalid "count" query arg');
if (!count || count > 500) throw new DbErrorBadRequest('missing or invalid "count" query arg');
if (account_sid) {
const data = await queryAlerts({

View File

@@ -40,6 +40,10 @@ async function validateAdd(req) {
if (!result || result.length === 0) {
throw new DbErrorBadRequest(`voip_carrier not found for sid ${req.body.voip_carrier_sid}`);
}
const carrier = result[0];
if (carrier.account_sid && req.body.account_sid && req.body.account_sid !== carrier.account_sid) {
throw new DbErrorBadRequest('voip_carrier_sid does not belong to the account');
}
}
}

View File

@@ -4,6 +4,7 @@ const {DbErrorBadRequest} = require('../../utils/errors');
const {getHomerApiKey, getHomerSipTrace, getHomerPcap} = require('../../utils/homer-utils');
const {getJaegerTrace} = require('../../utils/jaeger-utils');
const Account = require('../../models/account');
const { CloudWatchLogsClient, FilterLogEventsCommand } = require('@aws-sdk/client-cloudwatch-logs');
const {
getS3Object,
getGoogleStorageObject,
@@ -31,7 +32,7 @@ router.get('/', async(req, res) => {
const service_provider_sid = account_sid ? null : parseServiceProviderSid(req.originalUrl);
const {page, count, trunk, direction, days, answered, start, end, filter} = req.query || {};
if (!page || page < 1) throw new DbErrorBadRequest('missing or invalid "page" query arg');
if (!count || count < 25 || count > 500) throw new DbErrorBadRequest('missing or invalid "count" query arg');
if (!count || count > 500) throw new DbErrorBadRequest('missing or invalid "count" query arg');
if (account_sid) {
const data = await queryCdrs({
@@ -106,6 +107,71 @@ router.get('/:call_id/:method/pcap', async(req, res) => {
}
});
router.get('/:call_sid/logs', async(req, res) => {
const {logger, queryCdrs} = req.app.locals;
const aws_region = process.env.AWS_REGION;
const {call_sid} = req.params;
const {logGroupName = 'jambonz-feature_server'} = req.query;
const account_sid = parseAccountSid(req.originalUrl);
if (!aws_region) {
return res.status(400).send({msg: 'Logs are only available in AWS environments'});
}
if (!account_sid) {
return res.status(400).send({msg: 'account_sid is required,' +
'please use /Accounts/{account_sid}/RecentCalls/{call_sid}/logs'});
}
try {
//find back the call in CDR to get timestame of the call
// this allow us limit search in cloudwatch logs
const data = await queryCdrs({
account_sid,
filter: call_sid,
page: 0,
page_size: 50
});
if (!data || data.data.length === 0) {
return res.status(404).send({msg: 'Call not found'});
}
const {
attempted_at, //2025-02-24T13:11:51.969Z
terminated_at, //2025-02-24T13:11:56.153Z
sip_callid
} = data.data[0];
const TIMEBUFFER = 60; //60 seconds
const startTime = new Date(attempted_at).getTime() - TIMEBUFFER * 1000;
const endTime = new Date(terminated_at).getTime() + TIMEBUFFER * 1000;
const client = new CloudWatchLogsClient({ region: aws_region });
let params = {
logGroupName,
startTime,
endTime,
filterPattern: `{ ($.callSid = "${call_sid}") || ($.callId = "${sip_callid}") }`
};
const command = new FilterLogEventsCommand(params);
const response = await client.send(command);
// if response have nextToken, we need to fetch all logs
while (response.nextToken) {
params = {
...params,
nextToken: response.nextToken
};
const command = new FilterLogEventsCommand(params);
const response2 = await client.send(command);
response.events = response.events.concat(response2.events);
response.nextToken = response2.nextToken;
}
let logs = [];
if (response.events && response.events.length > 0) {
logs = response.events.map((e) => e.message);
}
res.status(200).json(logs);
} catch (err) {
logger.error({err}, 'Cannot fetch logs from cloudwatch');
res.status(500).send({msg: err.message});
}
});
router.get('/trace/:trace_id', async(req, res) => {
const {logger} = req.app.locals;
const {trace_id} = req.params;

View File

@@ -13,7 +13,8 @@ const {decryptCredential, testWhisper, testDeepgramTTS,
testVerbioStt,
testSpeechmaticsStt,
testCartesia,
testVoxistStt} = require('../../utils/speech-utils');
testVoxistStt,
testOpenAiStt} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden, DbErrorBadRequest} = require('../../utils/errors');
const {
testGoogleTts,
@@ -282,6 +283,12 @@ const encryptCredential = (obj) => {
const whisperData = JSON.stringify({api_key, model_id});
return encrypt(whisperData);
case 'openai':
assert(api_key, 'invalid openai speech credential: api_key is required');
assert(model_id, 'invalid openai speech credential: model_id is required');
const openaiData = JSON.stringify({api_key, model_id});
return encrypt(openaiData);
case 'verbio':
assert(engine_version, 'invalid verbio speech credential: client_id is required');
assert(client_id, 'invalid verbio speech credential: client_id is required');
@@ -882,6 +889,17 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.ttsTestResult(sid, false);
}
}
} else if (cred.vendor === 'openai') {
if (cred.use_for_stt) {
try {
await testOpenAiStt(logger, credential);
results.stt.status = 'ok';
SpeechCredential.sttTestResult(sid, true);
} catch (err) {
results.stt = {status: 'fail', reason: err.message};
SpeechCredential.sttTestResult(sid, false);
}
}
} else if (cred.vendor === 'verbio') {
if (cred.use_for_tts) {
try {

View File

@@ -1,138 +1,56 @@
module.exports = [
{
name: 'Chinese - general',
value: 'zh',
},
{
name: 'Chinese (China)',
value: 'zh-CN',
},
{
name: 'Chinese (Taiwan)',
value: 'zh-TW',
},
{
name: 'Dutch - general',
value: 'nl',
},
{
name: 'English - general',
value: 'en',
},
{
name: 'English (Australia)',
value: 'en-AU',
},
{
name: 'English (United Kingdom)',
value: 'en-GB',
},
{
name: 'English (India)',
value: 'en-IN',
},
{
name: 'English (New Zealand)',
value: 'en-NZ',
},
{
name: 'English (United States)',
value: 'en-US',
},
{
name: 'French - general',
value: 'fr',
},
{
name: 'French (Canada)',
value: 'fr-CA',
},
{
name: 'German - general',
value: 'de',
},
{
name: 'Hindi - general',
value: 'hi',
},
{
name: 'Hindi (Roman Script)',
value: 'hi-Latin',
},
{
name: 'Indonesian - general',
value: 'in',
},
{
name: 'Italian - general',
value: 'it',
},
{
name: 'Japanese - general',
value: 'ja',
},
{
name: 'Korean - general',
value: 'ko',
},
{
name: 'Norwegian - general',
value: 'no',
},
{
name: 'Polish - general',
value: 'pl',
},
{
name: 'Portuguese - general',
value: 'pt',
},
{
name: 'Portuguese (Brazil)',
value: 'pt-BR',
},
{
name: 'Portuguese (Portugal)',
value: 'pt-PT',
},
{
name: 'Russian - general',
value: 'ru',
},
{
name: 'Spanish - general',
value: 'es',
},
{
name: 'Spanish (Latin America)',
value: 'es-419',
},
{
name: 'Swedish - general',
value: 'sv',
},
{
name: 'Turkish - general',
value: 'tr',
},
{
name: 'Ukrainian - general',
value: 'uk',
},
{
name: 'Flemish - general',
value: 'nl-BE',
},
{
name: 'Danish - general',
value: 'da',
},
{
name: 'Tamil - general',
value: 'ta',
},
{
name: 'Tamasheq - general',
value: 'taq',
},
{ name: 'Multilingual', value: 'multi' },
{ name: 'Bulgarian', value: 'bg' },
{ name: 'Catalan', value: 'ca' },
{ name: 'Chinese (Mandarin, Simplified)', value: 'zh' },
{ name: 'Chinese (Mandarin, Simplified - China)', value: 'zh-CN' },
{ name: 'Chinese (Mandarin, Simplified - Hans)', value: 'zh-Hans' },
{ name: 'Chinese (Mandarin, Traditional)', value: 'zh-TW' },
{ name: 'Chinese (Mandarin, Traditional - Hant)', value: 'zh-Hant' },
{ name: 'Chinese (Cantonese, Traditional - Hong Kong)', value: 'zh-HK' },
{ name: 'Czech', value: 'cs' },
{ name: 'Danish', value: 'da' },
{ name: 'Danish (Denmark)', value: 'da-DK' },
{ name: 'Dutch', value: 'nl' },
{ name: 'English', value: 'en' },
{ name: 'English (United States)', value: 'en-US' },
{ name: 'English (Australia)', value: 'en-AU' },
{ name: 'English (United Kingdom)', value: 'en-GB' },
{ name: 'English (New Zealand)', value: 'en-NZ' },
{ name: 'English (India)', value: 'en-IN' },
{ name: 'Estonian', value: 'et' },
{ name: 'Finnish', value: 'fi' },
{ name: 'Flemish', value: 'nl-BE' },
{ name: 'French', value: 'fr' },
{ name: 'French (Canada)', value: 'fr-CA' },
{ name: 'German', value: 'de' },
{ name: 'German (Switzerland)', value: 'de-CH' },
{ name: 'Greek', value: 'el' },
{ name: 'Hindi', value: 'hi' },
{ name: 'Hungarian', value: 'hu' },
{ name: 'Indonesian', value: 'id' },
{ name: 'Italian', value: 'it' },
{ name: 'Japanese', value: 'ja' },
{ name: 'Korean', value: 'ko' },
{ name: 'Korean (South Korea)', value: 'ko-KR' },
{ name: 'Latvian', value: 'lv' },
{ name: 'Lithuanian', value: 'lt' },
{ name: 'Malay', value: 'ms' },
{ name: 'Norwegian', value: 'no' },
{ name: 'Polish', value: 'pl' },
{ name: 'Portuguese', value: 'pt' },
{ name: 'Portuguese (Brazil)', value: 'pt-BR' },
{ name: 'Portuguese (Portugal)', value: 'pt-PT' },
{ name: 'Romanian', value: 'ro' },
{ name: 'Russian', value: 'ru' },
{ name: 'Slovak', value: 'sk' },
{ name: 'Spanish', value: 'es' },
{ name: 'Spanish (Latin America)', value: 'es-419' },
{ name: 'Swedish', value: 'sv' },
{ name: 'Swedish (Sweden)', value: 'sv-SE' },
{ name: 'Thai', value: 'th' },
{ name: 'Thai (Thailand)', value: 'th-TH' },
{ name: 'Turkish', value: 'tr' },
{ name: 'Ukrainian', value: 'uk' },
{ name: 'Vietnamese', value: 'vi' }
];

View File

@@ -0,0 +1,6 @@
module.exports = [
{ name: 'Whisper', value: 'whisper-1' },
{ name: 'GPT 4o Mini Transcribe', value: 'gpt-4o-mini-transcribe' },
{ name: 'GLT 4o Transcribe', value: 'gpt-4o-transcribe' },
];

View File

@@ -0,0 +1,59 @@
module.exports = [
{ name: 'Afrikaans', value: 'af' },
{ name: 'Arabic', value: 'ar' },
{ name: 'Azerbaijani', value: 'az' },
{ name: 'Belarusian', value: 'be' },
{ name: 'Bulgarian', value: 'bg' },
{ name: 'Bosnian', value: 'bs' },
{ name: 'Catalan', value: 'ca' },
{ name: 'Czech', value: 'cs' },
{ name: 'Welsh', value: 'cy' },
{ name: 'Danish', value: 'da' },
{ name: 'German', value: 'de' },
{ name: 'Greek', value: 'el' },
{ name: 'English', value: 'en' },
{ name: 'Spanish', value: 'es' },
{ name: 'Estonian', value: 'et' },
{ name: 'Persian', value: 'fa' },
{ name: 'Finnish', value: 'fi' },
{ name: 'French', value: 'fr' },
{ name: 'Galician', value: 'gl' },
{ name: 'Hebrew', value: 'he' },
{ name: 'Hindi', value: 'hi' },
{ name: 'Croatian', value: 'hr' },
{ name: 'Hungarian', value: 'hu' },
{ name: 'Armenian', value: 'hy' },
{ name: 'Indonesian', value: 'id' },
{ name: 'Icelandic', value: 'is' },
{ name: 'Italian', value: 'it' },
{ name: 'Japanese', value: 'ja' },
{ name: 'Kazakh', value: 'kk' },
{ name: 'Kannada', value: 'kn' },
{ name: 'Korean', value: 'ko' },
{ name: 'Lithuanian', value: 'lt' },
{ name: 'Latvian', value: 'lv' },
{ name: 'Maori', value: 'mi' },
{ name: 'Macedonian', value: 'mk' },
{ name: 'Marathi', value: 'mr' },
{ name: 'Malay', value: 'ms' },
{ name: 'Nepali', value: 'ne' },
{ name: 'Dutch', value: 'nl' },
{ name: 'Norwegian', value: 'no' },
{ name: 'Polish', value: 'pl' },
{ name: 'Portuguese', value: 'pt' },
{ name: 'Romanian', value: 'ro' },
{ name: 'Russian', value: 'ru' },
{ name: 'Slovak', value: 'sk' },
{ name: 'Slovenian', value: 'sl' },
{ name: 'Serbian', value: 'sr' },
{ name: 'Swedish', value: 'sv' },
{ name: 'Swahili', value: 'sw' },
{ name: 'Tamil', value: 'ta' },
{ name: 'Thai', value: 'th' },
{ name: 'Tagalog', value: 'tl' },
{ name: 'Turkish', value: 'tr' },
{ name: 'Ukrainian', value: 'uk' },
{ name: 'Urdu', value: 'ur' },
{ name: 'Vietnamese', value: 'vi' },
{ name: 'Chinese', value: 'zh' },
];

View File

@@ -4,6 +4,16 @@ module.exports = [
value: 'sonic',
languages: ['en', 'fr', 'de', 'es', 'pt', 'zh', 'ja', 'hi', 'it', 'ko', 'nl', 'pl', 'ru', 'sv', 'tr']
},
{
name: 'Sonic 2',
value: 'sonic-2',
languages: ['en', 'fr', 'de', 'es', 'pt', 'zh', 'ja', 'hi', 'it', 'ko', 'nl', 'pl', 'ru', 'sv', 'tr']
},
{
name: 'Sonic Turbo',
value: 'sonic-turbo',
languages: ['en', 'fr', 'de', 'es', 'pt', 'zh', 'ja', 'hi', 'it', 'ko', 'nl', 'pl', 'ru', 'sv', 'tr']
},
{ name: 'Sonic Preview', value: 'sonic-preview', languages: ['en'] },
{
name: 'Sonic 2024-12-12',

View File

@@ -0,0 +1,6 @@
module.exports = [
{ name: 'TTS-1', value: 'tts-1' },
{ name: 'TTS-1-HD', value: 'tts-1-hd' },
{ name: 'GPT-4o-Mini-TTS', value: 'gpt-4o-mini-tts' },
];

View File

@@ -20,6 +20,7 @@ const TtsElevenlabsLanguagesVoices = require('./speech-data/tts-elevenlabs');
const TtsWhisperLanguagesVoices = require('./speech-data/tts-whisper');
const TtsPlayHtLanguagesVoices = require('./speech-data/tts-playht');
const TtsVerbioLanguagesVoices = require('./speech-data/tts-verbio');
const ttsCartesia = require('./speech-data/tts-cartesia');
const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
const TtsLanguagesDeepgram = require('./speech-data/tts-deepgram');
@@ -29,6 +30,7 @@ const TtsModelPlayHT = require('./speech-data/tts-model-playht');
const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
const TtsModelCartesia = require('./speech-data/tts-model-cartesia');
const TtsModelOpenai = require('./speech-data/tts-model-openai');
const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
const SttAwsLanguagesVoices = require('./speech-data/stt-aws');
@@ -43,8 +45,10 @@ const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics')
const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
const SttVoxistLanguagesVoices = require('./speech-data/stt-voxist');
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
const ttsCartesia = require('./speech-data/tts-cartesia');
const ttsModelCartesia = require('./speech-data/tts-model-cartesia');
const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
const SttModelOpenai = require('./speech-data/stt-model-openai');
const testSonioxStt = async(logger, credentials) => {
@@ -477,6 +481,43 @@ const testVerbioStt = async(logger, getVerbioAccessToken, credentials) => {
}
};
const testOpenAiStt = async(logger, credentials) => {
const {api_key} = credentials;
try {
// Create a FormData object to properly format the multipart request
const formData = new FormData();
// Add the audio file as 'file' field
const audioBuffer = fs.readFileSync(`${__dirname}/../../data/test_audio.wav`);
const blob = new Blob([audioBuffer], { type: 'audio/wav' });
formData.append('file', blob, 'audio.wav');
// Add the model parameter (required by OpenAI)
formData.append('model', 'whisper-1');
// Make the request using fetch
const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
method: 'POST',
headers: {
'Authorization': `Bearer ${api_key}`,
'User-Agent': 'jambonz'
},
body: formData
});
if (!response.ok) {
throw new Error(`OpenAI API error: ${response.status} ${(await response.json()).error?.message}`);
}
const json = await response.json();
logger.debug({json}, 'successfully speech to text from OpenAI');
return json;
} catch (err) {
logger.info({err}, 'OpenAI speech-to-text request failed');
throw err;
}
};
const testAssemblyStt = async(logger, credentials) => {
const {api_key} = credentials;
@@ -651,6 +692,10 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
} else if ('openai' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
} else if ('verbio' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
@@ -714,6 +759,8 @@ async function getLanguagesAndVoicesForVendor(logger, vendor, credential, getTts
return await getLanguagesVoicesForVoxist(credential, getTtsVoices, logger);
case 'whisper':
return await getLanguagesVoicesForWhisper(credential, getTtsVoices, logger);
case 'openai':
return await getLanguagesVoicesForOpenAi(credential, getTtsVoices, logger);
case 'verbio':
return await getLanguagesVoicesForVerbio(credential, getTtsVoices, logger);
case 'speechmatics':
@@ -1014,6 +1061,10 @@ async function getLanguagesVoicesForWhisper(credential) {
return tranform(TtsWhisperLanguagesVoices, undefined, TtsModelWhisper);
}
async function getLanguagesVoicesForOpenAi(credential) {
return tranform(undefined, SttOpenaiLanguagesVoices, TtsModelOpenai, SttModelOpenai);
}
async function getLanguagesVoicesForVerbio(credentials, getTtsVoices, logger) {
const stt = SttVerbioLanguagesVoices.reduce((acc, v) => {
if (!v.version || (credentials && credentials.engine_version === v.version)) {
@@ -1034,11 +1085,12 @@ async function getLanguagesVoicesForVerbio(credentials, getTtsVoices, logger) {
}
}
function tranform(tts, stt, models) {
function tranform(tts, stt, models, sttModels) {
return {
...(tts && {tts}),
...(stt && {stt}),
...(models && {models})
...(models && {models}),
...(sttModels && {sttModels})
};
}
@@ -1224,7 +1276,7 @@ const testCartesia = async(logger, synthAudio, credentials) => {
async function getLanguagesVoicesForCartesia(credential) {
if (credential) {
const {model_id} = credential;
const {languages} = ttsModelCartesia.find((m) => m.value === model_id);
const {languages} = TtsModelCartesia.find((m) => m.value === model_id);
const voices = await fetchCartesiaVoices(credential);
const buildVoice = (d) => (
@@ -1301,5 +1353,6 @@ module.exports = {
getLanguagesAndVoicesForVendor,
testSpeechmaticsStt,
testCartesia,
testVoxistStt
testVoxistStt,
testOpenAiStt
};

1852
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@
"url": "https://github.com/jambonz/jambonz-api-server.git"
},
"dependencies": {
"@aws-sdk/client-cloudwatch-logs": "^3.750.0",
"@aws-sdk/client-s3": "^3.550.0",
"@aws-sdk/client-transcribe": "^3.549.0",
"@azure/storage-blob": "^12.17.0",
@@ -29,7 +30,7 @@
"@jambonz/db-helpers": "^0.9.3",
"@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.10",
"@jambonz/realtimedb-helpers": "^0.8.13",
"@jambonz/speech-utils": "^0.2.3",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.72",