remove ibm speech since it is not used (to my knowledge) and has dependencies with vulnerabilities (#141)

This commit is contained in:
Dave Horton
2026-03-25 10:08:30 -04:00
committed by GitHub
parent 305695d068
commit c123f19898
14 changed files with 1436 additions and 2516 deletions
+1 -8
View File
@@ -13,11 +13,6 @@ jobs:
with:
node-version: '20'
- run: npm install
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
docker-compose --version
- run: npm run jslint
- run: sudo apt update && sudo apt install -y squid
- run: sudo cp test/squid.conf /etc/squid/squid.conf
@@ -28,9 +23,7 @@ jobs:
AWS_REGION: ${{ secrets.AWS_REGION }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
GCP_JSON_KEY: ${{ secrets.GCP_JSON_KEY }}
IBM_API_KEY: ${{ secrets.IBM_API_KEY }}
IBM_TTS_API_KEY: ${{ secrets.IBM_TTS_API_KEY }}
IBM_TTS_REGION: ${{ secrets.IBM_TTS_REGION }}
MICROSOFT_API_KEY: ${{ secrets.MICROSOFT_API_KEY }}
MICROSOFT_REGION: ${{ secrets.MICROSOFT_REGION }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+1 -1
View File
@@ -16,7 +16,7 @@ module.exports = (opts, logger) => {
synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
getVerbioAccessToken: require('./lib/get-verbio-token').bind(null, client, logger),
getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
getTtsVoices: require('./lib/get-tts-voices').bind(null, client, createHash, retrieveHash, logger),
};
-48
View File
@@ -1,48 +0,0 @@
const formurlencoded = require('form-urlencoded');
const {Pool} = require('undici');
const pool = new Pool('https://iam.cloud.ibm.com');
const {makeIbmKey, noopLogger} = require('./utils');
const { HTTP_TIMEOUT } = require('./config');
const debug = require('debug')('jambonz:realtimedb-helpers');
async function getIbmAccessToken(client, logger, apiKey) {
logger = logger || noopLogger;
try {
const key = makeIbmKey(apiKey);
const access_token = await client.get(key);
if (access_token) return {access_token, servedFromCache: true};
/* access token not found in cache, so fetch it from Ibm */
const payload = {
grant_type: 'urn:ibm:params:oauth:grant-type:apikey',
apikey: apiKey
};
const {statusCode, headers, body} = await pool.request({
path: '/identity/token',
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded'
},
body: formurlencoded(payload),
timeout: HTTP_TIMEOUT,
followRedirects: false
});
if (200 !== statusCode) {
const json = await body.json();
logger.debug({statusCode, headers, body: json}, 'error fetching access token from Ibm');
const err = new Error();
err.statusCode = statusCode;
throw err;
}
const json = await body.json();
await client.set(key, json.access_token, 'EX', json.expires_in - 30);
return {...json, servedFromCache: false};
} catch (err) {
debug(err, 'getIbmAccessToken: Error retrieving Ibm access token');
logger.error(err, 'getIbmAccessToken: Error retrieving Ibm access token for client_id ${clientId}');
throw err;
}
}
module.exports = getIbmAccessToken;
+1 -20
View File
@@ -3,8 +3,6 @@ const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils')
const getNuanceAccessToken = require('./get-nuance-access-token');
const getVerbioAccessToken = require('./get-verbio-token');
const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
const { IamAuthenticator } = require('ibm-watson/auth');
const ttsGoogle = require('@google-cloud/text-to-speech');
const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
const getAwsAuthToken = require('./get-aws-sts-token');
@@ -12,21 +10,6 @@ const {Pool} = require('undici');
const { HTTP_TIMEOUT } = require('./config');
const verbioVoicePool = new Pool('https://us.rest.speechcenter.verbio.com');
const getIbmVoices = async(client, logger, credentials) => {
const {tts_region, tts_api_key} = credentials;
console.log(`region: ${tts_region}, api_key: ${tts_api_key}`);
const textToSpeech = new TextToSpeechV1({
authenticator: new IamAuthenticator({
apikey: tts_api_key,
}),
serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
});
const voices = await textToSpeech.listVoices();
return voices;
};
const getNuanceVoices = async(client, logger, credentials) => {
const {client_id: clientId, secret: secret, nuance_tts_uri} = credentials;
@@ -165,14 +148,12 @@ const getVerbioVoices = async(client, logger, credentials) => {
async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
logger = logger || noopLogger;
assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
assert.ok(['nuance', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
`getTtsVoices not supported for vendor ${vendor}`);
switch (vendor) {
case 'nuance':
return getNuanceVoices(client, logger, credentials);
case 'ibm':
return getIbmVoices(client, logger, credentials);
case 'google':
return getGoogleVoices(client, logger, credentials);
case 'aws':
+3 -45
View File
@@ -6,8 +6,7 @@ const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly'
const { CartesiaClient } = require('@cartesia/cartesia-js');
const sdk = require('microsoft-cognitiveservices-speech-sdk');
const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
const { IamAuthenticator } = require('ibm-watson/auth');
const {
ResultReason,
SpeechConfig,
@@ -96,7 +95,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
let rtt;
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'elevenlabs',
'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
vendor.startsWith('custom'),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
@@ -122,11 +121,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
assert.ok(language, 'synthAudio requires language when nvidia is used');
assert.ok(credentials.riva_server_uri, 'synthAudio requires riva_server_uri in credentials when nvidia is used');
}
else if ('ibm' === vendor) {
assert.ok(voice, 'synthAudio requires voice when ibm is used');
assert.ok(credentials.tts_region, 'synthAudio requires tts_region in credentials when ibm watson is used');
assert.ok(credentials.tts_api_key, 'synthAudio requires tts_api_key in credentials when nuance is used');
}
else if ('wellsaid' === vendor) {
language = 'en-US'; // WellSaid only supports English atm
assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
@@ -230,9 +224,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
audioData = await synthNvidia(client, logger, {credentials, stats, language, voice, model, key, text,
renderForCaching, disableTtsStreaming, disableTtsCache});
break;
case 'ibm':
audioData = await synthIbm(logger, {credentials, stats, voice, key, text});
break;
case 'wellsaid':
audioData = await synthWellSaid(logger, {credentials, stats, language, voice, key, text});
break;
@@ -535,39 +526,6 @@ const synthGoogle = async(logger, {
}
};
const synthIbm = async(logger, {credentials, stats, voice, text}) => {
const {tts_api_key, tts_region} = credentials;
const params = {
text,
voice,
accept: 'audio/mp3'
};
try {
const textToSpeech = new TextToSpeechV1({
authenticator: new IamAuthenticator({
apikey: tts_api_key,
}),
serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
});
const r = await textToSpeech.synthesize(params);
const chunks = [];
for await (const chunk of r.result) {
chunks.push(chunk);
}
return {
audioContent: Buffer.concat(chunks),
extension: 'mp3',
sampleRate: 8000
};
} catch (err) {
logger.info({err, params}, 'synthAudio: Error synthesizing speech using ibm');
stats.increment('tts.count', ['vendor:ibm', 'accepted:no']);
throw new Error(err.statusText || err.message);
}
};
async function _synthOnPremMicrosoft(logger, {
credentials,
language,
@@ -969,7 +927,7 @@ const synthElevenlabs = async(logger, {
const optimize_streaming_latency = opts.optimize_streaming_latency ?
`?optimize_streaming_latency=${opts.optimize_streaming_latency}` : '';
try {
const post = bent(`https://${api_uri}`, 'POST', 'buffer', {
const post = bent(`https://${api_uri || 'api.elevenlabs.io'}`, 'POST', 'buffer', {
'xi-api-key': api_key,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
+1 -7
View File
@@ -54,12 +54,6 @@ function makeBasicAuthHeader(username, password) {
return {Authorization: header};
}
function makeIbmKey(apiKey) {
const hash = crypto.createHash('sha1');
hash.update(apiKey);
return `ibm:${hash.digest('hex')}`;
}
function makeAwsKey(awsAccessKeyId) {
const hash = crypto.createHash('sha1');
hash.update(awsAccessKeyId);
@@ -143,7 +137,7 @@ const createRivaClient = async(rivaUri) => {
module.exports = {
makeSynthKey,
makeNuanceKey,
makeIbmKey,
makePlayhtKey,
makeAwsKey,
makeVerbioKey,
+1424 -2199
View File
File diff suppressed because it is too large Load Diff
+2 -2
View File
@@ -1,6 +1,6 @@
{
"name": "@jambonz/speech-utils",
"version": "0.2.30",
"version": "1.0.0",
"description": "TTS-related speech utilities for jambonz",
"main": "index.js",
"author": "Dave Horton",
@@ -37,7 +37,7 @@
"debug": "^4.3.4",
"form-urlencoded": "^6.1.4",
"google-protobuf": "^3.21.2",
"ibm-watson": "^11.0.0",
"microsoft-cognitiveservices-speech-sdk": "1.38.0",
"openai": "^4.98.0",
"undici": "^7.5.0"
+1 -1
View File
@@ -2,7 +2,7 @@ const test = require('tape').test ;
const exec = require('child_process').exec ;
test('starting docker network..', (t) => {
exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
exec(`docker compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
setTimeout(() => {
t.end(err);
}, 2000);
+1 -1
View File
@@ -3,7 +3,7 @@ const exec = require('child_process').exec ;
test('stopping docker network..', (t) => {
t.timeoutAfter(10000);
exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
exec(`docker compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
//console.log(`stderr: ${stderr}`);
process.exit(0);
});
-78
View File
@@ -1,78 +0,0 @@
const test = require('tape').test ;
const config = require('config');
const opts = config.get('redis');
const fs = require('fs');
const logger = require('pino')({level: 'error'});
process.on('unhandledRejection', (reason, p) => {
console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
});
const stats = {
increment: () => {},
histogram: () => {}
};
test('IBM - create access key', async(t) => {
const fn = require('..');
const {client, getIbmAccessToken} = fn(opts, logger);
if (!process.env.IBM_API_KEY ) {
t.pass('skipping IBM test since no IBM api_key provided');
t.end();
client.quit();
return;
}
try {
let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
//console.log({obj}, 'received access token from IBM');
t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
obj = await getIbmAccessToken(process.env.IBM_API_KEY);
//console.log({obj}, 'received access token from IBM - second request');
t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
await client.flushall();
t.end();
}
catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('IBM - retrieve tts voices test', async(t) => {
const fn = require('..');
const {client, getTtsVoices} = fn(opts, logger);
if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
t.pass('skipping IBM test since no IBM api_key and/or region provided');
t.end();
client.quit();
return;
}
try {
const opts = {
vendor: 'ibm',
credentials: {
tts_api_key: process.env.IBM_TTS_API_KEY,
tts_region: process.env.IBM_TTS_REGION
}
};
const obj = await getTtsVoices(opts);
const {voices} = obj.result;
//console.log(JSON.stringify(voices));
t.ok(voices.length > 0 && voices[0].language,
`GetVoices: successfully retrieved ${voices.length} voices from IBM`);
await client.flushall();
t.end();
}
catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
+1 -1
View File
@@ -2,6 +2,6 @@ require('./docker_start');
require('./synth');
require('./list-voices');
require('./aws');
require('./ibm');
require('./nuance');
require('./docker_stop');
-65
View File
@@ -38,71 +38,6 @@ test('Verbio - get Access key and voices', async(t) => {
client.quit();
});
test('IBM - create access key', async(t) => {
const fn = require('..');
const {client, getIbmAccessToken} = fn(opts, logger);
if (!process.env.IBM_API_KEY ) {
t.pass('skipping IBM test since no IBM api_key provided');
t.end();
client.quit();
return;
}
try {
let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
//console.log({obj}, 'received access token from IBM');
t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
obj = await getIbmAccessToken(process.env.IBM_API_KEY);
//console.log({obj}, 'received access token from IBM - second request');
t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
await client.flushall();
t.end();
}
catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('IBM - retrieve tts voices test', async(t) => {
const fn = require('..');
const {client, getTtsVoices} = fn(opts, logger);
if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
t.pass('skipping IBM test since no IBM api_key and/or region provided');
t.end();
client.quit();
return;
}
try {
const opts = {
vendor: 'ibm',
credentials: {
tts_api_key: process.env.IBM_TTS_API_KEY,
tts_region: process.env.IBM_TTS_REGION
}
};
const obj = await getTtsVoices(opts);
const {voices} = obj.result;
//console.log(JSON.stringify(voices));
t.ok(voices.length > 0 && voices[0].language,
`GetVoices: successfully retrieved ${voices.length} voices from IBM`);
await client.flushall();
t.end();
}
catch (err) {
console.error(err);
t.end(err);
}
client.quit();
});
test('Nuance hosted tests', async(t) => {
const fn = require('..');
const {client, getTtsVoices} = fn(opts, logger);
-40
View File
@@ -872,46 +872,6 @@ test('Nvidia speech synth tests', async(t) => {
client.quit();
});
test('IBM watson speech synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);
if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
return t.end();
}
const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
try {
let opts = await synthAudio(stats, {
vendor: 'ibm',
credentials: {
tts_api_key: process.env.IBM_TTS_API_KEY,
tts_region: process.env.IBM_TTS_REGION,
},
language: 'en-US',
voice: 'en-US_AllisonV2Voice',
text,
});
t.ok(!opts.servedFromCache, `successfully synthesized ibm audio to ${opts.filePath}`);
opts = await synthAudio(stats, {
vendor: 'ibm',
credentials: {
tts_api_key: process.env.IBM_TTS_API_KEY,
tts_region: process.env.IBM_TTS_REGION,
},
language: 'en-US',
voice: 'en-US_AllisonV2Voice',
text,
});
t.ok(opts.servedFromCache, `successfully retrieved ibm audio from cache ${opts.filePath}`);
} catch (err) {
console.error(JSON.stringify(err));
t.end(err);
}
client.quit();
});
test('Custom Vendor speech synth tests', async(t) => {
const fn = require('..');
const {synthAudio, client} = fn(opts, logger);