remove ibm speech since it is not used (to my knowledge) and has dependencies with vulnerabilities (#141)

2026-05-06 08:47:02 +00:00 · 2026-03-25 10:08:30 -04:00
parent 305695d068
commit c123f19898
14 changed files with 1436 additions and 2516 deletions
@@ -13,11 +13,6 @@ jobs:
        with:
          node-version: '20'
      - run: npm install
      - name: Install Docker Compose
        run: |
          sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
          sudo chmod +x /usr/local/bin/docker-compose
          docker-compose --version
      - run: npm run jslint
      - run: sudo apt update && sudo apt install -y squid
      - run: sudo cp test/squid.conf /etc/squid/squid.conf
@@ -28,9 +23,7 @@ jobs:
          AWS_REGION: ${{ secrets.AWS_REGION }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          GCP_JSON_KEY: ${{ secrets.GCP_JSON_KEY }}
-          IBM_API_KEY: ${{ secrets.IBM_API_KEY }}
+
          IBM_TTS_API_KEY: ${{ secrets.IBM_TTS_API_KEY }}
          IBM_TTS_REGION: ${{ secrets.IBM_TTS_REGION }}
          MICROSOFT_API_KEY: ${{ secrets.MICROSOFT_API_KEY }}
          MICROSOFT_REGION: ${{ secrets.MICROSOFT_REGION }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -16,7 +16,7 @@ module.exports = (opts, logger) => {
    synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
    getVerbioAccessToken: require('./lib/get-verbio-token').bind(null, client, logger),
    getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
-    getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
+
    getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
    getTtsVoices: require('./lib/get-tts-voices').bind(null, client, createHash, retrieveHash, logger),
  };
@@ -1,48 +0,0 @@
 const formurlencoded = require('form-urlencoded');
 const {Pool} = require('undici');
 const pool = new Pool('https://iam.cloud.ibm.com');
 const {makeIbmKey, noopLogger} = require('./utils');
 const { HTTP_TIMEOUT } = require('./config');
 const debug = require('debug')('jambonz:realtimedb-helpers');
 async function getIbmAccessToken(client, logger, apiKey) {
  logger = logger || noopLogger;
  try {
    const key = makeIbmKey(apiKey);
    const access_token = await client.get(key);
    if (access_token) return {access_token, servedFromCache: true};
    /* access token not found in cache, so fetch it from Ibm */
    const payload = {
      grant_type: 'urn:ibm:params:oauth:grant-type:apikey',
      apikey: apiKey
    };
    const {statusCode, headers, body} =  await pool.request({
      path: '/identity/token',
      method: 'POST',
      headers: {
        'Content-Type': 'application/x-www-form-urlencoded'
      },
      body: formurlencoded(payload),
      timeout: HTTP_TIMEOUT,
      followRedirects: false
    });
    if (200 !== statusCode) {
      const json = await body.json();
      logger.debug({statusCode, headers, body: json}, 'error fetching access token from Ibm');
      const err = new Error();
      err.statusCode = statusCode;
      throw err;
    }
    const json = await body.json();
    await client.set(key, json.access_token, 'EX', json.expires_in - 30);
    return {...json, servedFromCache: false};
  } catch (err) {
    debug(err, 'getIbmAccessToken: Error retrieving Ibm access token');
    logger.error(err, 'getIbmAccessToken: Error retrieving Ibm access token for client_id ${clientId}');
    throw err;
  }
 }
 module.exports = getIbmAccessToken;
@@ -3,8 +3,6 @@ const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils')
 const getNuanceAccessToken = require('./get-nuance-access-token');
 const getVerbioAccessToken = require('./get-verbio-token');
 const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
 const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
 const { IamAuthenticator } = require('ibm-watson/auth');
 const ttsGoogle = require('@google-cloud/text-to-speech');
 const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
 const getAwsAuthToken = require('./get-aws-sts-token');
@@ -12,21 +10,6 @@ const {Pool} = require('undici');
 const { HTTP_TIMEOUT } = require('./config');
 const verbioVoicePool = new Pool('https://us.rest.speechcenter.verbio.com');
 const getIbmVoices = async(client, logger, credentials) => {
  const {tts_region, tts_api_key} = credentials;
  console.log(`region: ${tts_region}, api_key: ${tts_api_key}`);
  const textToSpeech = new TextToSpeechV1({
    authenticator: new IamAuthenticator({
      apikey: tts_api_key,
    }),
    serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
  });
  const voices = await textToSpeech.listVoices();
  return voices;
 };
 const getNuanceVoices = async(client, logger, credentials) => {
  const {client_id: clientId, secret: secret, nuance_tts_uri} = credentials;
@@ -165,14 +148,12 @@ const getVerbioVoices = async(client, logger, credentials) => {
 async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
  logger = logger || noopLogger;
-  assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
+  assert.ok(['nuance', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
    `getTtsVoices not supported for vendor ${vendor}`);
  switch (vendor) {
    case 'nuance':
      return getNuanceVoices(client, logger, credentials);
    case 'ibm':
      return getIbmVoices(client, logger, credentials);
    case 'google':
      return getGoogleVoices(client, logger, credentials);
    case 'aws':
@@ -6,8 +6,7 @@ const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly'
 const { CartesiaClient } = require('@cartesia/cartesia-js');
 const sdk = require('microsoft-cognitiveservices-speech-sdk');
-const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
+
 const { IamAuthenticator } = require('ibm-watson/auth');
 const {
  ResultReason,
  SpeechConfig,
@@ -96,7 +95,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
  let rtt;
  logger = logger || noopLogger;
-  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
+  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'elevenlabs',
    'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
  vendor.startsWith('custom'),
  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
@@ -122,11 +121,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
    assert.ok(language, 'synthAudio requires language when nvidia is used');
    assert.ok(credentials.riva_server_uri, 'synthAudio requires riva_server_uri in credentials when nvidia is used');
  }
  else if ('ibm' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when ibm is used');
    assert.ok(credentials.tts_region, 'synthAudio requires tts_region in credentials when ibm watson is used');
    assert.ok(credentials.tts_api_key, 'synthAudio requires tts_api_key in credentials when nuance is used');
  }
  else if ('wellsaid' === vendor) {
    language = 'en-US'; // WellSaid only supports English atm
    assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
@@ -230,9 +224,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
        audioData = await synthNvidia(client, logger, {credentials, stats, language, voice, model, key, text,
          renderForCaching, disableTtsStreaming, disableTtsCache});
        break;
      case 'ibm':
        audioData = await synthIbm(logger, {credentials, stats, voice, key, text});
        break;
      case 'wellsaid':
        audioData = await synthWellSaid(logger, {credentials, stats, language, voice, key, text});
        break;
@@ -535,39 +526,6 @@ const synthGoogle = async(logger, {
  }
 };
 const synthIbm = async(logger, {credentials, stats, voice, text}) => {
  const {tts_api_key, tts_region} = credentials;
  const params = {
    text,
    voice,
    accept: 'audio/mp3'
  };
  try {
    const textToSpeech = new TextToSpeechV1({
      authenticator: new IamAuthenticator({
        apikey: tts_api_key,
      }),
      serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
    });
    const r = await textToSpeech.synthesize(params);
    const chunks = [];
    for await (const chunk of r.result) {
      chunks.push(chunk);
    }
    return {
      audioContent: Buffer.concat(chunks),
      extension: 'mp3',
      sampleRate: 8000
    };
  } catch (err) {
    logger.info({err, params}, 'synthAudio: Error synthesizing speech using ibm');
    stats.increment('tts.count', ['vendor:ibm', 'accepted:no']);
    throw new Error(err.statusText || err.message);
  }
 };
 async function _synthOnPremMicrosoft(logger, {
  credentials,
  language,
@@ -969,7 +927,7 @@ const synthElevenlabs = async(logger, {
  const optimize_streaming_latency = opts.optimize_streaming_latency ?
    `?optimize_streaming_latency=${opts.optimize_streaming_latency}` : '';
  try {
-    const post = bent(`https://${api_uri}`, 'POST', 'buffer', {
+    const post = bent(`https://${api_uri || 'api.elevenlabs.io'}`, 'POST', 'buffer', {
      'xi-api-key': api_key,
      'Accept': 'audio/mpeg',
      'Content-Type': 'application/json'
@@ -54,12 +54,6 @@ function makeBasicAuthHeader(username, password) {
  return {Authorization: header};
 }
 function makeIbmKey(apiKey) {
  const hash = crypto.createHash('sha1');
  hash.update(apiKey);
  return `ibm:${hash.digest('hex')}`;
 }
 function makeAwsKey(awsAccessKeyId) {
  const hash = crypto.createHash('sha1');
  hash.update(awsAccessKeyId);
@@ -143,7 +137,7 @@ const createRivaClient = async(rivaUri) => {
 module.exports = {
  makeSynthKey,
  makeNuanceKey,
-  makeIbmKey,
+
  makePlayhtKey,
  makeAwsKey,
  makeVerbioKey,
@@ -1,6 +1,6 @@
 {
  "name": "@jambonz/speech-utils",
-  "version": "0.2.30",
+  "version": "1.0.0",
  "description": "TTS-related speech utilities for jambonz",
  "main": "index.js",
  "author": "Dave Horton",
@@ -37,7 +37,7 @@
    "debug": "^4.3.4",
    "form-urlencoded": "^6.1.4",
    "google-protobuf": "^3.21.2",
-    "ibm-watson": "^11.0.0",
+
    "microsoft-cognitiveservices-speech-sdk": "1.38.0",
    "openai": "^4.98.0",
    "undici": "^7.5.0"
@@ -2,7 +2,7 @@ const test = require('tape').test ;
 const exec = require('child_process').exec ;
 test('starting docker network..', (t) => {
-  exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
+  exec(`docker compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
    setTimeout(() => {
      t.end(err);
    }, 2000);
@@ -3,7 +3,7 @@ const exec = require('child_process').exec ;
 test('stopping docker network..', (t) => {
  t.timeoutAfter(10000);
-  exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
+  exec(`docker compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
    //console.log(`stderr: ${stderr}`);
    process.exit(0);
  });
@@ -1,78 +0,0 @@
 const test = require('tape').test ;
 const config = require('config');
 const opts = config.get('redis');
 const fs = require('fs');
 const logger = require('pino')({level: 'error'});
 process.on('unhandledRejection', (reason, p) => {
  console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
 });
 const stats = {
  increment: () => {},
  histogram: () => {}
 };
 test('IBM - create access key', async(t) => {
  const fn = require('..');
  const {client, getIbmAccessToken} = fn(opts, logger);
  if (!process.env.IBM_API_KEY ) {
      t.pass('skipping IBM test since no IBM api_key provided');
      t.end();
      client.quit();
      return;
  }
  try {
    let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
    //console.log({obj}, 'received access token from IBM');
    t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
    obj = await getIbmAccessToken(process.env.IBM_API_KEY);
    //console.log({obj}, 'received access token from IBM - second request');
    t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
    await client.flushall();
    t.end();
  }
  catch (err) {
    console.error(err);
    t.end(err);
  }
  client.quit();
 });
 test('IBM - retrieve tts voices test', async(t) => {
  const fn = require('..');
  const {client, getTtsVoices} = fn(opts, logger);
  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
      t.pass('skipping IBM test since no IBM api_key and/or region provided');
      t.end();
      client.quit();
      return;
  }
  try {
    const opts = {
      vendor: 'ibm',
      credentials: {
        tts_api_key: process.env.IBM_TTS_API_KEY,
        tts_region: process.env.IBM_TTS_REGION
      }
    };
    const obj = await getTtsVoices(opts);
    const {voices} = obj.result;
    //console.log(JSON.stringify(voices));
    t.ok(voices.length > 0 && voices[0].language, 
      `GetVoices: successfully retrieved ${voices.length} voices from IBM`);
    await client.flushall();
    t.end();
  }
  catch (err) {
    console.error(err);
    t.end(err);
  }
  client.quit();
 });
@@ -2,6 +2,6 @@ require('./docker_start');
 require('./synth');
 require('./list-voices');
 require('./aws');
-require('./ibm');
+
 require('./nuance');
 require('./docker_stop');
@@ -38,71 +38,6 @@ test('Verbio - get Access key and voices', async(t) => {
  client.quit();
 });
 test('IBM - create access key', async(t) => {
  const fn = require('..');
  const {client, getIbmAccessToken} = fn(opts, logger);
  if (!process.env.IBM_API_KEY ) {
      t.pass('skipping IBM test since no IBM api_key provided');
      t.end();
      client.quit();
      return;
  }
  try {
    let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
    //console.log({obj}, 'received access token from IBM');
    t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
    obj = await getIbmAccessToken(process.env.IBM_API_KEY);
    //console.log({obj}, 'received access token from IBM - second request');
    t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
    await client.flushall();
    t.end();
  }
  catch (err) {
    console.error(err);
    t.end(err);
  }
  client.quit();
 });
 test('IBM - retrieve tts voices test', async(t) => {
  const fn = require('..');
  const {client, getTtsVoices} = fn(opts, logger);
  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
      t.pass('skipping IBM test since no IBM api_key and/or region provided');
      t.end();
      client.quit();
      return;
  }
  try {
    const opts = {
      vendor: 'ibm',
      credentials: {
        tts_api_key: process.env.IBM_TTS_API_KEY,
        tts_region: process.env.IBM_TTS_REGION
      }
    };
    const obj = await getTtsVoices(opts);
    const {voices} = obj.result;
    //console.log(JSON.stringify(voices));
    t.ok(voices.length > 0 && voices[0].language, 
      `GetVoices: successfully retrieved ${voices.length} voices from IBM`);
    await client.flushall();
    t.end();
  }
  catch (err) {
    console.error(err);
    t.end(err);
  }
  client.quit();
 });
 test('Nuance hosted tests', async(t) => {
  const fn = require('..');
  const {client, getTtsVoices} = fn(opts, logger);
@@ -872,46 +872,6 @@ test('Nvidia speech synth tests', async(t) => {
  client.quit();
 });
 test('IBM watson speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);
  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
    t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
    return t.end();
  }
  const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
  try {
    let opts = await synthAudio(stats, {
      vendor: 'ibm',
      credentials: {
        tts_api_key: process.env.IBM_TTS_API_KEY,
        tts_region: process.env.IBM_TTS_REGION,
      },
      language: 'en-US',
      voice: 'en-US_AllisonV2Voice',
      text,
    });
    t.ok(!opts.servedFromCache, `successfully synthesized ibm audio to ${opts.filePath}`);
    opts = await synthAudio(stats, {
      vendor: 'ibm',
      credentials: {
        tts_api_key: process.env.IBM_TTS_API_KEY,
        tts_region: process.env.IBM_TTS_REGION,
      },
      language: 'en-US',
      voice: 'en-US_AllisonV2Voice',
      text,
    });
    t.ok(opts.servedFromCache, `successfully retrieved ibm audio from cache ${opts.filePath}`);
  } catch (err) {
    console.error(JSON.stringify(err));
    t.end(err);
  }
  client.quit();
 });
 test('Custom Vendor speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);