remove ibm speech since it is not used (to my knowledge) and has dependencies with vulnerabilities (#141)

2026-05-06 08:47:02 +00:00 · 2026-03-25 10:08:30 -04:00
parent 305695d068
commit c123f19898
14 changed files with 1436 additions and 2516 deletions
@@ -13,11 +13,6 @@ jobs:
        with:
          node-version: '20'
      - run: npm install
-      - name: Install Docker Compose
-        run: |
-          sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
-          sudo chmod +x /usr/local/bin/docker-compose
-          docker-compose --version
      - run: npm run jslint
      - run: sudo apt update && sudo apt install -y squid
      - run: sudo cp test/squid.conf /etc/squid/squid.conf
@@ -28,9 +23,7 @@ jobs:
          AWS_REGION: ${{ secrets.AWS_REGION }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          GCP_JSON_KEY: ${{ secrets.GCP_JSON_KEY }}
-          IBM_API_KEY: ${{ secrets.IBM_API_KEY }}
-          IBM_TTS_API_KEY: ${{ secrets.IBM_TTS_API_KEY }}
-          IBM_TTS_REGION: ${{ secrets.IBM_TTS_REGION }}
+
          MICROSOFT_API_KEY: ${{ secrets.MICROSOFT_API_KEY }}
          MICROSOFT_REGION: ${{ secrets.MICROSOFT_REGION }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@@ -16,7 +16,7 @@ module.exports = (opts, logger) => {
    synthAudio: require('./lib/synth-audio').bind(null, client, createHash, retrieveHash, logger),
    getVerbioAccessToken: require('./lib/get-verbio-token').bind(null, client, logger),
    getNuanceAccessToken: require('./lib/get-nuance-access-token').bind(null, client, logger),
-    getIbmAccessToken: require('./lib/get-ibm-access-token').bind(null, client, logger),
+
    getAwsAuthToken: require('./lib/get-aws-sts-token').bind(null, logger, createHash, retrieveHash),
    getTtsVoices: require('./lib/get-tts-voices').bind(null, client, createHash, retrieveHash, logger),
  };
@@ -1,48 +0,0 @@
-const formurlencoded = require('form-urlencoded');
-const {Pool} = require('undici');
-const pool = new Pool('https://iam.cloud.ibm.com');
-const {makeIbmKey, noopLogger} = require('./utils');
-const { HTTP_TIMEOUT } = require('./config');
-const debug = require('debug')('jambonz:realtimedb-helpers');
-
-async function getIbmAccessToken(client, logger, apiKey) {
-  logger = logger || noopLogger;
-  try {
-    const key = makeIbmKey(apiKey);
-    const access_token = await client.get(key);
-    if (access_token) return {access_token, servedFromCache: true};
-
-    /* access token not found in cache, so fetch it from Ibm */
-    const payload = {
-      grant_type: 'urn:ibm:params:oauth:grant-type:apikey',
-      apikey: apiKey
-    };
-    const {statusCode, headers, body} =  await pool.request({
-      path: '/identity/token',
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/x-www-form-urlencoded'
-      },
-      body: formurlencoded(payload),
-      timeout: HTTP_TIMEOUT,
-      followRedirects: false
-    });
-
-    if (200 !== statusCode) {
-      const json = await body.json();
-      logger.debug({statusCode, headers, body: json}, 'error fetching access token from Ibm');
-      const err = new Error();
-      err.statusCode = statusCode;
-      throw err;
-    }
-    const json = await body.json();
-    await client.set(key, json.access_token, 'EX', json.expires_in - 30);
-    return {...json, servedFromCache: false};
-  } catch (err) {
-    debug(err, 'getIbmAccessToken: Error retrieving Ibm access token');
-    logger.error(err, 'getIbmAccessToken: Error retrieving Ibm access token for client_id ${clientId}');
-    throw err;
-  }
-}
-
-module.exports = getIbmAccessToken;
@@ -3,8 +3,6 @@ const {noopLogger, createNuanceClient, createKryptonClient} = require('./utils')
 const getNuanceAccessToken = require('./get-nuance-access-token');
 const getVerbioAccessToken = require('./get-verbio-token');
 const {GetVoicesRequest, Voice} = require('../stubs/nuance/synthesizer_pb');
-const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
-const { IamAuthenticator } = require('ibm-watson/auth');
 const ttsGoogle = require('@google-cloud/text-to-speech');
 const { PollyClient, DescribeVoicesCommand } = require('@aws-sdk/client-polly');
 const getAwsAuthToken = require('./get-aws-sts-token');
@@ -12,21 +10,6 @@ const {Pool} = require('undici');
 const { HTTP_TIMEOUT } = require('./config');
 const verbioVoicePool = new Pool('https://us.rest.speechcenter.verbio.com');

-const getIbmVoices = async(client, logger, credentials) => {
-  const {tts_region, tts_api_key} = credentials;
-  console.log(`region: ${tts_region}, api_key: ${tts_api_key}`);
-
-  const textToSpeech = new TextToSpeechV1({
-    authenticator: new IamAuthenticator({
-      apikey: tts_api_key,
-    }),
-    serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
-  });
-
-  const voices = await textToSpeech.listVoices();
-  return voices;
-};
-
 const getNuanceVoices = async(client, logger, credentials) => {
  const {client_id: clientId, secret: secret, nuance_tts_uri} = credentials;

@@ -165,14 +148,12 @@ const getVerbioVoices = async(client, logger, credentials) => {
 async function getTtsVoices(client, createHash, retrieveHash, logger, {vendor, credentials}) {
  logger = logger || noopLogger;

-  assert.ok(['nuance', 'ibm', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
+  assert.ok(['nuance', 'google', 'aws', 'polly', 'verbio'].includes(vendor),
    `getTtsVoices not supported for vendor ${vendor}`);

  switch (vendor) {
    case 'nuance':
      return getNuanceVoices(client, logger, credentials);
-    case 'ibm':
-      return getIbmVoices(client, logger, credentials);
    case 'google':
      return getGoogleVoices(client, logger, credentials);
    case 'aws':
@@ -6,8 +6,7 @@ const { PollyClient, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly'
 const { CartesiaClient } = require('@cartesia/cartesia-js');

 const sdk = require('microsoft-cognitiveservices-speech-sdk');
-const TextToSpeechV1 = require('ibm-watson/text-to-speech/v1');
-const { IamAuthenticator } = require('ibm-watson/auth');
+
 const {
  ResultReason,
  SpeechConfig,
@@ -96,7 +95,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
  let rtt;
  logger = logger || noopLogger;

-  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'ibm', 'elevenlabs',
+  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'elevenlabs',
    'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
  vendor.startsWith('custom'),
  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
@@ -122,11 +121,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
    assert.ok(language, 'synthAudio requires language when nvidia is used');
    assert.ok(credentials.riva_server_uri, 'synthAudio requires riva_server_uri in credentials when nvidia is used');
  }
-  else if ('ibm' === vendor) {
-    assert.ok(voice, 'synthAudio requires voice when ibm is used');
-    assert.ok(credentials.tts_region, 'synthAudio requires tts_region in credentials when ibm watson is used');
-    assert.ok(credentials.tts_api_key, 'synthAudio requires tts_api_key in credentials when nuance is used');
-  }
  else if ('wellsaid' === vendor) {
    language = 'en-US'; // WellSaid only supports English atm
    assert.ok(voice, 'synthAudio requires voice when wellsaid is used');
@@ -230,9 +224,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
        audioData = await synthNvidia(client, logger, {credentials, stats, language, voice, model, key, text,
          renderForCaching, disableTtsStreaming, disableTtsCache});
        break;
-      case 'ibm':
-        audioData = await synthIbm(logger, {credentials, stats, voice, key, text});
-        break;
      case 'wellsaid':
        audioData = await synthWellSaid(logger, {credentials, stats, language, voice, key, text});
        break;
@@ -535,39 +526,6 @@ const synthGoogle = async(logger, {
  }
 };

-const synthIbm = async(logger, {credentials, stats, voice, text}) => {
-  const {tts_api_key, tts_region} = credentials;
-  const params = {
-    text,
-    voice,
-    accept: 'audio/mp3'
-  };
-
-  try {
-    const textToSpeech = new TextToSpeechV1({
-      authenticator: new IamAuthenticator({
-        apikey: tts_api_key,
-      }),
-      serviceUrl: `https://api.${tts_region}.text-to-speech.watson.cloud.ibm.com`
-    });
-
-    const r = await textToSpeech.synthesize(params);
-    const chunks = [];
-    for await (const chunk of r.result) {
-      chunks.push(chunk);
-    }
-    return {
-      audioContent: Buffer.concat(chunks),
-      extension: 'mp3',
-      sampleRate: 8000
-    };
-  } catch (err) {
-    logger.info({err, params}, 'synthAudio: Error synthesizing speech using ibm');
-    stats.increment('tts.count', ['vendor:ibm', 'accepted:no']);
-    throw new Error(err.statusText || err.message);
-  }
-};
-
 async function _synthOnPremMicrosoft(logger, {
  credentials,
  language,
@@ -969,7 +927,7 @@ const synthElevenlabs = async(logger, {
  const optimize_streaming_latency = opts.optimize_streaming_latency ?
    `?optimize_streaming_latency=${opts.optimize_streaming_latency}` : '';
  try {
-    const post = bent(`https://${api_uri}`, 'POST', 'buffer', {
+    const post = bent(`https://${api_uri || 'api.elevenlabs.io'}`, 'POST', 'buffer', {
      'xi-api-key': api_key,
      'Accept': 'audio/mpeg',
      'Content-Type': 'application/json'
@@ -54,12 +54,6 @@ function makeBasicAuthHeader(username, password) {
  return {Authorization: header};
 }

-function makeIbmKey(apiKey) {
-  const hash = crypto.createHash('sha1');
-  hash.update(apiKey);
-  return `ibm:${hash.digest('hex')}`;
-}
-
 function makeAwsKey(awsAccessKeyId) {
  const hash = crypto.createHash('sha1');
  hash.update(awsAccessKeyId);
@@ -143,7 +137,7 @@ const createRivaClient = async(rivaUri) => {
 module.exports = {
  makeSynthKey,
  makeNuanceKey,
-  makeIbmKey,
+
  makePlayhtKey,
  makeAwsKey,
  makeVerbioKey,
@@ -1,6 +1,6 @@
 {
  "name": "@jambonz/speech-utils",
-  "version": "0.2.30",
+  "version": "1.0.0",
  "description": "TTS-related speech utilities for jambonz",
  "main": "index.js",
  "author": "Dave Horton",
@@ -37,7 +37,7 @@
    "debug": "^4.3.4",
    "form-urlencoded": "^6.1.4",
    "google-protobuf": "^3.21.2",
-    "ibm-watson": "^11.0.0",
+
    "microsoft-cognitiveservices-speech-sdk": "1.38.0",
    "openai": "^4.98.0",
    "undici": "^7.5.0"
@@ -2,7 +2,7 @@ const test = require('tape').test ;
 const exec = require('child_process').exec ;

 test('starting docker network..', (t) => {
-  exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
+  exec(`docker compose -f ${__dirname}/docker-compose-testbed.yaml up -d`, (err, stdout, stderr) => {
    setTimeout(() => {
      t.end(err);
    }, 2000);
@@ -3,7 +3,7 @@ const exec = require('child_process').exec ;

 test('stopping docker network..', (t) => {
  t.timeoutAfter(10000);
-  exec(`docker-compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
+  exec(`docker compose -f ${__dirname}/docker-compose-testbed.yaml down`, (err, stdout, stderr) => {
    //console.log(`stderr: ${stderr}`);
    process.exit(0);
  });
@@ -1,78 +0,0 @@
-const test = require('tape').test ;
-const config = require('config');
-const opts = config.get('redis');
-const fs = require('fs');
-const logger = require('pino')({level: 'error'});
-process.on('unhandledRejection', (reason, p) => {
-  console.log('Unhandled Rejection at: Promise', p, 'reason:', reason);
-});
-
-const stats = {
-  increment: () => {},
-  histogram: () => {}
-};
-
-test('IBM - create access key', async(t) => {
-  const fn = require('..');
-  const {client, getIbmAccessToken} = fn(opts, logger);
-
-  if (!process.env.IBM_API_KEY ) {
-      t.pass('skipping IBM test since no IBM api_key provided');
-      t.end();
-      client.quit();
-      return;
-  }
-  try {
-    let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
-    //console.log({obj}, 'received access token from IBM');
-    t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
-
-    obj = await getIbmAccessToken(process.env.IBM_API_KEY);
-    //console.log({obj}, 'received access token from IBM - second request');
-    t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
- 
-    await client.flushall();
-    t.end();
-  }
-  catch (err) {
-    console.error(err);
-    t.end(err);
-  }
-  client.quit();
-});
-
-test('IBM - retrieve tts voices test', async(t) => {
-  const fn = require('..');
-  const {client, getTtsVoices} = fn(opts, logger);
-
-  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
-      t.pass('skipping IBM test since no IBM api_key and/or region provided');
-      t.end();
-      client.quit();
-      return;
-  }
-  try {
-    const opts = {
-      vendor: 'ibm',
-      credentials: {
-        tts_api_key: process.env.IBM_TTS_API_KEY,
-        tts_region: process.env.IBM_TTS_REGION
-      }
-    };
-    const obj = await getTtsVoices(opts);
-    const {voices} = obj.result;
-    //console.log(JSON.stringify(voices));
-    t.ok(voices.length > 0 && voices[0].language, 
-      `GetVoices: successfully retrieved ${voices.length} voices from IBM`);
- 
-    await client.flushall();
-
-    t.end();
-
-  }
-  catch (err) {
-    console.error(err);
-    t.end(err);
-  }
-  client.quit();
-});
@@ -2,6 +2,6 @@ require('./docker_start');
 require('./synth');
 require('./list-voices');
 require('./aws');
-require('./ibm');
+
 require('./nuance');
 require('./docker_stop');
@@ -38,71 +38,6 @@ test('Verbio - get Access key and voices', async(t) => {
  client.quit();
 });

-test('IBM - create access key', async(t) => {
-  const fn = require('..');
-  const {client, getIbmAccessToken} = fn(opts, logger);
-
-  if (!process.env.IBM_API_KEY ) {
-      t.pass('skipping IBM test since no IBM api_key provided');
-      t.end();
-      client.quit();
-      return;
-  }
-  try {
-    let obj = await getIbmAccessToken(process.env.IBM_API_KEY);
-    //console.log({obj}, 'received access token from IBM');
-    t.ok(obj.access_token && !obj.servedFromCache, 'successfull received access token from IBM');
-
-    obj = await getIbmAccessToken(process.env.IBM_API_KEY);
-    //console.log({obj}, 'received access token from IBM - second request');
-    t.ok(obj.access_token && obj.servedFromCache, 'successfully received access token from cache');
- 
-    await client.flushall();
-    t.end();
-  }
-  catch (err) {
-    console.error(err);
-    t.end(err);
-  }
-  client.quit();
-});
-
-test('IBM - retrieve tts voices test', async(t) => {
-  const fn = require('..');
-  const {client, getTtsVoices} = fn(opts, logger);
-
-  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
-      t.pass('skipping IBM test since no IBM api_key and/or region provided');
-      t.end();
-      client.quit();
-      return;
-  }
-  try {
-    const opts = {
-      vendor: 'ibm',
-      credentials: {
-        tts_api_key: process.env.IBM_TTS_API_KEY,
-        tts_region: process.env.IBM_TTS_REGION
-      }
-    };
-    const obj = await getTtsVoices(opts);
-    const {voices} = obj.result;
-    //console.log(JSON.stringify(voices));
-    t.ok(voices.length > 0 && voices[0].language, 
-      `GetVoices: successfully retrieved ${voices.length} voices from IBM`);
- 
-    await client.flushall();
-
-    t.end();
-
-  }
-  catch (err) {
-    console.error(err);
-    t.end(err);
-  }
-  client.quit();
-});
-
 test('Nuance hosted tests', async(t) => {
  const fn = require('..');
  const {client, getTtsVoices} = fn(opts, logger);
@@ -872,46 +872,6 @@ test('Nvidia speech synth tests', async(t) => {
  client.quit();
 });

-test('IBM watson speech synth tests', async(t) => {
-  const fn = require('..');
-  const {synthAudio, client} = fn(opts, logger);
-
-  if (!process.env.IBM_TTS_API_KEY || !process.env.IBM_TTS_REGION) {
-    t.pass('skipping IBM Watson speech synth tests since IBM_TTS_API_KEY or IBM_TTS_API_KEY not provided');
-    return t.end();
-  }
-  const text = `<speak> Hi there and welcome to jambones! jambones is the <sub alias="seapass">CPaaS</sub> designed with the needs of communication service providers in mind. This is an example of simple text-to-speech, but there is so much more you can do. Try us out!</speak>`;
-  try {
-    let opts = await synthAudio(stats, {
-      vendor: 'ibm',
-      credentials: {
-        tts_api_key: process.env.IBM_TTS_API_KEY,
-        tts_region: process.env.IBM_TTS_REGION,
-      },
-      language: 'en-US',
-      voice: 'en-US_AllisonV2Voice',
-      text,
-    });
-    t.ok(!opts.servedFromCache, `successfully synthesized ibm audio to ${opts.filePath}`);
-
-    opts = await synthAudio(stats, {
-      vendor: 'ibm',
-      credentials: {
-        tts_api_key: process.env.IBM_TTS_API_KEY,
-        tts_region: process.env.IBM_TTS_REGION,
-      },
-      language: 'en-US',
-      voice: 'en-US_AllisonV2Voice',
-      text,
-    });
-    t.ok(opts.servedFromCache, `successfully retrieved ibm audio from cache ${opts.filePath}`);
-  } catch (err) {
-    console.error(JSON.stringify(err));
-    t.end(err);
-  }
-  client.quit();
-});
-
 test('Custom Vendor speech synth tests', async(t) => {
  const fn = require('..');
  const {synthAudio, client} = fn(opts, logger);