chore: deprecate + remove verbio, nuance, playht speech vendor support (#144)

* chore: deprecate and remove verbio, nuance speech vendor support Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * chore: also deprecate and remove PlayHT speech vendor PlayHT was acquired and no longer provides the service. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 19:31:49 +00:00 · 2026-06-17 16:20:00 -04:00
parent 142323a151
commit 7d076bb8b4
16 changed files with 9 additions and 8162 deletions
@@ -16,26 +16,10 @@ const {
 } = sdk;
 const {
  makeSynthKey,
-  createNuanceClient,
-  createKryptonClient,
  createRivaClient,
  noopLogger,
-  makeFilePath,
-  makePlayhtKey
+  makeFilePath
 } = require('./utils');
-const getNuanceAccessToken = require('./get-nuance-access-token');
-const getVerbioAccessToken = require('./get-verbio-token');
-const {
-  SynthesisRequest,
-  Voice,
-  AudioFormat,
-  AudioParameters,
-  PCM,
-  Input,
-  Text,
-  SSML,
-  EventParameters
-} = require('../stubs/nuance/synthesizer_pb');
 const {SynthesizeSpeechRequest} = require('../stubs/riva/proto/riva_tts_pb');
 const {AudioEncoding} = require('../stubs/riva/proto/riva_audio_pb');
 const debug = require('debug')('jambonz:realtimedb-helpers');
@@ -95,10 +79,10 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
  let rtt;
  logger = logger || noopLogger;

-  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'elevenlabs',
-    'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
+  assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nvidia', 'elevenlabs',
+    'whisper', 'deepgram', 'rimelabs', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
  vendor.startsWith('custom'),
-  `synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
+  `synthAudio supported vendors are google, aws, microsoft, nvidia and wellsaid ..etc, not ${vendor}`);
  if ('google' === vendor) {
    assert.ok(language, 'synthAudio requires language when google is used');
  }
@@ -109,13 +93,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
    assert.ok(language || deploymentId, 'synthAudio requires language when microsoft is used');
    assert.ok(voice || deploymentId, 'synthAudio requires voice when microsoft is used');
  }
-  else if ('nuance' === vendor) {
-    assert.ok(voice, 'synthAudio requires voice when nuance is used');
-    if (!credentials.nuance_tts_uri) {
-      assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used');
-      assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used');
-    }
-  }
  else if ('nvidia' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when nvidia is used');
    assert.ok(language, 'synthAudio requires language when nvidia is used');
@@ -129,11 +106,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
    assert.ok(voice, 'synthAudio requires voice when elevenlabs is used');
    assert.ok(credentials.api_key, 'synthAudio requires api_key when elevenlabs is used');
    assert.ok(credentials.model_id, 'synthAudio requires model_id when elevenlabs is used');
-  } else if ('playht' === vendor) {
-    assert.ok(voice, 'synthAudio requires voice when playht is used');
-    assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
-    assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
-    assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
  } else if ('inworld' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when inworld is used');
    assert.ok(credentials.api_key, 'synthAudio requires api_key when inworld is used');
@@ -148,10 +120,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
    assert.ok(credentials.api_key, 'synthAudio requires api_key when whisper is used');
  } else  if (vendor.startsWith('custom')) {
    assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
-  } else if ('verbio' === vendor) {
-    assert.ok(voice, 'synthAudio requires voice when verbio is used');
-    assert.ok(credentials.client_id, 'synthAudio requires client_id when verbio is used');
-    assert.ok(credentials.client_secret, 'synthAudio requires client_secret when verbio is used');
  } else if ('deepgram' === vendor) {
    if (!credentials.deepgram_tts_uri) {
      assert.ok(credentials.api_key, 'synthAudio requires api_key when deepgram is used');
@@ -216,10 +184,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
        audioData = await synthMicrosoft(logger, {credentials, stats, language, voice, key, text, deploymentId,
          renderForCaching, disableTtsStreaming, disableTtsCache});
        break;
-      case 'nuance':
-        model = model || 'enhanced';
-        audioData = await synthNuance(client, logger, {credentials, stats, voice, model, key, text});
-        break;
      case 'nvidia':
        audioData = await synthNvidia(client, logger, {credentials, stats, language, voice, model, key, text,
          renderForCaching, disableTtsStreaming, disableTtsCache});
@@ -232,11 +196,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
          credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
          disableTtsCache});
        break;
-      case 'playht':
-        audioData = await synthPlayHT(client, logger, {
-          credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
-          disableTtsCache});
-        break;
      case 'cartesia':
        audioData = await synthCartesia(logger, {
          credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
@@ -257,11 +216,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
          credentials, stats, voice, key, text, instructions, renderForCaching, disableTtsStreaming,
          disableTtsCache});
        break;
-      case 'verbio':
-        audioData = await synthVerbio(client, logger, {
-          credentials, stats, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache});
-        if (audioData?.filePath) return audioData;
-        break;
      case 'deepgram':
        audioData = await synthDeepgram(logger, {credentials, stats, model, key, text,
          renderForCaching, disableTtsStreaming, disableTtsCache});
@@ -725,70 +679,6 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender
  }
 };

-const synthNuance = async(client, logger, {credentials, stats, voice, model, text}) => {
-  let nuanceClient;
-  const {client_id, secret, nuance_tts_uri} = credentials;
-  if (nuance_tts_uri) {
-    nuanceClient = await createKryptonClient(nuance_tts_uri);
-  }
-  else {
-    /* get a nuance access token */
-    const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts');
-    nuanceClient = await createNuanceClient(access_token);
-  }
-
-  const v = new Voice();
-  const p = new AudioParameters();
-  const f = new AudioFormat();
-  const pcm = new PCM();
-  const params  = new EventParameters();
-  const request = new SynthesisRequest();
-  const input = new Input();
-
-  if (text.startsWith('<speak')) {
-    const ssml = new SSML();
-    ssml.setText(text);
-    input.setSsml(ssml);
-  }
-  else {
-    const t = new Text();
-    t.setText(text);
-    input.setText(t);
-  }
-  const sampleRate = 8000;
-  pcm.setSampleRateHz(sampleRate);
-  f.setPcm(pcm);
-  p.setAudioFormat(f);
-  v.setName(voice);
-  v.setModel(model);
-  request.setVoice(v);
-  request.setAudioParams(p);
-  request.setInput(input);
-  request.setEventParams(params);
-  request.setUserId('jambonz');
-
-  return new Promise((resolve, reject) => {
-    nuanceClient.unarySynthesize(request, (err, response) => {
-      if (err) {
-        console.error(err);
-        return reject(err);
-      }
-      const status = response.getStatus();
-      const code = status.getCode();
-      if (code !== 200) {
-        const message = status.getMessage();
-        const details = status.getDetails();
-        return reject({code, message, details});
-      }
-      resolve({
-        audioContent: Buffer.from(response.getAudio()),
-        extension: 'r8',
-        sampleRate
-      });
-    });
-  });
-};
-
 const synthNvidia = async(client, logger, {
  credentials, stats, language,  voice, model, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
 }) => {
@@ -954,101 +844,6 @@ const synthElevenlabs = async(logger, {
  }
 };

-const synthPlayHT = async(client, logger, {
-  credentials, options, stats, voice, language, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
-}) => {
-  const {api_key, user_id, voice_engine, playht_tts_uri, options: credOpts} = credentials;
-  const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
-
-  let synthesizeUrl = playht_tts_uri ? `${playht_tts_uri}/api/v2/tts/stream` : 'https://api.play.ht/api/v2/tts/stream';
-  // If model is play3.0, the synthesizeUrl is got from authentication endpoint
-  if (voice_engine === 'Play3.0') {
-    try {
-      const post = bent('https://api.play.ht', 'POST', 'json', 201, {
-        'AUTHORIZATION': api_key,
-        'X-USER-ID': user_id,
-        'Accept': 'application/json'
-      });
-      const key = makePlayhtKey(api_key);
-      const url = await client.get(key);
-      if (!url) {
-        const {inference_address, expires_at_ms} = await post('/api/v3/auth');
-        synthesizeUrl = inference_address;
-        const expiry =  Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
-        await client.set(key, inference_address, 'EX', expiry);
-      } else {
-        // Use cached URL
-        synthesizeUrl = url;
-      }
-    } catch (err) {
-      logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
-      stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
-      throw err;
-    }
-  }
-
-  /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
-  if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
-    let params = '{';
-    params += `api_key=${api_key}`;
-    params += `,playback_id=${key}`;
-    params += `,user_id=${user_id}`;
-    params += ',vendor=playht';
-    params += `,voice=${voice}`;
-    params += `,voice_engine=${voice_engine}`;
-    params += `,synthesize_url=${synthesizeUrl}`;
-    params += `,write_cache_file=${disableTtsCache ? 0 : 1}`;
-    params += `,language=${language}`;
-    if (opts.quality) params += `,quality=${opts.quality}`;
-    if (opts.speed) params += `,speed=${opts.speed}`;
-    if (opts.seed) params += `,style=${opts.seed}`;
-    if (opts.temperature) params += `,temperature=${opts.temperature}`;
-    if (opts.emotion) params += `,emotion=${opts.emotion}`;
-    if (opts.voice_guidance) params += `,voice_guidance=${opts.voice_guidance}`;
-    if (opts.style_guidance) params += `,style_guidance=${opts.style_guidance}`;
-    if (opts.text_guidance) params += `,text_guidance=${opts.text_guidance}`;
-    if (opts.top_p) params += `,top_p=${opts.top_p}`;
-    if (opts.repetition_penalty) params += `,repetition_penalty=${opts.repetition_penalty}`;
-    params += '}';
-
-    return {
-      filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
-      servedFromCache: false,
-      rtt: 0
-    };
-  }
-
-  try {
-    const post = bent('POST', 'buffer', {
-      ...(voice_engine !== 'Play3.0' && {
-        'AUTHORIZATION': api_key,
-        'X-USER-ID': user_id,
-      }),
-      'Accept': 'audio/mpeg',
-      'Content-Type': 'application/json'
-    });
-
-    const audioContent = await post(synthesizeUrl, {
-      text,
-      ...(voice_engine === 'Play3.0' && { language }),
-      voice,
-      voice_engine,
-      output_format: 'mp3',
-      sample_rate: 8000,
-      ...opts
-    });
-    return {
-      audioContent,
-      extension: 'mp3',
-      sampleRate: 8000
-    };
-  } catch (err) {
-    logger.info({err}, 'synth PlayHT returned error');
-    stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
-    throw err;
-  }
-};
-
 const synthInworld = async(logger, {
  credentials, options, stats, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
 }) => {
@@ -1174,54 +969,6 @@ const synthRimelabs = async(logger, {
    throw err;
  }
 };
-const synthVerbio = async(client, logger, {
-  credentials, stats, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
-}) => {
-  //https://doc.speechcenter.verbio.com/#tag/Text-To-Speech-REST-API
-  if (text.length > 2000) {
-    throw new Error('Verbio cannot synthesize for the text length larger than 2000 characters');
-  }
-  const token = await getVerbioAccessToken(client, logger, credentials);
-  if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
-    let params = '{';
-    params += `access_token=${token.access_token}`;
-    params += `,playback_id=${key}`;
-    params += ',vendor=verbio';
-    params += `,voice=${voice}`;
-    params += `,write_cache_file=${disableTtsCache ? 0 : 1}`;
-    params += '}';
-
-    return {
-      filePath: `say:${params}${text.replace(/\n/g, ' ')}`,
-      servedFromCache: false,
-      rtt: 0
-    };
-  }
-
-  try {
-    const post = bent('https://us.rest.speechcenter.verbio.com', 'POST', 'buffer', {
-      'Authorization': `Bearer ${token.access_token}`,
-      'User-Agent': 'jambonz',
-      'Content-Type': 'application/json'
-    });
-    const audioContent = await post('/api/v1/synthesize', {
-      voice_id: voice,
-      output_sample_rate: '8k',
-      output_encoding: 'pcm16',
-      text
-    });
-    return {
-      audioContent,
-      extension: 'r8',
-      sampleRate: 8000
-    };
-  } catch (err) {
-    logger.info({err}, 'synth Verbio returned error');
-    stats.increment('tts.count', ['vendor:verbio', 'accepted:no']);
-    throw err;
-  }
-};
-
 const synthWhisper = async(logger, {credentials, stats, voice, key, text, instructions,
  renderForCaching, disableTtsStreaming, disableTtsCache}) => {
  const {api_key, model_id, baseURL, timeout, speed} = credentials;