chore: deprecate + remove verbio, nuance, playht speech vendor support (#144)

* chore: deprecate and remove verbio, nuance speech vendor support

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* chore: also deprecate and remove PlayHT speech vendor

PlayHT was acquired and no longer provides the service.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dave Horton
2026-06-17 16:20:00 -04:00
committed by GitHub
parent 142323a151
commit 7d076bb8b4
16 changed files with 9 additions and 8162 deletions
+4 -257
View File
@@ -16,26 +16,10 @@ const {
} = sdk;
const {
makeSynthKey,
createNuanceClient,
createKryptonClient,
createRivaClient,
noopLogger,
makeFilePath,
makePlayhtKey
makeFilePath
} = require('./utils');
const getNuanceAccessToken = require('./get-nuance-access-token');
const getVerbioAccessToken = require('./get-verbio-token');
const {
SynthesisRequest,
Voice,
AudioFormat,
AudioParameters,
PCM,
Input,
Text,
SSML,
EventParameters
} = require('../stubs/nuance/synthesizer_pb');
const {SynthesizeSpeechRequest} = require('../stubs/riva/proto/riva_tts_pb');
const {AudioEncoding} = require('../stubs/riva/proto/riva_audio_pb');
const debug = require('debug')('jambonz:realtimedb-helpers');
@@ -95,10 +79,10 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
let rtt;
logger = logger || noopLogger;
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nuance', 'nvidia', 'elevenlabs',
'whisper', 'deepgram', 'playht', 'rimelabs', 'verbio', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
assert.ok(['google', 'aws', 'polly', 'microsoft', 'wellsaid', 'nvidia', 'elevenlabs',
'whisper', 'deepgram', 'rimelabs', 'cartesia', 'inworld', 'resemble'].includes(vendor) ||
vendor.startsWith('custom'),
`synthAudio supported vendors are google, aws, microsoft, nuance, nvidia and wellsaid ..etc, not ${vendor}`);
`synthAudio supported vendors are google, aws, microsoft, nvidia and wellsaid ..etc, not ${vendor}`);
if ('google' === vendor) {
assert.ok(language, 'synthAudio requires language when google is used');
}
@@ -109,13 +93,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
assert.ok(language || deploymentId, 'synthAudio requires language when microsoft is used');
assert.ok(voice || deploymentId, 'synthAudio requires voice when microsoft is used');
}
else if ('nuance' === vendor) {
assert.ok(voice, 'synthAudio requires voice when nuance is used');
if (!credentials.nuance_tts_uri) {
assert.ok(credentials.client_id, 'synthAudio requires client_id in credentials when nuance is used');
assert.ok(credentials.secret, 'synthAudio requires client_id in credentials when nuance is used');
}
}
else if ('nvidia' === vendor) {
assert.ok(voice, 'synthAudio requires voice when nvidia is used');
assert.ok(language, 'synthAudio requires language when nvidia is used');
@@ -129,11 +106,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
assert.ok(voice, 'synthAudio requires voice when elevenlabs is used');
assert.ok(credentials.api_key, 'synthAudio requires api_key when elevenlabs is used');
assert.ok(credentials.model_id, 'synthAudio requires model_id when elevenlabs is used');
} else if ('playht' === vendor) {
assert.ok(voice, 'synthAudio requires voice when playht is used');
assert.ok(credentials.api_key, 'synthAudio requires api_key when playht is used');
assert.ok(credentials.user_id, 'synthAudio requires user_id when playht is used');
assert.ok(credentials.voice_engine, 'synthAudio requires voice_engine when playht is used');
} else if ('inworld' === vendor) {
assert.ok(voice, 'synthAudio requires voice when inworld is used');
assert.ok(credentials.api_key, 'synthAudio requires api_key when inworld is used');
@@ -148,10 +120,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
assert.ok(credentials.api_key, 'synthAudio requires api_key when whisper is used');
} else if (vendor.startsWith('custom')) {
assert.ok(credentials.custom_tts_url, `synthAudio requires custom_tts_url in credentials when ${vendor} is used`);
} else if ('verbio' === vendor) {
assert.ok(voice, 'synthAudio requires voice when verbio is used');
assert.ok(credentials.client_id, 'synthAudio requires client_id when verbio is used');
assert.ok(credentials.client_secret, 'synthAudio requires client_secret when verbio is used');
} else if ('deepgram' === vendor) {
if (!credentials.deepgram_tts_uri) {
assert.ok(credentials.api_key, 'synthAudio requires api_key when deepgram is used');
@@ -216,10 +184,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
audioData = await synthMicrosoft(logger, {credentials, stats, language, voice, key, text, deploymentId,
renderForCaching, disableTtsStreaming, disableTtsCache});
break;
case 'nuance':
model = model || 'enhanced';
audioData = await synthNuance(client, logger, {credentials, stats, voice, model, key, text});
break;
case 'nvidia':
audioData = await synthNvidia(client, logger, {credentials, stats, language, voice, model, key, text,
renderForCaching, disableTtsStreaming, disableTtsCache});
@@ -232,11 +196,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
disableTtsCache});
break;
case 'playht':
audioData = await synthPlayHT(client, logger, {
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
disableTtsCache});
break;
case 'cartesia':
audioData = await synthCartesia(logger, {
credentials, options, stats, language, voice, key, text, renderForCaching, disableTtsStreaming,
@@ -257,11 +216,6 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
credentials, stats, voice, key, text, instructions, renderForCaching, disableTtsStreaming,
disableTtsCache});
break;
case 'verbio':
audioData = await synthVerbio(client, logger, {
credentials, stats, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache});
if (audioData?.filePath) return audioData;
break;
case 'deepgram':
audioData = await synthDeepgram(logger, {credentials, stats, model, key, text,
renderForCaching, disableTtsStreaming, disableTtsCache});
@@ -725,70 +679,6 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender
}
};
const synthNuance = async(client, logger, {credentials, stats, voice, model, text}) => {
let nuanceClient;
const {client_id, secret, nuance_tts_uri} = credentials;
if (nuance_tts_uri) {
nuanceClient = await createKryptonClient(nuance_tts_uri);
}
else {
/* get a nuance access token */
const {access_token} = await getNuanceAccessToken(client, logger, client_id, secret, 'tts');
nuanceClient = await createNuanceClient(access_token);
}
const v = new Voice();
const p = new AudioParameters();
const f = new AudioFormat();
const pcm = new PCM();
const params = new EventParameters();
const request = new SynthesisRequest();
const input = new Input();
if (text.startsWith('<speak')) {
const ssml = new SSML();
ssml.setText(text);
input.setSsml(ssml);
}
else {
const t = new Text();
t.setText(text);
input.setText(t);
}
const sampleRate = 8000;
pcm.setSampleRateHz(sampleRate);
f.setPcm(pcm);
p.setAudioFormat(f);
v.setName(voice);
v.setModel(model);
request.setVoice(v);
request.setAudioParams(p);
request.setInput(input);
request.setEventParams(params);
request.setUserId('jambonz');
return new Promise((resolve, reject) => {
nuanceClient.unarySynthesize(request, (err, response) => {
if (err) {
console.error(err);
return reject(err);
}
const status = response.getStatus();
const code = status.getCode();
if (code !== 200) {
const message = status.getMessage();
const details = status.getDetails();
return reject({code, message, details});
}
resolve({
audioContent: Buffer.from(response.getAudio()),
extension: 'r8',
sampleRate
});
});
});
};
const synthNvidia = async(client, logger, {
credentials, stats, language, voice, model, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
}) => {
@@ -954,101 +844,6 @@ const synthElevenlabs = async(logger, {
}
};
const synthPlayHT = async(client, logger, {
credentials, options, stats, voice, language, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
}) => {
const {api_key, user_id, voice_engine, playht_tts_uri, options: credOpts} = credentials;
const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');
let synthesizeUrl = playht_tts_uri ? `${playht_tts_uri}/api/v2/tts/stream` : 'https://api.play.ht/api/v2/tts/stream';
// If model is play3.0, the synthesizeUrl is got from authentication endpoint
if (voice_engine === 'Play3.0') {
try {
const post = bent('https://api.play.ht', 'POST', 'json', 201, {
'AUTHORIZATION': api_key,
'X-USER-ID': user_id,
'Accept': 'application/json'
});
const key = makePlayhtKey(api_key);
const url = await client.get(key);
if (!url) {
const {inference_address, expires_at_ms} = await post('/api/v3/auth');
synthesizeUrl = inference_address;
const expiry = Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
await client.set(key, inference_address, 'EX', expiry);
} else {
// Use cached URL
synthesizeUrl = url;
}
} catch (err) {
logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
throw err;
}
}
/* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
let params = '{';
params += `api_key=${api_key}`;
params += `,playback_id=${key}`;
params += `,user_id=${user_id}`;
params += ',vendor=playht';
params += `,voice=${voice}`;
params += `,voice_engine=${voice_engine}`;
params += `,synthesize_url=${synthesizeUrl}`;
params += `,write_cache_file=${disableTtsCache ? 0 : 1}`;
params += `,language=${language}`;
if (opts.quality) params += `,quality=${opts.quality}`;
if (opts.speed) params += `,speed=${opts.speed}`;
if (opts.seed) params += `,style=${opts.seed}`;
if (opts.temperature) params += `,temperature=${opts.temperature}`;
if (opts.emotion) params += `,emotion=${opts.emotion}`;
if (opts.voice_guidance) params += `,voice_guidance=${opts.voice_guidance}`;
if (opts.style_guidance) params += `,style_guidance=${opts.style_guidance}`;
if (opts.text_guidance) params += `,text_guidance=${opts.text_guidance}`;
if (opts.top_p) params += `,top_p=${opts.top_p}`;
if (opts.repetition_penalty) params += `,repetition_penalty=${opts.repetition_penalty}`;
params += '}';
return {
filePath: `say:${params}${text.replace(/\n/g, ' ').replace(/\r/g, ' ')}`,
servedFromCache: false,
rtt: 0
};
}
try {
const post = bent('POST', 'buffer', {
...(voice_engine !== 'Play3.0' && {
'AUTHORIZATION': api_key,
'X-USER-ID': user_id,
}),
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const audioContent = await post(synthesizeUrl, {
text,
...(voice_engine === 'Play3.0' && { language }),
voice,
voice_engine,
output_format: 'mp3',
sample_rate: 8000,
...opts
});
return {
audioContent,
extension: 'mp3',
sampleRate: 8000
};
} catch (err) {
logger.info({err}, 'synth PlayHT returned error');
stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
throw err;
}
};
const synthInworld = async(logger, {
credentials, options, stats, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
}) => {
@@ -1174,54 +969,6 @@ const synthRimelabs = async(logger, {
throw err;
}
};
const synthVerbio = async(client, logger, {
credentials, stats, voice, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
}) => {
//https://doc.speechcenter.verbio.com/#tag/Text-To-Speech-REST-API
if (text.length > 2000) {
throw new Error('Verbio cannot synthesize for the text length larger than 2000 characters');
}
const token = await getVerbioAccessToken(client, logger, credentials);
if (!process.env.JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
let params = '{';
params += `access_token=${token.access_token}`;
params += `,playback_id=${key}`;
params += ',vendor=verbio';
params += `,voice=${voice}`;
params += `,write_cache_file=${disableTtsCache ? 0 : 1}`;
params += '}';
return {
filePath: `say:${params}${text.replace(/\n/g, ' ')}`,
servedFromCache: false,
rtt: 0
};
}
try {
const post = bent('https://us.rest.speechcenter.verbio.com', 'POST', 'buffer', {
'Authorization': `Bearer ${token.access_token}`,
'User-Agent': 'jambonz',
'Content-Type': 'application/json'
});
const audioContent = await post('/api/v1/synthesize', {
voice_id: voice,
output_sample_rate: '8k',
output_encoding: 'pcm16',
text
});
return {
audioContent,
extension: 'r8',
sampleRate: 8000
};
} catch (err) {
logger.info({err}, 'synth Verbio returned error');
stats.increment('tts.count', ['vendor:verbio', 'accepted:no']);
throw err;
}
};
const synthWhisper = async(logger, {credentials, stats, voice, key, text, instructions,
renderForCaching, disableTtsStreaming, disableTtsCache}) => {
const {api_key, model_id, baseURL, timeout, speed} = credentials;