feat(nvidia): NVCF cloud support for one-shot Riva/nvidia TTS

The nvidia/riva one-shot synth path was self-hosted-only (insecure gRPC to
riva_server_uri). Add NVCF cloud: when credentials.api_key is set, createRivaClient
dials grpc.nvcf.nvidia.com:443 over TLS with per-RPC metadata (function-id +
Bearer api key) baked into the channel credentials; function-id defaults to
ai-magpie-tts-multilingual, overridable via credentials.function_id.

- createRivaClient(uri, {apiKey, functionId}) — cloud when apiKey present, else
  insecure self-hosted (unchanged).
- synthNvidia: pass api_key/function_id to the gRPC synth (caching path); and in
  the say: path emit NVIDIA_API_KEY(+NVIDIA_FUNCTION_ID) for cloud so mediajam's
  nvidia dialect uses NVCF (it already reads those). Self-hosted say: unchanged.
- assert now accepts riva_server_uri (self-hosted) OR api_key (cloud).

Closes the 'one-shot say TTS cloud' gap; pairs with the webapp nvidia api_key
field. Requires a version bump + publish.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dave Horton
2026-06-17 17:25:44 -04:00
parent c47b4883c7
commit 3ba1a14358
2 changed files with 30 additions and 8 deletions
+12 -5
View File
@@ -96,7 +96,8 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
else if ('nvidia' === vendor) {
assert.ok(voice, 'synthAudio requires voice when nvidia is used');
assert.ok(language, 'synthAudio requires language when nvidia is used');
assert.ok(credentials.riva_server_uri, 'synthAudio requires riva_server_uri in credentials when nvidia is used');
assert.ok(credentials.riva_server_uri || credentials.api_key,
'synthAudio requires riva_server_uri (self-hosted) or api_key (NVCF cloud) in credentials when nvidia is used');
}
else if ('wellsaid' === vendor) {
language = 'en-US'; // WellSaid only supports English atm
@@ -682,10 +683,16 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender
const synthNvidia = async(client, logger, {
credentials, stats, language, voice, model, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
}) => {
const {riva_server_uri} = credentials;
const {riva_server_uri, api_key, function_id} = credentials;
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
let params = '';
params += `{riva_server_uri=${riva_server_uri}`;
let params = '{';
if (api_key) {
/* NVCF cloud: mediajam connects to grpc.nvcf.nvidia.com using these */
params += `NVIDIA_API_KEY=${api_key}`;
if (function_id) params += `,NVIDIA_FUNCTION_ID=${function_id}`;
} else {
params += `riva_server_uri=${riva_server_uri}`;
}
params += `,playback_id=${key}`;
params += `,voice=${voice}`;
params += `,language=${language}`;
@@ -701,7 +708,7 @@ const synthNvidia = async(client, logger, {
let rivaClient, request;
const sampleRate = 8000;
try {
rivaClient = await createRivaClient(riva_server_uri);
rivaClient = await createRivaClient(riva_server_uri, {apiKey: api_key, functionId: function_id});
request = new SynthesizeSpeechRequest();
request.setVoiceName(voice);
request.setLanguageCode(language);
+18 -3
View File
@@ -38,9 +38,24 @@ function makeAwsKey(awsAccessKeyId) {
return `aws:${hash.digest('hex')}`;
}
const createRivaClient = async(rivaUri) => {
const client = new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
return client;
// NVCF cloud TTS function-id default: ai-magpie-tts-multilingual (public)
const NVIDIA_TTS_FUNCTION_ID = '877104f7-e885-42b9-8de8-f6e4c6303969';
const createRivaClient = async(rivaUri, {apiKey, functionId} = {}) => {
if (apiKey) {
/* NVCF cloud: TLS to grpc.nvcf.nvidia.com:443 with per-RPC metadata
(function-id + Bearer api key) baked into the channel credentials */
const callCreds = grpc.credentials.createFromMetadataGenerator((_params, cb) => {
const md = new grpc.Metadata();
md.add('function-id', functionId || NVIDIA_TTS_FUNCTION_ID);
md.add('authorization', `Bearer ${apiKey}`);
cb(null, md);
});
const creds = grpc.credentials.combineChannelCredentials(
grpc.credentials.createSsl(), callCreds);
return new RivaSpeechSynthesisClient('grpc.nvcf.nvidia.com:443', creds);
}
return new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
};
module.exports = {