feat(nvidia): NVCF cloud support for one-shot Riva/nvidia TTS

The nvidia/riva one-shot synth path was self-hosted-only (insecure gRPC to riva_server_uri). Add NVCF cloud: when credentials.api_key is set, createRivaClient dials grpc.nvcf.nvidia.com:443 over TLS with per-RPC metadata (function-id + Bearer api key) baked into the channel credentials; function-id defaults to ai-magpie-tts-multilingual, overridable via credentials.function_id. - createRivaClient(uri, {apiKey, functionId}) — cloud when apiKey present, else insecure self-hosted (unchanged). - synthNvidia: pass api_key/function_id to the gRPC synth (caching path); and in the say: path emit NVIDIA_API_KEY(+NVIDIA_FUNCTION_ID) for cloud so mediajam's nvidia dialect uses NVCF (it already reads those). Self-hosted say: unchanged. - assert now accepts riva_server_uri (self-hosted) OR api_key (cloud). Closes the 'one-shot say TTS cloud' gap; pairs with the webapp nvidia api_key field. Requires a version bump + publish. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 19:31:49 +00:00 · 2026-06-17 17:25:44 -04:00
parent c47b4883c7
commit 3ba1a14358
2 changed files with 30 additions and 8 deletions
@@ -96,7 +96,8 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
  else if ('nvidia' === vendor) {
    assert.ok(voice, 'synthAudio requires voice when nvidia is used');
    assert.ok(language, 'synthAudio requires language when nvidia is used');
-    assert.ok(credentials.riva_server_uri, 'synthAudio requires riva_server_uri in credentials when nvidia is used');
+    assert.ok(credentials.riva_server_uri || credentials.api_key,
+      'synthAudio requires riva_server_uri (self-hosted) or api_key (NVCF cloud) in credentials when nvidia is used');
  }
  else if ('wellsaid' === vendor) {
    language = 'en-US'; // WellSaid only supports English atm
@@ -682,10 +683,16 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender
 const synthNvidia = async(client, logger, {
  credentials, stats, language,  voice, model, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
 }) => {
-  const {riva_server_uri} = credentials;
+  const {riva_server_uri, api_key, function_id} = credentials;
  if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
-    let params = '';
-    params += `{riva_server_uri=${riva_server_uri}`;
+    let params = '{';
+    if (api_key) {
+      /* NVCF cloud: mediajam connects to grpc.nvcf.nvidia.com using these */
+      params += `NVIDIA_API_KEY=${api_key}`;
+      if (function_id) params += `,NVIDIA_FUNCTION_ID=${function_id}`;
+    } else {
+      params += `riva_server_uri=${riva_server_uri}`;
+    }
    params += `,playback_id=${key}`;
    params += `,voice=${voice}`;
    params += `,language=${language}`;
@@ -701,7 +708,7 @@ const synthNvidia = async(client, logger, {
  let rivaClient, request;
  const sampleRate = 8000;
  try {
-    rivaClient = await createRivaClient(riva_server_uri);
+    rivaClient = await createRivaClient(riva_server_uri, {apiKey: api_key, functionId: function_id});
    request = new SynthesizeSpeechRequest();
    request.setVoiceName(voice);
    request.setLanguageCode(language);
@@ -38,9 +38,24 @@ function makeAwsKey(awsAccessKeyId) {
  return `aws:${hash.digest('hex')}`;
 }

-const createRivaClient = async(rivaUri) => {
-  const client = new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
-  return client;
+// NVCF cloud TTS function-id default: ai-magpie-tts-multilingual (public)
+const NVIDIA_TTS_FUNCTION_ID = '877104f7-e885-42b9-8de8-f6e4c6303969';
+
+const createRivaClient = async(rivaUri, {apiKey, functionId} = {}) => {
+  if (apiKey) {
+    /* NVCF cloud: TLS to grpc.nvcf.nvidia.com:443 with per-RPC metadata
+       (function-id + Bearer api key) baked into the channel credentials */
+    const callCreds = grpc.credentials.createFromMetadataGenerator((_params, cb) => {
+      const md = new grpc.Metadata();
+      md.add('function-id', functionId || NVIDIA_TTS_FUNCTION_ID);
+      md.add('authorization', `Bearer ${apiKey}`);
+      cb(null, md);
+    });
+    const creds = grpc.credentials.combineChannelCredentials(
+      grpc.credentials.createSsl(), callCreds);
+    return new RivaSpeechSynthesisClient('grpc.nvcf.nvidia.com:443', creds);
+  }
+  return new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
 };

 module.exports = {