Files
speech-utils/lib/utils.js
T
Dave Horton 3ba1a14358 feat(nvidia): NVCF cloud support for one-shot Riva/nvidia TTS
The nvidia/riva one-shot synth path was self-hosted-only (insecure gRPC to
riva_server_uri). Add NVCF cloud: when credentials.api_key is set, createRivaClient
dials grpc.nvcf.nvidia.com:443 over TLS with per-RPC metadata (function-id +
Bearer api key) baked into the channel credentials; function-id defaults to
ai-magpie-tts-multilingual, overridable via credentials.function_id.

- createRivaClient(uri, {apiKey, functionId}) — cloud when apiKey present, else
  insecure self-hosted (unchanged).
- synthNvidia: pass api_key/function_id to the gRPC synth (caching path); and in
  the say: path emit NVIDIA_API_KEY(+NVIDIA_FUNCTION_ID) for cloud so mediajam's
  nvidia dialect uses NVCF (it already reads those). Self-hosted say: unchanged.
- assert now accepts riva_server_uri (self-hosted) OR api_key (cloud).

Closes the 'one-shot say TTS cloud' gap; pairs with the webapp nvidia api_key
field. Requires a version bump + publish.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-17 17:25:44 -04:00

68 lines
2.0 KiB
JavaScript

const crypto = require('crypto');
const {RivaSpeechSynthesisClient} = require('../stubs/riva/proto/riva_tts_grpc_pb');
const grpc = require('@grpc/grpc-js');
const { TMP_FOLDER } = require('./config');
function makeSynthKey({
account_sid = '',
vendor,
language,
voice,
engine = '',
model = '',
text,
instructions = '',
}) {
const hash = crypto.createHash('sha1');
hash.update(`${language}:${vendor}:${voice}:${engine}:${model}:${text}:${instructions}`);
const hexHashKey = hash.digest('hex');
const accountKey = account_sid ? `:${account_sid}` : '';
const key = `tts${accountKey}:${hexHashKey}`;
return key;
}
function makeFilePath({key, salt = '', extension}) {
return `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt}`)}.${extension}`;
}
const noopLogger = {
info: () => {},
debug: () => {},
error: () => {}
};
function makeAwsKey(awsAccessKeyId) {
const hash = crypto.createHash('sha1');
hash.update(awsAccessKeyId);
return `aws:${hash.digest('hex')}`;
}
// NVCF cloud TTS function-id default: ai-magpie-tts-multilingual (public)
const NVIDIA_TTS_FUNCTION_ID = '877104f7-e885-42b9-8de8-f6e4c6303969';
const createRivaClient = async(rivaUri, {apiKey, functionId} = {}) => {
if (apiKey) {
/* NVCF cloud: TLS to grpc.nvcf.nvidia.com:443 with per-RPC metadata
(function-id + Bearer api key) baked into the channel credentials */
const callCreds = grpc.credentials.createFromMetadataGenerator((_params, cb) => {
const md = new grpc.Metadata();
md.add('function-id', functionId || NVIDIA_TTS_FUNCTION_ID);
md.add('authorization', `Bearer ${apiKey}`);
cb(null, md);
});
const creds = grpc.credentials.combineChannelCredentials(
grpc.credentials.createSsl(), callCreds);
return new RivaSpeechSynthesisClient('grpc.nvcf.nvidia.com:443', creds);
}
return new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
};
module.exports = {
makeSynthKey,
makeAwsKey,
createRivaClient,
noopLogger,
makeFilePath
};