mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-07-04 19:31:49 +00:00
feat(nvidia): NVCF cloud support for one-shot Riva/nvidia TTS
The nvidia/riva one-shot synth path was self-hosted-only (insecure gRPC to
riva_server_uri). Add NVCF cloud: when credentials.api_key is set, createRivaClient
dials grpc.nvcf.nvidia.com:443 over TLS with per-RPC metadata (function-id +
Bearer api key) baked into the channel credentials; function-id defaults to
ai-magpie-tts-multilingual, overridable via credentials.function_id.
- createRivaClient(uri, {apiKey, functionId}) — cloud when apiKey present, else
insecure self-hosted (unchanged).
- synthNvidia: pass api_key/function_id to the gRPC synth (caching path); and in
the say: path emit NVIDIA_API_KEY(+NVIDIA_FUNCTION_ID) for cloud so mediajam's
nvidia dialect uses NVCF (it already reads those). Self-hosted say: unchanged.
- assert now accepts riva_server_uri (self-hosted) OR api_key (cloud).
Closes the 'one-shot say TTS cloud' gap; pairs with the webapp nvidia api_key
field. Requires a version bump + publish.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+12
-5
@@ -96,7 +96,8 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
|
||||
else if ('nvidia' === vendor) {
|
||||
assert.ok(voice, 'synthAudio requires voice when nvidia is used');
|
||||
assert.ok(language, 'synthAudio requires language when nvidia is used');
|
||||
assert.ok(credentials.riva_server_uri, 'synthAudio requires riva_server_uri in credentials when nvidia is used');
|
||||
assert.ok(credentials.riva_server_uri || credentials.api_key,
|
||||
'synthAudio requires riva_server_uri (self-hosted) or api_key (NVCF cloud) in credentials when nvidia is used');
|
||||
}
|
||||
else if ('wellsaid' === vendor) {
|
||||
language = 'en-US'; // WellSaid only supports English atm
|
||||
@@ -682,10 +683,16 @@ const synthWellSaid = async(logger, {credentials, stats, language, voice, gender
|
||||
const synthNvidia = async(client, logger, {
|
||||
credentials, stats, language, voice, model, key, text, renderForCaching, disableTtsStreaming, disableTtsCache
|
||||
}) => {
|
||||
const {riva_server_uri} = credentials;
|
||||
const {riva_server_uri, api_key, function_id} = credentials;
|
||||
if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
|
||||
let params = '';
|
||||
params += `{riva_server_uri=${riva_server_uri}`;
|
||||
let params = '{';
|
||||
if (api_key) {
|
||||
/* NVCF cloud: mediajam connects to grpc.nvcf.nvidia.com using these */
|
||||
params += `NVIDIA_API_KEY=${api_key}`;
|
||||
if (function_id) params += `,NVIDIA_FUNCTION_ID=${function_id}`;
|
||||
} else {
|
||||
params += `riva_server_uri=${riva_server_uri}`;
|
||||
}
|
||||
params += `,playback_id=${key}`;
|
||||
params += `,voice=${voice}`;
|
||||
params += `,language=${language}`;
|
||||
@@ -701,7 +708,7 @@ const synthNvidia = async(client, logger, {
|
||||
let rivaClient, request;
|
||||
const sampleRate = 8000;
|
||||
try {
|
||||
rivaClient = await createRivaClient(riva_server_uri);
|
||||
rivaClient = await createRivaClient(riva_server_uri, {apiKey: api_key, functionId: function_id});
|
||||
request = new SynthesizeSpeechRequest();
|
||||
request.setVoiceName(voice);
|
||||
request.setLanguageCode(language);
|
||||
|
||||
+18
-3
@@ -38,9 +38,24 @@ function makeAwsKey(awsAccessKeyId) {
|
||||
return `aws:${hash.digest('hex')}`;
|
||||
}
|
||||
|
||||
const createRivaClient = async(rivaUri) => {
|
||||
const client = new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
|
||||
return client;
|
||||
// NVCF cloud TTS function-id default: ai-magpie-tts-multilingual (public)
|
||||
const NVIDIA_TTS_FUNCTION_ID = '877104f7-e885-42b9-8de8-f6e4c6303969';
|
||||
|
||||
const createRivaClient = async(rivaUri, {apiKey, functionId} = {}) => {
|
||||
if (apiKey) {
|
||||
/* NVCF cloud: TLS to grpc.nvcf.nvidia.com:443 with per-RPC metadata
|
||||
(function-id + Bearer api key) baked into the channel credentials */
|
||||
const callCreds = grpc.credentials.createFromMetadataGenerator((_params, cb) => {
|
||||
const md = new grpc.Metadata();
|
||||
md.add('function-id', functionId || NVIDIA_TTS_FUNCTION_ID);
|
||||
md.add('authorization', `Bearer ${apiKey}`);
|
||||
cb(null, md);
|
||||
});
|
||||
const creds = grpc.credentials.combineChannelCredentials(
|
||||
grpc.credentials.createSsl(), callCreds);
|
||||
return new RivaSpeechSynthesisClient('grpc.nvcf.nvidia.com:443', creds);
|
||||
}
|
||||
return new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user