mirror of
https://github.com/jambonz/speech-utils.git
synced 2026-07-04 19:31:49 +00:00
3ba1a14358
The nvidia/riva one-shot synth path was self-hosted-only (insecure gRPC to
riva_server_uri). Add NVCF cloud: when credentials.api_key is set, createRivaClient
dials grpc.nvcf.nvidia.com:443 over TLS with per-RPC metadata (function-id +
Bearer api key) baked into the channel credentials; function-id defaults to
ai-magpie-tts-multilingual, overridable via credentials.function_id.
- createRivaClient(uri, {apiKey, functionId}) — cloud when apiKey present, else
insecure self-hosted (unchanged).
- synthNvidia: pass api_key/function_id to the gRPC synth (caching path); and in
the say: path emit NVIDIA_API_KEY(+NVIDIA_FUNCTION_ID) for cloud so mediajam's
nvidia dialect uses NVCF (it already reads those). Self-hosted say: unchanged.
- assert now accepts riva_server_uri (self-hosted) OR api_key (cloud).
Closes the 'one-shot say TTS cloud' gap; pairs with the webapp nvidia api_key
field. Requires a version bump + publish.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
68 lines
2.0 KiB
JavaScript
68 lines
2.0 KiB
JavaScript
const crypto = require('crypto');
|
|
const {RivaSpeechSynthesisClient} = require('../stubs/riva/proto/riva_tts_grpc_pb');
|
|
const grpc = require('@grpc/grpc-js');
|
|
const { TMP_FOLDER } = require('./config');
|
|
|
|
function makeSynthKey({
|
|
account_sid = '',
|
|
vendor,
|
|
language,
|
|
voice,
|
|
engine = '',
|
|
model = '',
|
|
text,
|
|
instructions = '',
|
|
}) {
|
|
const hash = crypto.createHash('sha1');
|
|
hash.update(`${language}:${vendor}:${voice}:${engine}:${model}:${text}:${instructions}`);
|
|
const hexHashKey = hash.digest('hex');
|
|
const accountKey = account_sid ? `:${account_sid}` : '';
|
|
const key = `tts${accountKey}:${hexHashKey}`;
|
|
return key;
|
|
}
|
|
|
|
function makeFilePath({key, salt = '', extension}) {
|
|
return `${TMP_FOLDER}/${key.replace('tts:', `tts-${salt}`)}.${extension}`;
|
|
}
|
|
|
|
|
|
const noopLogger = {
|
|
info: () => {},
|
|
debug: () => {},
|
|
error: () => {}
|
|
};
|
|
|
|
function makeAwsKey(awsAccessKeyId) {
|
|
const hash = crypto.createHash('sha1');
|
|
hash.update(awsAccessKeyId);
|
|
return `aws:${hash.digest('hex')}`;
|
|
}
|
|
|
|
// NVCF cloud TTS function-id default: ai-magpie-tts-multilingual (public)
|
|
const NVIDIA_TTS_FUNCTION_ID = '877104f7-e885-42b9-8de8-f6e4c6303969';
|
|
|
|
const createRivaClient = async(rivaUri, {apiKey, functionId} = {}) => {
|
|
if (apiKey) {
|
|
/* NVCF cloud: TLS to grpc.nvcf.nvidia.com:443 with per-RPC metadata
|
|
(function-id + Bearer api key) baked into the channel credentials */
|
|
const callCreds = grpc.credentials.createFromMetadataGenerator((_params, cb) => {
|
|
const md = new grpc.Metadata();
|
|
md.add('function-id', functionId || NVIDIA_TTS_FUNCTION_ID);
|
|
md.add('authorization', `Bearer ${apiKey}`);
|
|
cb(null, md);
|
|
});
|
|
const creds = grpc.credentials.combineChannelCredentials(
|
|
grpc.credentials.createSsl(), callCreds);
|
|
return new RivaSpeechSynthesisClient('grpc.nvcf.nvidia.com:443', creds);
|
|
}
|
|
return new RivaSpeechSynthesisClient(rivaUri, grpc.credentials.createInsecure());
|
|
};
|
|
|
|
module.exports = {
|
|
makeSynthKey,
|
|
makeAwsKey,
|
|
createRivaClient,
|
|
noopLogger,
|
|
makeFilePath
|
|
};
|