support playht3.0

2026-01-25 02:08:26 +00:00 · 2024-09-27 12:08:41 +07:00
parent 1846203807
commit 1a04fd736c
3 changed files with 53 additions and 10 deletions
--- a/lib/synth-audio.js
+++ b/lib/synth-audio.js
@@ -20,7 +20,8 @@ const {
  createKryptonClient,
  createRivaClient,
  noopLogger,
-  makeFilePath
+  makeFilePath,
+  makePlayhtKey
 } = require('./utils');
 const getNuanceAccessToken = require('./get-nuance-access-token');
 const getVerbioAccessToken = require('./get-verbio-token');
@@ -244,7 +245,7 @@ async function synthAudio(client, createHash, retrieveHash, logger, stats, { acc
        });
        break;
      case 'playht':
-        audioBuffer = await synthPlayHT(logger, {
+        audioBuffer = await synthPlayHT(client, logger, {
          credentials, options, stats, language, voice, text, renderForCaching, disableTtsStreaming, filePath
        });
        break;
@@ -755,12 +756,38 @@ const synthElevenlabs = async(logger, {
  }
 };

-const synthPlayHT = async(logger, {
+const synthPlayHT = async(client, logger, {
  credentials, options, stats, voice, text, renderForCaching, disableTtsStreaming
 }) => {
  const {api_key, user_id, voice_engine, options: credOpts} = credentials;
  const opts = !!options && Object.keys(options).length !== 0 ? options : JSON.parse(credOpts || '{}');

+  let synthesizeUrl = 'https://api.play.ht/api/v2/tts/stream';
+
+  // If model is play3.0, the stream url is provided by v3 auth endpoint which is
+  // including jwt token as request params.
+  if (voice_engine === 'Play3.0') {
+    try {
+      const post = bent('https://api.play.ht', 'POST', 'json', 201, {
+        'AUTHORIZATION': api_key,
+        'X-USER-ID': user_id,
+        'Accept': 'application/json'
+      });
+      const key = makePlayhtKey(api_key);
+      const url = await client.get(key);
+      if (!url) {
+        const {inference_address, expires_at_ms} = await post('/api/v3/auth');
+        synthesizeUrl = inference_address;
+        const expiry =  Math.floor((expires_at_ms - Date.now()) / 1000 - 30);
+        await client.set(key, inference_address, 'EX', expiry);
+      }
+    } catch (err) {
+      logger.info({err}, 'synth PlayHT returned error for authentication version 3.0');
+      stats.increment('tts.count', ['vendor:playht', 'accepted:no']);
+      throw err;
+    }
+  }
+
  /* default to using the streaming interface, unless disabled by env var OR we want just a cache file */
  if (!JAMBONES_DISABLE_TTS_STREAMING && !renderForCaching && !disableTtsStreaming) {
    let params = '';
@@ -769,6 +796,7 @@ const synthPlayHT = async(logger, {
    params += ',vendor=playht';
    params += `,voice=${voice}`;
    params += `,voice_engine=${voice_engine}`;
+    params += `,synthesize_url=${synthesizeUrl}`;
    params += ',write_cache_file=1';
    if (opts.quality) params += `,quality=${opts.quality}`;
    if (opts.speed) params += `,speed=${opts.speed}`;
@@ -794,6 +822,7 @@ const synthPlayHT = async(logger, {
      'Accept': 'audio/mpeg',
      'Content-Type': 'application/json'
    });
+
    const mp3 = await post('/api/v2/tts/stream', {
      text,
      voice,
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -98,6 +98,11 @@ function makeAwsKey(awsAccessKeyId) {
  return `aws:${hash.digest('hex')}`;
 }

+function makePlayhtKey(apiKey) {
+  const hash = crypto.createHash('sha1');
+  hash.update(apiKey);
+  return `playht:${hash.digest('hex')}`;
+}
 function makeVerbioKey(client_id) {
  const hash = crypto.createHash('sha1');
  hash.update(client_id);
@@ -171,6 +176,7 @@ module.exports = {
  makeSynthKey,
  makeNuanceKey,
  makeIbmKey,
+  makePlayhtKey,
  makeAwsKey,
  makeVerbioKey,
  getNuanceAccessToken,