api server cannot synthesize text after upgrade latest speech-utils (#317)

* api server cannot synthesize text after upgrade latest speech-utils * wip * add testcase for synthesize text * fix synthesize testcase
2025-12-19 05:47:46 +00:00 · 2024-04-30 06:48:34 +07:00
parent 2436bea6ea
commit b765232d4f
2 changed files with 100 additions and 5 deletions
--- a/lib/routes/api/tts-cache.js
+++ b/lib/routes/api/tts-cache.js
@@ -10,6 +10,7 @@ const Account = require('../../models/account');
 const sysError = require('../error');
 const { getSpeechCredential, decryptCredential } = require('../../utils/speech-utils');
 const PCMToMP3Encoder = require('../../record/encoder');
+const { pipeline } = require('stream');

 router.delete('/', async(req, res) => {
  const {purgeTtsCache} = req.app.locals;
@@ -69,6 +70,8 @@ router.post('/Synthesize', async(req, res) => {
        voice = arr[1];
        model = arr[2];
      }
+    }  else if (cred.vendor === 'deepgram') {
+      model = voice;
    }
    const stats = {
      histogram: () => {},
@@ -84,7 +87,8 @@ router.post('/Synthesize', async(req, res) => {
      model,
      salt,
      credentials: cred,
-      disableTtsCache: false
+      disableTtsCache: false,
+      disableTtsStreaming: true
    });

    let contentType = 'audio/mpeg';
@@ -110,10 +114,17 @@ router.post('/Synthesize', async(req, res) => {
    res.writeHead(200, {
      'Content-Type': contentType,
    });
-    readStream.pipe(res);
-    readStream.on('end', () => {
-      fs.unlink(filePath, (err) => {
-        if (err) throw err;
+
+    pipeline(readStream, res, (err) => {
+      if (err) {
+        logger.error('ttscache/Synthesize failed:', err);
+        if (!res.headersSent) {
+          res.status(500).end('Server error');
+        }
+      }
+
+      fs.unlink(filePath, (unlinkErr) => {
+        if (unlinkErr) throw unlinkErr;
        logger.info(`${filePath} was deleted`);
      });
    });
--- a/test/speech-credentials.js
+++ b/test/speech-credentials.js
@@ -170,6 +170,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts');
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt');
+
+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "en-US-Standard-C"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully google tested synthesize');
    }

    /* add / test a credential for microsoft */
@@ -198,6 +212,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts');
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt');
+
+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "en-US-AvaMultilingualNeural"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully microsoft tested synthesize');
    }

    /* add / test a credential for AWS */
@@ -227,6 +255,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for AWS tts');
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for AWS stt');
+
+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "Joanna"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully AWS tested synthesize');
    }

    /* add a credential for wellsaid */
@@ -253,6 +295,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid');

+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "3"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully Wellsaid tested synthesize');
+
      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
        auth: authUser,
@@ -285,6 +341,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for deepgram');

+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "aura-asteria-en"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully deepgram tested synthesize');
+
      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
        auth: authUser,
@@ -404,6 +474,20 @@ test('speech credentials tests', async(t) => {
      //console.log(JSON.stringify(result));
      t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for ibm stt');

+      result = await request.post(`/Accounts/${account_sid}/TtsCache/Synthesize`, {
+        resolveWithFullResponse: true,
+        auth: authUser,
+        json: true,
+        body: {
+          speech_credential_sid: ms_sid,
+          text: "Hello How are you",
+          language: "en-US",
+          voice: "en-US_MichaelExpressive"
+        }
+      });
+
+      t.ok(result.statusCode === 200, 'successfully IBM tested synthesize');
+
      /* delete the credential */
      result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {
        auth: authUser,