initial checkin

2026-01-25 02:08:06 +00:00 · 2024-04-20 10:17:19 -04:00
commit 29d21cb3dd
11 changed files with 4327 additions and 0 deletions
--- a/.eslintignore
+++ b/.eslintignore
@@ -0,0 +1 @@
+test/*
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -0,0 +1,126 @@
+{
+  "env": {
+    "node": true,
+    "es6": true  
+  },
+  "parserOptions": {
+    "ecmaFeatures": {
+      "jsx": false,
+      "modules": false
+    },
+    "ecmaVersion": 2020
+  },
+  "plugins": ["promise"],
+  "rules": {
+    "promise/always-return": "error",
+    "promise/no-return-wrap": "error",
+    "promise/param-names": "error",
+    "promise/catch-or-return": "error",
+    "promise/no-native": "off",
+    "promise/no-nesting": "warn",
+    "promise/no-promise-in-callback": "warn",
+    "promise/no-callback-in-promise": "warn",
+    "promise/no-return-in-finally": "warn",
+
+    // Possible Errors
+    // http://eslint.org/docs/rules/#possible-errors
+    "comma-dangle": [2, "only-multiline"],
+    "no-control-regex": 2,
+    "no-debugger": 2,
+    "no-dupe-args": 2,
+    "no-dupe-keys": 2,
+    "no-duplicate-case": 2,
+    "no-empty-character-class": 2,
+    "no-ex-assign": 2,
+    "no-extra-boolean-cast" : 2,
+    "no-extra-parens": [2, "functions"],
+    "no-extra-semi": 2,
+    "no-func-assign": 2,
+    "no-invalid-regexp": 2,
+    "no-irregular-whitespace": 2,
+    "no-negated-in-lhs": 2,
+    "no-obj-calls": 2,
+    "no-proto": 2,
+    "no-unexpected-multiline": 2,
+    "no-unreachable": 2,
+    "use-isnan": 2,
+    "valid-typeof": 2,
+
+    // Best Practices
+    // http://eslint.org/docs/rules/#best-practices
+    "no-fallthrough": 2,
+    "no-octal": 2,
+    "no-redeclare": 2,
+    "no-self-assign": 2,
+    "no-unused-labels": 2,
+
+    // Strict Mode
+    // http://eslint.org/docs/rules/#strict-mode
+    "strict": [2, "never"],
+
+    // Variables
+    // http://eslint.org/docs/rules/#variables
+    "no-delete-var": 2,
+    "no-undef": 2,
+    "no-unused-vars": [2, {"args": "none"}],
+
+    // Node.js and CommonJS
+    // http://eslint.org/docs/rules/#nodejs-and-commonjs
+    "no-mixed-requires": 2,
+    "no-new-require": 2,
+    "no-path-concat": 2,
+    "no-restricted-modules": [2, "sys", "_linklist"],
+
+    // Stylistic Issues
+    // http://eslint.org/docs/rules/#stylistic-issues
+    "comma-spacing": 2,
+    "eol-last": 2,
+    "indent": [2, 2, {"SwitchCase": 1}],
+    "keyword-spacing": 2,
+    "max-len": [2, 120, 2],
+    "new-parens": 2,
+    "no-mixed-spaces-and-tabs": 2,
+    "no-multiple-empty-lines": [2, {"max": 2}],
+    "no-trailing-spaces": [2, {"skipBlankLines": false }],
+    "quotes": [2, "single", "avoid-escape"],
+    "semi": 2,
+    "space-before-blocks": [2, "always"],
+    "space-before-function-paren": [2, "never"],
+    "space-in-parens": [2, "never"],
+    "space-infix-ops": 2,
+    "space-unary-ops": 2,
+
+    // ECMAScript 6
+    // http://eslint.org/docs/rules/#ecmascript-6
+    "arrow-parens": [2, "always"],
+    "arrow-spacing": [2, {"before": true, "after": true}],
+    "constructor-super": 2,
+    "no-class-assign": 2,
+    "no-confusing-arrow": 2,
+    "no-const-assign": 2,
+    "no-dupe-class-members": 2,
+    "no-new-symbol": 2,
+    "no-this-before-super": 2,
+    "prefer-const": 2
+  },
+  "globals": {
+    "DTRACE_HTTP_CLIENT_REQUEST"           : false,
+    "LTTNG_HTTP_CLIENT_REQUEST"            : false,
+    "COUNTER_HTTP_CLIENT_REQUEST"          : false,
+    "DTRACE_HTTP_CLIENT_RESPONSE"          : false,
+    "LTTNG_HTTP_CLIENT_RESPONSE"           : false,
+    "COUNTER_HTTP_CLIENT_RESPONSE"         : false,
+    "DTRACE_HTTP_SERVER_REQUEST"           : false,
+    "LTTNG_HTTP_SERVER_REQUEST"            : false,
+    "COUNTER_HTTP_SERVER_REQUEST"          : false,
+    "DTRACE_HTTP_SERVER_RESPONSE"          : false,
+    "LTTNG_HTTP_SERVER_RESPONSE"           : false,
+    "COUNTER_HTTP_SERVER_RESPONSE"         : false,
+    "DTRACE_NET_STREAM_END"                : false,
+    "LTTNG_NET_STREAM_END"                 : false,
+    "COUNTER_NET_SERVER_CONNECTION_CLOSE"  : false,
+    "DTRACE_NET_SERVER_CONNECTION"         : false,
+    "LTTNG_NET_SERVER_CONNECTION"          : false,
+    "COUNTER_NET_SERVER_CONNECTION"        : false  
+  }
+}
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,40 @@
+# Logs
+logs
+*.log
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# github pages site
+_site
+
+#transient test cases
+examples/nosave.*.js
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+.nyc_output/
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (http://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directory
+# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
+node_modules
+
+.DS_Store
+
+examples/*
+
+ecosystem.config.js
--- a/README.md
+++ b/README.md
@@ -0,0 +1,32 @@
+# realtime-translator
+
+This application demonstates the use of the jambonz [dub verb](https://www.jambonz.org/docs/webhooks/dub/) to create an application where each party on the call is receiving two distinct audio tracks:
+
+- one from the remote party
+- one from a translator that is listening in on the call
+
+The scenario is intended to mimic a contact center staffed with English-speaking agents would use the services of an automated translator to handle calls from Vietnamese-speaking callers
+
+## Install
+This is a jambonz Node.js websocket application that uses environment variables to configure the choice of languages.  Additionally, you will need a google json key file to use the google translate service.
+
+The example below starts the application listening on port 3000 with languages set according to the scenario described above.
+```
+npm ci
+
+WS_PORT=3000 \
+GOOGLE_APPLICATION_CREDENTIALS='path-to-your-key.json' \
+CALLER_LANGUAGE_NAME='Vietnamese' \
+CALLER_LANGUAGE_CODE='vi-VN' \
+CALLER_TTS_VENDOR='microsoft' \
+CALLER_TTS_VOICE='vi-VN-NamMinhNeural' \
+CALLER_STT_VENDOR='microsoft' \
+CALLED_LANGUAGE_NAME='English' \
+CALLED_LANGUAGE_CODE='en-US' \
+CALLED_TTS_VENDOR='microsoft' \
+CALLED_TTS_VOICE='en-US-AndrewMultilingualNeural' \
+CALLED_STT_VENDOR='deepgram'
+npm start
+```
+
+On the jambonz server, create an application with url `wss://jambonz-apps.drachtio.org/translator`.
--- a/app.js
+++ b/app.js
@@ -0,0 +1,12 @@
+const {createServer} = require('http');
+const {createEndpoint} = require('@jambonz/node-client-ws');
+const server = createServer();
+const logger = require('pino')({level: process.env.LOGLEVEL || 'info'});
+const port = process.env.WS_PORT || 3000;
+const makeService = createEndpoint({server, logger});
+
+require('./lib/routes')({logger, makeService});
+
+server.listen(port, () => {
+  logger.info(`jambonz websocket server listening at http://localhost:${port}`);
+});
--- a/lib/routes/index.js
+++ b/lib/routes/index.js
@@ -0,0 +1,4 @@
+module.exports = ({logger, makeService}) => {
+  require('./translator')({logger, makeService});
+};
+
--- a/lib/routes/translator.js
+++ b/lib/routes/translator.js
@@ -0,0 +1,172 @@
+const assert = require('assert');
+const translateText = require('../utils/translate');
+const {
+  synthesizer_a,
+  synthesizer_b,
+  recognizer_a,
+  recognizer_b
+} = require('../utils/config');
+
+
+const service = ({logger, makeService}) => {
+  const svc = makeService({path: '/translator'});
+
+  svc.on('session:new', async(session) => {
+    session.locals = {logger: logger.child({call_sid: session.call_sid})};
+    session.locals.logger.info({session}, `new incoming call: ${session.call_sid}`);
+
+    session
+      .on('/transcription-a', onTranscribeALeg.bind(null, session))
+      .on('/transcription-b', onTranscribeBLeg.bind(null, session))
+      .on('call:status', onCallStatus.bind(null, session))
+      .on('close', onClose.bind(null, session))
+      .on('error', onError.bind(null, session));
+
+    /**
+     * Outdial and set up translation on both legs.
+     * Create an additional audio track on both legs for the translated speech.
+     * Each party will hear the untranslated speech of the other party, followed by the translation.
+     */
+    session
+
+      // answer the call
+      .answer()
+
+      // turn down the volume of the remote party, to make the translator's voice the focus
+      // also enable transcriptions of the caller's speech
+      .config({
+        boostAudioSignal: '-10 dB',
+        recognizer: recognizer_a,
+        transcribe: {
+          enable: true,
+          transcriptionHook: '/transcription-a'
+        }
+      })
+
+      // add an additional audio track to the call, which will carry the translator's voice
+      .dub({
+        action: 'addTrack',
+        track: 'a'
+      })
+
+      // dial the called party, and set similar options on that leg of the call
+      .dial({
+        target: [
+          {
+            type: 'user',
+            name: 'daveh@sip.jambonz.xyz'
+          }
+        ],
+        boostAudioSignal: '-10 dB',
+        transcribe: {
+          transcriptionHook: '/transcription-b',
+          channel: 2,
+          recognizer: {
+            ...recognizer_b,
+            deepgramOptions: {
+              endpointing: 500,
+              utteranceEndMs: 1000,
+              smartFormatting: true,
+            }
+          }
+        },
+        dub:(
+          [
+            {
+              action: 'addTrack',
+              track: 'b',
+            }
+          ]
+        ),
+      })
+
+      // hangup if dial fails, or when it completes
+      .hangup()
+      .reply();
+  });
+};
+
+const onClose = (session, code, reason) => {
+  const {logger} = session.locals;
+  logger.info({session, code, reason}, `session ${session.call_sid} closed`);
+};
+
+const onError = (session, err) => {
+  const {logger} = session.locals;
+  logger.info({err}, `session ${session.call_sid} received error`);
+};
+
+const onCallStatus = (session, evt) => {
+  const {logger} = session.locals;
+  logger.info({evt}, 'call status');
+  if (!session.locals.call_sid_b && evt.direction === 'outbound') {
+    session.locals.call_sid_b = evt.call_sid;
+    logger.info(`call_sid for b leg is ${session.locals.call_sid_b}`);
+  }
+};
+
+const onTranscribeALeg = (session, evt) => {
+  const {logger, call_sid_b} = session.locals;
+  const {speech} = evt;
+  const transcript = speech.alternatives[0].transcript;
+  logger.info({speech}, 'transcription received for channel 1');
+
+  session.reply();
+
+  assert.ok(speech.is_final, 'expecting only final transcriptions');
+
+  if (call_sid_b) {
+    translateText(logger, transcript, recognizer_a.language, recognizer_b.language)
+      .then((translation) => {
+        if (!translation) return;
+        logger.info({translation},
+          `translated text, now sending dub command: ${translation} for call_sid_b ${call_sid_b}`);
+
+        /* speak the translation to the b party */
+        session.injectCommand('dub', {
+          action: 'sayOnTrack',
+          track: 'b',
+          say: {
+            text: translation,
+            synthesizer: synthesizer_b
+          }
+        }, call_sid_b);
+        return;
+      })
+      .catch((err) => logger.error({err}, 'Error translating text'));
+  }
+  else {
+    logger.info('no call_sid_b, not sending dub command');
+  }
+};
+
+const onTranscribeBLeg = (session, evt) => {
+  const {logger} = session.locals;
+  const {speech} = evt;
+  const transcript = speech.alternatives[0].transcript;
+  logger.info({speech}, 'transcription received for channel 2');
+
+  session.reply();
+
+  assert.ok(speech.is_final, 'expecting only final transcriptions');
+
+  translateText(logger, transcript, recognizer_b.language, recognizer_a.language)
+    .then((translation) => {
+      if (!translation) return;
+      logger.info({translation}, `translated text, now sending dub command: ${translation}`);
+
+      /* speak the translation to the a party */
+      session.injectCommand('dub', {
+        action: 'sayOnTrack',
+        track: 'a',
+        say: {
+          text: translation,
+          synthesizer: synthesizer_a
+        }
+      });
+      return;
+    })
+    .catch((err) => logger.error({err}, 'Error translating text'));
+};
+
+module.exports = service;
--- a/lib/utils/config.js
+++ b/lib/utils/config.js
@@ -0,0 +1,66 @@
+const assert = require('assert');
+
+assert.ok(process.env.CALLER_LANGUAGE_NAME, 'process.env.CALLER_LANGUAGE_NAME is required');
+assert.ok(process.env.CALLER_LANGUAGE_CODE, 'process.env.CALLER_LANGUAGE_CODE is required');
+assert.ok(process.env.CALLER_TTS_VENDOR, 'process.env.CALLER_TTS_VENDOR is required');
+assert.ok(process.env.CALLER_TTS_VOICE, 'process.env.CALLER_TTS_VOICE is required');
+assert.ok(process.env.CALLER_STT_VENDOR, 'process.env.CALLER_STT_VENDOR is required');
+assert.ok(process.env.CALLED_LANGUAGE_NAME, 'process.env.CALLED_LANGUAGE_NAME is required');
+assert.ok(process.env.CALLED_LANGUAGE_CODE, 'process.env.CALLED_LANGUAGE_CODE is required');
+assert.ok(process.env.CALLED_TTS_VENDOR, 'process.env.CALLED_TTS_VENDOR is required');
+assert.ok(process.env.CALLED_TTS_VOICE, 'process.env.CALLED_TTS_VOICE is required');
+assert.ok(process.env.CALLED_STT_VENDOR, 'process.env.CALLED_STT_VENDOR is required');
+
+const speakerSettings = {
+  a: {
+    preferredLanguageName: process.env.CALLER_LANGUAGE_NAME,
+    tts: {
+      vendor: process.env.CALLER_TTS_VENDOR,
+      language: process.env.CALLER_LANGUAGE_CODE,
+      voice: process.env.CALLER_TTS_VOICE
+    },
+    stt: {
+      vendor: process.env.CALLER_STT_VENDOR,
+      language: process.env.CALLER_LANGUAGE_CODE
+    }
+  },
+  b: {
+    preferredLanguageName: process.env.CALLED_LANGUAGE_NAME,
+    tts: {
+      vendor: process.env.CALLED_TTS_VENDOR,
+      language: process.env.CALLED_LANGUAGE_CODE,
+      voice: process.env.CALLED_TTS_VOICE
+    },
+    stt: {
+      vendor: process.env.CALLED_STT_VENDOR,
+      language: process.env.CALLED_LANGUAGE_CODE
+    }
+  }
+};
+
+const synthesizer_a = {
+  vendor: speakerSettings.a.tts.vendor,
+  language: speakerSettings.a.tts.language,
+  voice: speakerSettings.a.tts.voice
+};
+const synthesizer_b = {
+  vendor: speakerSettings.b.tts.vendor,
+  language: speakerSettings.b.tts.language,
+  voice: speakerSettings.b.tts.voice
+};
+const recognizer_a = {
+  vendor: speakerSettings.a.stt.vendor,
+  language: speakerSettings.a.stt.language
+};
+const recognizer_b = {
+  vendor: speakerSettings.b.stt.vendor,
+  language: speakerSettings.b.stt.language
+};
+
+module.exports = {
+  speakerSettings,
+  synthesizer_a,
+  synthesizer_b,
+  recognizer_a,
+  recognizer_b
+};
--- a/lib/utils/translate.js
+++ b/lib/utils/translate.js
@@ -0,0 +1,21 @@
+const { Translate } = require('@google-cloud/translate').v2;
+const translateClient = new Translate();
+
+async function translateText(logger, text, sourceLang, targetLang) {
+
+  text = text.replace(/\n(?!\s)/g, ' ').replace(/\n\s/g, ' ');
+  if (sourceLang === targetLang) return text;
+  try {
+    const [translation] = await translateClient.translate(text, {
+      from: sourceLang,
+      to: targetLang,
+    });
+    logger.debug(`"${text}" => "${translation}"`);
+    return translation;
+  } catch (error) {
+    logger.info('Error translating text:', error);
+    throw error;
+  }
+}
+
+module.exports = translateText;
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "realtime-translator",
+  "version": "0.0.1",
+  "description": "jambonz websocket application",
+  "main": "app.js",
+  "scripts": {
+    "start": "node app",
+    "jslint": "eslint app.js lib"
+  },
+  "author": "",
+  "license": "MIT",
+  "dependencies": {
+    "@google-cloud/translate": "^8.1.0",
+    "@jambonz/node-client-ws": "^0.1.42",
+    "pino": "^8.19.0"
+  },
+  "devDependencies": {
+    "eslint": "^8.57.0",
+    "eslint-plugin-promise": "^6.1.1"
+  }
+}