mirror of
https://github.com/jambonz/realtime-translator.git
synced 2025-12-18 20:37:47 +00:00
initial checkin
This commit is contained in:
1
.eslintignore
Normal file
1
.eslintignore
Normal file
@@ -0,0 +1 @@
|
||||
test/*
|
||||
126
.eslintrc.json
Normal file
126
.eslintrc.json
Normal file
@@ -0,0 +1,126 @@
|
||||
{
|
||||
"env": {
|
||||
"node": true,
|
||||
"es6": true
|
||||
},
|
||||
"parserOptions": {
|
||||
"ecmaFeatures": {
|
||||
"jsx": false,
|
||||
"modules": false
|
||||
},
|
||||
"ecmaVersion": 2020
|
||||
},
|
||||
"plugins": ["promise"],
|
||||
"rules": {
|
||||
"promise/always-return": "error",
|
||||
"promise/no-return-wrap": "error",
|
||||
"promise/param-names": "error",
|
||||
"promise/catch-or-return": "error",
|
||||
"promise/no-native": "off",
|
||||
"promise/no-nesting": "warn",
|
||||
"promise/no-promise-in-callback": "warn",
|
||||
"promise/no-callback-in-promise": "warn",
|
||||
"promise/no-return-in-finally": "warn",
|
||||
|
||||
// Possible Errors
|
||||
// http://eslint.org/docs/rules/#possible-errors
|
||||
"comma-dangle": [2, "only-multiline"],
|
||||
"no-control-regex": 2,
|
||||
"no-debugger": 2,
|
||||
"no-dupe-args": 2,
|
||||
"no-dupe-keys": 2,
|
||||
"no-duplicate-case": 2,
|
||||
"no-empty-character-class": 2,
|
||||
"no-ex-assign": 2,
|
||||
"no-extra-boolean-cast" : 2,
|
||||
"no-extra-parens": [2, "functions"],
|
||||
"no-extra-semi": 2,
|
||||
"no-func-assign": 2,
|
||||
"no-invalid-regexp": 2,
|
||||
"no-irregular-whitespace": 2,
|
||||
"no-negated-in-lhs": 2,
|
||||
"no-obj-calls": 2,
|
||||
"no-proto": 2,
|
||||
"no-unexpected-multiline": 2,
|
||||
"no-unreachable": 2,
|
||||
"use-isnan": 2,
|
||||
"valid-typeof": 2,
|
||||
|
||||
// Best Practices
|
||||
// http://eslint.org/docs/rules/#best-practices
|
||||
"no-fallthrough": 2,
|
||||
"no-octal": 2,
|
||||
"no-redeclare": 2,
|
||||
"no-self-assign": 2,
|
||||
"no-unused-labels": 2,
|
||||
|
||||
// Strict Mode
|
||||
// http://eslint.org/docs/rules/#strict-mode
|
||||
"strict": [2, "never"],
|
||||
|
||||
// Variables
|
||||
// http://eslint.org/docs/rules/#variables
|
||||
"no-delete-var": 2,
|
||||
"no-undef": 2,
|
||||
"no-unused-vars": [2, {"args": "none"}],
|
||||
|
||||
// Node.js and CommonJS
|
||||
// http://eslint.org/docs/rules/#nodejs-and-commonjs
|
||||
"no-mixed-requires": 2,
|
||||
"no-new-require": 2,
|
||||
"no-path-concat": 2,
|
||||
"no-restricted-modules": [2, "sys", "_linklist"],
|
||||
|
||||
// Stylistic Issues
|
||||
// http://eslint.org/docs/rules/#stylistic-issues
|
||||
"comma-spacing": 2,
|
||||
"eol-last": 2,
|
||||
"indent": [2, 2, {"SwitchCase": 1}],
|
||||
"keyword-spacing": 2,
|
||||
"max-len": [2, 120, 2],
|
||||
"new-parens": 2,
|
||||
"no-mixed-spaces-and-tabs": 2,
|
||||
"no-multiple-empty-lines": [2, {"max": 2}],
|
||||
"no-trailing-spaces": [2, {"skipBlankLines": false }],
|
||||
"quotes": [2, "single", "avoid-escape"],
|
||||
"semi": 2,
|
||||
"space-before-blocks": [2, "always"],
|
||||
"space-before-function-paren": [2, "never"],
|
||||
"space-in-parens": [2, "never"],
|
||||
"space-infix-ops": 2,
|
||||
"space-unary-ops": 2,
|
||||
|
||||
// ECMAScript 6
|
||||
// http://eslint.org/docs/rules/#ecmascript-6
|
||||
"arrow-parens": [2, "always"],
|
||||
"arrow-spacing": [2, {"before": true, "after": true}],
|
||||
"constructor-super": 2,
|
||||
"no-class-assign": 2,
|
||||
"no-confusing-arrow": 2,
|
||||
"no-const-assign": 2,
|
||||
"no-dupe-class-members": 2,
|
||||
"no-new-symbol": 2,
|
||||
"no-this-before-super": 2,
|
||||
"prefer-const": 2
|
||||
},
|
||||
"globals": {
|
||||
"DTRACE_HTTP_CLIENT_REQUEST" : false,
|
||||
"LTTNG_HTTP_CLIENT_REQUEST" : false,
|
||||
"COUNTER_HTTP_CLIENT_REQUEST" : false,
|
||||
"DTRACE_HTTP_CLIENT_RESPONSE" : false,
|
||||
"LTTNG_HTTP_CLIENT_RESPONSE" : false,
|
||||
"COUNTER_HTTP_CLIENT_RESPONSE" : false,
|
||||
"DTRACE_HTTP_SERVER_REQUEST" : false,
|
||||
"LTTNG_HTTP_SERVER_REQUEST" : false,
|
||||
"COUNTER_HTTP_SERVER_REQUEST" : false,
|
||||
"DTRACE_HTTP_SERVER_RESPONSE" : false,
|
||||
"LTTNG_HTTP_SERVER_RESPONSE" : false,
|
||||
"COUNTER_HTTP_SERVER_RESPONSE" : false,
|
||||
"DTRACE_NET_STREAM_END" : false,
|
||||
"LTTNG_NET_STREAM_END" : false,
|
||||
"COUNTER_NET_SERVER_CONNECTION_CLOSE" : false,
|
||||
"DTRACE_NET_SERVER_CONNECTION" : false,
|
||||
"LTTNG_NET_SERVER_CONNECTION" : false,
|
||||
"COUNTER_NET_SERVER_CONNECTION" : false
|
||||
}
|
||||
}
|
||||
40
.gitignore
vendored
Normal file
40
.gitignore
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
|
||||
# Runtime data
|
||||
pids
|
||||
*.pid
|
||||
*.seed
|
||||
|
||||
# github pages site
|
||||
_site
|
||||
|
||||
#transient test cases
|
||||
examples/nosave.*.js
|
||||
|
||||
# Directory for instrumented libs generated by jscoverage/JSCover
|
||||
lib-cov
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
.nyc_output/
|
||||
|
||||
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
|
||||
.grunt
|
||||
|
||||
# node-waf configuration
|
||||
.lock-wscript
|
||||
|
||||
# Compiled binary addons (http://nodejs.org/api/addons.html)
|
||||
build/Release
|
||||
|
||||
# Dependency directory
|
||||
# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
|
||||
node_modules
|
||||
|
||||
.DS_Store
|
||||
|
||||
examples/*
|
||||
|
||||
ecosystem.config.js
|
||||
32
README.md
Normal file
32
README.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# realtime-translator
|
||||
|
||||
This application demonstates the use of the jambonz [dub verb](https://www.jambonz.org/docs/webhooks/dub/) to create an application where each party on the call is receiving two distinct audio tracks:
|
||||
|
||||
- one from the remote party
|
||||
- one from a translator that is listening in on the call
|
||||
|
||||
The scenario is intended to mimic a contact center staffed with English-speaking agents would use the services of an automated translator to handle calls from Vietnamese-speaking callers
|
||||
|
||||
## Install
|
||||
This is a jambonz Node.js websocket application that uses environment variables to configure the choice of languages. Additionally, you will need a google json key file to use the google translate service.
|
||||
|
||||
The example below starts the application listening on port 3000 with languages set according to the scenario described above.
|
||||
```
|
||||
npm ci
|
||||
|
||||
WS_PORT=3000 \
|
||||
GOOGLE_APPLICATION_CREDENTIALS='path-to-your-key.json' \
|
||||
CALLER_LANGUAGE_NAME='Vietnamese' \
|
||||
CALLER_LANGUAGE_CODE='vi-VN' \
|
||||
CALLER_TTS_VENDOR='microsoft' \
|
||||
CALLER_TTS_VOICE='vi-VN-NamMinhNeural' \
|
||||
CALLER_STT_VENDOR='microsoft' \
|
||||
CALLED_LANGUAGE_NAME='English' \
|
||||
CALLED_LANGUAGE_CODE='en-US' \
|
||||
CALLED_TTS_VENDOR='microsoft' \
|
||||
CALLED_TTS_VOICE='en-US-AndrewMultilingualNeural' \
|
||||
CALLED_STT_VENDOR='deepgram'
|
||||
npm start
|
||||
```
|
||||
|
||||
On the jambonz server, create an application with url `wss://jambonz-apps.drachtio.org/translator`.
|
||||
12
app.js
Normal file
12
app.js
Normal file
@@ -0,0 +1,12 @@
|
||||
const {createServer} = require('http');
|
||||
const {createEndpoint} = require('@jambonz/node-client-ws');
|
||||
const server = createServer();
|
||||
const logger = require('pino')({level: process.env.LOGLEVEL || 'info'});
|
||||
const port = process.env.WS_PORT || 3000;
|
||||
const makeService = createEndpoint({server, logger});
|
||||
|
||||
require('./lib/routes')({logger, makeService});
|
||||
|
||||
server.listen(port, () => {
|
||||
logger.info(`jambonz websocket server listening at http://localhost:${port}`);
|
||||
});
|
||||
4
lib/routes/index.js
Normal file
4
lib/routes/index.js
Normal file
@@ -0,0 +1,4 @@
|
||||
module.exports = ({logger, makeService}) => {
|
||||
require('./translator')({logger, makeService});
|
||||
};
|
||||
|
||||
172
lib/routes/translator.js
Normal file
172
lib/routes/translator.js
Normal file
@@ -0,0 +1,172 @@
|
||||
const assert = require('assert');
|
||||
const translateText = require('../utils/translate');
|
||||
const {
|
||||
synthesizer_a,
|
||||
synthesizer_b,
|
||||
recognizer_a,
|
||||
recognizer_b
|
||||
} = require('../utils/config');
|
||||
|
||||
|
||||
const service = ({logger, makeService}) => {
|
||||
const svc = makeService({path: '/translator'});
|
||||
|
||||
svc.on('session:new', async(session) => {
|
||||
session.locals = {logger: logger.child({call_sid: session.call_sid})};
|
||||
session.locals.logger.info({session}, `new incoming call: ${session.call_sid}`);
|
||||
|
||||
session
|
||||
.on('/transcription-a', onTranscribeALeg.bind(null, session))
|
||||
.on('/transcription-b', onTranscribeBLeg.bind(null, session))
|
||||
.on('call:status', onCallStatus.bind(null, session))
|
||||
.on('close', onClose.bind(null, session))
|
||||
.on('error', onError.bind(null, session));
|
||||
|
||||
/**
|
||||
* Outdial and set up translation on both legs.
|
||||
* Create an additional audio track on both legs for the translated speech.
|
||||
* Each party will hear the untranslated speech of the other party, followed by the translation.
|
||||
*/
|
||||
session
|
||||
|
||||
// answer the call
|
||||
.answer()
|
||||
|
||||
// turn down the volume of the remote party, to make the translator's voice the focus
|
||||
// also enable transcriptions of the caller's speech
|
||||
.config({
|
||||
boostAudioSignal: '-10 dB',
|
||||
recognizer: recognizer_a,
|
||||
transcribe: {
|
||||
enable: true,
|
||||
transcriptionHook: '/transcription-a'
|
||||
}
|
||||
})
|
||||
|
||||
// add an additional audio track to the call, which will carry the translator's voice
|
||||
.dub({
|
||||
action: 'addTrack',
|
||||
track: 'a'
|
||||
})
|
||||
|
||||
// dial the called party, and set similar options on that leg of the call
|
||||
.dial({
|
||||
target: [
|
||||
{
|
||||
type: 'user',
|
||||
name: 'daveh@sip.jambonz.xyz'
|
||||
}
|
||||
],
|
||||
boostAudioSignal: '-10 dB',
|
||||
transcribe: {
|
||||
transcriptionHook: '/transcription-b',
|
||||
channel: 2,
|
||||
recognizer: {
|
||||
...recognizer_b,
|
||||
deepgramOptions: {
|
||||
endpointing: 500,
|
||||
utteranceEndMs: 1000,
|
||||
smartFormatting: true,
|
||||
}
|
||||
}
|
||||
},
|
||||
dub:(
|
||||
[
|
||||
{
|
||||
action: 'addTrack',
|
||||
track: 'b',
|
||||
}
|
||||
]
|
||||
),
|
||||
})
|
||||
|
||||
// hangup if dial fails, or when it completes
|
||||
.hangup()
|
||||
.reply();
|
||||
});
|
||||
};
|
||||
|
||||
const onClose = (session, code, reason) => {
|
||||
const {logger} = session.locals;
|
||||
logger.info({session, code, reason}, `session ${session.call_sid} closed`);
|
||||
};
|
||||
|
||||
const onError = (session, err) => {
|
||||
const {logger} = session.locals;
|
||||
logger.info({err}, `session ${session.call_sid} received error`);
|
||||
};
|
||||
|
||||
const onCallStatus = (session, evt) => {
|
||||
const {logger} = session.locals;
|
||||
logger.info({evt}, 'call status');
|
||||
if (!session.locals.call_sid_b && evt.direction === 'outbound') {
|
||||
session.locals.call_sid_b = evt.call_sid;
|
||||
logger.info(`call_sid for b leg is ${session.locals.call_sid_b}`);
|
||||
}
|
||||
};
|
||||
|
||||
const onTranscribeALeg = (session, evt) => {
|
||||
const {logger, call_sid_b} = session.locals;
|
||||
const {speech} = evt;
|
||||
const transcript = speech.alternatives[0].transcript;
|
||||
logger.info({speech}, 'transcription received for channel 1');
|
||||
|
||||
session.reply();
|
||||
|
||||
assert.ok(speech.is_final, 'expecting only final transcriptions');
|
||||
|
||||
if (call_sid_b) {
|
||||
translateText(logger, transcript, recognizer_a.language, recognizer_b.language)
|
||||
.then((translation) => {
|
||||
if (!translation) return;
|
||||
logger.info({translation},
|
||||
`translated text, now sending dub command: ${translation} for call_sid_b ${call_sid_b}`);
|
||||
|
||||
/* speak the translation to the b party */
|
||||
session.injectCommand('dub', {
|
||||
action: 'sayOnTrack',
|
||||
track: 'b',
|
||||
say: {
|
||||
text: translation,
|
||||
synthesizer: synthesizer_b
|
||||
}
|
||||
}, call_sid_b);
|
||||
return;
|
||||
})
|
||||
.catch((err) => logger.error({err}, 'Error translating text'));
|
||||
}
|
||||
else {
|
||||
logger.info('no call_sid_b, not sending dub command');
|
||||
}
|
||||
};
|
||||
|
||||
const onTranscribeBLeg = (session, evt) => {
|
||||
const {logger} = session.locals;
|
||||
const {speech} = evt;
|
||||
const transcript = speech.alternatives[0].transcript;
|
||||
logger.info({speech}, 'transcription received for channel 2');
|
||||
|
||||
session.reply();
|
||||
|
||||
assert.ok(speech.is_final, 'expecting only final transcriptions');
|
||||
|
||||
translateText(logger, transcript, recognizer_b.language, recognizer_a.language)
|
||||
.then((translation) => {
|
||||
if (!translation) return;
|
||||
logger.info({translation}, `translated text, now sending dub command: ${translation}`);
|
||||
|
||||
/* speak the translation to the a party */
|
||||
session.injectCommand('dub', {
|
||||
action: 'sayOnTrack',
|
||||
track: 'a',
|
||||
say: {
|
||||
text: translation,
|
||||
synthesizer: synthesizer_a
|
||||
}
|
||||
});
|
||||
return;
|
||||
})
|
||||
.catch((err) => logger.error({err}, 'Error translating text'));
|
||||
};
|
||||
|
||||
module.exports = service;
|
||||
66
lib/utils/config.js
Normal file
66
lib/utils/config.js
Normal file
@@ -0,0 +1,66 @@
|
||||
const assert = require('assert');
|
||||
|
||||
assert.ok(process.env.CALLER_LANGUAGE_NAME, 'process.env.CALLER_LANGUAGE_NAME is required');
|
||||
assert.ok(process.env.CALLER_LANGUAGE_CODE, 'process.env.CALLER_LANGUAGE_CODE is required');
|
||||
assert.ok(process.env.CALLER_TTS_VENDOR, 'process.env.CALLER_TTS_VENDOR is required');
|
||||
assert.ok(process.env.CALLER_TTS_VOICE, 'process.env.CALLER_TTS_VOICE is required');
|
||||
assert.ok(process.env.CALLER_STT_VENDOR, 'process.env.CALLER_STT_VENDOR is required');
|
||||
assert.ok(process.env.CALLED_LANGUAGE_NAME, 'process.env.CALLED_LANGUAGE_NAME is required');
|
||||
assert.ok(process.env.CALLED_LANGUAGE_CODE, 'process.env.CALLED_LANGUAGE_CODE is required');
|
||||
assert.ok(process.env.CALLED_TTS_VENDOR, 'process.env.CALLED_TTS_VENDOR is required');
|
||||
assert.ok(process.env.CALLED_TTS_VOICE, 'process.env.CALLED_TTS_VOICE is required');
|
||||
assert.ok(process.env.CALLED_STT_VENDOR, 'process.env.CALLED_STT_VENDOR is required');
|
||||
|
||||
const speakerSettings = {
|
||||
a: {
|
||||
preferredLanguageName: process.env.CALLER_LANGUAGE_NAME,
|
||||
tts: {
|
||||
vendor: process.env.CALLER_TTS_VENDOR,
|
||||
language: process.env.CALLER_LANGUAGE_CODE,
|
||||
voice: process.env.CALLER_TTS_VOICE
|
||||
},
|
||||
stt: {
|
||||
vendor: process.env.CALLER_STT_VENDOR,
|
||||
language: process.env.CALLER_LANGUAGE_CODE
|
||||
}
|
||||
},
|
||||
b: {
|
||||
preferredLanguageName: process.env.CALLED_LANGUAGE_NAME,
|
||||
tts: {
|
||||
vendor: process.env.CALLED_TTS_VENDOR,
|
||||
language: process.env.CALLED_LANGUAGE_CODE,
|
||||
voice: process.env.CALLED_TTS_VOICE
|
||||
},
|
||||
stt: {
|
||||
vendor: process.env.CALLED_STT_VENDOR,
|
||||
language: process.env.CALLED_LANGUAGE_CODE
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const synthesizer_a = {
|
||||
vendor: speakerSettings.a.tts.vendor,
|
||||
language: speakerSettings.a.tts.language,
|
||||
voice: speakerSettings.a.tts.voice
|
||||
};
|
||||
const synthesizer_b = {
|
||||
vendor: speakerSettings.b.tts.vendor,
|
||||
language: speakerSettings.b.tts.language,
|
||||
voice: speakerSettings.b.tts.voice
|
||||
};
|
||||
const recognizer_a = {
|
||||
vendor: speakerSettings.a.stt.vendor,
|
||||
language: speakerSettings.a.stt.language
|
||||
};
|
||||
const recognizer_b = {
|
||||
vendor: speakerSettings.b.stt.vendor,
|
||||
language: speakerSettings.b.stt.language
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
speakerSettings,
|
||||
synthesizer_a,
|
||||
synthesizer_b,
|
||||
recognizer_a,
|
||||
recognizer_b
|
||||
};
|
||||
21
lib/utils/translate.js
Normal file
21
lib/utils/translate.js
Normal file
@@ -0,0 +1,21 @@
|
||||
const { Translate } = require('@google-cloud/translate').v2;
|
||||
const translateClient = new Translate();
|
||||
|
||||
async function translateText(logger, text, sourceLang, targetLang) {
|
||||
|
||||
text = text.replace(/\n(?!\s)/g, ' ').replace(/\n\s/g, ' ');
|
||||
if (sourceLang === targetLang) return text;
|
||||
try {
|
||||
const [translation] = await translateClient.translate(text, {
|
||||
from: sourceLang,
|
||||
to: targetLang,
|
||||
});
|
||||
logger.debug(`"${text}" => "${translation}"`);
|
||||
return translation;
|
||||
} catch (error) {
|
||||
logger.info('Error translating text:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = translateText;
|
||||
3832
package-lock.json
generated
Normal file
3832
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
package.json
Normal file
21
package.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "realtime-translator",
|
||||
"version": "0.0.1",
|
||||
"description": "jambonz websocket application",
|
||||
"main": "app.js",
|
||||
"scripts": {
|
||||
"start": "node app",
|
||||
"jslint": "eslint app.js lib"
|
||||
},
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@google-cloud/translate": "^8.1.0",
|
||||
"@jambonz/node-client-ws": "^0.1.42",
|
||||
"pino": "^8.19.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.57.0",
|
||||
"eslint-plugin-promise": "^6.1.1"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user