initial checkin

This commit is contained in:
Dave Horton
2024-04-20 10:17:19 -04:00
commit 29d21cb3dd
11 changed files with 4327 additions and 0 deletions

4
lib/routes/index.js Normal file
View File

@@ -0,0 +1,4 @@
module.exports = ({logger, makeService}) => {
require('./translator')({logger, makeService});
};

172
lib/routes/translator.js Normal file
View File

@@ -0,0 +1,172 @@
const assert = require('assert');
const translateText = require('../utils/translate');
const {
synthesizer_a,
synthesizer_b,
recognizer_a,
recognizer_b
} = require('../utils/config');
const service = ({logger, makeService}) => {
const svc = makeService({path: '/translator'});
svc.on('session:new', async(session) => {
session.locals = {logger: logger.child({call_sid: session.call_sid})};
session.locals.logger.info({session}, `new incoming call: ${session.call_sid}`);
session
.on('/transcription-a', onTranscribeALeg.bind(null, session))
.on('/transcription-b', onTranscribeBLeg.bind(null, session))
.on('call:status', onCallStatus.bind(null, session))
.on('close', onClose.bind(null, session))
.on('error', onError.bind(null, session));
/**
* Outdial and set up translation on both legs.
* Create an additional audio track on both legs for the translated speech.
* Each party will hear the untranslated speech of the other party, followed by the translation.
*/
session
// answer the call
.answer()
// turn down the volume of the remote party, to make the translator's voice the focus
// also enable transcriptions of the caller's speech
.config({
boostAudioSignal: '-10 dB',
recognizer: recognizer_a,
transcribe: {
enable: true,
transcriptionHook: '/transcription-a'
}
})
// add an additional audio track to the call, which will carry the translator's voice
.dub({
action: 'addTrack',
track: 'a'
})
// dial the called party, and set similar options on that leg of the call
.dial({
target: [
{
type: 'user',
name: 'daveh@sip.jambonz.xyz'
}
],
boostAudioSignal: '-10 dB',
transcribe: {
transcriptionHook: '/transcription-b',
channel: 2,
recognizer: {
...recognizer_b,
deepgramOptions: {
endpointing: 500,
utteranceEndMs: 1000,
smartFormatting: true,
}
}
},
dub:(
[
{
action: 'addTrack',
track: 'b',
}
]
),
})
// hangup if dial fails, or when it completes
.hangup()
.reply();
});
};
const onClose = (session, code, reason) => {
const {logger} = session.locals;
logger.info({session, code, reason}, `session ${session.call_sid} closed`);
};
const onError = (session, err) => {
const {logger} = session.locals;
logger.info({err}, `session ${session.call_sid} received error`);
};
const onCallStatus = (session, evt) => {
const {logger} = session.locals;
logger.info({evt}, 'call status');
if (!session.locals.call_sid_b && evt.direction === 'outbound') {
session.locals.call_sid_b = evt.call_sid;
logger.info(`call_sid for b leg is ${session.locals.call_sid_b}`);
}
};
const onTranscribeALeg = (session, evt) => {
const {logger, call_sid_b} = session.locals;
const {speech} = evt;
const transcript = speech.alternatives[0].transcript;
logger.info({speech}, 'transcription received for channel 1');
session.reply();
assert.ok(speech.is_final, 'expecting only final transcriptions');
if (call_sid_b) {
translateText(logger, transcript, recognizer_a.language, recognizer_b.language)
.then((translation) => {
if (!translation) return;
logger.info({translation},
`translated text, now sending dub command: ${translation} for call_sid_b ${call_sid_b}`);
/* speak the translation to the b party */
session.injectCommand('dub', {
action: 'sayOnTrack',
track: 'b',
say: {
text: translation,
synthesizer: synthesizer_b
}
}, call_sid_b);
return;
})
.catch((err) => logger.error({err}, 'Error translating text'));
}
else {
logger.info('no call_sid_b, not sending dub command');
}
};
const onTranscribeBLeg = (session, evt) => {
const {logger} = session.locals;
const {speech} = evt;
const transcript = speech.alternatives[0].transcript;
logger.info({speech}, 'transcription received for channel 2');
session.reply();
assert.ok(speech.is_final, 'expecting only final transcriptions');
translateText(logger, transcript, recognizer_b.language, recognizer_a.language)
.then((translation) => {
if (!translation) return;
logger.info({translation}, `translated text, now sending dub command: ${translation}`);
/* speak the translation to the a party */
session.injectCommand('dub', {
action: 'sayOnTrack',
track: 'a',
say: {
text: translation,
synthesizer: synthesizer_a
}
});
return;
})
.catch((err) => logger.error({err}, 'Error translating text'));
};
module.exports = service;

66
lib/utils/config.js Normal file
View File

@@ -0,0 +1,66 @@
const assert = require('assert');
assert.ok(process.env.CALLER_LANGUAGE_NAME, 'process.env.CALLER_LANGUAGE_NAME is required');
assert.ok(process.env.CALLER_LANGUAGE_CODE, 'process.env.CALLER_LANGUAGE_CODE is required');
assert.ok(process.env.CALLER_TTS_VENDOR, 'process.env.CALLER_TTS_VENDOR is required');
assert.ok(process.env.CALLER_TTS_VOICE, 'process.env.CALLER_TTS_VOICE is required');
assert.ok(process.env.CALLER_STT_VENDOR, 'process.env.CALLER_STT_VENDOR is required');
assert.ok(process.env.CALLED_LANGUAGE_NAME, 'process.env.CALLED_LANGUAGE_NAME is required');
assert.ok(process.env.CALLED_LANGUAGE_CODE, 'process.env.CALLED_LANGUAGE_CODE is required');
assert.ok(process.env.CALLED_TTS_VENDOR, 'process.env.CALLED_TTS_VENDOR is required');
assert.ok(process.env.CALLED_TTS_VOICE, 'process.env.CALLED_TTS_VOICE is required');
assert.ok(process.env.CALLED_STT_VENDOR, 'process.env.CALLED_STT_VENDOR is required');
const speakerSettings = {
a: {
preferredLanguageName: process.env.CALLER_LANGUAGE_NAME,
tts: {
vendor: process.env.CALLER_TTS_VENDOR,
language: process.env.CALLER_LANGUAGE_CODE,
voice: process.env.CALLER_TTS_VOICE
},
stt: {
vendor: process.env.CALLER_STT_VENDOR,
language: process.env.CALLER_LANGUAGE_CODE
}
},
b: {
preferredLanguageName: process.env.CALLED_LANGUAGE_NAME,
tts: {
vendor: process.env.CALLED_TTS_VENDOR,
language: process.env.CALLED_LANGUAGE_CODE,
voice: process.env.CALLED_TTS_VOICE
},
stt: {
vendor: process.env.CALLED_STT_VENDOR,
language: process.env.CALLED_LANGUAGE_CODE
}
}
};
const synthesizer_a = {
vendor: speakerSettings.a.tts.vendor,
language: speakerSettings.a.tts.language,
voice: speakerSettings.a.tts.voice
};
const synthesizer_b = {
vendor: speakerSettings.b.tts.vendor,
language: speakerSettings.b.tts.language,
voice: speakerSettings.b.tts.voice
};
const recognizer_a = {
vendor: speakerSettings.a.stt.vendor,
language: speakerSettings.a.stt.language
};
const recognizer_b = {
vendor: speakerSettings.b.stt.vendor,
language: speakerSettings.b.stt.language
};
module.exports = {
speakerSettings,
synthesizer_a,
synthesizer_b,
recognizer_a,
recognizer_b
};

21
lib/utils/translate.js Normal file
View File

@@ -0,0 +1,21 @@
const { Translate } = require('@google-cloud/translate').v2;
const translateClient = new Translate();
async function translateText(logger, text, sourceLang, targetLang) {
text = text.replace(/\n(?!\s)/g, ' ').replace(/\n\s/g, ' ');
if (sourceLang === targetLang) return text;
try {
const [translation] = await translateClient.translate(text, {
from: sourceLang,
to: targetLang,
});
logger.debug(`"${text}" => "${translation}"`);
return translation;
} catch (error) {
logger.info('Error translating text:', error);
throw error;
}
}
module.exports = translateText;