Compare commits

..

12 Commits

Author SHA1 Message Date
Dave Horton
c71a58dcb5 wip 2023-12-01 11:06:34 -05:00
Dave Horton
8022f9d16c aws pii support 2023-12-01 10:32:19 -05:00
Dave Horton
c4dcb051be wip 2023-12-01 09:52:34 -05:00
Dave Horton
29dc2f7052 cleanup alerts 2023-12-01 09:52:34 -05:00
Dave Horton
7718f50877 report aws stt errors 2023-12-01 09:52:34 -05:00
Dave Horton
11283edf6f fix bug in prev commit 2023-12-01 09:52:34 -05:00
Dave Horton
6043921067 aws stt: calculate transcript-level confidence based on word confidence scores 2023-12-01 09:52:34 -05:00
Dave Horton
8ad947c0fd wip 2023-12-01 09:52:34 -05:00
Dave Horton
63c925c731 wip 2023-12-01 09:52:34 -05:00
Dave Horton
e8647b2b55 when using AWS for STT generate and use a session token 2023-12-01 09:52:34 -05:00
Dave Horton
dab83423cf update speech utils 2023-12-01 09:52:34 -05:00
Dave Horton
864a673ea0 get an aws security token for STT 2023-12-01 09:52:34 -05:00
12 changed files with 520 additions and 670 deletions

View File

@@ -116,9 +116,12 @@ const customSanitizeFunction = (value) => {
/* trims characters at the beginning and at the end of a string */
value = value.trim();
/* Verify strings including 'http' via new URL */
/* We don't escape URLs but verify them via new URL */
if (value.includes('http')) {
value = new URL(value).toString();
} else {
/* replaces <, >, &, ', " and / with their corresponding HTML entities */
value = escape(value);
}
}
} catch (error) {

View File

@@ -160,7 +160,7 @@ module.exports = function(srf, logger) {
};
logger.info({callId, metadata, sdp}, 'successfully parsed SIPREC payload');
} catch (err) {
logger.info({err, callId}, 'Error parsing multipart payload');
logger.info({callId}, 'Error parsing multipart payload');
return res.send(503);
}
}

View File

@@ -170,18 +170,6 @@ class CallSession extends Emitter {
return this.application.notifier;
}
/**
* syntheizer
*/
get synthesizer() {
return this._synthesizer;
}
set synthesizer(synth) {
this._synthesizer = synth;
}
/**
* default vendor to use for speech synthesis if not provided in the app
*/
@@ -264,16 +252,6 @@ class CallSession extends Emitter {
set fallbackSpeechRecognizerVendor(vendor) {
this.application.fallback_speech_recognizer_vendor = vendor;
}
/**
* recognizer
*/
get recognizer() {
return this._recognizer;
}
set recognizer(rec) {
this._recognizer = rec;
}
/**
* default vendor to use for speech recognition if not provided in the app
*/

View File

@@ -109,7 +109,6 @@ class TaskConfig extends Task {
});
if (this.hasSynthesizer) {
cs.synthesizer = this.synthesizer;
cs.speechSynthesisVendor = this.synthesizer.vendor !== 'default'
? this.synthesizer.vendor
: cs.speechSynthesisVendor;
@@ -139,7 +138,6 @@ class TaskConfig extends Task {
this.logger.info({synthesizer: this.synthesizer}, 'Config: updated synthesizer');
}
if (this.hasRecognizer) {
cs.recognizer = this.recognizer;
cs.speechRecognizerVendor = this.recognizer.vendor !== 'default'
? this.recognizer.vendor
: cs.speechRecognizerVendor;

View File

@@ -103,7 +103,7 @@ class TaskGather extends SttTask {
async exec(cs, {ep}) {
this.logger.debug({options: this.data}, 'Gather:exec');
await super.exec(cs, {ep});
await super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
@@ -141,6 +141,59 @@ class TaskGather extends SttTask {
this.interim = true;
this.logger.debug('Gather:exec - early hints match enabled');
}
this.ep = ep;
if ('default' === this.vendor || !this.vendor) {
this.vendor = cs.speechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.vendor = this.vendor;
}
if ('default' === this.language || !this.language) {
this.language = cs.speechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.language = this.language;
}
if ('default' === this.label || !this.label) {
this.label = cs.speechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.label = this.label;
}
// Fallback options
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
}
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
}
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
}
if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor;
}
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
// By default, application saves cobalt model in language
this.data.recognizer.model = cs.speechRecognizerLanguage;
}
if (this.needsStt && !this.sttCredentials) {
try {
this.sttCredentials = await this._initSpeechCredentials(cs, this.vendor, this.label);
} catch (error) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
} else {
throw error;
}
}
}
/* when using cobalt model is required */
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
this.notifyError({ msg: 'ASR error', details:'Cobalt requires a model to be specified'});
throw new Error('Cobalt requires a model to be specified');
}
const startListening = async(cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
@@ -266,13 +319,6 @@ class TaskGather extends SttTask {
this._resolve('dtmf-terminator-key');
}
else if (this.input.includes('digits')) {
if (this.digitBuffer.length === 0 && this.needsStt) {
// DTMF is higher priority than STT.
this.removeSpeechListeners(ep);
ep.stopTranscription({vendor: this.vendor})
.catch((err) => this.logger.error({err},
` Received DTMF, Error stopping transcription for vendor ${this.vendor}`));
}
this.digitBuffer += evt.dtmf;
const len = this.digitBuffer.length;
if (len === this.numDigits || len === this.maxDigits) {
@@ -482,9 +528,7 @@ class TaskGather extends SttTask {
this._clearTimer();
this._timeoutTimer = setTimeout(() => {
if (this.isContinuousAsr) this._startAsrTimer();
else if (this.interDigitTimeout <= 0 ||
this.digitBuffer.length < this.minDigits ||
this.needsStt && this.digitBuffer.length === 0) {
else if (this.interDigitTimeout <= 0 || this.digitBuffer.length < this.minDigits || this.needsStt) {
this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
}
}, this.timeout);
@@ -780,13 +824,14 @@ class TaskGather extends SttTask {
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskGather:_onJambonzError');
const errMessage = evt.error || evt.Message;
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
message: `Speech vendor ${this.vendor} error: ${errMessage}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
this.notifyError({msg: 'ASR error', details:`Speech vendor ${this.vendor} error: ${evt.error}`});
}
_onVendorConnectFailure(cs, _ep, evt) {

View File

@@ -52,81 +52,8 @@ class SttTask extends Task {
}
async exec(cs, {ep, ep2}) {
super.exec(cs);
this.ep = ep;
this.ep2 = ep2;
if ('default' === this.vendor || !this.vendor) {
this.vendor = cs.speechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.vendor = this.vendor;
}
if ('default' === this.language || !this.language) {
this.language = cs.speechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.language = this.language;
}
if ('default' === this.label || !this.label) {
this.label = cs.speechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.label = this.label;
}
// Fallback options
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
}
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
}
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
}
if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor;
}
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
// By default, application saves cobalt model in language
this.data.recognizer.model = cs.speechRecognizerLanguage;
}
if (cs.recognizer) {
for (const k in cs.recognizer) {
if (Array.isArray(this.data.recognizer[k]) ||
Array.isArray(cs.recognizer[k]) ||
typeof this.data.recognizer[k] === 'object' ||
typeof cs.recognizer[k] === 'object'
) {
this.data.recognizer[k] = {
...this.data.recognizer[k],
...cs.recognizer[k]
};
} else {
this.data.recognizer[k] = cs.recognizer[k] || this.data.recognizer[k];
}
}
}
if (!this.sttCredentials) {
try {
this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
} catch (error) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
} else {
throw error;
}
}
}
/* when using cobalt model is required */
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
this.notifyError({ msg: 'ASR error', details:'Cobalt requires a model to be specified'});
throw new Error('Cobalt requires a model to be specified');
}
}
async _initSpeechCredentials(cs, vendor, label) {
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
const {getNuanceAccessToken, getIbmAccessToken, getAwsAuthToken} = this.cs.srf.locals.dbHelpers;
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
if (!credentials) {
@@ -160,6 +87,15 @@ class SttTask extends Task {
this.logger.debug({stt_api_key}, `got ibm access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, access_token, stt_region};
}
else if (vendor == 'aws') {
/* get AWS access token */
const {accessKeyId, secretAccessKey, securityToken, region } = credentials;
if (!securityToken) {
const { servedFromCache, ...newCredentials} = await getAwsAuthToken(accessKeyId, secretAccessKey, region);
this.logger.debug({newCredentials}, `got aws security token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...newCredentials, region};
}
}
return credentials;
}

View File

@@ -45,7 +45,7 @@ class TaskTranscribe extends SttTask {
get name() { return TaskName.Transcribe; }
async exec(cs, {ep, ep2}) {
await super.exec(cs, {ep, ep2});
super.exec(cs);
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
if (cs.hasGlobalSttHints) {
@@ -63,6 +63,60 @@ class TaskTranscribe extends SttTask {
if (cs.hasGlobalSttPunctuation && !this.data.recognizer.punctuation) {
this.data.recognizer.punctuation = cs.globalSttPunctuation;
}
this.ep = ep;
this.ep2 = ep2;
if ('default' === this.vendor || !this.vendor) {
this.vendor = cs.speechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.vendor = this.vendor;
}
if ('default' === this.language || !this.language) {
this.language = cs.speechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.language = this.language;
}
if ('default' === this.label || !this.label) {
this.label = cs.speechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.label = this.label;
}
// Fallback options
if ('default' === this.fallbackVendor || !this.fallbackVendor) {
this.fallbackVendor = cs.fallbackSpeechRecognizerVendor;
if (this.data.recognizer) this.data.recognizer.fallbackVendor = this.fallbackVendor;
}
if ('default' === this.fallbackLanguage || !this.fallbackLanguage) {
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
}
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
}
if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor;
}
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
// By default, application saves cobalt model in language
this.data.recognizer.model = cs.speechRecognizerLanguage;
}
if (!this.sttCredentials) {
try {
this.sttCredentials = await this._initSpeechCredentials(cs, this.vendor, this.label);
} catch (error) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
} else {
throw error;
}
}
}
/* when using cobalt model is required */
if (this.vendor === 'cobalt' && !this.data.recognizer.model) {
this.notifyError({ msg: 'ASR error', details:'Cobalt requires a model to be specified'});
throw new Error('Cobalt requires a model to be specified');
}
try {
await this._startTranscribing(cs, ep, 1);
if (this.separateRecognitionPerChannel && ep2) {

View File

@@ -178,7 +178,8 @@ function installSrfLocals(srf, logger) {
synthAudio,
getNuanceAccessToken,
getIbmAccessToken,
} = require('@jambonz/speech-utils')({}, logger);
getAwsAuthToken
} = require('@jambonz/speech-utils')({redis_client: client}, logger);
const {
writeAlerts,
AlertType
@@ -231,6 +232,7 @@ function installSrfLocals(srf, logger) {
getListPosition,
getNuanceAccessToken,
getIbmAccessToken,
getAwsAuthToken,
addToSortedSet,
retrieveFromSortedSet,
retrieveByPatternSortedSet,

View File

@@ -60,7 +60,13 @@ const stickyVars = {
aws: [
'AWS_VOCABULARY_NAME',
'AWS_VOCABULARY_FILTER_METHOD',
'AWS_VOCABULARY_FILTER_NAME'
'AWS_VOCABULARY_FILTER_NAME',
'AWS_LANGUAGE_MODEL_NAME',
'AWS_ACCESS_KEY_ID',
'AWS_SECRET_ACCESS_KEY',
'AWS_REGION',
'AWS_SECURITY_TOKEN',
'AWS_PII_ENTITY_TYPES'
],
nuance: [
'NUANCE_ACCESS_TOKEN',
@@ -368,11 +374,19 @@ const normalizeMicrosoft = (evt, channel, language) => {
const normalizeAws = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const alternatives = evt.Transcript?.Results[0]?.Alternatives.map((alt) => {
const items = alt.Items.filter((item) => item.Type === 'pronunciation' && 'Confidence' in item);
const confidence = items.reduce((acc, item) => acc + item.Confidence, 0) / items.length;
return {
transcript: alt.Transcript,
confidence
};
});
return {
language_code: language,
channel_tag: channel,
is_final: evt[0].is_final,
alternatives: evt[0].alternatives,
is_final: evt.Transcript?.Results[0].IsPartial === false,
alternatives,
vendor: {
name: 'aws',
evt: copy
@@ -483,16 +497,29 @@ module.exports = (logger) => {
};
}
else if (['aws', 'polly'].includes(vendor)) {
const {awsOptions = {}} = rOpts;
const vocabularyName = awsOptions.vocabularyName || rOpts.vocabularyName;
const vocabularyFilterName = awsOptions.vocabularyFilterName || rOpts.vocabularyFilterName;
const filterMethod = awsOptions.vocabularyFilterMethod || rOpts.filterMethod;
opts = {
...opts,
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
...(rOpts.vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: rOpts.vocabularyFilterName}),
...(rOpts.filterMethod && {AWS_VOCABULARY_FILTER_METHOD: rOpts.filterMethod}),
...(vocabularyName && {AWS_VOCABULARY_NAME: vocabularyName}),
...(vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: vocabularyFilterName}),
...(filterMethod && {AWS_VOCABULARY_FILTER_METHOD: filterMethod}),
...(sttCredentials && {
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
AWS_REGION: sttCredentials.region
AWS_REGION: sttCredentials.region,
AWS_SECURITY_TOKEN: sttCredentials.securityToken
}),
...(awsOptions.accessKey && {AWS_ACCESS_KEY_ID: awsOptions.accessKey}),
...(awsOptions.secretKey && {AWS_SECRET_ACCESS_KEY: awsOptions.secretKey}),
...(awsOptions.region && {AWS_REGION: awsOptions.region}),
...(awsOptions.securityToken && {AWS_SECURITY_TOKEN: awsOptions.securityToken}),
...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}),
...(awsOptions.piiEntityTypes?.length && {AWS_PII_ENTITY_TYPES: awsOptions.piiEntityTypes.join(',')}),
...(awsOptions.piiIdentifyEntities && {AWS_PII_IDENTIFY_ENTITIES: true}),
...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}),
};
}
else if ('microsoft' === vendor) {
@@ -801,6 +828,17 @@ module.exports = (logger) => {
if (clientId && secret) return {client_id: clientId, secret};
if (kryptonEndpoint) return {nuance_stt_uri: kryptonEndpoint};
}
else if (recognizer.vendor === 'aws') {
const {accessKey, secretKey, region, securityToken} = recognizer.awsOptions || {};
if (accessKey || secretKey || region || securityToken) {
return {
accessKeyId: accessKey,
secretAccessKey: secretKey,
region,
securityToken
};
}
}
else if (recognizer.vendor === 'nvidia') {
const {rivaUri} = recognizer.nvidiaOptions || {};
if (rivaUri) return {riva_uri: rivaUri};

812
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -31,19 +31,19 @@
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.4",
"@jambonz/realtimedb-helpers": "^0.8.7",
"@jambonz/speech-utils": "^0.0.31",
"@jambonz/speech-utils": "^0.0.30",
"@jambonz/stats-collector": "^0.1.9",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.46",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
"@opentelemetry/exporter-zipkin": "^1.9.0",
"@opentelemetry/instrumentation": "^0.35.0",
"@opentelemetry/resources": "^1.9.0",
"@opentelemetry/sdk-trace-base": "^1.9.0",
"@opentelemetry/sdk-trace-node": "^1.9.0",
"@opentelemetry/semantic-conventions": "^1.9.0",
"@jambonz/verb-specifications": "^0.0.49",
"@opentelemetry/api": "^1.7.0",
"@opentelemetry/exporter-jaeger": "^1.18.1",
"@opentelemetry/exporter-trace-otlp-http": "^0.45.1",
"@opentelemetry/exporter-zipkin": "^1.18.1",
"@opentelemetry/instrumentation": "^0.45.1",
"@opentelemetry/resources": "^1.18.1",
"@opentelemetry/sdk-trace-base": "^1.18.1",
"@opentelemetry/sdk-trace-node": "^1.18.1",
"@opentelemetry/semantic-conventions": "^1.18.1",
"bent": "^7.3.12",
"debug": "^4.3.4",
"deepcopy": "^2.1.0",

View File

@@ -137,64 +137,6 @@ test('\'transcribe\' test - aws', async(t) => {
}
});
test('\'transcribe\' test - deepgram config options', async(t) => {
if (!DEEPGRAM_API_KEY ) {
t.pass('skipping deepgram tests');
return t.end();
}
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let verbs = [
{
"verb": "config",
"recognizer": {
"vendor": "deepgram",
"language": "en-US",
"deepgramOptions": {
"model": "2-ea",
"tier": "nova",
"numerals": true,
"ner": true,
"vadTurnoff": 10,
"keywords": [
"CPT"
]
}
}
},
{
"verb": "transcribe",
"transcriptionHook": "/transcriptionHook",
"recognizer": {
"vendor": "deepgram",
"hints": ["customer support", "sales", "human resources", "HR"],
"deepgramOptions": {
"apiKey": DEEPGRAM_API_KEY,
}
}
}
];
let from = "gather_success";
await provisionCallHook(from, verbs);
// THEN
await sippUac('uac-gather-account-creds-success.xml', '172.38.0.10', from);
let obj = await getJSON(`http://127.0.0.1:3100/lastRequest/${from}_actionHook`);
t.ok(obj.body.speech.alternatives[0].transcript.toLowerCase().includes('like to speak to customer support'),
'transcribe: succeeds when using deepgram credentials');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});
test('\'transcribe\' test - deepgram', async(t) => {
if (!DEEPGRAM_API_KEY ) {
t.pass('skipping deepgram tests');