add support for aws language model name when transcribing (#890)

* add support for aws language model name when transcribing

* wip - from prev branch

* wip

* support both aws grpc and ws api - detect based on transcription payload

* update to drachtio-fsmrf@4.0.0

* fix for grpc compatibility, requires JAMBONES_AWS_TRANSCRIBE_USE_GRPC env

* back out major update to drachtio-srf and fsmrf; that should come in a separate PR

* update drachtio-srf
This commit is contained in:
Dave Horton
2024-10-12 19:46:31 -04:00
committed by GitHub
parent 98b968d61f
commit f541ff1a15
5 changed files with 650 additions and 403 deletions

View File

@@ -108,6 +108,8 @@ const DEEPGRAM_API_KEY = process.env.DEEPGRAM_API_KEY;
const ANCHOR_MEDIA_ALWAYS = process.env.ANCHOR_MEDIA_ALWAYS;
const VMD_HINTS_FILE = process.env.VMD_HINTS_FILE;
const JAMBONES_AWS_TRANSCRIBE_USE_GRPC = process.env.JAMBONES_AWS_TRANSCRIBE_USE_GRPC;
/* security, secrets */
const LEGACY_CRYPTO = !!process.env.LEGACY_CRYPTO;
const JWT_SECRET = process.env.JWT_SECRET;
@@ -190,6 +192,7 @@ module.exports = {
ANCHOR_MEDIA_ALWAYS,
VMD_HINTS_FILE,
JAMBONES_FREESWITCH_MAX_CALL_DURATION_MINS,
JAMBONES_AWS_TRANSCRIBE_USE_GRPC,
LEGACY_CRYPTO,
JWT_SECRET,

View File

@@ -1006,10 +1006,11 @@ class TaskGather extends SttTask {
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskGather:_onJambonzError');
const errMessage = evt.error || evt.Message;
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
message: `Custom speech vendor ${this.vendor} error: ${errMessage}`,
vendor: this.vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));

View File

@@ -3,6 +3,7 @@ const assert = require('assert');
const crypto = require('crypto');
const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/constants');
const { SpeechCredentialError } = require('../utils/error');
const {JAMBONES_AWS_TRANSCRIBE_USE_GRPC} = require('../config');
class SttTask extends Task {
@@ -217,14 +218,25 @@ class SttTask extends Task {
region,
roleArn
});
this.logger.debug({roleArn}, `got aws access token ${servedFromCache ? 'from cache' : ''}`);
this.logger.debug({roleArn}, `(roleArn) got aws access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, accessKeyId, secretAccessKey, sessionToken};
} else if (vendor === 'verbio' && credentials.client_id && credentials.client_secret) {
}
else if (vendor === 'verbio' && credentials.client_id && credentials.client_secret) {
const {access_token, servedFromCache} = await getVerbioAccessToken(credentials);
this.logger.debug({client_id: credentials.client_id},
`got verbio access token ${servedFromCache ? 'from cache' : ''}`);
credentials.access_token = access_token;
}
else if (vendor == 'aws' && !JAMBONES_AWS_TRANSCRIBE_USE_GRPC) {
/* get AWS access token */
const {accessKeyId, secretAccessKey, securityToken, region } = credentials;
if (!securityToken) {
const { servedFromCache, ...newCredentials} = await getAwsAuthToken({accessKeyId, secretAccessKey, region});
this.logger.debug({newCredentials}, `got aws security token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...newCredentials, region};
}
}
return credentials;
}

View File

@@ -48,7 +48,13 @@ const stickyVars = {
aws: [
'AWS_VOCABULARY_NAME',
'AWS_VOCABULARY_FILTER_METHOD',
'AWS_VOCABULARY_FILTER_NAME'
'AWS_VOCABULARY_FILTER_NAME',
'AWS_LANGUAGE_MODEL_NAME',
'AWS_ACCESS_KEY_ID',
'AWS_SECRET_ACCESS_KEY',
'AWS_REGION',
'AWS_SECURITY_TOKEN',
'AWS_PII_ENTITY_TYPES',
],
nuance: [
'NUANCE_ACCESS_TOKEN',
@@ -444,16 +450,41 @@ const normalizeMicrosoft = (evt, channel, language, punctuation = true) => {
const normalizeAws = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt[0].is_final,
alternatives: evt[0].alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
const isGrpcPayload = Array.isArray(evt);
if (isGrpcPayload) {
/* legacy grpc api */
return {
language_code: language,
channel_tag: channel,
is_final: evt[0].is_final,
alternatives: evt[0].alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
}
else {
/* websocket api */
const alternatives = evt.Transcript?.Results[0]?.Alternatives.map((alt) => {
const items = alt.Items.filter((item) => item.Type === 'pronunciation' && 'Confidence' in item);
const confidence = items.reduce((acc, item) => acc + item.Confidence, 0) / items.length;
return {
transcript: alt.Transcript,
confidence
};
});
return {
language_code: language,
channel_tag: channel,
is_final: evt.Transcript?.Results[0].IsPartial === false,
alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
}
};
const normalizeAssemblyAi = (evt, channel, language) => {
@@ -602,17 +633,29 @@ module.exports = (logger) => {
};
}
else if (['aws', 'polly'].includes(vendor)) {
const {awsOptions = {}} = rOpts;
const vocabularyName = awsOptions.vocabularyName || rOpts.vocabularyName;
const vocabularyFilterName = awsOptions.vocabularyFilterName || rOpts.vocabularyFilterName;
const filterMethod = awsOptions.vocabularyFilterMethod || rOpts.filterMethod;
opts = {
...opts,
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
...(rOpts.vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: rOpts.vocabularyFilterName}),
...(rOpts.filterMethod && {AWS_VOCABULARY_FILTER_METHOD: rOpts.filterMethod}),
...(vocabularyName && {AWS_VOCABULARY_NAME: vocabularyName}),
...(vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: vocabularyFilterName}),
...(filterMethod && {AWS_VOCABULARY_FILTER_METHOD: filterMethod}),
...(sttCredentials && {
...(sttCredentials.accessKeyId && {AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId}),
...(sttCredentials.secretAccessKey && {AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey}),
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
AWS_REGION: sttCredentials.region,
...(sttCredentials.sessionToken && {AWS_SESSION_TOKEN: sttCredentials.sessionToken}),
AWS_SECURITY_TOKEN: sttCredentials.securityToken
}),
...(awsOptions.accessKey && {AWS_ACCESS_KEY_ID: awsOptions.accessKey}),
...(awsOptions.secretKey && {AWS_SECRET_ACCESS_KEY: awsOptions.secretKey}),
...(awsOptions.region && {AWS_REGION: awsOptions.region}),
...(awsOptions.securityToken && {AWS_SECURITY_TOKEN: awsOptions.securityToken}),
...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}),
...(awsOptions.piiEntityTypes?.length && {AWS_PII_ENTITY_TYPES: awsOptions.piiEntityTypes.join(',')}),
...(awsOptions.piiIdentifyEntities && {AWS_PII_IDENTIFY_ENTITIES: true}),
...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}),
};
}
else if ('microsoft' === vendor) {