Compare commits

..

17 Commits

Author SHA1 Message Date
Dave Horton
8938bf25dc race condition where gather transcribe is restarted after final transcript 2024-05-21 19:04:55 -04:00
Dave Horton
f7134d8fe7 more logging on restart of transcribing during gather 2024-05-10 14:12:34 -04:00
Dave Horton
a23dc50c20 lint 2024-05-03 08:56:55 -04:00
Dave Horton
888fddff37 possible fix for race condition in gather which ends but lets transcription continue 2024-05-03 08:55:16 -04:00
Dave Horton
e1497f90a8 update with various deepgram fixes, including for #700 2024-04-01 13:03:52 -04:00
Dave Horton
ec58232b61 Fix/replace application issue (#692)
* fix scenario where ws replace application from gather while awaiting command and no tasks on execution stack

* lint

* remove some debug logging
2024-03-23 16:14:16 -04:00
Hoan Luu Huu
65c241bcd1 gather verb should clean dtmf listerner even dtmfBargein=false (#686) 2024-03-23 16:01:41 -04:00
Hoan Luu Huu
75b6f89e0c add log to get more detail for AMD issue (#687)
* add log to check issue

* update drachtio-fsmrf 3.0.38
2024-03-21 09:14:32 -04:00
Hoan Luu Huu
b80d39d205 fix asrtimer always return vendor=deepgram (#682) 2024-03-13 12:57:55 -04:00
Hoan Luu Huu
40f70e3531 update speech utils version 0.0.63 (#681) 2024-03-12 09:12:18 -04:00
Hoan Luu Huu
1914b88af9 support azure language id mode (#674) 2024-03-12 08:35:01 -04:00
Hoan Luu Huu
c946a5d14d fix actionHookDelay feature is not working properly if there is no de… (#679)
* fix actionHookDelayAction when no actions is defnied

* terminated by jambonz for giveuptimeout
2024-03-12 08:33:03 -04:00
Hoan Luu Huu
878578fe0f Fix/issue 676 (#680)
* fix bargin is not working

* fix bargin is not working
2024-03-11 08:46:38 -04:00
Hoan Luu Huu
9b3be6c0b9 allow custom header on pause, resume recording (#670)
* allow custom header on pause, resume recording

* fix review comments
2024-03-05 18:01:32 -05:00
Hoan Luu Huu
4ae661daea remove unnecessary code for cleanup disableBotMode (#673) 2024-03-04 18:03:32 -05:00
Dave Horton
dbd3b59901 fix #666 2024-02-26 09:39:49 -05:00
Hoan Luu Huu
06b066a3f2 update speech util to support whisper stream (#657)
* update speech util to support whisper stream

* minor editing of span attributes

* more span attrs cleanup

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-02-22 14:17:29 -05:00
15 changed files with 292 additions and 148 deletions

1
.gitignore vendored
View File

@@ -42,3 +42,4 @@ ecosystem.config.js
test/credentials/*.json
run-tests.sh
run-coverage.sh
.vscode

17
.vscode/launch.json vendored
View File

@@ -1,17 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceFolder}/test/index.js",
"env": {
"NODE_ENV": "test"
}
}
]
}

View File

@@ -53,16 +53,24 @@ class AdultingCallSession extends CallSession {
}
_callerHungup() {
this._hangup('caller');
}
_jambonzHangup() {
this._hangup();
}
_hangup(terminatedBy = 'jambonz') {
if (this.dlg.connectTime) {
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.rootSpan.setAttributes({'call.termination': 'hangup by caller'});
this.callInfo.callTerminationBy = 'caller';
this.rootSpan.setAttributes({'call.termination': `hangup by ${terminatedBy}`});
this.callInfo.callTerminationBy = terminatedBy;
this.emit('callStatusChange', {
callStatus: CallStatus.Completed,
duration
});
}
this.logger.info('InboundCallSession: caller hung up');
this.logger.info(`InboundCallSession: ${terminatedBy} hung up`);
this._callReleased();
this.req.removeAllListeners('cancel');
}

View File

@@ -622,8 +622,10 @@ class CallSession extends Emitter {
const res = await this.dlg.request({
method: 'INFO',
headers: {
'X-Reason': 'pauseCallRecording'
}
'X-Reason': 'pauseCallRecording',
...(this.recordOptions.headers && {'Content-Type': 'application/json'})
},
...(this.recordOptions.headers && {body: JSON.stringify(this.recordOptions.headers) + '\n'})
});
if (res.status === 200) {
this._recordState = RecordState.RecordingPaused;
@@ -644,8 +646,10 @@ class CallSession extends Emitter {
const res = await this.dlg.request({
method: 'INFO',
headers: {
'X-Reason': 'resumeCallRecording'
}
'X-Reason': 'resumeCallRecording',
...(this.recordOptions.headers && {'Content-Type': 'application/json'})
},
...(this.recordOptions.headers && {body: JSON.stringify(this.recordOptions.headers) + '\n'})
});
if (res.status === 200) {
this._recordState = RecordState.RecordingOn;
@@ -680,7 +684,8 @@ class CallSession extends Emitter {
}
task = await this.backgroundTaskManager.newTask('bargeIn', gather);
task.sticky = autoEnable;
task.once('bargeIn-done', () => {
// listen to the bargein-done from background manager
this.backgroundTaskManager.once('bargeIn-done', () => {
if (this.requestor instanceof WsRequestor) {
try {
this.kill(true);
@@ -872,6 +877,7 @@ class CallSession extends Emitter {
const taskNum = ++this.taskIdx;
const stackNum = this.stackIdx;
const task = this.tasks.shift();
task._stackNum = `${stackNum}:${taskNum}`;
this.logger.info(`CallSession:exec starting task #${stackNum}:${taskNum}: ${task.name}`);
this._notifyTaskStatus(task, {event: 'starting'});
// Register verbhook span wait for end
@@ -935,7 +941,6 @@ class CallSession extends Emitter {
// all done - cleanup
this.logger.info('CallSession:exec all tasks complete');
this._stopping = true;
this.disableBotMode();
this._onTasksDone();
this._clearResources();
@@ -1443,6 +1448,11 @@ Duration=${duration} `
this.currentTask.kill(this, KillReason.Replaced);
this.currentTask = null;
}
else if (this.wakeupResolver) {
this.logger.debug('CallSession:replaceApplication - waking up');
this.wakeupResolver({reason: 'new tasks'});
this.wakeupResolver = null;
}
}
kill(onBackgroundGatherBargein = false) {
@@ -1467,7 +1477,8 @@ Duration=${duration} `
this.logger.info('CallSession:kill - found bargein disabled in the stack, clearing to that point');
break;
}
this.tasks.shift();
const rem = this.tasks.shift();
this.logger.debug(`CallSession:kill - clearing task ${rem.summary}`);
}
}
else this.tasks = [];
@@ -1827,6 +1838,14 @@ Duration=${duration} `
assert(false, 'subclass responsibility to override this method');
}
/**
* called when the jambonz has hung up. Provided for subclasses to override
* in order to apply logic at this point if needed.
*/
_jambonzHangup() {
assert(false, 'subclass responsibility to override this method');
}
/**
* get a media server to use for this call
*/
@@ -2255,7 +2274,7 @@ Duration=${duration} `
this.logger.debug(`CallSession:_startActionHookNoResponseTimer ${options.noResponseTimeoutMs}`);
this._actionHookNoResponseTimer = setTimeout(() => {
if (this._actionHookDelayRetryCount >= options.retries) {
this._callerHungup();
this._jambonzHangup();
}
const verb = options.actions[this._actionHookDelayRetryCount % options.actions.length];
// Inject verb to main stack
@@ -2293,7 +2312,7 @@ Duration=${duration} `
this.logger.debug(`CallSession:_startActionHookNoResponseGiveUpTimer ${options.noResponseGiveUpTimeoutMs}`);
this._actionHookNoResponseGiveUpTimer = setTimeout(() => {
this.logger.debug('CallSession:_startActionHookNoResponseGiveUpTimer Timeout');
this._callerHungup();
this._jambonzHangup();
this._actionHookNoResponseGiveUpTimer = null;
}, options.noResponseGiveUpTimeoutMs);
}

View File

@@ -34,6 +34,9 @@ class ConfirmCallSession extends CallSession {
_callerHungup() {
}
_jambonzHangup() {
}
}

View File

@@ -67,19 +67,27 @@ class InboundCallSession extends CallSession {
* This is invoked when the caller hangs up, in order to calculate the call duration.
*/
_callerHungup() {
this._hangup('caller');
}
_jambonzHangup() {
this._hangup();
}
_hangup(terminatedBy = 'jambonz') {
if (this.dlg === null) {
this.logger.info('InboundCallSession:_callerHungup - race condition, dlg cleared by app hangup');
this.logger.info('InboundCallSession:_hangup - race condition, dlg cleared by app hangup');
return;
}
assert(this.dlg.connectTime);
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.rootSpan.setAttributes({'call.termination': 'hangup by caller'});
this.callInfo.callTerminationBy = 'caller';
this.rootSpan.setAttributes({'call.termination': `hangup by ${terminatedBy}`});
this.callInfo.callTerminationBy = terminatedBy;
this.emit('callStatusChange', {
callStatus: CallStatus.Completed,
duration
});
this.logger.info('InboundCallSession: caller hung up');
this.logger.info(`InboundCallSession: ${terminatedBy} hung up`);
this._callReleased();
this.req.removeAllListeners('cancel');
}

View File

@@ -49,13 +49,21 @@ class RestCallSession extends CallSession {
* This is invoked when the called party hangs up, in order to calculate the call duration.
*/
_callerHungup() {
this._hangup('caller');
}
_jambonzHangup() {
this._hangup();
}
_hangup(terminatedBy = 'jamboz') {
if (this.restDialTask) {
this.restDialTask.turnOffAmd();
}
this.callInfo.callTerminationBy = 'caller';
this.callInfo.callTerminationBy = terminatedBy;
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
this.logger.debug('RestCallSession: called party hung up');
this.logger.debug(`RestCallSession: called party hung up by ${terminatedBy}`);
this._callReleased();
}

View File

@@ -107,7 +107,7 @@ class TaskGather extends SttTask {
}
async exec(cs, {ep}) {
this.logger.debug({options: this.data}, 'Gather:exec');
this.logger.debug({options: this.data}, `Gather:exec ${this.stackNum}`);
await super.exec(cs, {ep});
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
@@ -139,28 +139,27 @@ class TaskGather extends SttTask {
this.logger.debug('Gather:exec - early hints match enabled');
}
// actionHook delay
this._actionHookDelayEnabled = cs.actionHookDelayEnabled || !!this.actionHookDelayAction;
this._actionHookDelayActions = this.actionHookDelayAction && this.actionHookDelayAction.actions ?
this.actionHookDelayAction.actions : cs.actionHookDelayActions || [];
if (this._actionHookDelayEnabled && this._actionHookDelayActions.length > 0) {
this._actionHookNoResponseTimeout = (this.actionHookDelayAction && this.actionHookDelayAction.noResponseTimeout ?
this.actionHookDelayAction.noResponseTimeout : cs.actionHookNoResponseTimeout || 0) * 1000;
this._hookDelayEn = cs.actionHookDelayEnabled || !!this.actionHookDelayAction;
this._actionHookNoResponseGiveUpTimeout = (this.actionHookDelayAction &&
this.actionHookDelayAction.noResponseGiveUpTimeout ?
this.actionHookDelayAction.noResponseGiveUpTimeout : cs.actionHookNoResponseGiveUpTimeout || 0) * 1000;
this._hookDelayActions = this.actionHookDelayAction?.actions || cs.actionHookDelayActions || [];
this._actionHookDelayRetries = this.actionHookDelayAction && this.actionHookDelayAction.retries ?
this.actionHookDelayAction.retries : cs.actionHookDelayRetries || 1;
this._actionHookDelayTryCount = 0;
this.actionHookDelayActionOptions = {
enabled: this._actionHookDelayEnabled,
actions: this._actionHookDelayActions,
noResponseTimeoutMs: this._actionHookNoResponseTimeout,
noResponseGiveUpTimeoutMs: this._actionHookNoResponseGiveUpTimeout,
retries: this._actionHookDelayRetries
};
}
// Only enable NoResponseTimeout if there is _hookDelayActions
this._hookNoResponseTimeout = (this._hookDelayActions?.length ?
(this.actionHookDelayAction?.noResponseTimeout || cs.actionHookNoResponseTimeout || 0)
: 0) * 1000;
this._hookNoResponseGiveUpTimeout = (this.actionHookDelayAction?.noResponseGiveUpTimeout ||
cs.actionHookNoResponseGiveUpTimeout || 0) * 1000;
this._hookDelayRetries = this.actionHookDelayAction?.retries || cs.actionHookDelayRetries || 1;
this._hookDelayRetryCount = 0;
this.hookDelayActionOpts = {
enabled: this._hookDelayEn,
actions: this._hookDelayActions,
noResponseTimeoutMs: this._hookNoResponseTimeout,
noResponseGiveUpTimeoutMs: this._hookNoResponseGiveUpTimeout,
retries: this._hookDelayRetries
};
const startListening = async(cs, ep) => {
this._startTimer();
@@ -172,6 +171,7 @@ class TaskGather extends SttTask {
this.logger.info('Gather:exec - task was quickly killed so do not transcribe');
return;
}
this.logger.debug('Gather:exec - going to start transcribing (startListening)');
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
} catch (e) {
@@ -236,9 +236,15 @@ class TaskGather extends SttTask {
if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._setSpeechHandlers(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
if (!this.resolved && !this.killed) {
this.logger.debug(`Gather:exec ${this.stackNum} - going to start transcribing (listenDuringPrompt)`);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
else {
this.logger.info(`Gather:exec ${this.stackNum} - task was killed or resolved before starting transcription`);
}
}
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
@@ -483,7 +489,7 @@ class TaskGather extends SttTask {
locale: this.language,
interim: this.interim,
bugname: this.bugname
}, 'Gather:_startTranscribing');
}, `Gather:_startTranscribing ${this.stackNum}`);
/**
* Note: we don't need to ask deepgram for interim results, because they
@@ -535,7 +541,7 @@ class TaskGather extends SttTask {
this._clearAsrTimer();
this._asrTimer = setTimeout(() => {
this.logger.debug('_startAsrTimer - asr timer went off');
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
}, this.asrTimeout);
this.logger.debug(`_startAsrTimer: set for ${this.asrTimeout}ms`);
@@ -600,24 +606,24 @@ class TaskGather extends SttTask {
}
_startActionHookNoResponseTimer() {
assert(this._actionHookNoResponseTimeout > 0);
assert(this._hookNoResponseTimeout > 0);
this._clearActionHookNoResponseTimer();
this.logger.debug('startActionHookNoResponseTimer');
this._actionHookNoResponseTimer = setTimeout(() => {
if (this._actionHookDelayTryCount >= this._actionHookDelayRetries) {
if (this._hookDelayRetryCount >= this._hookDelayRetries) {
this._hangupCall();
return;
}
const verb = this._actionHookDelayActions[this._actionHookDelayTryCount % this._actionHookDelayActions.length];
const verb = this._hookDelayActions[this._hookDelayRetryCount % this._hookDelayActions.length];
if (verb.verb === 'say') {
this._actionHookDelaySayAction(verb);
} else if (verb.verb === 'play') {
this._actionHookDelayPlayAction(verb);
}
this._actionHookDelayTryCount++;
this._hookDelayRetryCount++;
this._startActionHookNoResponseTimer();
}, this._actionHookNoResponseTimeout);
}, this._hookNoResponseTimeout);
}
@@ -629,12 +635,12 @@ class TaskGather extends SttTask {
}
_startActionHookNoResponseGiveUpTimer() {
assert(this._actionHookNoResponseGiveUpTimeout > 0);
assert(this._hookNoResponseGiveUpTimeout > 0);
this._clearActionHookNoResponseGiveUpTimer();
this.logger.debug('startActionHookNoResponseGiveUpTimer');
this._actionHookNoResponseGiveUpTimer = setTimeout(() => {
this._hangupCall();
}, this._actionHookNoResponseGiveUpTimeout);
}, this._hookNoResponseGiveUpTimeout);
}
_clearActionHookNoResponseGiveUpTimer() {
@@ -664,7 +670,7 @@ class TaskGather extends SttTask {
this._clearFinalAsrTimer();
this._finalAsrTimer = setTimeout(() => {
this.logger.debug('_startFinalAsrTimer - final asr timer went off');
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
}, 1000);
this.logger.debug('_startFinalAsrTimer: set for 1 second');
@@ -701,11 +707,15 @@ class TaskGather extends SttTask {
// make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished');
this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
this.logger.debug({
evt,
bugname,
finished,
vendor: this.vendor
}, `Gather:_onTranscription ${this.stackNum} raw transcript`);
if (bugname && this.bugname !== bugname) return;
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */
if (this._bufferedTranscripts.length === 0) {
@@ -713,15 +723,21 @@ class TaskGather extends SttTask {
}
else {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._bufferedTranscripts = [];
this._resolve('speech', evt);
}
return;
}
if (this.vendor === 'deepgram' && evt.type === 'Metadata') {
this.logger.debug('Gather:_onTranscription - discarding Metadata event from deepgram');
return;
}
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language,
this.shortUtterance, this.data.recognizer.punctuation);
//this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
if (evt.alternatives.length === 0) {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
return;
@@ -783,20 +799,27 @@ class TaskGather extends SttTask {
this._clearTimer();
if (this._finalAsrTimer) {
this._clearFinalAsrTimer();
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
}
this._startAsrTimer();
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) {
this.logger.debug('Gather:_onTranscription - going to start transcribing again (continuous asr)');
this._startTranscribing(ep);
}
}
else {
if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
this.logger.debug({evt, words, bufferedWords},
'TaskGather:_onTranscription - final transcript but < min barge words');
this._bufferedTranscripts.push(evt);
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) {
this.logger.debug(
`Gather:_onTranscription - start transcribing again (min bargein words=${this.minBargeinWordCount}`);
this._startTranscribing(ep);
}
return;
}
else {
@@ -812,7 +835,7 @@ class TaskGather extends SttTask {
/* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
if (this._bufferedTranscripts.length === 0) return;
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._bufferedTranscripts = [];
}
@@ -862,7 +885,7 @@ class TaskGather extends SttTask {
}
}
_onEndOfUtterance(cs, ep) {
this.logger.debug('TaskGather:_onEndOfUtterance');
this.logger.debug(`TaskGather:_onEndOfUtterance ${this.stackNum}`);
if (this.bargein && this.minBargeinWordCount === 0) {
this._killAudio(cs);
}
@@ -877,6 +900,7 @@ class TaskGather extends SttTask {
* since we dont have a final transcript yet.
*/
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsSingleUtterance) {
this.logger.debug('Gather:_onEndOfUtterance - start transcribing again (end of utterance/wantsSingleUtterance)');
this._startTranscribing(ep);
}
}
@@ -902,6 +926,7 @@ class TaskGather extends SttTask {
try {
await this._fallback();
await this._initSpeech(cs, ep);
this.logger.debug('Gather:_onJambonzError - going to start transcribing again (jambonz error)');
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
return;
@@ -951,7 +976,7 @@ class TaskGather extends SttTask {
this.logger.debug('TaskGather:_onNoSpeechDetected for old gather, ignoring');
}
else {
this.logger.debug('TaskGather:_onNoSpeechDetected - listen again');
this.logger.debug('Gather:_onNoSpeechDetected - going to start transcribing again');
this._startTranscribing(ep);
}
return;
@@ -959,17 +984,25 @@ class TaskGather extends SttTask {
}
async _resolve(reason, evt) {
this.logger.debug(`TaskGather:resolve with reason ${reason}`);
this.logger.debug(`TaskGather:resolve ${this.stackNum} with reason ${reason}`);
if (this.needsStt && this.ep && this.ep.connected) {
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => {
if (this.resolved) return;
this.logger.error({err}, 'Error stopping transcription');
});
}
if (this.resolved) return;
this.resolved = true;
// If bargin is false and ws application return ack to verb:hook
// the gather should not play any audio
this._killAudio(this.cs);
// Clear dtmf event
if (this.dtmfBargein) {
this.ep.removeAllListeners('dtmf');
}
// Clear dtmf events, to avoid any case can leak the listener, just clean it
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._clearTimer();
this._clearFastRecognitionTimer();
@@ -979,13 +1012,6 @@ class TaskGather extends SttTask {
'stt.resolve': reason,
'stt.result': JSON.stringify(evt)
});
if (this.needsStt && this.ep && this.ep.connected) {
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
}
if (this.callSession && this.callSession.callGone) {
this.logger.debug('TaskGather:_resolve - call is gone, not invoking web callback');
@@ -994,11 +1020,11 @@ class TaskGather extends SttTask {
}
// Enabled action Hook delay timer to applied actions
if (this._actionHookNoResponseTimeout > 0) {
if (this._hookNoResponseTimeout > 0) {
this._startActionHookNoResponseTimer();
}
if (this._actionHookNoResponseGiveUpTimeout > 0) {
if (this._hookNoResponseGiveUpTimeout > 0) {
this._startActionHookNoResponseGiveUpTimer();
}

View File

@@ -277,7 +277,6 @@ class TaskSay extends Task {
ep.once('playback-start', (evt) => {
this.logger.debug({evt}, 'got playback-start');
if (this.otelSpan) {
this.logger.debug({evt}, 'got playback-start');
this._addStreamingTtsAttributes(this.otelSpan, evt);
this.otelSpan.end();
this.otelSpan = null;
@@ -341,11 +340,13 @@ class TaskSay extends Task {
for (const [key, value] of Object.entries(evt)) {
if (key.startsWith('variable_tts_')) {
let newKey = key.substring('variable_tts_'.length)
.replace('whisper_', 'whisper.')
.replace('elevenlabs_', 'elevenlabs.');
if (spanMapping[newKey]) newKey = spanMapping[newKey];
attrs[newKey] = value;
}
}
delete attrs['cache_filename']; //no value in adding this to the span
span.setAttributes(attrs);
}
}
@@ -355,6 +356,14 @@ const spanMapping = {
'elevenlabs.request_id': 'elevenlabs.req_id',
'elevenlabs.history_item_id': 'elevenlabs.item_id',
'elevenlabs.optimize_streaming_latency': 'elevenlabs.optimization',
'elevenlabs.name_lookup_time_ms': 'name_lookup_ms',
'elevenlabs.connect_time_ms': 'connect_ms',
'elevenlabs.final_response_time_ms': 'final_response_ms',
'whisper.reported_latency_ms': 'whisper.latency_ms',
'whisper.request_id': 'whisper.req_id',
'whisper.name_lookup_time_ms': 'name_lookup_ms',
'whisper.connect_time_ms': 'connect_ms',
'whisper.final_response_time_ms': 'final_response_ms',
};
module.exports = TaskSay;

View File

@@ -177,8 +177,8 @@ class Task extends Emitter {
// If actionHook delay action is configured, and ws application have not responded yet any verb for actionHook
// We have to transfer the task to call-session to await on next ws command verbs, and also run action Hook
// delay actions
if (this.actionHookDelayActionOptions) {
this.emit('ActionHookDelayActionOptions', this.actionHookDelayActionOptions);
if (this.hookDelayActionOpts) {
this.emit('ActionHookDelayActionOptions', this.hookDelayActionOpts);
}
}
if (expectResponse && json && Array.isArray(json)) {

View File

@@ -31,6 +31,11 @@ class TaskTranscribe extends SttTask {
this.separateRecognitionPerChannel = this.data.recognizer.separateRecognitionPerChannel;
}
/* for nested transcribe in dial, unless the app explicitly says so we want to transcribe both legs */
if (this.parentTask?.name === TaskName.Dial && this.separateRecognitionPerChannel !== false) {
this.separateRecognitionPerChannel = true;
}
this.childSpan = [null, null];
// Continuous asr timeout
@@ -298,12 +303,12 @@ class TaskTranscribe extends SttTask {
async _onTranscription(cs, ep, channel, evt, fsEvent) {
// make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished');
if (bugname && this.bugname !== bugname) return;
if (this.paused) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript');
}
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
@@ -313,9 +318,10 @@ class TaskTranscribe extends SttTask {
}
else {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
evt.is_final = true;
this._bufferedTranscripts = [];
this._resolve('speech', evt);
this._resolve(channel, evt);
}
return;
}
@@ -329,31 +335,89 @@ class TaskTranscribe extends SttTask {
return;
}
if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
if (['microsoft', 'deepgram'].includes(this.vendor)) {
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
let emptyTranscript = false;
if (evt.is_final) {
if (evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
emptyTranscript = true;
if (finished === 'true' &&
['microsoft', 'deepgram'].includes(this.vendor) &&
this._bufferedTranscripts.length === 0) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
return;
}
else if (this.vendor !== 'deepgram') {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
return;
}
else if (this.isContinuousAsr) {
this.logger.info({evt},
'TaskGather:_onTranscription - got empty deepgram transcript during continous asr, continue listening');
return;
}
else if (this.vendor === 'deepgram' && this._bufferedTranscripts.length > 0) {
this.logger.info({evt},
'TaskGather:_onTranscription - got empty transcript from deepgram, return the buffered transcripts');
}
}
if (this.isContinuousAsr) {
/* append the transcript and start listening again for asrTimeout */
const t = evt.alternatives[0].transcript;
if (t) {
/* remove trailing punctuation */
if (/[,;:\.!\?]$/.test(t)) {
this.logger.debug('TaskGather:_onTranscription - removing trailing punctuation');
evt.alternatives[0].transcript = t.slice(0, -1);
}
}
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
this._bufferedTranscripts.push(evt);
this._startAsrTimer(channel);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
}
else {
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, listen again');
this._transcribe(ep);
}
return;
}
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
else if (this.vendor === 'deepgram') {
/* compile transcripts into one */
if (!emptyTranscript) this._bufferedTranscripts.push(evt);
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
if (evt.is_final) {
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
/* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
if (this._bufferedTranscripts.length === 0) return;
evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language);
this._bufferedTranscripts = [];
}
/* here is where we return a final transcript */
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending final transcript');
this._resolve(channel, evt);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google']
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
}
}
else {
/* interim transcript */
if (this.isContinuousAsr && evt.is_final) {
this._bufferedTranscripts.push(evt);
this._startAsrTimer(channel);
} else {
await this._resolve(channel, evt);
/* deepgram can send a non-final transcript but with words that are final, so we need to buffer */
if (this.vendor === 'deepgram') {
const originalEvent = evt.vendor.evt;
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
this._bufferedTranscripts.push(evt);
}
}
if (this.interim) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending interim transcript');
this._resolve(channel, evt);
}
}
}
@@ -506,7 +570,7 @@ class TaskTranscribe extends SttTask {
this._clearAsrTimer(channel);
this._asrTimer = setTimeout(() => {
this.logger.debug(`TaskTranscribe:_startAsrTimer - asr timer went off for channel: ${channel}`);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language, this.vendor);
this._bufferedTranscripts = [];
this._resolve(channel, evt);
}, this.asrTimeout);

View File

@@ -133,8 +133,7 @@ class BackgroundTaskManager extends Emitter {
async _initRecord() {
if (this.cs.accountInfo.account.record_all_calls || this.cs.application.record_all_calls) {
if (!JAMBONZ_RECORD_WS_BASE_URL || !this.cs.accountInfo.account.bucket_credential) {
this.logger.error(`_initRecord: invalid configuration,
missing JAMBONZ_RECORD_WS_BASE_URL or bucket configuration`);
this.logger.error('_initRecord: invalid cfg - missing JAMBONZ_RECORD_WS_BASE_URL or bucket config');
return undefined;
}
const listenOpts = {
@@ -175,7 +174,7 @@ class BackgroundTaskManager extends Emitter {
}
_taskCompleted(type, task) {
this.logger.debug({type, task}, 'BackgroundTaskManager:_taskCompleted: task completed');
this.logger.debug({type, task}, `BackgroundTaskManager:_taskCompleted: task completed, sticky: ${task.sticky}`);
task.removeAllListeners();
task.span.end();
this.tasks.delete(type);
@@ -188,7 +187,8 @@ class BackgroundTaskManager extends Emitter {
}
_bargeInTaskCompleted(evt) {
this.logger.debug({evt}, 'BackgroundTaskManager:_bargeInTaskCompleted on event from background bargeIn');
this.logger.debug({evt},
'BackgroundTaskManager:_bargeInTaskCompleted on event from background bargeIn, emitting bargein-done event');
this.emit('bargeIn-done', evt);
}
}

View File

@@ -150,7 +150,7 @@ const selectDefaultDeepgramModel = (task, language) => {
return 'base';
};
const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
const consolidateTranscripts = (bufferedTranscripts, channel, language, vendor) => {
if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
let totalConfidence = 0;
const finalTranscript = bufferedTranscripts.reduce((acc, evt) => {
@@ -191,7 +191,7 @@ const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
totalConfidence / bufferedTranscripts.length;
finalTranscript.alternatives[0].transcript = finalTranscript.alternatives[0].transcript.trim();
finalTranscript.vendor = {
name: 'deepgram',
name: vendor,
evt: bufferedTranscripts
};
return finalTranscript;
@@ -488,6 +488,13 @@ module.exports = (logger) => {
if ('google' === vendor) {
const model = task.name === TaskName.Gather ? 'command_and_search' : 'latest_long';
/**
* When we support google v2 the models are different and we will want something like:
* const useV2 = sttCredentials?.credentials?.project_id; //TODO: v2 pref should be set in googleOptions
* const model = task.name === TaskName.Gather ?
* (useV2 ? 'telephony_short' : 'command_and_search') :
* (useV2 ? 'long' : 'latest_long');
*/
opts = {
...opts,
...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
@@ -520,6 +527,12 @@ module.exports = (logger) => {
...{GOOGLE_SPEECH_MODEL: rOpts.model || model},
...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
/*
...(useV2 && {
GOOGLE_SPEECH_RECOGNIZER_PARENT: `projects/${sttCredentials.credentials.project_id}/locations/global`,
GOOGLE_SPEECH_CLOUD_SERVICES_VERSION: 'v2'
}),
*/
};
}
else if (['aws', 'polly'].includes(vendor)) {
@@ -558,6 +571,8 @@ module.exports = (logger) => {
...{AZURE_USE_OUTPUT_FORMAT_DETAILED: 1},
...(azureOptions.speechSegmentationSilenceTimeoutMs &&
{AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS: azureOptions.speechSegmentationSilenceTimeoutMs}),
...(azureOptions.languageIdMode &&
{AZURE_LANGUAGE_ID_MODE: azureOptions.languageIdMode}),
...(sttCredentials && {
...(sttCredentials.api_key && {AZURE_SUBSCRIPTION_KEY: sttCredentials.api_key}),
...(sttCredentials.region && {AZURE_REGION: sttCredentials.region}),

42
package-lock.json generated
View File

@@ -15,10 +15,10 @@
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.4",
"@jambonz/realtimedb-helpers": "^0.8.7",
"@jambonz/speech-utils": "^0.0.41",
"@jambonz/speech-utils": "^0.0.42",
"@jambonz/stats-collector": "^0.1.9",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.53",
"@jambonz/verb-specifications": "^0.0.63",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
@@ -31,7 +31,7 @@
"bent": "^7.3.12",
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^3.0.37",
"drachtio-fsmrf": "^3.0.38",
"drachtio-srf": "^4.5.31",
"express": "^4.18.2",
"express-validator": "^7.0.1",
@@ -3468,9 +3468,9 @@
}
},
"node_modules/@jambonz/speech-utils": {
"version": "0.0.41",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.41.tgz",
"integrity": "sha512-i07b4usrw5olnqU0WCRbTVzWauvA2IEfpFeKfdAxeTZ8VbbpKeTOTCfEpz4DlkOWGcR8kWrdM9DWbK4fjv/t1w==",
"version": "0.0.42",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.42.tgz",
"integrity": "sha512-ROYin2JqV41Q9T14SOpaXBAvalkOAiMGzCxG9Q1d3XCvxDQ/QQXHbZeFdd9cc64eq4OJNtd9lxmnCS+DSPNuXQ==",
"dependencies": {
"@aws-sdk/client-polly": "^3.496.0",
"@aws-sdk/client-sts": "^3.496.0",
@@ -3517,9 +3517,9 @@
}
},
"node_modules/@jambonz/verb-specifications": {
"version": "0.0.53",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.53.tgz",
"integrity": "sha512-CXQlFuHgPcjB8UR75Zxh41aF2Q/QegBAq+zsuegmpeD5fwI6EEvydAm2aSDICNhtp7iN13KzQ6wlh5u9TQnR0Q==",
"version": "0.0.63",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.63.tgz",
"integrity": "sha512-eVO/W1z/y3U6xvwWdbdl3QGACJPcjgsGARcuzeqnafD5n8M22htM9HfHBXjw6L6TfQBc1NEFkRIF/1wx3GEyHA==",
"dependencies": {
"debug": "^4.3.4",
"pino": "^8.8.0"
@@ -6110,9 +6110,9 @@
}
},
"node_modules/drachtio-fsmrf": {
"version": "3.0.37",
"resolved": "https://registry.npmjs.org/drachtio-fsmrf/-/drachtio-fsmrf-3.0.37.tgz",
"integrity": "sha512-YS1H6w//Bj5Kk1pUXGaXHYWig/kowdBEoO/ApfKrnzwXIDyf9OVeSBmJ7gnAb5st6WszhByZE6S3ib3zbc68CQ==",
"version": "3.0.38",
"resolved": "https://registry.npmjs.org/drachtio-fsmrf/-/drachtio-fsmrf-3.0.38.tgz",
"integrity": "sha512-nR/FPEqgGxKkqYxU+afRivIyDQOpZJbLLd2ydYlubFsUWYxDugPu2rGT6/t0fYgePn6qpA418z+uMA65aB8Q/w==",
"dependencies": {
"camel-case": "^4.1.2",
"debug": "^2.6.9",
@@ -14112,9 +14112,9 @@
}
},
"@jambonz/speech-utils": {
"version": "0.0.41",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.41.tgz",
"integrity": "sha512-i07b4usrw5olnqU0WCRbTVzWauvA2IEfpFeKfdAxeTZ8VbbpKeTOTCfEpz4DlkOWGcR8kWrdM9DWbK4fjv/t1w==",
"version": "0.0.42",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.0.42.tgz",
"integrity": "sha512-ROYin2JqV41Q9T14SOpaXBAvalkOAiMGzCxG9Q1d3XCvxDQ/QQXHbZeFdd9cc64eq4OJNtd9lxmnCS+DSPNuXQ==",
"requires": {
"@aws-sdk/client-polly": "^3.496.0",
"@aws-sdk/client-sts": "^3.496.0",
@@ -14160,9 +14160,9 @@
}
},
"@jambonz/verb-specifications": {
"version": "0.0.53",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.53.tgz",
"integrity": "sha512-CXQlFuHgPcjB8UR75Zxh41aF2Q/QegBAq+zsuegmpeD5fwI6EEvydAm2aSDICNhtp7iN13KzQ6wlh5u9TQnR0Q==",
"version": "0.0.63",
"resolved": "https://registry.npmjs.org/@jambonz/verb-specifications/-/verb-specifications-0.0.63.tgz",
"integrity": "sha512-eVO/W1z/y3U6xvwWdbdl3QGACJPcjgsGARcuzeqnafD5n8M22htM9HfHBXjw6L6TfQBc1NEFkRIF/1wx3GEyHA==",
"requires": {
"debug": "^4.3.4",
"pino": "^8.8.0"
@@ -16191,9 +16191,9 @@
}
},
"drachtio-fsmrf": {
"version": "3.0.37",
"resolved": "https://registry.npmjs.org/drachtio-fsmrf/-/drachtio-fsmrf-3.0.37.tgz",
"integrity": "sha512-YS1H6w//Bj5Kk1pUXGaXHYWig/kowdBEoO/ApfKrnzwXIDyf9OVeSBmJ7gnAb5st6WszhByZE6S3ib3zbc68CQ==",
"version": "3.0.38",
"resolved": "https://registry.npmjs.org/drachtio-fsmrf/-/drachtio-fsmrf-3.0.38.tgz",
"integrity": "sha512-nR/FPEqgGxKkqYxU+afRivIyDQOpZJbLLd2ydYlubFsUWYxDugPu2rGT6/t0fYgePn6qpA418z+uMA65aB8Q/w==",
"requires": {
"camel-case": "^4.1.2",
"debug": "^2.6.9",

View File

@@ -31,10 +31,10 @@
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.4",
"@jambonz/realtimedb-helpers": "^0.8.7",
"@jambonz/speech-utils": "^0.0.41",
"@jambonz/speech-utils": "^0.0.42",
"@jambonz/stats-collector": "^0.1.9",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.53",
"@jambonz/verb-specifications": "^0.0.63",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
@@ -47,7 +47,7 @@
"bent": "^7.3.12",
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^3.0.37",
"drachtio-fsmrf": "^3.0.38",
"drachtio-srf": "^4.5.31",
"express": "^4.18.2",
"express-validator": "^7.0.1",