mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-09 02:30:17 +00:00
Compare commits
21 Commits
snyk-fix-2
...
v0.9.3-7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69ba18acd1 | ||
|
|
8bed44cce3 | ||
|
|
8ede41714b | ||
|
|
ee54e4341a | ||
|
|
4bf2f42f33 | ||
|
|
e09c763d3a | ||
|
|
e8a7366526 | ||
|
|
122d267816 | ||
|
|
33bca8e67c | ||
|
|
9c05fd3deb | ||
|
|
7fa0041f6b | ||
|
|
59d9c62cbe | ||
|
|
55b408eecb | ||
|
|
f241faa871 | ||
|
|
65d35c893c | ||
|
|
dbdc1cd43d | ||
|
|
7105453d81 | ||
|
|
8487a4be68 | ||
|
|
2ddcd53d6b | ||
|
|
a4d07ddce0 | ||
|
|
16e044cabf |
@@ -13,7 +13,7 @@ Configuration is provided via environment variables:
|
||||
|AWS_ACCESS_KEY_ID| aws access key id, used for TTS/STT as well SNS notifications|no|
|
||||
|AWS_REGION| aws region| no|
|
||||
|AWS_SECRET_ACCESS_KEY| aws secret access key, used per above|no|
|
||||
|AWS_SNS_TOPIC_ARM| aws sns topic arn that scale-in lifecycle notifications will be published to|no|
|
||||
|AWS_SNS_TOPIC_ARN| aws sns topic arn that scale-in lifecycle notifications will be published to|no|
|
||||
|DRACHTIO_HOST| ip address of drachtio server (typically '127.0.0.1')|yes|
|
||||
|DRACHTIO_PORT| listening port of drachtio server for control connections (typically 9022)|yes|
|
||||
|DRACHTIO_SECRET| shared secret|yes|
|
||||
@@ -72,7 +72,7 @@ module.exports = {
|
||||
STATS_PORT: 8125,
|
||||
STATS_PROTOCOL: 'tcp',
|
||||
STATS_TELEGRAF: 1,
|
||||
AWS_SNS_TOPIC_ARM: 'arn:aws:sns:us-west-1:xxxxxxxxxxx:terraform-20201107200347128600000002',
|
||||
AWS_SNS_TOPIC_ARN: 'arn:aws:sns:us-west-1:xxxxxxxxxxx:terraform-20201107200347128600000002',
|
||||
JAMBONES_NETWORK_CIDR: '172.31.0.0/16',
|
||||
JAMBONES_MYSQL_HOST: 'aurora-cluster-jambonz.cluster-yyyyyyyyyyy.us-west-1.rds.amazonaws.com',
|
||||
JAMBONES_MYSQL_USER: 'admin',
|
||||
|
||||
@@ -93,7 +93,7 @@ const AWS_REGION = process.env.AWS_REGION;
|
||||
const AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID;
|
||||
const AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY;
|
||||
const AWS_SNS_PORT = parseInt(process.env.AWS_SNS_PORT, 10) || 3001;
|
||||
const AWS_SNS_TOPIC_ARM = process.env.AWS_SNS_TOPIC_ARM;
|
||||
const AWS_SNS_TOPIC_ARN = process.env.AWS_SNS_TOPIC_ARN;
|
||||
const AWS_SNS_PORT_MAX = parseInt(process.env.AWS_SNS_PORT_MAX, 10) || 3005;
|
||||
|
||||
const GCP_JSON_KEY = process.env.GCP_JSON_KEY;
|
||||
@@ -189,7 +189,7 @@ module.exports = {
|
||||
AWS_ACCESS_KEY_ID,
|
||||
AWS_SECRET_ACCESS_KEY,
|
||||
AWS_SNS_PORT,
|
||||
AWS_SNS_TOPIC_ARM,
|
||||
AWS_SNS_TOPIC_ARN,
|
||||
AWS_SNS_PORT_MAX,
|
||||
|
||||
ANCHOR_MEDIA_ALWAYS,
|
||||
|
||||
@@ -100,6 +100,7 @@ router.post('/',
|
||||
...(req.body?.application_sid && {'X-Application-Sid': req.body.application_sid}),
|
||||
...(restDial.fromHost && {'X-Preferred-From-Host': restDial.fromHost}),
|
||||
...(record_all_calls && {'X-Record-All-Calls': recordOutputFormat}),
|
||||
...(target.proxy && {'X-SIP-Proxy': target.proxy}),
|
||||
...target.headers
|
||||
};
|
||||
|
||||
|
||||
@@ -1084,6 +1084,12 @@ class CallSession extends Emitter {
|
||||
api_key: credential.api_key
|
||||
};
|
||||
}
|
||||
else if ('voxist' === vendor) {
|
||||
return {
|
||||
speech_credential_sid: credential.speech_credential_sid,
|
||||
api_key: credential.api_key
|
||||
};
|
||||
}
|
||||
else if ('whisper' === vendor) {
|
||||
return {
|
||||
api_key: credential.api_key,
|
||||
@@ -1793,62 +1799,66 @@ Duration=${duration} `
|
||||
async updateCall(opts, callSid) {
|
||||
this.logger.debug(opts, 'CallSession:updateCall');
|
||||
|
||||
if (opts.call_status) {
|
||||
return this._lccCallStatus(opts);
|
||||
}
|
||||
if (opts.call_hook || opts.child_call_hook) {
|
||||
return await this._lccCallHook(opts);
|
||||
}
|
||||
if (opts.listen_status || opts.stream_status) {
|
||||
await this._lccListenStatus(opts);
|
||||
}
|
||||
if (opts.transcribe_status) {
|
||||
await this._lccTranscribeStatus(opts);
|
||||
}
|
||||
else if (opts.mute_status) {
|
||||
await this._lccMuteStatus(opts.mute_status === 'mute', callSid);
|
||||
}
|
||||
else if (opts.conf_hold_status) {
|
||||
await this._lccConfHoldStatus(opts);
|
||||
}
|
||||
else if (opts.conf_mute_status) {
|
||||
await this._lccConfMuteStatus(opts);
|
||||
}
|
||||
else if (opts.sip_request) {
|
||||
const res = await this._lccSipRequest(opts, callSid);
|
||||
return {status: res.status, reason: res.reason};
|
||||
} else if (opts.dtmf) {
|
||||
await this._lccDtmf(opts, callSid);
|
||||
}
|
||||
else if (opts.record) {
|
||||
await this.notifyRecordOptions(opts.record);
|
||||
}
|
||||
else if (opts.tag) {
|
||||
return this._lccTag(opts);
|
||||
}
|
||||
else if (opts.conferenceParticipantAction) {
|
||||
return this._lccConferenceParticipantAction(opts.conferenceParticipantAction);
|
||||
}
|
||||
else if (opts.dub) {
|
||||
return this._lccDub(opts.dub, callSid);
|
||||
}
|
||||
else if (opts.boostAudioSignal) {
|
||||
return this._lccBoostAudioSignal(opts, callSid);
|
||||
}
|
||||
else if (opts.media_path) {
|
||||
return this._lccMediaPath(opts.media_path, callSid);
|
||||
}
|
||||
else if (opts.llm_tool_output) {
|
||||
return this._lccToolOutput(opts.tool_call_id, opts.llm_tool_output, callSid);
|
||||
}
|
||||
else if (opts.llm_update) {
|
||||
return this._lccLlmUpdate(opts.llm_update, callSid);
|
||||
}
|
||||
try {
|
||||
if (opts.call_status) {
|
||||
return this._lccCallStatus(opts);
|
||||
}
|
||||
if (opts.call_hook || opts.child_call_hook) {
|
||||
return await this._lccCallHook(opts);
|
||||
}
|
||||
if (opts.listen_status || opts.stream_status) {
|
||||
await this._lccListenStatus(opts);
|
||||
}
|
||||
if (opts.transcribe_status) {
|
||||
await this._lccTranscribeStatus(opts);
|
||||
}
|
||||
else if (opts.mute_status) {
|
||||
await this._lccMuteStatus(opts.mute_status === 'mute', callSid);
|
||||
}
|
||||
else if (opts.conf_hold_status) {
|
||||
await this._lccConfHoldStatus(opts);
|
||||
}
|
||||
else if (opts.conf_mute_status) {
|
||||
await this._lccConfMuteStatus(opts);
|
||||
}
|
||||
else if (opts.sip_request) {
|
||||
const res = await this._lccSipRequest(opts, callSid);
|
||||
return {status: res.status, reason: res.reason};
|
||||
} else if (opts.dtmf) {
|
||||
await this._lccDtmf(opts, callSid);
|
||||
}
|
||||
else if (opts.record) {
|
||||
await this.notifyRecordOptions(opts.record);
|
||||
}
|
||||
else if (opts.tag) {
|
||||
return this._lccTag(opts);
|
||||
}
|
||||
else if (opts.conferenceParticipantAction) {
|
||||
return this._lccConferenceParticipantAction(opts.conferenceParticipantAction);
|
||||
}
|
||||
else if (opts.dub) {
|
||||
return this._lccDub(opts.dub, callSid);
|
||||
}
|
||||
else if (opts.boostAudioSignal) {
|
||||
return this._lccBoostAudioSignal(opts, callSid);
|
||||
}
|
||||
else if (opts.media_path) {
|
||||
return this._lccMediaPath(opts.media_path, callSid);
|
||||
}
|
||||
else if (opts.llm_tool_output) {
|
||||
return this._lccToolOutput(opts.tool_call_id, opts.llm_tool_output, callSid);
|
||||
}
|
||||
else if (opts.llm_update) {
|
||||
return this._lccLlmUpdate(opts.llm_update, callSid);
|
||||
}
|
||||
|
||||
// whisper may be the only thing we are asked to do, or it may that
|
||||
// we are doing a whisper after having muted, paused recording etc..
|
||||
if (opts.whisper) {
|
||||
return this._lccWhisper(opts, callSid);
|
||||
// whisper may be the only thing we are asked to do, or it may that
|
||||
// we are doing a whisper after having muted, paused recording etc..
|
||||
if (opts.whisper) {
|
||||
return this._lccWhisper(opts, callSid);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info({err, opts, callSid}, 'CallSession:updateCall - error updating call');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2468,12 +2478,14 @@ Duration=${duration} `
|
||||
} else if (sip_method === 'MESSAGE') {
|
||||
res.send(202);
|
||||
} else {
|
||||
this.logger.info(`CallSession:_onRequestWithinDialog unsported method: ${req.method}`);
|
||||
this.logger.warn(`CallSession:_onRequestWithinDialog unsupported method: ${req.method}`);
|
||||
res.send(501);
|
||||
return;
|
||||
}
|
||||
const params = {sip_method, sip_body: req.body, sip_headers: req.headers};
|
||||
this.currentTask.performHook(this, this.sipRequestWithinDialogHook, params);
|
||||
this.currentTask.performHook(this, this.sipRequestWithinDialogHook, params).catch((err) => {
|
||||
this.logger.error({err}, 'CallSession:_onRequestWithinDialog - error calling sipRequestWithinDialogHook');
|
||||
});
|
||||
}
|
||||
|
||||
async _onReinvite(req, res) {
|
||||
@@ -2484,7 +2496,7 @@ Duration=${duration} `
|
||||
res.send(200, {body: this.ep.local.sdp});
|
||||
}
|
||||
else {
|
||||
if (this.currentTask.name === TaskName.Dial && this.currentTask.isOnHoldEnabled) {
|
||||
if (this.currentTask && this.currentTask.name === TaskName.Dial && this.currentTask.isOnHoldEnabled) {
|
||||
this.logger.info('onholdMusic reINVITE after media has been released');
|
||||
await this.currentTask.handleReinviteAfterMediaReleased(req, res);
|
||||
} else {
|
||||
|
||||
@@ -83,7 +83,11 @@ class Conference extends Task {
|
||||
// reset answer time if we were transferred from another feature server
|
||||
if (this.connectTime) dlg.connectTime = this.connectTime;
|
||||
|
||||
|
||||
if (cs.sipRequestWithinDialogHook) {
|
||||
/* remove any existing listener to escape from duplicating events */
|
||||
this._removeSipIndialogRequestListener(this.dlg);
|
||||
this._initSipIndialogRequestListener(cs, dlg);
|
||||
}
|
||||
this.ep.on('destroy', this._kicked.bind(this, cs, dlg));
|
||||
|
||||
try {
|
||||
@@ -103,6 +107,7 @@ class Conference extends Task {
|
||||
|
||||
this.logger.debug(`Conference:exec - conference ${this.confName} is over`);
|
||||
if (this.callMoved !== false) await this.performAction(this.results);
|
||||
this._removeSipIndialogRequestListener(dlg);
|
||||
} catch (err) {
|
||||
this.logger.info(err, `TaskConference:exec - error in conference ${this.confName}`);
|
||||
}
|
||||
@@ -416,6 +421,20 @@ class Conference extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
_initSipIndialogRequestListener(cs, dlg) {
|
||||
dlg.on('info', this._onRequestWithinDialog.bind(this, cs));
|
||||
dlg.on('message', this._onRequestWithinDialog.bind(this, cs));
|
||||
}
|
||||
|
||||
_removeSipIndialogRequestListener(dlg) {
|
||||
dlg && dlg.removeAllListeners('message');
|
||||
dlg && dlg.removeAllListeners('info');
|
||||
}
|
||||
|
||||
_onRequestWithinDialog(cs, req, res) {
|
||||
cs._onRequestWithinDialog(req, res);
|
||||
}
|
||||
|
||||
/**
|
||||
* The conference we have been waiting for has started.
|
||||
* It may be on this server or a different one, and we are
|
||||
|
||||
@@ -187,18 +187,20 @@ class TaskConfig extends Task {
|
||||
: cs.speechRecognizerVendor;
|
||||
cs.speechRecognizerLabel = this.recognizer.label === 'default'
|
||||
? cs.speechRecognizerLabel : this.recognizer.label;
|
||||
cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
|
||||
cs.speechRecognizerLanguage = this.recognizer.language !== undefined && this.recognizer.language !== 'default'
|
||||
? this.recognizer.language
|
||||
: cs.speechRecognizerLanguage;
|
||||
|
||||
//fallback
|
||||
cs.fallbackSpeechRecognizerVendor = this.recognizer.fallbackVendor !== 'default'
|
||||
cs.fallbackSpeechRecognizerVendor = this.recognizer.fallbackVendor !== undefined &&
|
||||
this.recognizer.fallbackVendor !== 'default'
|
||||
? this.recognizer.fallbackVendor
|
||||
: cs.fallbackSpeechRecognizerVendor;
|
||||
cs.fallbackSpeechRecognizerLabel = this.recognizer.fallbackLabel === 'default' ?
|
||||
cs.fallbackSpeechRecognizerLabel :
|
||||
this.recognizer.fallbackLabel;
|
||||
cs.fallbackSpeechRecognizerLanguage = this.recognizer.fallbackLanguage !== 'default'
|
||||
cs.fallbackSpeechRecognizerLanguage = this.recognizer.fallbackLanguage !== undefined &&
|
||||
this.recognizer.fallbackLanguage !== 'default'
|
||||
? this.recognizer.fallbackLanguage
|
||||
: cs.fallbackSpeechRecognizerLanguage;
|
||||
|
||||
|
||||
@@ -230,10 +230,10 @@ class TaskDial extends Task {
|
||||
try {
|
||||
await this.epOther.play(this.dialMusic);
|
||||
} catch (err) {
|
||||
this.logger.error(err, `TaskDial:exec error playing ${this.dialMusic}`);
|
||||
this.logger.error(err, `TaskDial:exec error playing dialMusic ${this.dialMusic}`);
|
||||
await sleepFor(1000);
|
||||
}
|
||||
} while (!this.killed || !this.bridged);
|
||||
} while (!this.killed && !this.bridged && this._mediaPath === MediaPath.FullMedia);
|
||||
})();
|
||||
}
|
||||
}
|
||||
@@ -499,7 +499,7 @@ class TaskDial extends Task {
|
||||
dlg && dlg.removeAllListeners('info');
|
||||
}
|
||||
|
||||
async _onRequestWithinDialog(cs, req, res) {
|
||||
_onRequestWithinDialog(cs, req, res) {
|
||||
cs._onRequestWithinDialog(req, res);
|
||||
}
|
||||
|
||||
@@ -871,7 +871,11 @@ class TaskDial extends Task {
|
||||
|
||||
if (this.parentDtmfCollector) this._installDtmfDetection(cs, cs.dlg);
|
||||
if (this.childDtmfCollector) this._installDtmfDetection(cs, this.dlg);
|
||||
if (cs.sipRequestWithinDialogHook) this._initSipIndialogRequestListener(cs, this.dlg);
|
||||
if (cs.sipRequestWithinDialogHook) {
|
||||
/* remove any existing listener to escape from duplicating events */
|
||||
this._removeSipIndialogRequestListener(this.dlg);
|
||||
this._initSipIndialogRequestListener(cs, this.dlg);
|
||||
}
|
||||
|
||||
if (this.transcribeTask) this.transcribeTask.exec(cs, {ep: this.epOther, ep2:this.ep});
|
||||
if (this.listenTask) this.listenTask.exec(cs, {ep: this.listenTask.channel === 2 ? this.ep : this.epOther});
|
||||
|
||||
@@ -11,6 +11,7 @@ const {
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents,
|
||||
AssemblyAiTranscriptionEvents,
|
||||
VoxistTranscriptionEvents,
|
||||
VadDetection,
|
||||
VerbioTranscriptionEvents,
|
||||
SpeechmaticsTranscriptionEvents
|
||||
@@ -524,6 +525,17 @@ class TaskGather extends SttTask {
|
||||
this._onVendorConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'voxist':
|
||||
this.bugname = `${this.bugname_prefix}voxist_transcribe`;
|
||||
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep));
|
||||
this.addCustomEventListener(
|
||||
ep, VoxistTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, VoxistTranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'speechmatics':
|
||||
this.bugname = `${this.bugname_prefix}speechmatics_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
@@ -624,6 +636,13 @@ class TaskGather extends SttTask {
|
||||
this._asrTimer = setTimeout(() => {
|
||||
this.logger.debug('_startAsrTimer - asr timer went off');
|
||||
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
|
||||
|
||||
/* special case for speechmatics - keep listening if we dont have any transcripts */
|
||||
if (this.vendor === 'speechmatics' && this._bufferedTranscripts.length === 0) {
|
||||
this.logger.debug('Gather:_startAsrTimer - speechmatics, no transcripts yet, keep listening');
|
||||
this._startAsrTimer();
|
||||
return;
|
||||
}
|
||||
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
|
||||
}, this.asrTimeout);
|
||||
this.logger.debug(`_startAsrTimer: set for ${this.asrTimeout}ms`);
|
||||
@@ -766,10 +785,16 @@ class TaskGather extends SttTask {
|
||||
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts');
|
||||
}
|
||||
else {
|
||||
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
|
||||
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
|
||||
this._bufferedTranscripts = [];
|
||||
this._resolve('speech', evt);
|
||||
const utteranceTime = evt.last_word_end;
|
||||
if (utteranceTime && this._dgTimeOfLastUnprocessedWord && utteranceTime < this._dgTimeOfLastUnprocessedWord) {
|
||||
this.logger.debug('Gather:_onTranscription - got UtteranceEnd with unprocessed words, continue listening');
|
||||
}
|
||||
else {
|
||||
this.logger.debug('Gather:_onTranscription - got UtteranceEnd from deepgram, return buffered transcript');
|
||||
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
|
||||
this._bufferedTranscripts = [];
|
||||
this._resolve('speech', evt);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -780,7 +805,7 @@ class TaskGather extends SttTask {
|
||||
|
||||
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language,
|
||||
this.shortUtterance, this.data.recognizer.punctuation);
|
||||
this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
|
||||
//this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
|
||||
|
||||
if (evt.alternatives.length === 0) {
|
||||
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
|
||||
@@ -788,8 +813,6 @@ class TaskGather extends SttTask {
|
||||
}
|
||||
const confidence = evt.alternatives[0].confidence;
|
||||
const minConfidence = this.data.recognizer?.minConfidence;
|
||||
this.logger.debug({evt},
|
||||
`TaskGather:_onTranscription - confidence (${confidence}), minConfidence (${minConfidence})`);
|
||||
if (confidence && minConfidence && confidence < minConfidence) {
|
||||
this.logger.info({evt},
|
||||
'TaskGather:_onTranscription - Transcript confidence ' +
|
||||
@@ -905,8 +928,21 @@ class TaskGather extends SttTask {
|
||||
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
|
||||
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
|
||||
this._bufferedTranscripts.push(evt);
|
||||
this._dgTimeOfLastUnprocessedWord = null;
|
||||
}
|
||||
if (evt.alternatives[0].transcript === '') {
|
||||
emptyTranscript = true;
|
||||
}
|
||||
else if (!originalEvent.is_final) {
|
||||
/* Deepgram: we have unprocessed words-save last word end time so we can later compare to UtteranceEnd */
|
||||
const words = originalEvent.channel.alternatives[0].words;
|
||||
if (words?.length > 0) {
|
||||
this._dgTimeOfLastUnprocessedWord = words.slice(-1)[0].end;
|
||||
this.logger.debug(
|
||||
`TaskGather:_onTranscription - saving word end time: ${this._dgTimeOfLastUnprocessedWord}`);
|
||||
}
|
||||
|
||||
}
|
||||
if (evt.alternatives[0].transcript === '') emptyTranscript = true;
|
||||
}
|
||||
|
||||
if (!emptyTranscript) {
|
||||
@@ -1176,7 +1212,7 @@ class TaskGather extends SttTask {
|
||||
if (this.parentTask) this.parentTask.emit('stt-low-confidence', evt);
|
||||
else {
|
||||
this.emit('stt-low-confidence', evt);
|
||||
returnedVerbs = await this.performAction({reason: 'stt-low-confidence'});
|
||||
returnedVerbs = await this.performAction({speech:evt, reason: 'stt-low-confidence'});
|
||||
}
|
||||
}
|
||||
} catch (err) { /*already logged error*/ }
|
||||
|
||||
@@ -3,6 +3,7 @@ const {TaskPreconditions} = require('../../utils/constants');
|
||||
const TaskLlmOpenAI_S2S = require('./llms/openai_s2s');
|
||||
const TaskLlmVoiceAgent_S2S = require('./llms/voice_agent_s2s');
|
||||
const TaskLlmUltravox_S2S = require('./llms/ultravox_s2s');
|
||||
const TaskLlmElevenlabs_S2S = require('./llms/elevenlabs_s2s');
|
||||
|
||||
class TaskLlm extends Task {
|
||||
constructor(logger, opts) {
|
||||
@@ -54,6 +55,10 @@ class TaskLlm extends Task {
|
||||
llm = new TaskLlmUltravox_S2S(this.logger, this.data, this);
|
||||
break;
|
||||
|
||||
case 'elevenlabs':
|
||||
llm = new TaskLlmElevenlabs_S2S(this.logger, this.data, this);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported vendor ${this.vendor} for LLM`);
|
||||
}
|
||||
|
||||
302
lib/tasks/llm/llms/elevenlabs_s2s.js
Normal file
302
lib/tasks/llm/llms/elevenlabs_s2s.js
Normal file
@@ -0,0 +1,302 @@
|
||||
const Task = require('../../task');
|
||||
const TaskName = 'Llm_Elevenlabs_s2s';
|
||||
const {LlmEvents_Elevenlabs} = require('../../../utils/constants');
|
||||
const {request} = require('undici');
|
||||
const ClientEvent = 'client.event';
|
||||
const SessionDelete = 'session.delete';
|
||||
|
||||
const elevenlabs_server_events = [
|
||||
'conversation_initiation_metadata',
|
||||
'user_transcript',
|
||||
'agent_response',
|
||||
'client_tool_call'
|
||||
];
|
||||
|
||||
const expandWildcards = (events) => {
|
||||
const expandedEvents = [];
|
||||
|
||||
events.forEach((evt) => {
|
||||
if (evt.endsWith('.*')) {
|
||||
const prefix = evt.slice(0, -2); // Remove the wildcard ".*"
|
||||
const matchingEvents = elevenlabs_server_events.filter((e) => e.startsWith(prefix));
|
||||
expandedEvents.push(...matchingEvents);
|
||||
} else {
|
||||
expandedEvents.push(evt);
|
||||
}
|
||||
});
|
||||
|
||||
return expandedEvents;
|
||||
};
|
||||
|
||||
class TaskLlmElevenlabs_S2S extends Task {
|
||||
constructor(logger, opts, parentTask) {
|
||||
super(logger, opts, parentTask);
|
||||
this.parent = parentTask;
|
||||
|
||||
this.vendor = this.parent.vendor;
|
||||
this.auth = this.parent.auth;
|
||||
|
||||
const {agent_id, api_key} = this.auth || {};
|
||||
if (!agent_id) throw new Error('auth.agent_id is required for Elevenlabs S2S');
|
||||
|
||||
this.agent_id = agent_id;
|
||||
this.api_key = api_key;
|
||||
this.actionHook = this.data.actionHook;
|
||||
this.eventHook = this.data.eventHook;
|
||||
this.toolHook = this.data.toolHook;
|
||||
const {
|
||||
conversation_initiation_client_data,
|
||||
input_sample_rate = 16000,
|
||||
output_sample_rate = 16000
|
||||
} = this.data.llmOptions;
|
||||
this.conversation_initiation_client_data = conversation_initiation_client_data;
|
||||
this.input_sample_rate = input_sample_rate;
|
||||
this.output_sample_rate = output_sample_rate;
|
||||
this.results = {
|
||||
completionReason: 'normal conversation end'
|
||||
};
|
||||
|
||||
/**
|
||||
* only one of these will have items,
|
||||
* if includeEvents, then these are the events to include
|
||||
* if excludeEvents, then these are the events to exclude
|
||||
*/
|
||||
this.includeEvents = [];
|
||||
this.excludeEvents = [];
|
||||
|
||||
/* default to all events if user did not specify */
|
||||
this._populateEvents(this.data.events || elevenlabs_server_events);
|
||||
|
||||
this.addCustomEventListener = parentTask.addCustomEventListener.bind(parentTask);
|
||||
this.removeCustomEventListeners = parentTask.removeCustomEventListeners.bind(parentTask);
|
||||
}
|
||||
|
||||
get name() { return TaskName; }
|
||||
|
||||
async getSignedUrl() {
|
||||
if (!this.api_key) {
|
||||
return {
|
||||
host: 'api.elevenlabs.io',
|
||||
path: `/v1/convai/conversation?agent_id=${this.agent_id}`,
|
||||
};
|
||||
}
|
||||
|
||||
const {statusCode, body} = await request(
|
||||
`https://api.elevenlabs.io/v1/convai/conversation/get_signed_url?agent_id=${this.agent_id}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'xi-api-key': this.api_key
|
||||
},
|
||||
}
|
||||
);
|
||||
const data = await body.json();
|
||||
if (statusCode !== 200 || !data?.signed_url) {
|
||||
this.logger.error({statusCode, data}, 'Elevenlabs Error registering call');
|
||||
throw new Error(`Elevenlabs Error registering call: ${data.message}`);
|
||||
}
|
||||
|
||||
const url = new URL(data.signed_url);
|
||||
return {
|
||||
host: url.hostname,
|
||||
path: url.pathname + url.search,
|
||||
};
|
||||
}
|
||||
|
||||
async _api(ep, args) {
|
||||
const res = await ep.api('uuid_elevenlabs_s2s', `^^|${args.join('|')}`);
|
||||
if (!res.body?.startsWith('+OK')) {
|
||||
throw new Error({args}, `Error calling uuid_elevenlabs_s2s: ${res.body}`);
|
||||
}
|
||||
}
|
||||
|
||||
async exec(cs, {ep}) {
|
||||
await super.exec(cs);
|
||||
await this._startListening(cs, ep);
|
||||
|
||||
await this.awaitTaskDone();
|
||||
|
||||
/* note: the parent llm verb started the span, which is why this is necessary */
|
||||
await this.parent.performAction(this.results);
|
||||
|
||||
this._unregisterHandlers();
|
||||
}
|
||||
|
||||
async kill(cs) {
|
||||
super.kill(cs);
|
||||
|
||||
this._api(cs.ep, [cs.ep.uuid, SessionDelete])
|
||||
.catch((err) => this.logger.info({err}, 'TaskLlmElevenlabs_S2S:kill - error deleting session'));
|
||||
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
/**
|
||||
* Send function call output to the Elevenlabs server in the form of conversation.item.create
|
||||
* per https://elevenlabs.io/docs/conversational-ai/api-reference/conversational-ai/websocket
|
||||
*/
|
||||
async processToolOutput(ep, tool_call_id, rawData) {
|
||||
try {
|
||||
const {data} = rawData;
|
||||
this.logger.debug({tool_call_id, data}, 'TaskLlmElevenlabs_S2S:processToolOutput');
|
||||
|
||||
if (!data.type || data.type !== 'client_tool_result') {
|
||||
this.logger.info({data},
|
||||
'TaskLlmElevenlabs_S2S:processToolOutput - invalid tool output, must be client_tool_result');
|
||||
}
|
||||
else {
|
||||
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'TaskLlmElevenlabs_S2S:processToolOutput');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a session.update to the Elevenlabs server
|
||||
* Note: creating and deleting conversation items also supported as well as interrupting the assistant
|
||||
*/
|
||||
async processLlmUpdate(ep, data, _callSid) {
|
||||
this.logger.debug({data, _callSid}, 'TaskLlmElevenlabs_S2S:processLlmUpdate, ignored');
|
||||
}
|
||||
|
||||
async _startListening(cs, ep) {
|
||||
this._registerHandlers(ep);
|
||||
|
||||
try {
|
||||
const {host, path} = await this.getSignedUrl();
|
||||
const args = [ep.uuid, 'session.create', this.input_sample_rate, this.output_sample_rate, host, path];
|
||||
await this._api(ep, args);
|
||||
} catch (err) {
|
||||
this.logger.error({err}, 'TaskLlmElevenlabs_S2S:_startListening');
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
}
|
||||
|
||||
async _sendClientEvent(ep, obj) {
|
||||
let ok = true;
|
||||
this.logger.debug({obj}, 'TaskLlmElevenlabs_S2S:_sendClientEvent');
|
||||
try {
|
||||
const args = [ep.uuid, ClientEvent, JSON.stringify(obj)];
|
||||
await this._api(ep, args);
|
||||
} catch (err) {
|
||||
ok = false;
|
||||
this.logger.error({err}, 'TaskLlmElevenlabs_S2S:_sendClientEvent - Error');
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
|
||||
async _sendInitialMessage(ep) {
|
||||
if (this.conversation_initiation_client_data) {
|
||||
if (!await this._sendClientEvent(ep, {
|
||||
type: 'conversation_initiation_client_data',
|
||||
conversation_initiation_client_data: this.conversation_initiation_client_data
|
||||
})) {
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_registerHandlers(ep) {
|
||||
this.addCustomEventListener(ep, LlmEvents_Elevenlabs.Connect, this._onConnect.bind(this, ep));
|
||||
this.addCustomEventListener(ep, LlmEvents_Elevenlabs.ConnectFailure, this._onConnectFailure.bind(this, ep));
|
||||
this.addCustomEventListener(ep, LlmEvents_Elevenlabs.Disconnect, this._onDisconnect.bind(this, ep));
|
||||
this.addCustomEventListener(ep, LlmEvents_Elevenlabs.ServerEvent, this._onServerEvent.bind(this, ep));
|
||||
}
|
||||
|
||||
_unregisterHandlers() {
|
||||
this.removeCustomEventListeners();
|
||||
}
|
||||
|
||||
_onError(ep, evt) {
|
||||
this.logger.info({evt}, 'TaskLlmElevenlabs_S2S:_onError');
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
_onConnect(ep) {
|
||||
this.logger.debug('TaskLlmElevenlabs_S2S:_onConnect');
|
||||
this._sendInitialMessage(ep);
|
||||
}
|
||||
_onConnectFailure(_ep, evt) {
|
||||
this.logger.info(evt, 'TaskLlmElevenlabs_S2S:_onConnectFailure');
|
||||
this.results = {completionReason: 'connection failure'};
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
_onDisconnect(_ep, evt) {
|
||||
this.logger.info(evt, 'TaskLlmElevenlabs_S2S:_onConnectFailure');
|
||||
this.results = {completionReason: 'disconnect from remote end'};
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
async _onServerEvent(ep, evt) {
|
||||
let endConversation = false;
|
||||
const type = evt.type;
|
||||
this.logger.info({evt}, 'TaskLlmElevenlabs_S2S:_onServerEvent');
|
||||
|
||||
if (type === 'error') {
|
||||
endConversation = true;
|
||||
this.results = {
|
||||
completionReason: 'server error',
|
||||
error: evt.error
|
||||
};
|
||||
}
|
||||
|
||||
/* tool calls */
|
||||
else if (type === 'client_tool_call') {
|
||||
this.logger.debug({evt}, 'TaskLlmElevenlabs_S2S:_onServerEvent - function_call');
|
||||
if (!this.toolHook) {
|
||||
this.logger.warn({evt}, 'TaskLlmElevenlabs_S2S:_onServerEvent - no toolHook defined!');
|
||||
}
|
||||
else {
|
||||
const {client_tool_call} = evt;
|
||||
const {tool_name: name, tool_call_id: call_id, parameters: args} = client_tool_call;
|
||||
|
||||
try {
|
||||
await this.parent.sendToolHook(call_id, {name, args});
|
||||
} catch (err) {
|
||||
this.logger.info({err, evt}, 'TaskLlmElevenlabs_S2S - error calling function');
|
||||
this.results = {
|
||||
completionReason: 'client error calling function',
|
||||
error: err
|
||||
};
|
||||
endConversation = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check whether we should notify on this event */
|
||||
if (this.includeEvents.length > 0 ? this.includeEvents.includes(type) : !this.excludeEvents.includes(type)) {
|
||||
this.parent.sendEventHook(evt)
|
||||
.catch((err) => this.logger.info({err},
|
||||
'TaskLlmElevenlabs_S2S:_onServerEvent - error sending event hook'));
|
||||
}
|
||||
|
||||
if (endConversation) {
|
||||
this.logger.info({results: this.results},
|
||||
'TaskLlmElevenlabs_S2S:_onServerEvent - ending conversation due to error');
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
}
|
||||
|
||||
_populateEvents(events) {
|
||||
if (events.includes('all')) {
|
||||
/* work by excluding specific events */
|
||||
const exclude = events
|
||||
.filter((evt) => evt.startsWith('-'))
|
||||
.map((evt) => evt.slice(1));
|
||||
if (exclude.length === 0) this.includeEvents = elevenlabs_server_events;
|
||||
else this.excludeEvents = expandWildcards(exclude);
|
||||
}
|
||||
else {
|
||||
/* work by including specific events */
|
||||
const include = events
|
||||
.filter((evt) => !evt.startsWith('-'));
|
||||
this.includeEvents = expandWildcards(include);
|
||||
}
|
||||
|
||||
this.logger.debug({
|
||||
includeEvents: this.includeEvents,
|
||||
excludeEvents: this.excludeEvents
|
||||
}, 'TaskLlmElevenlabs_S2S:_populateEvents');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = TaskLlmElevenlabs_S2S;
|
||||
@@ -4,6 +4,7 @@ const {request} = require('undici');
|
||||
const {LlmEvents_Ultravox} = require('../../../utils/constants');
|
||||
|
||||
const ultravox_server_events = [
|
||||
'createCall',
|
||||
'pong',
|
||||
'state',
|
||||
'transcript',
|
||||
@@ -88,7 +89,7 @@ class TaskLlmUltravox_S2S extends Task {
|
||||
throw new Error(`Ultravox Error registering call: ${data.message}`);
|
||||
}
|
||||
this.logger.info({joinUrl: data.joinUrl}, 'Ultravox Call registered');
|
||||
return data.joinUrl;
|
||||
return data;
|
||||
}
|
||||
|
||||
_unregisterHandlers() {
|
||||
@@ -105,13 +106,19 @@ class TaskLlmUltravox_S2S extends Task {
|
||||
async _startListening(cs, ep) {
|
||||
this._registerHandlers(ep);
|
||||
|
||||
const joinUrl = await this.createCall();
|
||||
const data = await this.createCall();
|
||||
const {joinUrl} = data;
|
||||
// split the joinUrl into host and path
|
||||
const {host, pathname, search} = new URL(joinUrl);
|
||||
|
||||
try {
|
||||
const args = [ep.uuid, 'session.create', host, pathname + search];
|
||||
await this._api(ep, args);
|
||||
// Notify the application that the session has been created with detail information
|
||||
this._sendLlmEvent('createCall', {
|
||||
type: 'createCall',
|
||||
...data
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error({err}, 'TaskLlmUltraVox_S2S:_startListening');
|
||||
this.notifyTaskDone();
|
||||
@@ -190,11 +197,7 @@ class TaskLlmUltravox_S2S extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
/* check whether we should notify on this event */
|
||||
if (this.includeEvents.length > 0 ? this.includeEvents.includes(type) : !this.excludeEvents.includes(type)) {
|
||||
this.parent.sendEventHook(evt)
|
||||
.catch((err) => this.logger.info({err}, 'TaskLlmUltravox_S2S:_onServerEvent - error sending event hook'));
|
||||
}
|
||||
this._sendLlmEvent(type, evt);
|
||||
|
||||
if (endConversation) {
|
||||
this.logger.info({results: this.results},
|
||||
@@ -203,6 +206,14 @@ class TaskLlmUltravox_S2S extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
_sendLlmEvent(type, evt) {
|
||||
/* check whether we should notify on this event */
|
||||
if (this.includeEvents.length > 0 ? this.includeEvents.includes(type) : !this.excludeEvents.includes(type)) {
|
||||
this.parent.sendEventHook(evt)
|
||||
.catch((err) => this.logger.info({err}, 'TaskLlmUltravox_S2S:_onServerEvent - error sending event hook'));
|
||||
}
|
||||
}
|
||||
|
||||
async processToolOutput(ep, tool_call_id, data) {
|
||||
try {
|
||||
this.logger.debug({tool_call_id, data}, 'TaskLlmUltravox_S2S:processToolOutput');
|
||||
|
||||
@@ -213,7 +213,7 @@ class TaskSay extends TtsTask {
|
||||
ep.once('playback-start', (evt) => {
|
||||
this.logger.debug({evt}, 'Say got playback-start');
|
||||
if (this.otelSpan) {
|
||||
this._addStreamingTtsAttributes(this.otelSpan, evt);
|
||||
this._addStreamingTtsAttributes(this.otelSpan, evt, vendor);
|
||||
this.otelSpan.end();
|
||||
this.otelSpan = null;
|
||||
if (evt.variable_tts_cache_filename) {
|
||||
@@ -240,6 +240,7 @@ class TaskSay extends TtsTask {
|
||||
language,
|
||||
voice,
|
||||
engine,
|
||||
model: this.model || this.model_id,
|
||||
text
|
||||
}).catch((err) => this.logger.info({err}, 'Error adding file to cache'));
|
||||
}
|
||||
@@ -307,7 +308,7 @@ class TaskSay extends TtsTask {
|
||||
this.notifyTaskDone();
|
||||
}
|
||||
|
||||
_addStreamingTtsAttributes(span, evt) {
|
||||
_addStreamingTtsAttributes(span, evt, vendor) {
|
||||
const attrs = {'tts.cached': false};
|
||||
for (const [key, value] of Object.entries(evt)) {
|
||||
if (key.startsWith('variable_tts_')) {
|
||||
@@ -321,6 +322,9 @@ class TaskSay extends TtsTask {
|
||||
.replace('elevenlabs_', 'elevenlabs.');
|
||||
if (spanMapping[newKey]) newKey = spanMapping[newKey];
|
||||
attrs[newKey] = value;
|
||||
if (key === 'variable_tts_time_to_first_byte_ms' && value) {
|
||||
this.cs.srf.locals.stats.histogram('tts.response_time', value, [`vendor:${vendor}`]);
|
||||
}
|
||||
}
|
||||
}
|
||||
delete attrs['cache_filename']; //no value in adding this to the span
|
||||
|
||||
@@ -13,6 +13,7 @@ const {
|
||||
JambonzTranscriptionEvents,
|
||||
TranscribeStatus,
|
||||
AssemblyAiTranscriptionEvents,
|
||||
VoxistTranscriptionEvents,
|
||||
VerbioTranscriptionEvents,
|
||||
SpeechmaticsTranscriptionEvents
|
||||
} = require('../utils/constants.json');
|
||||
@@ -300,6 +301,17 @@ class TaskTranscribe extends SttTask {
|
||||
this._onVendorConnectFailure.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'voxist':
|
||||
this.bugname = `${this.bugname_prefix}voxist_transcribe`;
|
||||
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Transcription,
|
||||
this._onTranscription.bind(this, cs, ep, channel));
|
||||
this.addCustomEventListener(ep,
|
||||
VoxistTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, VoxistTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, VoxistTranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep, channel));
|
||||
break;
|
||||
|
||||
case 'speechmatics':
|
||||
this.bugname = `${this.bugname_prefix}speechmatics_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
|
||||
@@ -143,16 +143,16 @@ class TtsTask extends Task {
|
||||
`No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`);
|
||||
}
|
||||
/* parse Nuance voices into name and model */
|
||||
let model;
|
||||
if (vendor === 'nuance' && voice) {
|
||||
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
|
||||
if (arr) {
|
||||
voice = arr[1];
|
||||
model = arr[2];
|
||||
this.model = arr[2];
|
||||
}
|
||||
} else if (vendor === 'deepgram') {
|
||||
model = voice;
|
||||
this.model = voice;
|
||||
}
|
||||
this.model_id = credentials.model_id;
|
||||
|
||||
/* allow for microsoft custom region voice and api_key to be specified as an override */
|
||||
if (vendor === 'microsoft' && this.options.deploymentId) {
|
||||
@@ -215,7 +215,8 @@ class TtsTask extends Task {
|
||||
// If vendor is changed from the previous one, then reset the cache_speech_handles flag
|
||||
//cs.currentTtsVendor = vendor;
|
||||
|
||||
if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
|
||||
if (!preCache && !this._disableTracing)
|
||||
this.logger.info({vendor, language, voice, model: this.model}, 'TaskSay:exec');
|
||||
try {
|
||||
if (!credentials) {
|
||||
writeAlerts({
|
||||
@@ -250,7 +251,7 @@ class TtsTask extends Task {
|
||||
language,
|
||||
voice,
|
||||
engine,
|
||||
model,
|
||||
model: this.model,
|
||||
salt,
|
||||
credentials,
|
||||
options: this.options,
|
||||
|
||||
@@ -4,7 +4,7 @@ const assert = require('assert');
|
||||
const {
|
||||
AWS_REGION,
|
||||
AWS_SNS_PORT: PORT,
|
||||
AWS_SNS_TOPIC_ARM,
|
||||
AWS_SNS_TOPIC_ARN,
|
||||
AWS_SNS_PORT_MAX,
|
||||
} = require('../config');
|
||||
const {LifeCycleEvents} = require('./constants');
|
||||
@@ -55,12 +55,12 @@ class SnsNotifier extends Emitter {
|
||||
async _handlePost(req, res) {
|
||||
try {
|
||||
const parsedBody = JSON.parse(req.body);
|
||||
this.logger.debug({headers: req.headers, body: parsedBody}, 'Received HTTP POST from AWS');
|
||||
this.logger.info({headers: req.headers, body: parsedBody}, 'Received HTTP POST from AWS');
|
||||
if (!validatePayload(parsedBody)) {
|
||||
this.logger.info('incoming AWS SNS HTTP POST failed signature validation');
|
||||
return res.sendStatus(403);
|
||||
}
|
||||
this.logger.debug('incoming HTTP POST passed validation');
|
||||
this.logger.info('incoming HTTP POST passed validation');
|
||||
res.sendStatus(200);
|
||||
|
||||
switch (parsedBody.Type) {
|
||||
@@ -74,7 +74,18 @@ class SnsNotifier extends Emitter {
|
||||
subscriptionRequestId: this.subscriptionRequestId
|
||||
}, 'response from SNS SubscribeURL');
|
||||
const data = await this.describeInstance();
|
||||
this.lifecycleState = data.AutoScalingGroups[0].Instances[0].LifecycleState;
|
||||
|
||||
const group = data.AutoScalingGroups.find((group) =>
|
||||
group.Instances && group.Instances.some((instance) => instance.InstanceId === this.instanceId)
|
||||
);
|
||||
if (!group) {
|
||||
this.logger.error('Current instance not found in any Auto Scaling group', data);
|
||||
} else {
|
||||
const instance = group.Instances.find((instance) => instance.InstanceId === this.instanceId);
|
||||
this.lifecycleState = instance.LifecycleState;
|
||||
}
|
||||
|
||||
//this.lifecycleState = data.AutoScalingGroups[0].Instances[0].LifecycleState;
|
||||
this.emit('SubscriptionConfirmation', {publicIp: this.publicIp});
|
||||
break;
|
||||
|
||||
@@ -94,7 +105,7 @@ class SnsNotifier extends Emitter {
|
||||
this.unsubscribe();
|
||||
}
|
||||
else {
|
||||
this.logger.debug(`SnsNotifier - instance ${msg.EC2InstanceId} is scaling in (not us)`);
|
||||
this.logger.info(`SnsNotifier - instance ${msg.EC2InstanceId} is scaling in (not us)`);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -111,7 +122,7 @@ class SnsNotifier extends Emitter {
|
||||
|
||||
async init() {
|
||||
try {
|
||||
this.logger.debug('SnsNotifier: retrieving instance data');
|
||||
this.logger.info('SnsNotifier: retrieving instance data');
|
||||
this.instanceId = await getString('http://169.254.169.254/latest/meta-data/instance-id');
|
||||
this.publicIp = await getString('http://169.254.169.254/latest/meta-data/public-ipv4');
|
||||
this.logger.info({
|
||||
@@ -142,13 +153,13 @@ class SnsNotifier extends Emitter {
|
||||
try {
|
||||
const params = {
|
||||
Protocol: 'http',
|
||||
TopicArn: AWS_SNS_TOPIC_ARM,
|
||||
TopicArn: AWS_SNS_TOPIC_ARN,
|
||||
Endpoint: this.snsEndpoint
|
||||
};
|
||||
const response = await snsClient.send(new SubscribeCommand(params));
|
||||
this.logger.info({response}, `response to SNS subscribe to ${AWS_SNS_TOPIC_ARM}`);
|
||||
this.logger.info({response}, `response to SNS subscribe to ${AWS_SNS_TOPIC_ARN}`);
|
||||
} catch (err) {
|
||||
this.logger.error({err}, `Error subscribing to SNS topic arn ${AWS_SNS_TOPIC_ARM}`);
|
||||
this.logger.error({err}, `Error subscribing to SNS topic arn ${AWS_SNS_TOPIC_ARN}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,9 +170,9 @@ class SnsNotifier extends Emitter {
|
||||
SubscriptionArn: this.subscriptionArn
|
||||
};
|
||||
const response = await snsClient.send(new UnsubscribeCommand(params));
|
||||
this.logger.info({response}, `response to SNS unsubscribe to ${AWS_SNS_TOPIC_ARM}`);
|
||||
this.logger.info({response}, `response to SNS unsubscribe to ${AWS_SNS_TOPIC_ARN}`);
|
||||
} catch (err) {
|
||||
this.logger.error({err}, `Error unsubscribing to SNS topic arn ${AWS_SNS_TOPIC_ARM}`);
|
||||
this.logger.error({err}, `Error unsubscribing to SNS topic arn ${AWS_SNS_TOPIC_ARN}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -149,6 +149,12 @@
|
||||
"ConnectFailure": "assemblyai_transcribe::connect_failed",
|
||||
"Connect": "assemblyai_transcribe::connect"
|
||||
},
|
||||
"VoxistTranscriptionEvents": {
|
||||
"Transcription": "voxist_transcribe::transcription",
|
||||
"Error": "voxist_transcribe::error",
|
||||
"ConnectFailure": "voxist_transcribe::connect_failed",
|
||||
"Connect": "voxist_transcribe::connect"
|
||||
},
|
||||
"VadDetection": {
|
||||
"Detection": "vad_detect:detection"
|
||||
},
|
||||
@@ -176,6 +182,13 @@
|
||||
"Disconnect": "openai_s2s::disconnect",
|
||||
"ServerEvent": "openai_s2s::server_event"
|
||||
},
|
||||
"LlmEvents_Elevenlabs": {
|
||||
"Error": "error",
|
||||
"Connect": "elevenlabs_s2s::connect",
|
||||
"ConnectFailure": "elevenlabs_s2s::connect_failed",
|
||||
"Disconnect": "elevenlabs_s2s::disconnect",
|
||||
"ServerEvent": "elevenlabs_s2s::server_event"
|
||||
},
|
||||
"LlmEvents_VoiceAgent": {
|
||||
"Error": "error",
|
||||
"Connect": "voice_agent_s2s::connect",
|
||||
|
||||
@@ -122,6 +122,10 @@ const speechMapper = (cred) => {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
}
|
||||
else if ('voxist' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
}
|
||||
else if ('whisper' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
|
||||
@@ -8,7 +8,7 @@ const {
|
||||
JAMBONES_SBCS,
|
||||
K8S,
|
||||
K8S_SBC_SIP_SERVICE_NAME,
|
||||
AWS_SNS_TOPIC_ARM,
|
||||
AWS_SNS_TOPIC_ARN,
|
||||
OPTIONS_PING_INTERVAL,
|
||||
AWS_REGION,
|
||||
NODE_ENV,
|
||||
@@ -35,7 +35,7 @@ module.exports = (logger) => {
|
||||
// listen for SNS lifecycle changes
|
||||
let lifecycleEmitter = new Emitter();
|
||||
let dryUpCalls = false;
|
||||
if (AWS_SNS_TOPIC_ARM && AWS_REGION) {
|
||||
if (AWS_SNS_TOPIC_ARN && AWS_REGION) {
|
||||
|
||||
(async function() {
|
||||
try {
|
||||
|
||||
@@ -30,6 +30,7 @@ const stickyVars = {
|
||||
'DEEPGRAM_SPEECH_TIER',
|
||||
'DEEPGRAM_SPEECH_MODEL',
|
||||
'DEEPGRAM_SPEECH_ENABLE_SMART_FORMAT',
|
||||
'DEEPGRAM_SPEECH_ENABLE_NO_DELAY',
|
||||
'DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION',
|
||||
'DEEPGRAM_SPEECH_PROFANITY_FILTER',
|
||||
'DEEPGRAM_SPEECH_REDACT',
|
||||
@@ -44,7 +45,8 @@ const stickyVars = {
|
||||
'DEEPGRAM_SPEECH_VAD_TURNOFF',
|
||||
'DEEPGRAM_SPEECH_TAG',
|
||||
'DEEPGRAM_SPEECH_MODEL_VERSION',
|
||||
'DEEPGRAM_SPEECH_FILLER_WORDS'
|
||||
'DEEPGRAM_SPEECH_FILLER_WORDS',
|
||||
'DEEPGRAM_SPEECH_KEYTERMS',
|
||||
],
|
||||
aws: [
|
||||
'AWS_VOCABULARY_NAME',
|
||||
@@ -105,6 +107,9 @@ const stickyVars = {
|
||||
'ASSEMBLYAI_API_KEY',
|
||||
'ASSEMBLYAI_WORD_BOOST'
|
||||
],
|
||||
voxist: [
|
||||
'VOXIST_API_KEY',
|
||||
],
|
||||
speechmatics: [
|
||||
'SPEECHMATICS_API_KEY',
|
||||
'SPEECHMATICS_HOST',
|
||||
@@ -517,6 +522,25 @@ const normalizeAssemblyAi = (evt, channel, language) => {
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeVoxist = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.type === 'final',
|
||||
alternatives: [
|
||||
{
|
||||
confidence: 1.00,
|
||||
transcript: evt.text,
|
||||
}
|
||||
],
|
||||
vendor: {
|
||||
name: 'voxist',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeSpeechmatics = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const is_final = evt.message === 'AddTranscript';
|
||||
@@ -567,6 +591,8 @@ module.exports = (logger) => {
|
||||
return normalizeCobalt(evt, channel, language);
|
||||
case 'assemblyai':
|
||||
return normalizeAssemblyAi(evt, channel, language, shortUtterance);
|
||||
case 'voxist':
|
||||
return normalizeVoxist(evt, channel, language);
|
||||
case 'verbio':
|
||||
return normalizeVerbio(evt, channel, language);
|
||||
case 'speechmatics':
|
||||
@@ -705,6 +731,8 @@ module.exports = (logger) => {
|
||||
//azureSttEndpointId overrides sttCredentials.custom_stt_endpoint
|
||||
...(rOpts.azureSttEndpointId &&
|
||||
{AZURE_SERVICE_ENDPOINT_ID: rOpts.azureSttEndpointId}),
|
||||
...(azureOptions.speechRecognitionMode &&
|
||||
{AZURE_RECOGNITION_MODE: azureOptions.speechRecognitionMode}),
|
||||
};
|
||||
}
|
||||
else if ('nuance' === vendor) {
|
||||
@@ -778,6 +806,8 @@ module.exports = (logger) => {
|
||||
{DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1},
|
||||
...(deepgramOptions.smartFormatting) &&
|
||||
{DEEPGRAM_SPEECH_ENABLE_SMART_FORMAT: 1},
|
||||
...(deepgramOptions.noDelay) &&
|
||||
{DEEPGRAM_SPEECH_ENABLE_NO_DELAY: 1},
|
||||
...(deepgramOptions.profanityFilter) &&
|
||||
{DEEPGRAM_SPEECH_PROFANITY_FILTER: 1},
|
||||
...(deepgramOptions.redact) &&
|
||||
@@ -815,7 +845,9 @@ module.exports = (logger) => {
|
||||
...(deepgramOptions.version) &&
|
||||
{DEEPGRAM_SPEECH_MODEL_VERSION: deepgramOptions.version},
|
||||
...(deepgramOptions.fillerWords) &&
|
||||
{DEEPGRAM_SPEECH_FILLER_WORDS: deepgramOptions.fillerWords}
|
||||
{DEEPGRAM_SPEECH_FILLER_WORDS: deepgramOptions.fillerWords},
|
||||
...((Array.isArray(deepgramOptions.keyterms) && deepgramOptions.keyterms.length > 0) &&
|
||||
{DEEPGRAM_SPEECH_KEYTERMS: deepgramOptions.keyterms.join(',')})
|
||||
};
|
||||
}
|
||||
else if ('soniox' === vendor) {
|
||||
@@ -924,6 +956,13 @@ module.exports = (logger) => {
|
||||
{ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)})
|
||||
};
|
||||
}
|
||||
else if ('voxist' === vendor) {
|
||||
opts = {
|
||||
...opts,
|
||||
...(sttCredentials.api_key) &&
|
||||
{VOXIST_API_KEY: sttCredentials.api_key},
|
||||
};
|
||||
}
|
||||
else if ('verbio' === vendor) {
|
||||
const {verbioOptions = {}} = rOpts;
|
||||
opts = {
|
||||
|
||||
@@ -4,37 +4,50 @@ const {
|
||||
TtsStreamingEvents,
|
||||
TtsStreamingConnectionStatus
|
||||
} = require('../utils/constants');
|
||||
|
||||
const MAX_CHUNK_SIZE = 1800;
|
||||
const HIGH_WATER_BUFFER_SIZE = 1000;
|
||||
const LOW_WATER_BUFFER_SIZE = 200;
|
||||
const TIMEOUT_RETRY_MSECS = 3000;
|
||||
|
||||
|
||||
const isWhitespace = (str) => /^\s*$/.test(str);
|
||||
|
||||
/**
|
||||
* Each queue item is an object:
|
||||
* - { type: 'text', value: '…' } for text tokens.
|
||||
* - { type: 'flush' } for a flush command.
|
||||
*/
|
||||
class TtsStreamingBuffer extends Emitter {
|
||||
constructor(cs) {
|
||||
super();
|
||||
this.cs = cs;
|
||||
this.logger = cs.logger;
|
||||
|
||||
this.tokens = '';
|
||||
// Use an array to hold our structured items.
|
||||
this.queue = [];
|
||||
// Track total number of characters in text items.
|
||||
this.bufferedLength = 0;
|
||||
this.eventHandlers = [];
|
||||
this._isFull = false;
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
|
||||
this._flushPending = false;
|
||||
this.timer = null;
|
||||
// Record the last time the text buffer was updated.
|
||||
this.lastUpdateTime = 0;
|
||||
}
|
||||
|
||||
get isEmpty() {
|
||||
return this.tokens.length === 0;
|
||||
return this.queue.length === 0;
|
||||
}
|
||||
|
||||
get size() {
|
||||
return this.bufferedLength;
|
||||
}
|
||||
|
||||
get isFull() {
|
||||
return this._isFull;
|
||||
}
|
||||
|
||||
get size() {
|
||||
return this.tokens.length;
|
||||
}
|
||||
|
||||
get ep() {
|
||||
return this.cs?.ep;
|
||||
}
|
||||
@@ -42,7 +55,8 @@ class TtsStreamingBuffer extends Emitter {
|
||||
async start() {
|
||||
assert.ok(
|
||||
this._connectionStatus === TtsStreamingConnectionStatus.NotConnected,
|
||||
'TtsStreamingBuffer:start already started, or has failed');
|
||||
'TtsStreamingBuffer:start already started, or has failed'
|
||||
);
|
||||
|
||||
this.vendor = this.cs.getTsStreamingVendor();
|
||||
if (!this.vendor) {
|
||||
@@ -55,9 +69,9 @@ class TtsStreamingBuffer extends Emitter {
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.Connecting;
|
||||
try {
|
||||
if (this.eventHandlers.length === 0) this._initHandlers(this.ep);
|
||||
await this._api(this.ep, [this.ep.uuid, 'connect']);
|
||||
await this._api(this.ep, [this.ep.uuid, 'connect']);
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'TtsStreamingBuffer:start Error connecting to TTS streaming');
|
||||
this.logger.info({ err }, 'TtsStreamingBuffer:start Error connecting to TTS streaming');
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
|
||||
}
|
||||
}
|
||||
@@ -67,204 +81,319 @@ class TtsStreamingBuffer extends Emitter {
|
||||
this.removeCustomEventListeners();
|
||||
if (this.ep) {
|
||||
this._api(this.ep, [this.ep.uuid, 'close'])
|
||||
.catch((err) => this.logger.info({err}, 'TtsStreamingBuffer:kill Error closing TTS streaming'));
|
||||
.catch((err) =>
|
||||
this.logger.info({ err }, 'TtsStreamingBuffer:stop Error closing TTS streaming')
|
||||
);
|
||||
}
|
||||
this.timer = null;
|
||||
this.tokens = '';
|
||||
this.queue = [];
|
||||
this.bufferedLength = 0;
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add tokens to the buffer and start feeding them to the endpoint if necessary.
|
||||
* Buffer new text tokens.
|
||||
*/
|
||||
async bufferTokens(tokens) {
|
||||
|
||||
if (this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
|
||||
this.logger.info('TtsStreamingBuffer:bufferTokens TTS streaming connection failed, rejecting request');
|
||||
return {status: 'failed', reason: `connection to ${this.vendor} failed`};
|
||||
return { status: 'failed', reason: `connection to ${this.vendor} failed` };
|
||||
}
|
||||
|
||||
if (0 === this.bufferedLength && isWhitespace(tokens)) {
|
||||
this.logger.debug({tokens}, 'TtsStreamingBuffer:bufferTokens discarded whitespace tokens');
|
||||
return { status: 'ok' };
|
||||
}
|
||||
|
||||
const displayedTokens = tokens.length <= 40 ? tokens : tokens.substring(0, 40);
|
||||
const totalLength = tokens.length;
|
||||
|
||||
/* if we crossed the high water mark, reject the request */
|
||||
if (this.tokens.length + totalLength > HIGH_WATER_BUFFER_SIZE) {
|
||||
if (this.bufferedLength + totalLength > HIGH_WATER_BUFFER_SIZE) {
|
||||
this.logger.info(
|
||||
`TtsStreamingBuffer throttling: buffer is full, rejecting request to buffer ${totalLength} tokens`);
|
||||
|
||||
`TtsStreamingBuffer throttling: buffer is full, rejecting request to buffer ${totalLength} tokens`
|
||||
);
|
||||
if (!this._isFull) {
|
||||
this._isFull = true;
|
||||
this.emit(TtsStreamingEvents.Pause);
|
||||
}
|
||||
return {status: 'failed', reason: 'full'};
|
||||
return { status: 'failed', reason: 'full' };
|
||||
}
|
||||
|
||||
this.logger.debug(
|
||||
`TtsStreamingBuffer:bufferTokens "${displayedTokens}" (length: ${totalLength}), starting? ${this.isEmpty}`
|
||||
`TtsStreamingBuffer:bufferTokens "${displayedTokens}" (length: ${totalLength})`
|
||||
);
|
||||
this.tokens += (tokens || '');
|
||||
this.queue.push({ type: 'text', value: tokens });
|
||||
this.bufferedLength += totalLength;
|
||||
// Update the last update time each time new text is buffered.
|
||||
this.lastUpdateTime = Date.now();
|
||||
|
||||
await this._feedTokens();
|
||||
|
||||
return {status: 'ok'};
|
||||
await this._feedQueue();
|
||||
return { status: 'ok' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a flush command. If no text is queued, flush immediately.
|
||||
* Otherwise, append a flush marker so that all text preceding it will be sent
|
||||
* (regardless of sentence boundaries) before the flush is issued.
|
||||
*/
|
||||
flush() {
|
||||
this.logger.debug('TtsStreamingBuffer:flush');
|
||||
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
|
||||
this.logger.debug('TtsStreamingBuffer:flush TTS stream is not quite ready - wait for connect');
|
||||
this._flushPending = true;
|
||||
if (this.queue.length === 0 || this.queue[this.queue.length - 1].type !== 'flush') {
|
||||
this.queue.push({ type: 'flush' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (this._connectionStatus === TtsStreamingConnectionStatus.Connected) {
|
||||
|
||||
if (this.size === 0) {
|
||||
if (this.isEmpty) {
|
||||
this._doFlush();
|
||||
}
|
||||
else {
|
||||
/* we have tokens queued, so flush after they have been sent */
|
||||
this._pendingFlush = true;
|
||||
if (this.queue[this.queue.length - 1].type !== 'flush') {
|
||||
this.queue.push({ type: 'flush' });
|
||||
this.logger.debug('TtsStreamingBuffer:flush added flush marker to queue');
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
this.logger.debug(
|
||||
`TtsStreamingBuffer:flush TTS stream is not connected, status: ${this._connectionStatus}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.logger.debug('TtsStreamingBuffer:clear');
|
||||
|
||||
if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
|
||||
clearTimeout(this.timer);
|
||||
this._api(this.ep, [this.ep.uuid, 'clear'])
|
||||
.catch((err) => this.logger.info({err}, 'TtsStreamingBuffer:clear Error clearing TTS streaming'));
|
||||
this.tokens = '';
|
||||
this._api(this.ep, [this.ep.uuid, 'clear']).catch((err) =>
|
||||
this.logger.info({ err }, 'TtsStreamingBuffer:clear Error clearing TTS streaming')
|
||||
);
|
||||
this.queue = [];
|
||||
this.bufferedLength = 0;
|
||||
this.timer = null;
|
||||
this._isFull = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send tokens to the TTS engine in sentence chunks for best playout
|
||||
* Process the queue in two phases.
|
||||
*
|
||||
* Phase 1: Look for flush markers. When a flush marker is found (even if not at the very front),
|
||||
* send all text tokens that came before it immediately (ignoring sentence boundaries)
|
||||
* and then send the flush command. Repeat until there are no flush markers left.
|
||||
*
|
||||
* Phase 2: With the remaining queue (now containing only text items), accumulate text
|
||||
* up to MAX_CHUNK_SIZE and use sentence-boundary logic to determine a chunk.
|
||||
* Then, remove the exact tokens (or portions thereof) that were consumed.
|
||||
*/
|
||||
async _feedTokens(handlingTimeout = false) {
|
||||
this.logger.debug({tokens: this.tokens}, '_feedTokens');
|
||||
|
||||
async _feedQueue(handlingTimeout = false) {
|
||||
this.logger.debug({ queue: this.queue }, 'TtsStreamingBuffer:_feedQueue');
|
||||
try {
|
||||
|
||||
/* are we in a state where we can feed tokens to the TTS? */
|
||||
if (!this.cs.isTtsStreamOpen || !this.ep || !this.tokens) {
|
||||
this.logger.debug('TTS stream is not open or no tokens to send');
|
||||
return this.tokens?.length || 0;
|
||||
if (!this.cs.isTtsStreamOpen || !this.ep) {
|
||||
this.logger.debug('TtsStreamingBuffer:_feedQueue TTS stream is not open or no endpoint available');
|
||||
return;
|
||||
}
|
||||
|
||||
if (this._connectionStatus === TtsStreamingConnectionStatus.NotConnected ||
|
||||
this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
|
||||
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not connected');
|
||||
if (
|
||||
this._connectionStatus === TtsStreamingConnectionStatus.NotConnected ||
|
||||
this._connectionStatus === TtsStreamingConnectionStatus.Failed
|
||||
) {
|
||||
this.logger.debug('TtsStreamingBuffer:_feedQueue TTS stream is not connected');
|
||||
return;
|
||||
}
|
||||
|
||||
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
|
||||
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not ready, waiting for connect');
|
||||
// --- Phase 1: Process flush markers ---
|
||||
// Process any flush marker that isn’t in the very first position.
|
||||
let flushIndex = this.queue.findIndex((item, idx) => item.type === 'flush' && idx > 0);
|
||||
while (flushIndex !== -1) {
|
||||
let flushText = '';
|
||||
// Accumulate all text tokens preceding the flush marker.
|
||||
for (let i = 0; i < flushIndex; i++) {
|
||||
if (this.queue[i].type === 'text') {
|
||||
flushText += this.queue[i].value;
|
||||
}
|
||||
}
|
||||
// Remove those text items.
|
||||
for (let i = 0; i < flushIndex; i++) {
|
||||
const item = this.queue.shift();
|
||||
if (item.type === 'text') {
|
||||
this.bufferedLength -= item.value.length;
|
||||
}
|
||||
}
|
||||
// Remove the flush marker (now at the front).
|
||||
if (this.queue.length > 0 && this.queue[0].type === 'flush') {
|
||||
this.queue.shift();
|
||||
}
|
||||
// Immediately send all accumulated text (ignoring sentence boundaries).
|
||||
if (flushText.length > 0) {
|
||||
const modifiedFlushText = flushText.replace(/\n\n/g, '\n \n');
|
||||
try {
|
||||
await this._api(this.ep, [this.ep.uuid, 'send', modifiedFlushText]);
|
||||
} catch (err) {
|
||||
this.logger.info({ err, flushText }, 'TtsStreamingBuffer:_feedQueue Error sending TTS chunk');
|
||||
}
|
||||
}
|
||||
// Send the flush command.
|
||||
await this._doFlush();
|
||||
|
||||
flushIndex = this.queue.findIndex((item, idx) => item.type === 'flush' && idx > 0);
|
||||
}
|
||||
|
||||
// If a flush marker is at the very front, process it.
|
||||
while (this.queue.length > 0 && this.queue[0].type === 'flush') {
|
||||
this.queue.shift();
|
||||
await this._doFlush();
|
||||
}
|
||||
|
||||
// --- Phase 2: Process remaining text tokens ---
|
||||
if (this.queue.length === 0) {
|
||||
this._removeTimer();
|
||||
return;
|
||||
}
|
||||
|
||||
/* must send at least one sentence */
|
||||
const limit = Math.min(MAX_CHUNK_SIZE, this.tokens.length);
|
||||
let chunkEnd = findSentenceBoundary(this.tokens, limit);
|
||||
// Accumulate contiguous text tokens (from the front) up to MAX_CHUNK_SIZE.
|
||||
let combinedText = '';
|
||||
for (const item of this.queue) {
|
||||
if (item.type !== 'text') break;
|
||||
combinedText += item.value;
|
||||
if (combinedText.length >= MAX_CHUNK_SIZE) break;
|
||||
}
|
||||
if (combinedText.length === 0) {
|
||||
this._removeTimer();
|
||||
return;
|
||||
}
|
||||
|
||||
const limit = Math.min(MAX_CHUNK_SIZE, combinedText.length);
|
||||
let chunkEnd = findSentenceBoundary(combinedText, limit);
|
||||
if (chunkEnd <= 0) {
|
||||
if (handlingTimeout) {
|
||||
/* on a timeout we've left some tokens sitting around, so be more aggressive now in sending them */
|
||||
chunkEnd = findWordBoundary(this.tokens, limit);
|
||||
chunkEnd = findWordBoundary(combinedText, limit);
|
||||
if (chunkEnd <= 0) {
|
||||
this.logger.debug('TtsStreamingBuffer:_feedTokens: no word boundary found');
|
||||
this._setTimerIfNeeded();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* if we just received tokens, we wont send unless we have at least a full sentence */
|
||||
this.logger.debug('TtsStreamingBuffer:_feedTokens: no sentence boundary found');
|
||||
} else {
|
||||
this._setTimerIfNeeded();
|
||||
return;
|
||||
}
|
||||
}
|
||||
const chunk = combinedText.slice(0, chunkEnd);
|
||||
|
||||
const chunk = this.tokens.slice(0, chunkEnd);
|
||||
this.tokens = this.tokens.slice(chunkEnd);
|
||||
// Now we iterate over the queue items
|
||||
// and deduct their lengths until we've accounted for chunkEnd characters.
|
||||
let remaining = chunkEnd;
|
||||
let tokensProcessed = 0;
|
||||
for (let i = 0; i < this.queue.length; i++) {
|
||||
const token = this.queue[i];
|
||||
if (token.type !== 'text') break;
|
||||
if (remaining >= token.value.length) {
|
||||
remaining -= token.value.length;
|
||||
tokensProcessed = i + 1;
|
||||
} else {
|
||||
// Partially consumed token: update its value to remove the consumed part.
|
||||
token.value = token.value.slice(remaining);
|
||||
tokensProcessed = i;
|
||||
remaining = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Remove the fully consumed tokens from the front of the queue.
|
||||
this.queue.splice(0, tokensProcessed);
|
||||
this.bufferedLength -= chunkEnd;
|
||||
|
||||
/* freeswitch looks for sequence of 2 newlines to determine end of message, so insert a space */
|
||||
const modifiedChunk = chunk.replace(/\n\n/g, '\n \n');
|
||||
await this._api(this.ep, [this.ep.uuid, 'send', modifiedChunk]);
|
||||
this.logger.debug(`TtsStreamingBuffer:_feedTokens: sent ${chunk.length}, remaining: ${this.tokens.length}`);
|
||||
this.logger.debug(`TtsStreamingBuffer:_feedQueue sending chunk to tts: ${modifiedChunk}`);
|
||||
|
||||
if (this._pendingFlush) {
|
||||
this._doFlush();
|
||||
this._pendingFlush = false;
|
||||
try {
|
||||
await this._api(this.ep, [this.ep.uuid, 'send', modifiedChunk]);
|
||||
} catch (err) {
|
||||
this.logger.info({ err, chunk }, 'TtsStreamingBuffer:_feedQueue Error sending TTS chunk');
|
||||
}
|
||||
|
||||
if (this.isFull && this.tokens.length <= LOW_WATER_BUFFER_SIZE) {
|
||||
this.logger.info('TtsStreamingBuffer throttling: TTS streaming buffer is no longer full - resuming');
|
||||
if (this._isFull && this.bufferedLength <= LOW_WATER_BUFFER_SIZE) {
|
||||
this.logger.info('TtsStreamingBuffer throttling: buffer is no longer full - resuming');
|
||||
this._isFull = false;
|
||||
this.emit(TtsStreamingEvents.Resume);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info({err}, 'TtsStreamingBuffer:_feedTokens Error sending TTS chunk');
|
||||
this.tokens = '';
|
||||
}
|
||||
|
||||
return;
|
||||
return this._feedQueue();
|
||||
} catch (err) {
|
||||
this.logger.info({ err }, 'TtsStreamingBuffer:_feedQueue Error sending TTS chunk');
|
||||
this.queue = [];
|
||||
this.bufferedLength = 0;
|
||||
}
|
||||
}
|
||||
|
||||
async _api(ep, args) {
|
||||
const apiCmd = `uuid_${this.vendor.startsWith('custom:') ? 'custom' : this.vendor}_tts_streaming`;
|
||||
const res = await ep.api(apiCmd, `^^|${args.join('|')}`);
|
||||
if (!res.body?.startsWith('+OK')) {
|
||||
this.logger.info({args}, `Error calling ${apiCmd}: ${res.body}`);
|
||||
this.logger.info({ args }, `Error calling ${apiCmd}: ${res.body}`);
|
||||
throw new Error(`Error calling ${apiCmd}: ${res.body}`);
|
||||
}
|
||||
}
|
||||
|
||||
_onConnectFailure(vendor) {
|
||||
this.logger.info(`streaming tts connection failed to ${vendor}`);
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
|
||||
this.tokens = '';
|
||||
this.emit(TtsStreamingEvents.ConnectFailure, {vendor});
|
||||
}
|
||||
|
||||
_doFlush() {
|
||||
this._api(this.ep, [this.ep.uuid, 'flush'])
|
||||
.catch((err) => this.logger.info({err},
|
||||
`TtsStreamingBuffer:_doFlush Error flushing TTS streaming: ${JSON.stringify(err)}`));
|
||||
return this._api(this.ep, [this.ep.uuid, 'flush'])
|
||||
.then(() => this.logger.debug('TtsStreamingBuffer:_doFlush sent flush command'))
|
||||
.catch((err) =>
|
||||
this.logger.info(
|
||||
{ err },
|
||||
`TtsStreamingBuffer:_doFlush Error flushing TTS streaming: ${JSON.stringify(err)}`
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
async _onConnect(vendor) {
|
||||
this.logger.info(`streaming tts connection made to ${vendor}`);
|
||||
this.logger.info(`TtsStreamingBuffer:_onConnect streaming tts connection made to ${vendor} successful`);
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.Connected;
|
||||
if (this.tokens.length > 0) {
|
||||
await this._feedTokens();
|
||||
}
|
||||
if (this._flushPending) {
|
||||
this.flush();
|
||||
this._flushPending = false;
|
||||
if (this.queue.length > 0) {
|
||||
await this._feedQueue();
|
||||
}
|
||||
}
|
||||
|
||||
_onConnectFailure(vendor) {
|
||||
this.logger.info(`TtsStreamingBuffer:_onConnectFailure streaming tts connection failed to ${vendor}`);
|
||||
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
|
||||
this.queue = [];
|
||||
this.bufferedLength = 0;
|
||||
this.emit(TtsStreamingEvents.ConnectFailure, { vendor });
|
||||
}
|
||||
|
||||
_setTimerIfNeeded() {
|
||||
if (this.tokens.length > 0 && !this.timer) {
|
||||
if (this.bufferedLength > 0 && !this.timer) {
|
||||
this.logger.debug({queue: this.queue},
|
||||
`TtsStreamingBuffer:_setTimerIfNeeded setting timer because ${this.bufferedLength} buffered`);
|
||||
this.timer = setTimeout(this._onTimeout.bind(this), TIMEOUT_RETRY_MSECS);
|
||||
}
|
||||
}
|
||||
|
||||
_removeTimer() {
|
||||
if (this.timer) {
|
||||
this.logger.debug('TtsStreamingBuffer:_removeTimer clearing timer');
|
||||
clearTimeout(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
_onTimeout() {
|
||||
this.logger.info('TtsStreamingBuffer:_onTimeout');
|
||||
this.logger.debug('TtsStreamingBuffer:_onTimeout Timeout waiting for sentence boundary');
|
||||
// Check if new text has been added since the timer was set.
|
||||
const now = Date.now();
|
||||
if (now - this.lastUpdateTime < TIMEOUT_RETRY_MSECS) {
|
||||
this.logger.debug('TtsStreamingBuffer:_onTimeout New text received recently; postponing flush.');
|
||||
this._setTimerIfNeeded();
|
||||
return;
|
||||
}
|
||||
this.timer = null;
|
||||
this._feedTokens(true);
|
||||
this._feedQueue(true);
|
||||
}
|
||||
|
||||
_onTtsEmpty(vendor) {
|
||||
this.emit(TtsStreamingEvents.Empty, {vendor});
|
||||
this.emit(TtsStreamingEvents.Empty, { vendor });
|
||||
}
|
||||
|
||||
addCustomEventListener(ep, event, handler) {
|
||||
this.eventHandlers.push({ep, event, handler});
|
||||
this.eventHandlers.push({ ep, event, handler });
|
||||
ep.addCustomEventListener(event, handler);
|
||||
}
|
||||
|
||||
@@ -274,7 +403,6 @@ class TtsStreamingBuffer extends Emitter {
|
||||
|
||||
_initHandlers(ep) {
|
||||
[
|
||||
// DH: add other vendors here as modules are added
|
||||
'deepgram',
|
||||
'cartesia',
|
||||
'elevenlabs',
|
||||
@@ -293,23 +421,21 @@ class TtsStreamingBuffer extends Emitter {
|
||||
}
|
||||
|
||||
const findSentenceBoundary = (text, limit) => {
|
||||
// Match traditional sentence boundaries or double newlines
|
||||
// Look for punctuation or double newline that signals sentence end.
|
||||
const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
|
||||
let lastSentenceBoundary = -1;
|
||||
let match;
|
||||
|
||||
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
|
||||
const precedingText = text.slice(0, match.index).trim(); // Extract text before the match and trim whitespace
|
||||
if (precedingText.length > 0) { // Check if there's actual content
|
||||
const precedingText = text.slice(0, match.index).trim();
|
||||
if (precedingText.length > 0) {
|
||||
if (
|
||||
match[0] === '\n\n' || // It's a double newline
|
||||
(match.index === 0 || !/\d$/.test(text[match.index - 1])) // Standard punctuation rules
|
||||
match[0] === '\n\n' ||
|
||||
(match.index === 0 || !/\d$/.test(text[match.index - 1]))
|
||||
) {
|
||||
lastSentenceBoundary = match.index + (match[0] === '\n\n' ? 2 : 1); // Include the boundary
|
||||
lastSentenceBoundary = match.index + (match[0] === '\n\n' ? 2 : 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return lastSentenceBoundary;
|
||||
};
|
||||
|
||||
@@ -317,7 +443,6 @@ const findWordBoundary = (text, limit) => {
|
||||
const wordBoundaryRegex = /\s+/g;
|
||||
let lastWordBoundary = -1;
|
||||
let match;
|
||||
|
||||
while ((match = wordBoundaryRegex.exec(text)) && match.index < limit) {
|
||||
lastWordBoundary = match.index;
|
||||
}
|
||||
|
||||
@@ -431,6 +431,21 @@ class WsRequestor extends BaseRequestor {
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info({err, content}, 'WsRequestor:_onMessage - invalid incoming message');
|
||||
const params = {
|
||||
msg: 'InvalidMessage',
|
||||
details: err.message,
|
||||
content: Buffer.from(content).toString('utf-8')
|
||||
};
|
||||
const {writeAlerts, AlertType} = this.Alerter;
|
||||
writeAlerts({
|
||||
account_sid: this.account_sid,
|
||||
alert_type: AlertType.INVALID_APP_PAYLOAD,
|
||||
target_sid: this.call_sid,
|
||||
message: err.message,
|
||||
|
||||
}).catch((err) => this.logger.info({err}, 'Error generating alert for invalid message'));
|
||||
this.request('jambonz:error', '/error', params)
|
||||
.catch((err) => this.logger.debug({err}, 'WsRequestor:_onMessage - Error sending'));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
9683
package-lock.json
generated
9683
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -31,9 +31,9 @@
|
||||
"@jambonz/http-health-check": "^0.0.1",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.8",
|
||||
"@jambonz/speech-utils": "^0.2.1",
|
||||
"@jambonz/speech-utils": "^0.2.3",
|
||||
"@jambonz/stats-collector": "^0.1.10",
|
||||
"@jambonz/verb-specifications": "^0.0.94",
|
||||
"@jambonz/verb-specifications": "^0.0.97",
|
||||
"@jambonz/time-series": "^0.2.13",
|
||||
"@opentelemetry/api": "^1.8.0",
|
||||
"@opentelemetry/exporter-jaeger": "^1.23.0",
|
||||
@@ -48,7 +48,7 @@
|
||||
"debug": "^4.3.4",
|
||||
"deepcopy": "^2.1.0",
|
||||
"drachtio-fsmrf": "^4.0.1",
|
||||
"drachtio-srf": "^5.0.1",
|
||||
"drachtio-srf": "^5.0.2",
|
||||
"express": "^4.19.2",
|
||||
"express-validator": "^7.0.1",
|
||||
"moment": "^2.30.1",
|
||||
|
||||
Reference in New Issue
Block a user