Compare commits

..

7 Commits

Author SHA1 Message Date
Dave Horton
e396b6aa98 fix #1466: (#1467)
* fix #1466:

* do not send tts streaming events when we are not doing tts streaming
2025-12-09 09:43:53 -05:00
Vinod Dharashive
9104ebb603 Add configurable say chunk size (#1461) 2025-12-08 10:54:27 -05:00
Vinod Dharashive
1ad0261336 Enhance TTS sentence boundary detection for Arabic and Japanese (#1464)
Update sentenceEndRegex to treat the following as sentence boundaries: ASCII .!? followed by whitespace or end-of-text; Arabic question mark (؟) and full stop (۔) with the same rule; Japanese 。, !, ? treated as boundaries regardless of following character; and double newlines (\n\n). This improves streaming chunking for mixed-language content.
2025-12-08 10:44:20 -05:00
Hoan Luu Huu
7802822773 fixed dial verb cannot bridge 2 leg endpoints due to transcoding (#1457)
* fixed dial verb cannot bridge 2 leg endpoints due to transcoding

* wip
2025-12-03 07:16:25 -05:00
Hoan Luu Huu
edb4d21ce1 fixed undefine issue when setting tts streaming channel vars (#1456) 2025-12-02 19:46:28 -05:00
Dave Horton
8048e9cf88 when dialing the B leg we check to see if we are using opus on the A leg, and if so we outdial B with opus first; however we were incorrectly checking the SDP on the A leg invite not the 200 OK we send back (#1455) 2025-12-02 19:22:20 -05:00
Sam Machin
451feafed4 use timeout on HTTP requests (#1453) 2025-12-02 07:41:47 -05:00
14 changed files with 41 additions and 82 deletions

View File

@@ -119,7 +119,7 @@ const ENCRYPTION_SECRET = process.env.ENCRYPTION_SECRET;
const HTTP_POOL = process.env.HTTP_POOL && parseInt(process.env.HTTP_POOL);
const HTTP_POOLSIZE = parseInt(process.env.HTTP_POOLSIZE, 10) || 10;
const HTTP_PIPELINING = parseInt(process.env.HTTP_PIPELINING, 10) || 1;
const HTTP_TIMEOUT = 10000;
const HTTP_TIMEOUT = parseInt(process.env.JAMBONES_HTTP_TIMEOUT, 10) || 10000;
const HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
const HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
const HTTP_PROXY_PROTOCOL = process.env.JAMBONES_HTTP_PROXY_PROTOCOL || 'http';
@@ -139,6 +139,10 @@ const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIM
const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
const JAMBONES_MEDIA_TIMEOUT_MS = process.env.JAMBONES_MEDIA_TIMEOUT_MS || 0;
const JAMBONES_MEDIA_HOLD_TIMEOUT_MS = process.env.JAMBONES_MEDIA_HOLD_TIMEOUT_MS || 0;
/* say / tts */
const JAMBONES_SAY_CHUNK_SIZE = parseInt(process.env.JAMBONES_SAY_CHUNK_SIZE, 10) || 900;
// jambonz
const JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS =
process.env.JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS;
@@ -231,5 +235,6 @@ module.exports = {
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS,
JAMBONES_SAY_CHUNK_SIZE,
JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS,
};

View File

@@ -291,7 +291,7 @@ router.post('/',
}, {
...(account.enable_debug_log && {level: 'debug'})
});
app.requestor.logger = app.notifier.logger = sipLogger;
app.requestor.logger = app.notifier.logger = restDial.logger = sipLogger;
const callInfo = new CallInfo({
direction: CallDirection.Outbound,
req: inviteReq,

View File

@@ -927,7 +927,7 @@ class CallSession extends Emitter {
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
} else {
this.logger.debug(
'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
'CallSession:enableBackgroundTtsStream - ignoring request; conditions not met (probably not using ws api)');
}
} catch (err) {
this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
@@ -941,9 +941,11 @@ class CallSession extends Emitter {
}
}
clearTtsStream() {
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
this.ttsStreamingBuffer?.clear();
if (this.isTtsStreamEnabled) {
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
this.ttsStreamingBuffer?.clear();
}
}
startTtsStream() {
@@ -951,7 +953,7 @@ class CallSession extends Emitter {
}
stopTtsStream() {
if (this.appIsUsingWebsockets) {
if (this.isTtsStreamEnabled) {
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
this.ttsStreamingBuffer?.stop();
@@ -1141,7 +1143,6 @@ class CallSession extends Emitter {
return {
api_key: credential.api_key,
model_id: credential.model_id,
stt_model_id: credential.stt_model_id,
api_uri: credential.api_uri,
options: credential.options
};

View File

@@ -158,7 +158,7 @@ class TaskDial extends Task {
get canReleaseMedia() {
const keepAnchor = this.data.anchorMedia ||
this.weAreTranscoding ||
this.isTranscoding ||
this.cs.isBackGroundListen ||
this.cs.onHoldMusic ||
ANCHOR_MEDIA_ALWAYS ||
@@ -576,7 +576,7 @@ class TaskDial extends Task {
proxy: `sip:${sbcAddress}`,
callingNumber: this.callerId || fromUri.user,
...(this.callerName && {callingName: this.callerName}),
opusFirst: isOpusFirst(this.cs.ep.remote.sdp),
opusFirst: isOpusFirst(this.cs.ep.local.sdp),
isVideoCall: this.cs.ep.remote.sdp.includes('m=video')
};
@@ -773,6 +773,15 @@ class TaskDial extends Task {
}
async _connectSingleDial(cs, sd) {
// start connect with dialed leg, this is the soonest we can identify transcoding
if (this.epOther && sd.ep) {
const codecA = getLeadingCodec(this.epOther.local.sdp);
const codecB = getLeadingCodec(sd.ep.remote.sdp);
this.isTranscoding = (codecA !== codecB);
if (this.isTranscoding) {
this.logger.info(`Dial:_connectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
}
}
if (!this.bridged && !this.canReleaseMedia) {
this.logger.debug('Dial:_connectSingleDial bridging endpoints');
if (this.epOther) {
@@ -930,13 +939,6 @@ class TaskDial extends Task {
this.logger.info({err}, 'Dial:_selectSingleDial - Error boosting audio signal');
}
}
/* basic determination to see if call is being transcoded */
const codecA = getLeadingCodec(this.epOther.local.sdp);
const codecB = getLeadingCodec(this.ep.remote.sdp);
this.weAreTranscoding = (codecA !== codecB);
if (this.weAreTranscoding) {
this.logger.info(`Dial:_selectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
}
/* if we can release the media back to the SBC, do so now */
if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);

View File

@@ -5,7 +5,6 @@ const {
AwsTranscriptionEvents,
AzureTranscriptionEvents,
DeepgramTranscriptionEvents,
ElevenlabsTranscriptionEvents,
GladiaTranscriptionEvents,
SonioxTranscriptionEvents,
CobaltTranscriptionEvents,
@@ -493,17 +492,6 @@ class TaskGather extends SttTask {
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'elevenlabs':
this.bugname = `${this.bugname_prefix}elevenlabs_transcribe`;
this.addCustomEventListener(
ep, ElevenlabsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(
ep, ElevenlabsTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, ElevenlabsTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, ElevenlabsTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
break;
case 'gladia':
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
this.addCustomEventListener(
@@ -893,7 +881,7 @@ class TaskGather extends SttTask {
this._fillerNoiseOn = false; // in a race, if we just started audio it may sneak through here
this.ep.api('uuid_break', this.ep.uuid)
.catch((err) => this.logger.info(err, 'Error killing audio'));
cs.clearTtsStream();
if (cs.isTtsStreamEnabled) cs.clearTtsStream();
}
return;
}

View File

@@ -1,6 +1,7 @@
const assert = require('assert');
const TtsTask = require('./tts-task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
const {JAMBONES_SAY_CHUNK_SIZE} = require('../config');
const pollySSMLSplit = require('polly-ssml-split');
const { SpeechCredentialError, NonFatalTaskError } = require('../utils/error');
const { sleepFor } = require('../utils/helpers');
@@ -31,7 +32,7 @@ const isMatchingEvent = (logger, filename, playbackId, evt) => {
const breakLengthyTextIfNeeded = (logger, text) => {
// As The text can be used for tts streaming, we need to break lengthy text into smaller chunks
// HIGH_WATER_BUFFER_SIZE defined in tts-streaming-buffer.js
const chunkSize = 900;
const chunkSize = JAMBONES_SAY_CHUNK_SIZE;
const isSSML = text.startsWith('<speak>');
const options = {
softLimit: 100,

View File

@@ -89,8 +89,9 @@ class TtsTask extends Task {
// api_key, model_id, api_uri, custom_tts_streaming_url, and auth_token are encoded in the credentials
// allow them to be overriden via config, using options
// give preference to options passed in via config
const local_options = {...JSON.parse(options), ...this.options};
const local_voice_settings = {...JSON.parse(options).voice_settings, ...this.options.voice_settings};
const parsed_options = options ? JSON.parse(options) : {};
const local_options = {...parsed_options, ...this.options};
const local_voice_settings = {...(parsed_options.voice_settings || {}), ...(this.options.voice_settings || {})};
const local_api_key = local_options.api_key ?? api_key;
const local_model_id = local_options.model_id ?? model_id;
const local_api_uri = local_options.api_uri ?? api_uri;

View File

@@ -103,12 +103,6 @@
"Connect": "deepgramflux_transcribe::connect",
"Error": "deepgramflux_transcribe::error"
},
"ElevenlabsTranscriptionEvents": {
"Transcription": "elevenlabs_transcribe::transcription",
"ConnectFailure": "elevenlabs_transcribe::connect_failed",
"Connect": "elevenlabs_transcribe::connect",
"Error": "elevenlabs_transcribe::error"
},
"GladiaTranscriptionEvents": {
"Transcription": "gladia_transcribe::transcription",
"ConnectFailure": "gladia_transcribe::connect_failed",

View File

@@ -106,7 +106,6 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
obj.stt_model_id = o.stt_model_id;
obj.api_uri = o.api_uri;
obj.options = o.options;
}

View File

@@ -191,7 +191,7 @@ class HttpRequestor extends BaseRequestor {
method,
headers: hdrs,
...('POST' === method && {body: JSON.stringify(payload)}),
timeout: HTTP_TIMEOUT,
headersTimeout: HTTP_TIMEOUT,
followRedirects: false
};

View File

@@ -127,7 +127,6 @@ class SttLatencyCalculator extends Emitter {
calculateLatency() {
if (!this.isRunning) {
this.logger.debug('Latency calculator is not running, cannot calculate latency, returning default values');
return null;
}

View File

@@ -545,23 +545,6 @@ const normalizeVerbio = (evt, channel, language) => {
};
};
const normalizeElevenlabs = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt.message_type === 'committed_transcript',
alternatives: [{
confidence: 1.0,
transcript: evt.text,
}],
vendor: {
name: 'elevenlabs',
evt: copy
}
};
};
const normalizeMicrosoft = (evt, channel, language, punctuation = true) => {
const copy = JSON.parse(JSON.stringify(evt));
const nbest = evt.NBest;
@@ -787,8 +770,6 @@ module.exports = (logger) => {
return normalizeGladia(evt, channel, language, shortUtterance);
case 'deepgramflux':
return normalizeDeepgramFlux(evt, channel, language, shortUtterance);
case 'elevenlabs':
return normalizeElevenlabs(evt, channel, language);
case 'microsoft':
return normalizeMicrosoft(evt, channel, language, punctuation);
case 'google':
@@ -1104,24 +1085,6 @@ module.exports = (logger) => {
...(keyterms && keyterms.length > 0 && {DEEPGRAMFLUX_SPEECH_KEYTERMS: keyterms.join(',')}),
};
}
else if ('elevenlabs' === vendor) {
const {api_key, stt_model_id} = sttCredentials;
const {includeTimestamps, commitStrategy, vadSilenceThresholdSecs, vadThreshold,
minSpeechDurationMs, minSilenceDurationMs, enableLogging} = rOpts.elevenlabsOptions || {};
opts = {
...opts,
ELEVENLABS_API_KEY: api_key,
ELEVENLABS_MODEL_ID: stt_model_id,
ELEVENLABS_INCLUDE_TIMESTAMPS: includeTimestamps === true ? true : false,
...(commitStrategy && {ELEVENLABS_COMMIT_STRATEGY: commitStrategy}),
...(vadSilenceThresholdSecs && {ELEVENLABS_VAD_SILENCE_THRESHOLD_SECS: vadSilenceThresholdSecs}),
...(vadThreshold && {ELEVENLABS_VAD_THRESHOLD: vadThreshold}),
...(minSpeechDurationMs && {ELEVENLABS_MIN_SPEECH_DURATION_MS: minSpeechDurationMs}),
...(minSilenceDurationMs && {ELEVENLABS_MIN_SILENCE_DURATION_MS: minSilenceDurationMs}),
...(enableLogging && {ELEVENLABS_ENABLE_LOGGING: enableLogging ? 1 : 0}),
};
}
else if ('gladia' === vendor) {
const {host, path} = sttCredentials;
opts = {

View File

@@ -163,7 +163,6 @@ class TtsStreamingBuffer extends Emitter {
}
clear() {
this.logger.debug('TtsStreamingBuffer:clear');
if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
clearTimeout(this.timer);
this._api(this.ep, [this.ep.uuid, 'clear']).catch((err) =>
@@ -437,7 +436,15 @@ class TtsStreamingBuffer extends Emitter {
const findSentenceBoundary = (text, limit) => {
// Look for punctuation or double newline that signals sentence end.
const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
// Includes:
// - ASCII: . ! ?
// - Arabic: ؟ (question mark), ۔ (full stop)
// - Japanese: 。 (full stop), , (full-width exclamation/question)
//
// For languages that use spaces between sentences, we still require
// whitespace or end-of-string after the mark. For Japanese (no spaces),
// we treat the punctuation itself as a boundary regardless of following char.
const sentenceEndRegex = /[.!?؟۔](?=\s|$)|[。!?]|\n\n/g;
let lastSentenceBoundary = -1;
let match;
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {

1
package-lock.json generated
View File

@@ -8752,7 +8752,6 @@
},
"node_modules/unix-dgram": {
"version": "2.0.6",
"hasInstallScript": true,
"license": "ISC",
"optional": true,
"dependencies": {