mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-12 09:19:34 +00:00
Compare commits
7 Commits
feat/eleve
...
v0.9.5-7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e396b6aa98 | ||
|
|
9104ebb603 | ||
|
|
1ad0261336 | ||
|
|
7802822773 | ||
|
|
edb4d21ce1 | ||
|
|
8048e9cf88 | ||
|
|
451feafed4 |
@@ -119,7 +119,7 @@ const ENCRYPTION_SECRET = process.env.ENCRYPTION_SECRET;
|
||||
const HTTP_POOL = process.env.HTTP_POOL && parseInt(process.env.HTTP_POOL);
|
||||
const HTTP_POOLSIZE = parseInt(process.env.HTTP_POOLSIZE, 10) || 10;
|
||||
const HTTP_PIPELINING = parseInt(process.env.HTTP_PIPELINING, 10) || 1;
|
||||
const HTTP_TIMEOUT = 10000;
|
||||
const HTTP_TIMEOUT = parseInt(process.env.JAMBONES_HTTP_TIMEOUT, 10) || 10000;
|
||||
const HTTP_PROXY_IP = process.env.JAMBONES_HTTP_PROXY_IP;
|
||||
const HTTP_PROXY_PORT = process.env.JAMBONES_HTTP_PROXY_PORT;
|
||||
const HTTP_PROXY_PROTOCOL = process.env.JAMBONES_HTTP_PROXY_PROTOCOL || 'http';
|
||||
@@ -139,6 +139,10 @@ const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIM
|
||||
const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
|
||||
const JAMBONES_MEDIA_TIMEOUT_MS = process.env.JAMBONES_MEDIA_TIMEOUT_MS || 0;
|
||||
const JAMBONES_MEDIA_HOLD_TIMEOUT_MS = process.env.JAMBONES_MEDIA_HOLD_TIMEOUT_MS || 0;
|
||||
|
||||
/* say / tts */
|
||||
const JAMBONES_SAY_CHUNK_SIZE = parseInt(process.env.JAMBONES_SAY_CHUNK_SIZE, 10) || 900;
|
||||
|
||||
// jambonz
|
||||
const JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS =
|
||||
process.env.JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS;
|
||||
@@ -231,5 +235,6 @@ module.exports = {
|
||||
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
|
||||
JAMBONES_MEDIA_TIMEOUT_MS,
|
||||
JAMBONES_MEDIA_HOLD_TIMEOUT_MS,
|
||||
JAMBONES_SAY_CHUNK_SIZE,
|
||||
JAMBONES_TRANSCRIBE_EP_DESTROY_DELAY_MS,
|
||||
};
|
||||
|
||||
@@ -291,7 +291,7 @@ router.post('/',
|
||||
}, {
|
||||
...(account.enable_debug_log && {level: 'debug'})
|
||||
});
|
||||
app.requestor.logger = app.notifier.logger = sipLogger;
|
||||
app.requestor.logger = app.notifier.logger = restDial.logger = sipLogger;
|
||||
const callInfo = new CallInfo({
|
||||
direction: CallDirection.Outbound,
|
||||
req: inviteReq,
|
||||
|
||||
@@ -927,7 +927,7 @@ class CallSession extends Emitter {
|
||||
this.logger.debug('CallSession:enableBackgroundTtsStream - ttsStream enabled');
|
||||
} else {
|
||||
this.logger.debug(
|
||||
'CallSession:enableBackgroundTtsStream - ignoring request as call does not have required conditions');
|
||||
'CallSession:enableBackgroundTtsStream - ignoring request; conditions not met (probably not using ws api)');
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.info({err, say}, 'CallSession:enableBackgroundTtsStream - Error creating background tts stream task');
|
||||
@@ -941,9 +941,11 @@ class CallSession extends Emitter {
|
||||
}
|
||||
}
|
||||
clearTtsStream() {
|
||||
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
|
||||
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
|
||||
this.ttsStreamingBuffer?.clear();
|
||||
if (this.isTtsStreamEnabled) {
|
||||
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'user_interruption'})
|
||||
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
|
||||
this.ttsStreamingBuffer?.clear();
|
||||
}
|
||||
}
|
||||
|
||||
startTtsStream() {
|
||||
@@ -951,7 +953,7 @@ class CallSession extends Emitter {
|
||||
}
|
||||
|
||||
stopTtsStream() {
|
||||
if (this.appIsUsingWebsockets) {
|
||||
if (this.isTtsStreamEnabled) {
|
||||
this.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
|
||||
.catch((err) => this.logger.info({err}, 'CallSession:clearTtsStream - Error sending user_interruption'));
|
||||
this.ttsStreamingBuffer?.stop();
|
||||
@@ -1141,7 +1143,6 @@ class CallSession extends Emitter {
|
||||
return {
|
||||
api_key: credential.api_key,
|
||||
model_id: credential.model_id,
|
||||
stt_model_id: credential.stt_model_id,
|
||||
api_uri: credential.api_uri,
|
||||
options: credential.options
|
||||
};
|
||||
|
||||
@@ -158,7 +158,7 @@ class TaskDial extends Task {
|
||||
|
||||
get canReleaseMedia() {
|
||||
const keepAnchor = this.data.anchorMedia ||
|
||||
this.weAreTranscoding ||
|
||||
this.isTranscoding ||
|
||||
this.cs.isBackGroundListen ||
|
||||
this.cs.onHoldMusic ||
|
||||
ANCHOR_MEDIA_ALWAYS ||
|
||||
@@ -576,7 +576,7 @@ class TaskDial extends Task {
|
||||
proxy: `sip:${sbcAddress}`,
|
||||
callingNumber: this.callerId || fromUri.user,
|
||||
...(this.callerName && {callingName: this.callerName}),
|
||||
opusFirst: isOpusFirst(this.cs.ep.remote.sdp),
|
||||
opusFirst: isOpusFirst(this.cs.ep.local.sdp),
|
||||
isVideoCall: this.cs.ep.remote.sdp.includes('m=video')
|
||||
};
|
||||
|
||||
@@ -773,6 +773,15 @@ class TaskDial extends Task {
|
||||
}
|
||||
|
||||
async _connectSingleDial(cs, sd) {
|
||||
// start connect with dialed leg, this is the soonest we can identify transcoding
|
||||
if (this.epOther && sd.ep) {
|
||||
const codecA = getLeadingCodec(this.epOther.local.sdp);
|
||||
const codecB = getLeadingCodec(sd.ep.remote.sdp);
|
||||
this.isTranscoding = (codecA !== codecB);
|
||||
if (this.isTranscoding) {
|
||||
this.logger.info(`Dial:_connectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
|
||||
}
|
||||
}
|
||||
if (!this.bridged && !this.canReleaseMedia) {
|
||||
this.logger.debug('Dial:_connectSingleDial bridging endpoints');
|
||||
if (this.epOther) {
|
||||
@@ -930,13 +939,6 @@ class TaskDial extends Task {
|
||||
this.logger.info({err}, 'Dial:_selectSingleDial - Error boosting audio signal');
|
||||
}
|
||||
}
|
||||
/* basic determination to see if call is being transcoded */
|
||||
const codecA = getLeadingCodec(this.epOther.local.sdp);
|
||||
const codecB = getLeadingCodec(this.ep.remote.sdp);
|
||||
this.weAreTranscoding = (codecA !== codecB);
|
||||
if (this.weAreTranscoding) {
|
||||
this.logger.info(`Dial:_selectSingleDial - transcoding from ${codecA} (A leg) to ${codecB} (B leg)`);
|
||||
}
|
||||
/* if we can release the media back to the SBC, do so now */
|
||||
if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
|
||||
setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);
|
||||
|
||||
@@ -5,7 +5,6 @@ const {
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents,
|
||||
DeepgramTranscriptionEvents,
|
||||
ElevenlabsTranscriptionEvents,
|
||||
GladiaTranscriptionEvents,
|
||||
SonioxTranscriptionEvents,
|
||||
CobaltTranscriptionEvents,
|
||||
@@ -493,17 +492,6 @@ class TaskGather extends SttTask {
|
||||
this.addCustomEventListener(ep, DeepgramfluxTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'elevenlabs':
|
||||
this.bugname = `${this.bugname_prefix}elevenlabs_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
ep, ElevenlabsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
this.addCustomEventListener(
|
||||
ep, ElevenlabsTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, ElevenlabsTranscriptionEvents.ConnectFailure,
|
||||
this._onVendorConnectFailure.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, ElevenlabsTranscriptionEvents.Error, this._onVendorError.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'gladia':
|
||||
this.bugname = `${this.bugname_prefix}gladia_transcribe`;
|
||||
this.addCustomEventListener(
|
||||
@@ -893,7 +881,7 @@ class TaskGather extends SttTask {
|
||||
this._fillerNoiseOn = false; // in a race, if we just started audio it may sneak through here
|
||||
this.ep.api('uuid_break', this.ep.uuid)
|
||||
.catch((err) => this.logger.info(err, 'Error killing audio'));
|
||||
cs.clearTtsStream();
|
||||
if (cs.isTtsStreamEnabled) cs.clearTtsStream();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
const assert = require('assert');
|
||||
const TtsTask = require('./tts-task');
|
||||
const {TaskName, TaskPreconditions} = require('../utils/constants');
|
||||
const {JAMBONES_SAY_CHUNK_SIZE} = require('../config');
|
||||
const pollySSMLSplit = require('polly-ssml-split');
|
||||
const { SpeechCredentialError, NonFatalTaskError } = require('../utils/error');
|
||||
const { sleepFor } = require('../utils/helpers');
|
||||
@@ -31,7 +32,7 @@ const isMatchingEvent = (logger, filename, playbackId, evt) => {
|
||||
const breakLengthyTextIfNeeded = (logger, text) => {
|
||||
// As The text can be used for tts streaming, we need to break lengthy text into smaller chunks
|
||||
// HIGH_WATER_BUFFER_SIZE defined in tts-streaming-buffer.js
|
||||
const chunkSize = 900;
|
||||
const chunkSize = JAMBONES_SAY_CHUNK_SIZE;
|
||||
const isSSML = text.startsWith('<speak>');
|
||||
const options = {
|
||||
softLimit: 100,
|
||||
|
||||
@@ -89,8 +89,9 @@ class TtsTask extends Task {
|
||||
// api_key, model_id, api_uri, custom_tts_streaming_url, and auth_token are encoded in the credentials
|
||||
// allow them to be overriden via config, using options
|
||||
// give preference to options passed in via config
|
||||
const local_options = {...JSON.parse(options), ...this.options};
|
||||
const local_voice_settings = {...JSON.parse(options).voice_settings, ...this.options.voice_settings};
|
||||
const parsed_options = options ? JSON.parse(options) : {};
|
||||
const local_options = {...parsed_options, ...this.options};
|
||||
const local_voice_settings = {...(parsed_options.voice_settings || {}), ...(this.options.voice_settings || {})};
|
||||
const local_api_key = local_options.api_key ?? api_key;
|
||||
const local_model_id = local_options.model_id ?? model_id;
|
||||
const local_api_uri = local_options.api_uri ?? api_uri;
|
||||
|
||||
@@ -103,12 +103,6 @@
|
||||
"Connect": "deepgramflux_transcribe::connect",
|
||||
"Error": "deepgramflux_transcribe::error"
|
||||
},
|
||||
"ElevenlabsTranscriptionEvents": {
|
||||
"Transcription": "elevenlabs_transcribe::transcription",
|
||||
"ConnectFailure": "elevenlabs_transcribe::connect_failed",
|
||||
"Connect": "elevenlabs_transcribe::connect",
|
||||
"Error": "elevenlabs_transcribe::error"
|
||||
},
|
||||
"GladiaTranscriptionEvents": {
|
||||
"Transcription": "gladia_transcribe::transcription",
|
||||
"ConnectFailure": "gladia_transcribe::connect_failed",
|
||||
|
||||
@@ -106,7 +106,6 @@ const speechMapper = (cred) => {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = o.api_key;
|
||||
obj.model_id = o.model_id;
|
||||
obj.stt_model_id = o.stt_model_id;
|
||||
obj.api_uri = o.api_uri;
|
||||
obj.options = o.options;
|
||||
}
|
||||
|
||||
@@ -191,7 +191,7 @@ class HttpRequestor extends BaseRequestor {
|
||||
method,
|
||||
headers: hdrs,
|
||||
...('POST' === method && {body: JSON.stringify(payload)}),
|
||||
timeout: HTTP_TIMEOUT,
|
||||
headersTimeout: HTTP_TIMEOUT,
|
||||
followRedirects: false
|
||||
};
|
||||
|
||||
|
||||
@@ -127,7 +127,6 @@ class SttLatencyCalculator extends Emitter {
|
||||
|
||||
calculateLatency() {
|
||||
if (!this.isRunning) {
|
||||
this.logger.debug('Latency calculator is not running, cannot calculate latency, returning default values');
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@@ -545,23 +545,6 @@ const normalizeVerbio = (evt, channel, language) => {
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeElevenlabs = (evt, channel, language) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
return {
|
||||
language_code: language,
|
||||
channel_tag: channel,
|
||||
is_final: evt.message_type === 'committed_transcript',
|
||||
alternatives: [{
|
||||
confidence: 1.0,
|
||||
transcript: evt.text,
|
||||
}],
|
||||
vendor: {
|
||||
name: 'elevenlabs',
|
||||
evt: copy
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const normalizeMicrosoft = (evt, channel, language, punctuation = true) => {
|
||||
const copy = JSON.parse(JSON.stringify(evt));
|
||||
const nbest = evt.NBest;
|
||||
@@ -787,8 +770,6 @@ module.exports = (logger) => {
|
||||
return normalizeGladia(evt, channel, language, shortUtterance);
|
||||
case 'deepgramflux':
|
||||
return normalizeDeepgramFlux(evt, channel, language, shortUtterance);
|
||||
case 'elevenlabs':
|
||||
return normalizeElevenlabs(evt, channel, language);
|
||||
case 'microsoft':
|
||||
return normalizeMicrosoft(evt, channel, language, punctuation);
|
||||
case 'google':
|
||||
@@ -1104,24 +1085,6 @@ module.exports = (logger) => {
|
||||
...(keyterms && keyterms.length > 0 && {DEEPGRAMFLUX_SPEECH_KEYTERMS: keyterms.join(',')}),
|
||||
};
|
||||
}
|
||||
else if ('elevenlabs' === vendor) {
|
||||
const {api_key, stt_model_id} = sttCredentials;
|
||||
const {includeTimestamps, commitStrategy, vadSilenceThresholdSecs, vadThreshold,
|
||||
minSpeechDurationMs, minSilenceDurationMs, enableLogging} = rOpts.elevenlabsOptions || {};
|
||||
|
||||
opts = {
|
||||
...opts,
|
||||
ELEVENLABS_API_KEY: api_key,
|
||||
ELEVENLABS_MODEL_ID: stt_model_id,
|
||||
ELEVENLABS_INCLUDE_TIMESTAMPS: includeTimestamps === true ? true : false,
|
||||
...(commitStrategy && {ELEVENLABS_COMMIT_STRATEGY: commitStrategy}),
|
||||
...(vadSilenceThresholdSecs && {ELEVENLABS_VAD_SILENCE_THRESHOLD_SECS: vadSilenceThresholdSecs}),
|
||||
...(vadThreshold && {ELEVENLABS_VAD_THRESHOLD: vadThreshold}),
|
||||
...(minSpeechDurationMs && {ELEVENLABS_MIN_SPEECH_DURATION_MS: minSpeechDurationMs}),
|
||||
...(minSilenceDurationMs && {ELEVENLABS_MIN_SILENCE_DURATION_MS: minSilenceDurationMs}),
|
||||
...(enableLogging && {ELEVENLABS_ENABLE_LOGGING: enableLogging ? 1 : 0}),
|
||||
};
|
||||
}
|
||||
else if ('gladia' === vendor) {
|
||||
const {host, path} = sttCredentials;
|
||||
opts = {
|
||||
|
||||
@@ -163,7 +163,6 @@ class TtsStreamingBuffer extends Emitter {
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.logger.debug('TtsStreamingBuffer:clear');
|
||||
if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
|
||||
clearTimeout(this.timer);
|
||||
this._api(this.ep, [this.ep.uuid, 'clear']).catch((err) =>
|
||||
@@ -437,7 +436,15 @@ class TtsStreamingBuffer extends Emitter {
|
||||
|
||||
const findSentenceBoundary = (text, limit) => {
|
||||
// Look for punctuation or double newline that signals sentence end.
|
||||
const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
|
||||
// Includes:
|
||||
// - ASCII: . ! ?
|
||||
// - Arabic: ؟ (question mark), ۔ (full stop)
|
||||
// - Japanese: 。 (full stop), !, ? (full-width exclamation/question)
|
||||
//
|
||||
// For languages that use spaces between sentences, we still require
|
||||
// whitespace or end-of-string after the mark. For Japanese (no spaces),
|
||||
// we treat the punctuation itself as a boundary regardless of following char.
|
||||
const sentenceEndRegex = /[.!?؟۔](?=\s|$)|[。!?]|\n\n/g;
|
||||
let lastSentenceBoundary = -1;
|
||||
let match;
|
||||
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
|
||||
|
||||
1
package-lock.json
generated
1
package-lock.json
generated
@@ -8752,7 +8752,6 @@
|
||||
},
|
||||
"node_modules/unix-dgram": {
|
||||
"version": "2.0.6",
|
||||
"hasInstallScript": true,
|
||||
"license": "ISC",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
|
||||
Reference in New Issue
Block a user