mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
support mod_vad_detect (#762)
* support mod_vad_detect * wip * update verb spec and drachtio fsmrf * Update example-voicemail-greetings.json (#761) Update voicemail english greetings * wip * stopvad if playdone --------- Co-authored-by: Vinod Dharashive <vdharashive@gmail.com>
This commit is contained in:
@@ -338,6 +338,17 @@ class CallSession extends Emitter {
|
||||
this.application.fallback_speech_recognizer_language = language;
|
||||
}
|
||||
|
||||
/**
|
||||
* Vad
|
||||
*/
|
||||
get vad() {
|
||||
return this._vad;
|
||||
}
|
||||
|
||||
set vad(v) {
|
||||
this._vad = v;
|
||||
}
|
||||
|
||||
/**
|
||||
* indicates whether the call currently in progress
|
||||
*/
|
||||
|
||||
@@ -15,7 +15,8 @@ class TaskConfig extends Task {
|
||||
'transcribe',
|
||||
'fillerNoise',
|
||||
'actionHookDelayAction',
|
||||
'boostAudioSignal'
|
||||
'boostAudioSignal',
|
||||
'vad'
|
||||
].forEach((k) => this[k] = this.data[k] || {});
|
||||
|
||||
if ('notifyEvents' in this.data) {
|
||||
@@ -70,6 +71,7 @@ class TaskConfig extends Task {
|
||||
get hasListen() { return Object.keys(this.listen).length; }
|
||||
get hasTranscribe() { return Object.keys(this.transcribe).length; }
|
||||
get hasDub() { return Object.keys(this.dub).length; }
|
||||
get hasVad() { return Object.keys(this.vad).length; }
|
||||
get hasFillerNoise() { return Object.keys(this.fillerNoise).length; }
|
||||
|
||||
get summary() {
|
||||
@@ -287,6 +289,16 @@ class TaskConfig extends Task {
|
||||
cs.enableFillerNoise(opts);
|
||||
}
|
||||
}
|
||||
|
||||
if (this.hasVad) {
|
||||
cs.vad = {
|
||||
enable: this.vad.enable || false,
|
||||
voiceMs: this.vad.voiceMs || 250,
|
||||
silenceMs: this.vad.silenceMs || 150,
|
||||
strategy: this.vad.strategy || 'one-shot',
|
||||
mode: this.vad.mod || 2
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async kill(cs) {
|
||||
|
||||
@@ -10,7 +10,8 @@ const {
|
||||
IbmTranscriptionEvents,
|
||||
NvidiaTranscriptionEvents,
|
||||
JambonzTranscriptionEvents,
|
||||
AssemblyAiTranscriptionEvents
|
||||
AssemblyAiTranscriptionEvents,
|
||||
VadDetection
|
||||
} = require('../utils/constants.json');
|
||||
const {
|
||||
JAMBONES_GATHER_EARLY_HINTS_MATCH,
|
||||
@@ -27,7 +28,7 @@ class TaskGather extends SttTask {
|
||||
[
|
||||
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
|
||||
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
|
||||
'speechTimeout', 'timeout', 'say', 'play', 'actionHookDelayAction', 'fillerNoise'
|
||||
'speechTimeout', 'timeout', 'say', 'play', 'actionHookDelayAction', 'fillerNoise', 'vad'
|
||||
].forEach((k) => this[k] = this.data[k]);
|
||||
|
||||
// gather default input is digits
|
||||
@@ -41,7 +42,8 @@ class TaskGather extends SttTask {
|
||||
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
|
||||
this.interim = !!this.partialResultHook || this.bargein || (this.timeout > 0);
|
||||
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
|
||||
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
|
||||
this.minBargeinWordCount = this.data.minBargeinWordCount !== undefined ? this.data.minBargeinWordCount : 1;
|
||||
this._vadEnabled = this.minBargeinWordCount === 0;
|
||||
if (this.data.recognizer) {
|
||||
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
|
||||
this.asrTimeout = typeof this.data.recognizer.asrTimeout === 'number' ?
|
||||
@@ -128,6 +130,11 @@ class TaskGather extends SttTask {
|
||||
...(this.fillerNoise || {})
|
||||
};
|
||||
|
||||
this.vad = {
|
||||
...(cs.vad || {}),
|
||||
...(this.vad || {})
|
||||
};
|
||||
|
||||
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
|
||||
const {hints, hintsBoost} = cs.globalSttHints;
|
||||
const setOfHints = new Set((this.data.recognizer.hints || [])
|
||||
@@ -178,6 +185,8 @@ class TaskGather extends SttTask {
|
||||
retries: this._hookDelayRetries
|
||||
};
|
||||
|
||||
this._startVad();
|
||||
|
||||
const startListening = async(cs, ep) => {
|
||||
this._startTimer();
|
||||
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
|
||||
@@ -201,6 +210,7 @@ class TaskGather extends SttTask {
|
||||
const {span, ctx} = this.startChildSpan(`nested:${this.sayTask.summary}`);
|
||||
const process = () => {
|
||||
this.logger.debug('Gather: nested say task completed');
|
||||
this._stopVad();
|
||||
if (!this.killed) {
|
||||
startListening(cs, ep);
|
||||
if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
|
||||
@@ -227,6 +237,7 @@ class TaskGather extends SttTask {
|
||||
const {span, ctx} = this.startChildSpan(`nested:${this.playTask.summary}`);
|
||||
const process = () => {
|
||||
this.logger.debug('Gather: nested play task completed');
|
||||
this._stopVad();
|
||||
if (!this.killed) {
|
||||
startListening(cs, ep);
|
||||
if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
|
||||
@@ -291,6 +302,7 @@ class TaskGather extends SttTask {
|
||||
this._clearAsrTimer();
|
||||
this.playTask?.span.end();
|
||||
this.sayTask?.span.end();
|
||||
this._stopVad();
|
||||
this._resolve('killed');
|
||||
}
|
||||
|
||||
@@ -368,15 +380,12 @@ class TaskGather extends SttTask {
|
||||
ep, GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
this.addCustomEventListener(
|
||||
ep, GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||
this.addCustomEventListener(
|
||||
ep, GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
break;
|
||||
|
||||
case 'aws':
|
||||
case 'polly':
|
||||
this.bugname = `${this.bugname_prefix}aws_transcribe`;
|
||||
this.addCustomEventListener(ep, AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
break;
|
||||
case 'microsoft':
|
||||
this.bugname = `${this.bugname_prefix}azure_transcribe`;
|
||||
@@ -384,7 +393,6 @@ class TaskGather extends SttTask {
|
||||
ep, AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
//this.addCustomEventListener(ep, AzureTranscriptionEvents.NoSpeechDetected,
|
||||
//this._onNoSpeechDetected.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
break;
|
||||
case 'nuance':
|
||||
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
|
||||
@@ -394,8 +402,6 @@ class TaskGather extends SttTask {
|
||||
this._onStartOfSpeech.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, NuanceTranscriptionEvents.TranscriptionComplete,
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, NuanceTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
|
||||
/* stall timers until prompt finishes playing */
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
@@ -465,8 +471,6 @@ class TaskGather extends SttTask {
|
||||
this._onStartOfSpeech.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, NvidiaTranscriptionEvents.TranscriptionComplete,
|
||||
this._onTranscriptionComplete.bind(this, cs, ep));
|
||||
this.addCustomEventListener(ep, NvidiaTranscriptionEvents.VadDetected,
|
||||
this._onVadDetected.bind(this, cs, ep));
|
||||
|
||||
/* I think nvidia has this (??) - stall timers until prompt finishes playing */
|
||||
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
|
||||
@@ -704,6 +708,25 @@ class TaskGather extends SttTask {
|
||||
this._finalAsrTimer = null;
|
||||
}
|
||||
|
||||
|
||||
_startVad() {
|
||||
if (!this._vadStarted && this._vadEnabled) {
|
||||
this.logger.debug('_startVad');
|
||||
this.addCustomEventListener(this.ep, VadDetection.Detection, this._onVadDetected.bind(this, this.cs, this.ep));
|
||||
this.ep?.startVadDetection(this.vad);
|
||||
this._vadStarted = true;
|
||||
}
|
||||
}
|
||||
|
||||
_stopVad() {
|
||||
if (this._vadStarted) {
|
||||
this.logger.debug('_stopVad');
|
||||
this.ep?.stopVadDetection(this.vad);
|
||||
this.ep?.removeCustomEventListener(VadDetection.Detection, this._onVadDetected);
|
||||
this._vadStarted = false;
|
||||
}
|
||||
}
|
||||
|
||||
_startFillerNoise() {
|
||||
this.logger.debug('Gather:_startFillerNoise - playing filler noise');
|
||||
this.ep?.play(this.fillerNoise.url);
|
||||
@@ -1039,6 +1062,10 @@ class TaskGather extends SttTask {
|
||||
this._killAudio(cs);
|
||||
this.emit('vad');
|
||||
}
|
||||
if (this.vad?.strategy === 'one-shot') {
|
||||
this.ep?.removeCustomEventListener(VadDetection.Detection, this._onVadDetected);
|
||||
this._vadStarted = false;
|
||||
}
|
||||
}
|
||||
|
||||
_onNoSpeechDetected(cs, ep, evt, fsEvent) {
|
||||
|
||||
@@ -134,6 +134,9 @@
|
||||
"ConnectFailure": "assemblyai_transcribe::connect_failed",
|
||||
"Connect": "assemblyai_transcribe::connect"
|
||||
},
|
||||
"VadDetection": {
|
||||
"Detection": "vad_detect:detection"
|
||||
},
|
||||
"ListenEvents": {
|
||||
"Connect": "mod_audio_fork::connect",
|
||||
"ConnectFailure": "mod_audio_fork::connect_failed",
|
||||
|
||||
@@ -474,18 +474,8 @@ module.exports = (logger) => {
|
||||
|
||||
const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => {
|
||||
let opts = {};
|
||||
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
|
||||
const vad = {enable, voiceMs, mode};
|
||||
const vendor = rOpts.vendor;
|
||||
|
||||
/* voice activity detection works across vendors */
|
||||
opts = {
|
||||
...opts,
|
||||
...(vad.enable && {START_RECOGNIZING_ON_VAD: 1}),
|
||||
...(vad.enable && vad.voiceMs && {RECOGNIZER_VAD_VOICE_MS: vad.voiceMs}),
|
||||
...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
|
||||
};
|
||||
|
||||
if ('google' === vendor) {
|
||||
const useV2 = rOpts.googleOptions?.serviceVersion === 'v2';
|
||||
const model = task.name === TaskName.Gather ?
|
||||
|
||||
Reference in New Issue
Block a user