mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
kill audio on vad when bargein is true and minBargeinWordCount is zero
This commit is contained in:
@@ -37,6 +37,7 @@ class TaskGather extends Task {
|
|||||||
this.hints = recognizer.hints || [];
|
this.hints = recognizer.hints || [];
|
||||||
this.hintsBoost = recognizer.hintsBoost;
|
this.hintsBoost = recognizer.hintsBoost;
|
||||||
this.altLanguages = recognizer.altLanguages || [];
|
this.altLanguages = recognizer.altLanguages || [];
|
||||||
|
this.punctuation = !!recognizer.punctuation;
|
||||||
|
|
||||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||||
@@ -156,9 +157,12 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
||||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
||||||
|
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
|
||||||
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
||||||
|
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
||||||
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
||||||
|
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
|
||||||
}
|
}
|
||||||
|
|
||||||
kill(cs) {
|
kill(cs) {
|
||||||
@@ -214,7 +218,8 @@ class TaskGather extends Task {
|
|||||||
Object.assign(opts, {
|
Object.assign(opts, {
|
||||||
GOOGLE_SPEECH_USE_ENHANCED: true,
|
GOOGLE_SPEECH_USE_ENHANCED: true,
|
||||||
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
|
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
|
||||||
GOOGLE_SPEECH_MODEL: 'command_and_search'
|
GOOGLE_SPEECH_MODEL: 'command_and_search',
|
||||||
|
GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: this.punctuation
|
||||||
});
|
});
|
||||||
if (this.hints && this.hints.length > 1) {
|
if (this.hints && this.hints.length > 1) {
|
||||||
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
||||||
@@ -230,6 +235,7 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
}
|
}
|
||||||
else if (['aws', 'polly'].includes(this.vendor)) {
|
else if (['aws', 'polly'].includes(this.vendor)) {
|
||||||
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
|
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
|
||||||
@@ -245,6 +251,7 @@ class TaskGather extends Task {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
}
|
}
|
||||||
else if ('microsoft' === this.vendor) {
|
else if ('microsoft' === this.vendor) {
|
||||||
if (this.sttCredentials) {
|
if (this.sttCredentials) {
|
||||||
@@ -257,13 +264,14 @@ class TaskGather extends Task {
|
|||||||
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
||||||
}
|
}
|
||||||
if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
|
if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
|
||||||
if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
if (this.profanityOption && this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
||||||
if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint;
|
if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint;
|
||||||
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
|
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
|
||||||
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
|
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
|
||||||
|
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
|
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
|
||||||
|
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
}
|
}
|
||||||
await ep.set(opts)
|
await ep.set(opts)
|
||||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||||
@@ -375,7 +383,7 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_onEndOfUtterance(cs, ep) {
|
_onEndOfUtterance(cs, ep) {
|
||||||
this.logger.info('TaskGather:_onEndOfUtterance');
|
this.logger.debug('TaskGather:_onEndOfUtterance');
|
||||||
if (this.bargein && this.minBargeinWordCount === 0) {
|
if (this.bargein && this.minBargeinWordCount === 0) {
|
||||||
this._killAudio(cs);
|
this._killAudio(cs);
|
||||||
}
|
}
|
||||||
@@ -385,6 +393,13 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_onVadDetected(cs, ep) {
|
||||||
|
if (this.bargein && this.minBargeinWordCount === 0) {
|
||||||
|
this.logger.debug('TaskGather:_onVadDetected');
|
||||||
|
this._killAudio(cs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_onNoSpeechDetected(cs, ep) {
|
_onNoSpeechDetected(cs, ep) {
|
||||||
this._resolve('timeout');
|
this._resolve('timeout');
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,19 +59,22 @@
|
|||||||
"Transcription": "google_transcribe::transcription",
|
"Transcription": "google_transcribe::transcription",
|
||||||
"EndOfUtterance": "google_transcribe::end_of_utterance",
|
"EndOfUtterance": "google_transcribe::end_of_utterance",
|
||||||
"NoAudioDetected": "google_transcribe::no_audio_detected",
|
"NoAudioDetected": "google_transcribe::no_audio_detected",
|
||||||
"MaxDurationExceeded": "google_transcribe::max_duration_exceeded"
|
"MaxDurationExceeded": "google_transcribe::max_duration_exceeded",
|
||||||
|
"VadDetected": "google_transcribe::vad_detected"
|
||||||
},
|
},
|
||||||
"AwsTranscriptionEvents": {
|
"AwsTranscriptionEvents": {
|
||||||
"Transcription": "aws_transcribe::transcription",
|
"Transcription": "aws_transcribe::transcription",
|
||||||
"EndOfTranscript": "aws_transcribe::end_of_transcript",
|
"EndOfTranscript": "aws_transcribe::end_of_transcript",
|
||||||
"NoAudioDetected": "aws_transcribe::no_audio_detected",
|
"NoAudioDetected": "aws_transcribe::no_audio_detected",
|
||||||
"MaxDurationExceeded": "aws_transcribe::max_duration_exceeded"
|
"MaxDurationExceeded": "aws_transcribe::max_duration_exceeded",
|
||||||
|
"VadDetected": "aws_transcribe::vad_detected"
|
||||||
},
|
},
|
||||||
"AzureTranscriptionEvents": {
|
"AzureTranscriptionEvents": {
|
||||||
"Transcription": "azure_transcribe::transcription",
|
"Transcription": "azure_transcribe::transcription",
|
||||||
"StartOfUtterance": "azure_transcribe::start_of_utterance",
|
"StartOfUtterance": "azure_transcribe::start_of_utterance",
|
||||||
"EndOfUtterance": "azure_transcribe::end_of_utterance",
|
"EndOfUtterance": "azure_transcribe::end_of_utterance",
|
||||||
"NoSpeechDetected": "azure_transcribe::no_speech_detected"
|
"NoSpeechDetected": "azure_transcribe::no_speech_detected",
|
||||||
|
"VadDetected": "azure_transcribe::vad_detected"
|
||||||
},
|
},
|
||||||
"ListenEvents": {
|
"ListenEvents": {
|
||||||
"Connect": "mod_audio_fork::connect",
|
"Connect": "mod_audio_fork::connect",
|
||||||
|
|||||||
Reference in New Issue
Block a user