kill audio on vad when bargein is true and minBargeinWordCount is zero

This commit is contained in:
Dave Horton
2022-03-17 08:51:44 -04:00
parent 8c5cdd374b
commit 3dd703411c
2 changed files with 24 additions and 6 deletions

View File

@@ -37,6 +37,7 @@ class TaskGather extends Task {
this.hints = recognizer.hints || []; this.hints = recognizer.hints || [];
this.hintsBoost = recognizer.hintsBoost; this.hintsBoost = recognizer.hintsBoost;
this.altLanguages = recognizer.altLanguages || []; this.altLanguages = recognizer.altLanguages || [];
this.punctuation = !!recognizer.punctuation;
/* vad: if provided, we dont connect to recognizer until voice activity is detected */ /* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {}; const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
@@ -156,9 +157,12 @@ class TaskGather extends Task {
} }
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription); ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance); ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription); ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription); ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected); ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
} }
kill(cs) { kill(cs) {
@@ -214,7 +218,8 @@ class TaskGather extends Task {
Object.assign(opts, { Object.assign(opts, {
GOOGLE_SPEECH_USE_ENHANCED: true, GOOGLE_SPEECH_USE_ENHANCED: true,
GOOGLE_SPEECH_SINGLE_UTTERANCE: true, GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
GOOGLE_SPEECH_MODEL: 'command_and_search' GOOGLE_SPEECH_MODEL: 'command_and_search',
GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: this.punctuation
}); });
if (this.hints && this.hints.length > 1) { if (this.hints && this.hints.length > 1) {
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(','); opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
@@ -230,6 +235,7 @@ class TaskGather extends Task {
} }
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep)); ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
} }
else if (['aws', 'polly'].includes(this.vendor)) { else if (['aws', 'polly'].includes(this.vendor)) {
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName; if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
@@ -245,6 +251,7 @@ class TaskGather extends Task {
}); });
} }
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
} }
else if ('microsoft' === this.vendor) { else if ('microsoft' === this.vendor) {
if (this.sttCredentials) { if (this.sttCredentials) {
@@ -257,13 +264,14 @@ class TaskGather extends Task {
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(','); opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
} }
if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1; if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption; if (this.profanityOption && this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint; if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint;
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs; if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1; opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep)); ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep)); ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
} }
await ep.set(opts) await ep.set(opts)
.catch((err) => this.logger.info(err, 'Error setting channel variables')); .catch((err) => this.logger.info(err, 'Error setting channel variables'));
@@ -375,7 +383,7 @@ class TaskGather extends Task {
} }
} }
_onEndOfUtterance(cs, ep) { _onEndOfUtterance(cs, ep) {
this.logger.info('TaskGather:_onEndOfUtterance'); this.logger.debug('TaskGather:_onEndOfUtterance');
if (this.bargein && this.minBargeinWordCount === 0) { if (this.bargein && this.minBargeinWordCount === 0) {
this._killAudio(cs); this._killAudio(cs);
} }
@@ -385,6 +393,13 @@ class TaskGather extends Task {
} }
} }
_onVadDetected(cs, ep) {
if (this.bargein && this.minBargeinWordCount === 0) {
this.logger.debug('TaskGather:_onVadDetected');
this._killAudio(cs);
}
}
_onNoSpeechDetected(cs, ep) { _onNoSpeechDetected(cs, ep) {
this._resolve('timeout'); this._resolve('timeout');
} }

View File

@@ -59,19 +59,22 @@
"Transcription": "google_transcribe::transcription", "Transcription": "google_transcribe::transcription",
"EndOfUtterance": "google_transcribe::end_of_utterance", "EndOfUtterance": "google_transcribe::end_of_utterance",
"NoAudioDetected": "google_transcribe::no_audio_detected", "NoAudioDetected": "google_transcribe::no_audio_detected",
"MaxDurationExceeded": "google_transcribe::max_duration_exceeded" "MaxDurationExceeded": "google_transcribe::max_duration_exceeded",
"VadDetected": "google_transcribe::vad_detected"
}, },
"AwsTranscriptionEvents": { "AwsTranscriptionEvents": {
"Transcription": "aws_transcribe::transcription", "Transcription": "aws_transcribe::transcription",
"EndOfTranscript": "aws_transcribe::end_of_transcript", "EndOfTranscript": "aws_transcribe::end_of_transcript",
"NoAudioDetected": "aws_transcribe::no_audio_detected", "NoAudioDetected": "aws_transcribe::no_audio_detected",
"MaxDurationExceeded": "aws_transcribe::max_duration_exceeded" "MaxDurationExceeded": "aws_transcribe::max_duration_exceeded",
"VadDetected": "aws_transcribe::vad_detected"
}, },
"AzureTranscriptionEvents": { "AzureTranscriptionEvents": {
"Transcription": "azure_transcribe::transcription", "Transcription": "azure_transcribe::transcription",
"StartOfUtterance": "azure_transcribe::start_of_utterance", "StartOfUtterance": "azure_transcribe::start_of_utterance",
"EndOfUtterance": "azure_transcribe::end_of_utterance", "EndOfUtterance": "azure_transcribe::end_of_utterance",
"NoSpeechDetected": "azure_transcribe::no_speech_detected" "NoSpeechDetected": "azure_transcribe::no_speech_detected",
"VadDetected": "azure_transcribe::vad_detected"
}, },
"ListenEvents": { "ListenEvents": {
"Connect": "mod_audio_fork::connect", "Connect": "mod_audio_fork::connect",