mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
kill audio on vad when bargein is true and minBargeinWordCount is zero
This commit is contained in:
@@ -37,6 +37,7 @@ class TaskGather extends Task {
|
||||
this.hints = recognizer.hints || [];
|
||||
this.hintsBoost = recognizer.hintsBoost;
|
||||
this.altLanguages = recognizer.altLanguages || [];
|
||||
this.punctuation = !!recognizer.punctuation;
|
||||
|
||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||
@@ -156,9 +157,12 @@ class TaskGather extends Task {
|
||||
}
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.VadDetected);
|
||||
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(AwsTranscriptionEvents.VadDetected);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.VadDetected);
|
||||
}
|
||||
|
||||
kill(cs) {
|
||||
@@ -214,7 +218,8 @@ class TaskGather extends Task {
|
||||
Object.assign(opts, {
|
||||
GOOGLE_SPEECH_USE_ENHANCED: true,
|
||||
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
|
||||
GOOGLE_SPEECH_MODEL: 'command_and_search'
|
||||
GOOGLE_SPEECH_MODEL: 'command_and_search',
|
||||
GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: this.punctuation
|
||||
});
|
||||
if (this.hints && this.hints.length > 1) {
|
||||
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
||||
@@ -230,6 +235,7 @@ class TaskGather extends Task {
|
||||
}
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
}
|
||||
else if (['aws', 'polly'].includes(this.vendor)) {
|
||||
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
|
||||
@@ -245,6 +251,7 @@ class TaskGather extends Task {
|
||||
});
|
||||
}
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
}
|
||||
else if ('microsoft' === this.vendor) {
|
||||
if (this.sttCredentials) {
|
||||
@@ -257,13 +264,14 @@ class TaskGather extends Task {
|
||||
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
||||
}
|
||||
if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
|
||||
if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
||||
if (this.profanityOption && this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
||||
if (this.azureServiceEndpoint) opts.AZURE_SERVICE_ENDPOINT = this.azureServiceEndpoint;
|
||||
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
|
||||
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
|
||||
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||
}
|
||||
await ep.set(opts)
|
||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||
@@ -375,7 +383,7 @@ class TaskGather extends Task {
|
||||
}
|
||||
}
|
||||
_onEndOfUtterance(cs, ep) {
|
||||
this.logger.info('TaskGather:_onEndOfUtterance');
|
||||
this.logger.debug('TaskGather:_onEndOfUtterance');
|
||||
if (this.bargein && this.minBargeinWordCount === 0) {
|
||||
this._killAudio(cs);
|
||||
}
|
||||
@@ -385,6 +393,13 @@ class TaskGather extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
_onVadDetected(cs, ep) {
|
||||
if (this.bargein && this.minBargeinWordCount === 0) {
|
||||
this.logger.debug('TaskGather:_onVadDetected');
|
||||
this._killAudio(cs);
|
||||
}
|
||||
}
|
||||
|
||||
_onNoSpeechDetected(cs, ep) {
|
||||
this._resolve('timeout');
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user