add support for vad to gather and transcribe (#67)

This commit is contained in:
Dave Horton
2022-02-10 08:45:16 -05:00
committed by GitHub
parent bac1b7f2c6
commit 30ed5b6a02
3 changed files with 31 additions and 0 deletions

View File

@@ -30,6 +30,10 @@ class TaskGather extends Task {
this.hints = recognizer.hints || [];
this.altLanguages = recognizer.altLanguages || [];
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
this.vad = {enable, voiceMs, mode};
/* aws options */
this.vocabularyName = recognizer.vocabularyName;
this.vocabularyFilterName = recognizer.vocabularyFilterName;
@@ -137,6 +141,12 @@ class TaskGather extends Task {
async _initSpeech(cs, ep) {
const opts = {};
if (this.vad.enable) {
opts.START_RECOGNIZING_ON_VAD = 1;
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
}
if ('google' === this.vendor) {
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
Object.assign(opts, {

View File

@@ -389,6 +389,7 @@
"enum": ["google", "aws", "microsoft", "default"]
},
"language": "string",
"vad": "#vad",
"hints": "array",
"altLanguages": "array",
"profanityFilter": "boolean",
@@ -457,5 +458,15 @@
"required": [
"name"
]
},
"vad": {
"properties": {
"enable": "boolean",
"voiceMs": "number",
"mode": "number"
},
"required": [
"enable"
]
}
}

View File

@@ -22,6 +22,10 @@ class TaskTranscribe extends Task {
this.interim = !!recognizer.interim;
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
this.vad = {enable, voiceMs, mode};
/* google-specific options */
this.hints = recognizer.hints || [];
this.profanityFilter = recognizer.profanityFilter;
@@ -105,6 +109,12 @@ class TaskTranscribe extends Task {
async _startTranscribing(cs, ep) {
const opts = {};
if (this.vad.enable) {
opts.START_RECOGNIZING_ON_VAD = 1;
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
}
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,