mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-13 09:49:30 +00:00
add support for vad to gather and transcribe (#67)
This commit is contained in:
@@ -30,6 +30,10 @@ class TaskGather extends Task {
|
|||||||
this.hints = recognizer.hints || [];
|
this.hints = recognizer.hints || [];
|
||||||
this.altLanguages = recognizer.altLanguages || [];
|
this.altLanguages = recognizer.altLanguages || [];
|
||||||
|
|
||||||
|
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||||
|
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||||
|
this.vad = {enable, voiceMs, mode};
|
||||||
|
|
||||||
/* aws options */
|
/* aws options */
|
||||||
this.vocabularyName = recognizer.vocabularyName;
|
this.vocabularyName = recognizer.vocabularyName;
|
||||||
this.vocabularyFilterName = recognizer.vocabularyFilterName;
|
this.vocabularyFilterName = recognizer.vocabularyFilterName;
|
||||||
@@ -137,6 +141,12 @@ class TaskGather extends Task {
|
|||||||
async _initSpeech(cs, ep) {
|
async _initSpeech(cs, ep) {
|
||||||
const opts = {};
|
const opts = {};
|
||||||
|
|
||||||
|
if (this.vad.enable) {
|
||||||
|
opts.START_RECOGNIZING_ON_VAD = 1;
|
||||||
|
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
|
||||||
|
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
||||||
|
}
|
||||||
|
|
||||||
if ('google' === this.vendor) {
|
if ('google' === this.vendor) {
|
||||||
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
||||||
Object.assign(opts, {
|
Object.assign(opts, {
|
||||||
|
|||||||
@@ -389,6 +389,7 @@
|
|||||||
"enum": ["google", "aws", "microsoft", "default"]
|
"enum": ["google", "aws", "microsoft", "default"]
|
||||||
},
|
},
|
||||||
"language": "string",
|
"language": "string",
|
||||||
|
"vad": "#vad",
|
||||||
"hints": "array",
|
"hints": "array",
|
||||||
"altLanguages": "array",
|
"altLanguages": "array",
|
||||||
"profanityFilter": "boolean",
|
"profanityFilter": "boolean",
|
||||||
@@ -457,5 +458,15 @@
|
|||||||
"required": [
|
"required": [
|
||||||
"name"
|
"name"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"vad": {
|
||||||
|
"properties": {
|
||||||
|
"enable": "boolean",
|
||||||
|
"voiceMs": "number",
|
||||||
|
"mode": "number"
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"enable"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,10 @@ class TaskTranscribe extends Task {
|
|||||||
this.interim = !!recognizer.interim;
|
this.interim = !!recognizer.interim;
|
||||||
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
|
this.separateRecognitionPerChannel = recognizer.separateRecognitionPerChannel;
|
||||||
|
|
||||||
|
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||||
|
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||||
|
this.vad = {enable, voiceMs, mode};
|
||||||
|
|
||||||
/* google-specific options */
|
/* google-specific options */
|
||||||
this.hints = recognizer.hints || [];
|
this.hints = recognizer.hints || [];
|
||||||
this.profanityFilter = recognizer.profanityFilter;
|
this.profanityFilter = recognizer.profanityFilter;
|
||||||
@@ -105,6 +109,12 @@ class TaskTranscribe extends Task {
|
|||||||
async _startTranscribing(cs, ep) {
|
async _startTranscribing(cs, ep) {
|
||||||
const opts = {};
|
const opts = {};
|
||||||
|
|
||||||
|
if (this.vad.enable) {
|
||||||
|
opts.START_RECOGNIZING_ON_VAD = 1;
|
||||||
|
if (this.vad.voiceMs) opts.RECOGNIZER_VAD_VOICE_MS = this.vad.voiceMs;
|
||||||
|
if (this.vad.mode >= 0 && this.vad.mode <= 3) opts.RECOGNIZER_VAD_MODE = this.vad.mode;
|
||||||
|
}
|
||||||
|
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.NoAudioDetected, this._onNoAudio.bind(this, cs, ep));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
ep.addCustomEventListener(GoogleTranscriptionEvents.MaxDurationExceeded,
|
||||||
|
|||||||
Reference in New Issue
Block a user