expose model and singleUtterance to gather/transcribe when using google

This commit is contained in:
Dave Horton
2022-05-08 12:29:55 -04:00
parent 036accab44
commit 182ad8c716
3 changed files with 39 additions and 20 deletions

View File

@@ -36,8 +36,18 @@ class TaskGather extends Task {
this.language = recognizer.language;
this.hints = recognizer.hints || [];
this.hintsBoost = recognizer.hintsBoost;
this.altLanguages = recognizer.altLanguages || [];
this.profanityFilter = recognizer.profanityFilter;
this.punctuation = !!recognizer.punctuation;
this.enhancedModel = !!recognizer.enhancedModel;
this.model = recognizer.model || 'command_and_search';
this.words = !!recognizer.words;
this.singleUtterance = recognizer.singleUtterance || true;
this.diarization = !!recognizer.diarization;
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
this.interactionType = recognizer.interactionType || 'unspecified';
this.naicsCode = recognizer.naicsCode || 0;
this.altLanguages = recognizer.altLanguages || [];
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
@@ -232,24 +242,35 @@ class TaskGather extends Task {
if ('google' === this.vendor) {
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
Object.assign(opts, {
GOOGLE_SPEECH_USE_ENHANCED: true,
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
GOOGLE_SPEECH_MODEL: 'command_and_search',
GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: !!this.punctuation
[
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
['singleUtterance', 'GOOGLE_SPEECH_SINGLE_UTTERANCE'],
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
].forEach((arr) => {
if (this[arr[0]]) opts[arr[1]] = true;
});
if (this.hints && this.hints.length > 1) {
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
if (this.hints.length > 1) {
opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
if (typeof this.hintsBoost === 'number') {
opts.GOOGLE_SPEECH_HINTS_BOOST = this.hintsBoost;
}
}
if (this.altLanguages && this.altLanguages.length > 0) {
opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
if (this.altLanguages.length > 1) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
if ('unspecified' !== this.interactionType) {
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
}
if (this.profanityFilter === true) {
Object.assign(opts, {'GOOGLE_SPEECH_PROFANITY_FILTER': true});
opts.GOOGLE_SPEECH_MODEL = this.model;
if (this.diarization && this.diarizationMinSpeakers > 0) {
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
}
if (this.diarization && this.diarizationMaxSpeakers > 0) {
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT = this.diarizationMaxSpeakers;
}
if (this.naicsCode > 0) opts.GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE = this.naicsCode;
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));