mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
expose model and singleUtterance to gather/transcribe when using google
This commit is contained in:
@@ -36,8 +36,18 @@ class TaskGather extends Task {
|
|||||||
this.language = recognizer.language;
|
this.language = recognizer.language;
|
||||||
this.hints = recognizer.hints || [];
|
this.hints = recognizer.hints || [];
|
||||||
this.hintsBoost = recognizer.hintsBoost;
|
this.hintsBoost = recognizer.hintsBoost;
|
||||||
this.altLanguages = recognizer.altLanguages || [];
|
this.profanityFilter = recognizer.profanityFilter;
|
||||||
this.punctuation = !!recognizer.punctuation;
|
this.punctuation = !!recognizer.punctuation;
|
||||||
|
this.enhancedModel = !!recognizer.enhancedModel;
|
||||||
|
this.model = recognizer.model || 'command_and_search';
|
||||||
|
this.words = !!recognizer.words;
|
||||||
|
this.singleUtterance = recognizer.singleUtterance || true;
|
||||||
|
this.diarization = !!recognizer.diarization;
|
||||||
|
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
|
||||||
|
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
|
||||||
|
this.interactionType = recognizer.interactionType || 'unspecified';
|
||||||
|
this.naicsCode = recognizer.naicsCode || 0;
|
||||||
|
this.altLanguages = recognizer.altLanguages || [];
|
||||||
|
|
||||||
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
/* vad: if provided, we dont connect to recognizer until voice activity is detected */
|
||||||
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
const {enable, voiceMs = 0, mode = -1} = recognizer.vad || {};
|
||||||
@@ -232,24 +242,35 @@ class TaskGather extends Task {
|
|||||||
|
|
||||||
if ('google' === this.vendor) {
|
if ('google' === this.vendor) {
|
||||||
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
if (this.sttCredentials) opts.GOOGLE_APPLICATION_CREDENTIALS = JSON.stringify(this.sttCredentials.credentials);
|
||||||
Object.assign(opts, {
|
[
|
||||||
GOOGLE_SPEECH_USE_ENHANCED: true,
|
['enhancedModel', 'GOOGLE_SPEECH_USE_ENHANCED'],
|
||||||
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
|
['separateRecognitionPerChannel', 'GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL'],
|
||||||
GOOGLE_SPEECH_MODEL: 'command_and_search',
|
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
||||||
GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: !!this.punctuation
|
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
||||||
|
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
||||||
|
['singleUtterance', 'GOOGLE_SPEECH_SINGLE_UTTERANCE'],
|
||||||
|
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
|
||||||
|
].forEach((arr) => {
|
||||||
|
if (this[arr[0]]) opts[arr[1]] = true;
|
||||||
});
|
});
|
||||||
if (this.hints && this.hints.length > 1) {
|
if (this.hints.length > 1) {
|
||||||
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
|
||||||
if (typeof this.hintsBoost === 'number') {
|
if (typeof this.hintsBoost === 'number') {
|
||||||
opts.GOOGLE_SPEECH_HINTS_BOOST = this.hintsBoost;
|
opts.GOOGLE_SPEECH_HINTS_BOOST = this.hintsBoost;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (this.altLanguages && this.altLanguages.length > 0) {
|
if (this.altLanguages.length > 1) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
||||||
opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
if ('unspecified' !== this.interactionType) {
|
||||||
|
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
|
||||||
}
|
}
|
||||||
if (this.profanityFilter === true) {
|
opts.GOOGLE_SPEECH_MODEL = this.model;
|
||||||
Object.assign(opts, {'GOOGLE_SPEECH_PROFANITY_FILTER': true});
|
if (this.diarization && this.diarizationMinSpeakers > 0) {
|
||||||
|
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
|
||||||
}
|
}
|
||||||
|
if (this.diarization && this.diarizationMaxSpeakers > 0) {
|
||||||
|
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT = this.diarizationMaxSpeakers;
|
||||||
|
}
|
||||||
|
if (this.naicsCode > 0) opts.GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE = this.naicsCode;
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||||
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
ep.addCustomEventListener(GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
|
||||||
|
|||||||
@@ -419,6 +419,7 @@
|
|||||||
"separateRecognitionPerChannel": "boolean",
|
"separateRecognitionPerChannel": "boolean",
|
||||||
"punctuation": "boolean",
|
"punctuation": "boolean",
|
||||||
"enhancedModel": "boolean",
|
"enhancedModel": "boolean",
|
||||||
|
"singleUtterance": "boolean",
|
||||||
"words": "boolean",
|
"words": "boolean",
|
||||||
"diarization": "boolean",
|
"diarization": "boolean",
|
||||||
"diarizationMinSpeakers": "number",
|
"diarizationMinSpeakers": "number",
|
||||||
@@ -448,6 +449,7 @@
|
|||||||
"tag"
|
"tag"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"model": "string",
|
||||||
"outputFormat": {
|
"outputFormat": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
|
|||||||
@@ -32,7 +32,9 @@ class TaskTranscribe extends Task {
|
|||||||
this.profanityFilter = recognizer.profanityFilter;
|
this.profanityFilter = recognizer.profanityFilter;
|
||||||
this.punctuation = !!recognizer.punctuation;
|
this.punctuation = !!recognizer.punctuation;
|
||||||
this.enhancedModel = !!recognizer.enhancedModel;
|
this.enhancedModel = !!recognizer.enhancedModel;
|
||||||
|
this.model = recognizer.model || 'phone_call';
|
||||||
this.words = !!recognizer.words;
|
this.words = !!recognizer.words;
|
||||||
|
this.singleUtterance = recognizer.singleUtterance || false;
|
||||||
this.diarization = !!recognizer.diarization;
|
this.diarization = !!recognizer.diarization;
|
||||||
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
|
this.diarizationMinSpeakers = recognizer.diarizationMinSpeakers || 0;
|
||||||
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
|
this.diarizationMaxSpeakers = recognizer.diarizationMaxSpeakers || 0;
|
||||||
@@ -136,6 +138,7 @@ class TaskTranscribe extends Task {
|
|||||||
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
['profanityFilter', 'GOOGLE_SPEECH_PROFANITY_FILTER'],
|
||||||
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
['punctuation', 'GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION'],
|
||||||
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
['words', 'GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS'],
|
||||||
|
['singleUtterance', 'GOOGLE_SPEECH_SINGLE_UTTERANCE'],
|
||||||
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
|
['diarization', 'GOOGLE_SPEECH_PROFANITY_FILTER']
|
||||||
].forEach((arr) => {
|
].forEach((arr) => {
|
||||||
if (this[arr[0]]) opts[arr[1]] = true;
|
if (this[arr[0]]) opts[arr[1]] = true;
|
||||||
@@ -149,15 +152,8 @@ class TaskTranscribe extends Task {
|
|||||||
if (this.altLanguages.length > 1) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
if (this.altLanguages.length > 1) opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
||||||
if ('unspecified' !== this.interactionType) {
|
if ('unspecified' !== this.interactionType) {
|
||||||
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
|
opts.GOOGLE_SPEECH_METADATA_INTERACTION_TYPE = this.interactionType;
|
||||||
|
|
||||||
// additionally set model if appropriate
|
|
||||||
if ('phone_call' === this.interactionType) opts.GOOGLE_SPEECH_MODEL = 'phone_call';
|
|
||||||
else if (['voice_search', 'voice_command'].includes(this.interactionType)) {
|
|
||||||
opts.GOOGLE_SPEECH_MODEL = 'command_and_search';
|
|
||||||
}
|
}
|
||||||
else opts.GOOGLE_SPEECH_MODEL = 'phone_call';
|
opts.GOOGLE_SPEECH_MODEL = this.model;
|
||||||
}
|
|
||||||
else opts.GOOGLE_SPEECH_MODEL = 'phone_call';
|
|
||||||
if (this.diarization && this.diarizationMinSpeakers > 0) {
|
if (this.diarization && this.diarizationMinSpeakers > 0) {
|
||||||
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
|
opts.GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT = this.diarizationMinSpeakers;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user