mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2026-02-15 02:39:35 +00:00
google stt fixes, including defaulting to phone_call model based on c… (#288)
* google stt fixes, including defaulting to phone_call model based on comparative model testing * lint error
This commit is contained in:
@@ -98,14 +98,15 @@ class TaskGather extends Task {
|
|||||||
this._sonioxTranscripts = [];
|
this._sonioxTranscripts = [];
|
||||||
|
|
||||||
this.parentTask = parentTask;
|
this.parentTask = parentTask;
|
||||||
|
this.partialTranscriptsCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
get name() { return TaskName.Gather; }
|
get name() { return TaskName.Gather; }
|
||||||
|
|
||||||
get needsStt() { return this.input.includes('speech'); }
|
get needsStt() { return this.input.includes('speech'); }
|
||||||
|
|
||||||
get wantsMultipleUtterances() {
|
get wantsSingleUtterance() {
|
||||||
return this.data.recognizer?.singleUtterance !== true;
|
return this.data.recognizer?.singleUtterance === true;
|
||||||
}
|
}
|
||||||
|
|
||||||
get earlyMedia() {
|
get earlyMedia() {
|
||||||
@@ -565,7 +566,8 @@ class TaskGather extends Task {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.earlyHintsMatch && evt.is_final === false) {
|
/* fast path: our first partial transcript exactly matches an early hint */
|
||||||
|
if (this.earlyHintsMatch && evt.is_final === false && this.partialTranscriptsCount++ === 0) {
|
||||||
const transcript = evt.alternatives[0].transcript?.toLowerCase();
|
const transcript = evt.alternatives[0].transcript?.toLowerCase();
|
||||||
const hints = this.data.recognizer?.hints || [];
|
const hints = this.data.recognizer?.hints || [];
|
||||||
if (hints.find((h) => h.toLowerCase() === transcript)) {
|
if (hints.find((h) => h.toLowerCase() === transcript)) {
|
||||||
@@ -670,15 +672,15 @@ class TaskGather extends Task {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* By default, Gather asks google for a single utterance. On getting an end of utterance event,
|
* By default, Gather asks google for multiple utterances.
|
||||||
* the mod_google_transcribe plugin will send a WritesDone to the grpc stream, which will usually
|
* The reason is that we can sometimes get an 'end_of_utterance' event without
|
||||||
* cause google to return a final transcript. So even though we have not received a final
|
* getting a transcription. This can happen if someone coughs or mumbles.
|
||||||
* transcript at this point (because otherwise resolved() would be true), we do not need to
|
* For that reason don't ask for a single utterance and we'll terminate the transcribe operation
|
||||||
* restart the recognizer - we should get the final transcript shortly.
|
* once we get a final transcript.
|
||||||
* The exception is if the Gather was specifically configured to listen
|
* However, if the usr has specified a singleUtterance, then we need to restart here
|
||||||
* to multiple utterances, in which case we need to restart the recognizer.
|
* since we dont have a final transcript yet.
|
||||||
*/
|
*/
|
||||||
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsMultipleUtterances) {
|
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsSingleUtterance) {
|
||||||
this._startTranscribing(ep);
|
this._startTranscribing(ep);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
const {
|
const {
|
||||||
TaskName,
|
|
||||||
AzureTranscriptionEvents,
|
AzureTranscriptionEvents,
|
||||||
GoogleTranscriptionEvents,
|
GoogleTranscriptionEvents,
|
||||||
AwsTranscriptionEvents,
|
AwsTranscriptionEvents,
|
||||||
@@ -343,16 +342,13 @@ module.exports = (logger) => {
|
|||||||
...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
|
...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
|
||||||
...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
|
...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
|
||||||
...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
|
...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
|
||||||
...((rOpts.singleUtterance /*|| task.name === TaskName.Gather*/) &&
|
...(rOpts.singleUtterance && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
|
||||||
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
|
|
||||||
...(rOpts.singleUtterance === false && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
|
|
||||||
...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
|
...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
|
||||||
...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
|
...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
|
||||||
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
|
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
|
||||||
...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
|
...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
|
||||||
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
|
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
|
||||||
...(rOpts.enhancedModel === false && {GOOGLE_SPEECH_USE_ENHANCED: 0}),
|
...(rOpts.enhancedModel && {GOOGLE_SPEECH_USE_ENHANCED: 1}),
|
||||||
...(rOpts.enhancedModel !== false && {GOOGLE_SPEECH_USE_ENHANCED: 1}),
|
|
||||||
...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
|
...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
|
||||||
...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
|
...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
|
||||||
...(rOpts.words == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
|
...(rOpts.words == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
|
||||||
@@ -366,7 +362,7 @@ module.exports = (logger) => {
|
|||||||
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
|
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
|
||||||
...(rOpts.interactionType &&
|
...(rOpts.interactionType &&
|
||||||
{GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
|
{GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
|
||||||
...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'command_and_search' : 'latest_long')},
|
...{GOOGLE_SPEECH_MODEL: rOpts.model || 'phone_call'},
|
||||||
...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
|
...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
|
||||||
GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
|
GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user