google STT: default to command_and_search for Gather, as latest_short seems to have issues, various other fixes (#285)

This commit is contained in:
Dave Horton
2023-03-26 12:20:03 -04:00
committed by GitHub
parent e1bd075ebc
commit 63ab554908
2 changed files with 33 additions and 40 deletions

View File

@@ -104,6 +104,10 @@ class TaskGather extends Task {
get needsStt() { return this.input.includes('speech'); } get needsStt() { return this.input.includes('speech'); }
get wantsMultipleUtterances() {
return this.data.recognizer?.singleUtterance === false;
}
get earlyMedia() { get earlyMedia() {
return (this.sayTask && this.sayTask.earlyMedia) || return (this.sayTask && this.sayTask.earlyMedia) ||
(this.playTask && this.playTask.earlyMedia); (this.playTask && this.playTask.earlyMedia);
@@ -661,14 +665,18 @@ class TaskGather extends Task {
this._killAudio(cs); this._killAudio(cs);
} }
// DCH: commenting out because my experience is that the google STT engine /**
// will keep listening after it detects end of utterance, and will return a final transcript * By default, Gather asks google for a single utterance. On getting an end of utterance event,
// My earlier understanding that we needed to stop and restart the recognizer appears incorrect. * the mod_google_transcribe plugin will send a WritesDone to the grpc stream, which will usually
/* * cause google to return a final transcript. So even though we have not received a final
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length) { * transcript at this point (because otherwise resolved() would be true), we do not need to
* restart the recognizer - we should get the final transcript shortly.
* The exception is if the Gather was specifically configured to listen
* to multiple utterances, in which case we need to restart the recognizer.
*/
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsMultipleUtterances) {
this._startTranscribing(ep); this._startTranscribing(ep);
} }
*/
} }
_onStartOfSpeech(cs, ep) { _onStartOfSpeech(cs, ep) {

View File

@@ -337,53 +337,38 @@ module.exports = (logger) => {
if ('google' === vendor) { if ('google' === vendor) {
opts = { opts = {
...opts, ...opts,
...(sttCredentials && ...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
{GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}), ...(rOpts.separateRecognitionPerChannel && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}),
...(rOpts.enhancedModel && ...(rOpts.separateRecognitionPerChanne === false && {GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}),
{GOOGLE_SPEECH_USE_ENHANCED: 1}), ...(rOpts.profanityFilter && {GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
...(rOpts.separateRecognitionPerChannel && ...(rOpts.punctuation && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 1}), ...(rOpts.words && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
...(rOpts.profanityFilter &&
{GOOGLE_SPEECH_PROFANITY_FILTER: 1}),
...(rOpts.punctuation &&
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1}),
...(rOpts.words &&
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 1}),
...((rOpts.singleUtterance || task.name === TaskName.Gather) && ...((rOpts.singleUtterance || task.name === TaskName.Gather) &&
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}), {GOOGLE_SPEECH_SINGLE_UTTERANCE: 1}),
...(rOpts.diarization && ...(rOpts.singleUtterance === false && {GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}), ...(rOpts.diarization && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 1}),
...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 && ...(rOpts.diarization && rOpts.diarizationMinSpeakers > 0 &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}), {GOOGLE_SPEECH_SPEAKER_DIARIZATION_MIN_SPEAKER_COUNT: rOpts.diarizationMinSpeakers}),
...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 && ...(rOpts.diarization && rOpts.diarizationMaxSpeakers > 0 &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}), {GOOGLE_SPEECH_SPEAKER_DIARIZATION_MAX_SPEAKER_COUNT: rOpts.diarizationMaxSpeakers}),
...(rOpts.enhancedModel === false && ...(rOpts.enhancedModel === false && {GOOGLE_SPEECH_USE_ENHANCED: 0}),
{GOOGLE_SPEECH_USE_ENHANCED: 0}), ...(rOpts.enhancedModel !== false && {GOOGLE_SPEECH_USE_ENHANCED: 1}),
...(rOpts.separateRecognitionPerChannel === false && ...(rOpts.profanityFilter === false && {GOOGLE_SPEECH_PROFANITY_FILTER: 0}),
{GOOGLE_SPEECH_SEPARATE_RECOGNITION_PER_CHANNEL: 0}), ...(rOpts.punctuation === false && {GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
...(rOpts.profanityFilter === false && ...(rOpts.words == false && {GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
{GOOGLE_SPEECH_PROFANITY_FILTER: 0}), ...(rOpts.diarization === false && {GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
...(rOpts.punctuation === false &&
{GOOGLE_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 0}),
...(rOpts.words == false &&
{GOOGLE_SPEECH_ENABLE_WORD_TIME_OFFSETS: 0}),
...((rOpts.singleUtterance === false || task.name === TaskName.Transcribe) &&
{GOOGLE_SPEECH_SINGLE_UTTERANCE: 0}),
...(rOpts.diarization === false &&
{GOOGLE_SPEECH_SPEAKER_DIARIZATION: 0}),
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' && ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'string' &&
{GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}), {GOOGLE_SPEECH_HINTS: rOpts.hints.join(',')}),
...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' && ...(rOpts.hints.length > 0 && typeof rOpts.hints[0] === 'object' &&
{GOOGLE_SPEECH_HINTS: JSON.stringify(rOpts.hints)}), {GOOGLE_SPEECH_HINTS: JSON.stringify(rOpts.hints)}),
...(typeof rOpts.hintsBoost === 'number' && ...(typeof rOpts.hintsBoost === 'number' && {GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
{GOOGLE_SPEECH_HINTS_BOOST: rOpts.hintsBoost}),
...(rOpts.altLanguages.length > 0 && ...(rOpts.altLanguages.length > 0 &&
{GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}), {GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES: [...new Set(rOpts.altLanguages)].join(',')}),
...(rOpts.interactionType && ...(rOpts.interactionType &&
{GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}), {GOOGLE_SPEECH_METADATA_INTERACTION_TYPE: rOpts.interactionType}),
...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'latest_short' : 'phone_call')}, ...{GOOGLE_SPEECH_MODEL: rOpts.model || (task.name === TaskName.Gather ? 'command_and_search' : 'latest_long')},
...(rOpts.naicsCode > 0 && ...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
{GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}), GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
}; };
} }
else if (['aws', 'polly'].includes(vendor)) { else if (['aws', 'polly'].includes(vendor)) {