google stt fixes, including defaulting to phone_call model based on c… (#288)

* google stt fixes, including defaulting to phone_call model based on comparative model testing

* lint error
This commit is contained in:
Dave Horton
2023-03-28 10:02:03 -04:00
committed by GitHub
parent efdea3e514
commit 6e945dde9a
2 changed files with 16 additions and 18 deletions

View File

@@ -98,14 +98,15 @@ class TaskGather extends Task {
this._sonioxTranscripts = [];
this.parentTask = parentTask;
this.partialTranscriptsCount = 0;
}
get name() { return TaskName.Gather; }
get needsStt() { return this.input.includes('speech'); }
get wantsMultipleUtterances() {
return this.data.recognizer?.singleUtterance !== true;
get wantsSingleUtterance() {
return this.data.recognizer?.singleUtterance === true;
}
get earlyMedia() {
@@ -565,7 +566,8 @@ class TaskGather extends Task {
return;
}
if (this.earlyHintsMatch && evt.is_final === false) {
/* fast path: our first partial transcript exactly matches an early hint */
if (this.earlyHintsMatch && evt.is_final === false && this.partialTranscriptsCount++ === 0) {
const transcript = evt.alternatives[0].transcript?.toLowerCase();
const hints = this.data.recognizer?.hints || [];
if (hints.find((h) => h.toLowerCase() === transcript)) {
@@ -670,15 +672,15 @@ class TaskGather extends Task {
}
/**
* By default, Gather asks google for a single utterance. On getting an end of utterance event,
* the mod_google_transcribe plugin will send a WritesDone to the grpc stream, which will usually
* cause google to return a final transcript. So even though we have not received a final
* transcript at this point (because otherwise resolved() would be true), we do not need to
* restart the recognizer - we should get the final transcript shortly.
* The exception is if the Gather was specifically configured to listen
* to multiple utterances, in which case we need to restart the recognizer.
* By default, Gather asks google for multiple utterances.
* The reason is that we can sometimes get an 'end_of_utterance' event without
* getting a transcription. This can happen if someone coughs or mumbles.
* For that reason don't ask for a single utterance and we'll terminate the transcribe operation
* once we get a final transcript.
* However, if the usr has specified a singleUtterance, then we need to restart here
* since we dont have a final transcript yet.
*/
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsMultipleUtterances) {
if (!this.resolved && !this.killed && !this._bufferedTranscripts.length && this.wantsSingleUtterance) {
this._startTranscribing(ep);
}
}