mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 16:50:39 +00:00
Feature/azure recognition (#46)
* add support for microsoft speech recognition * update to drachtio-fsmrf that support microsoft stt * gather and transcribe now support microsoft
This commit is contained in:
@@ -3,7 +3,8 @@ const {
|
||||
TaskName,
|
||||
TaskPreconditions,
|
||||
GoogleTranscriptionEvents,
|
||||
AwsTranscriptionEvents
|
||||
AwsTranscriptionEvents,
|
||||
AzureTranscriptionEvents
|
||||
} = require('../utils/constants');
|
||||
|
||||
const makeTask = require('./make_task');
|
||||
@@ -33,6 +34,12 @@ class TaskGather extends Task {
|
||||
this.vocabularyName = recognizer.vocabularyName;
|
||||
this.vocabularyFilterName = recognizer.vocabularyFilterName;
|
||||
this.filterMethod = recognizer.filterMethod;
|
||||
|
||||
/* microsoft options */
|
||||
this.outputFormat = recognizer.outputFormat || 'simple';
|
||||
this.profanityOption = recognizer.profanityOption || 'raw';
|
||||
this.requestSnr = recognizer.requestSnr || false;
|
||||
this.initialSpeechTimeoutMs = recognizer.initialSpeechTimeoutMs || 0;
|
||||
}
|
||||
|
||||
this.digitBuffer = '';
|
||||
@@ -63,7 +70,7 @@ class TaskGather extends Task {
|
||||
this.sttCredentials = cs.getSpeechCredentials(this.vendor, 'stt');
|
||||
if (this.needsStt && !this.sttCredentials) {
|
||||
const {writeAlerts, AlertType} = cs.srf.locals;
|
||||
this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but not creds supplied`);
|
||||
this.logger.info(`TaskGather:exec - ERROR stt using ${this.vendor} requested but creds not supplied`);
|
||||
writeAlerts({
|
||||
account_sid: cs.accountSid,
|
||||
alert_type: AlertType.STT_NOT_PROVISIONED,
|
||||
@@ -106,6 +113,8 @@ class TaskGather extends Task {
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance);
|
||||
ep.removeCustomEventListener(AwsTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.Transcription);
|
||||
ep.removeCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected);
|
||||
}
|
||||
|
||||
kill(cs) {
|
||||
@@ -135,7 +144,9 @@ class TaskGather extends Task {
|
||||
GOOGLE_SPEECH_SINGLE_UTTERANCE: true,
|
||||
GOOGLE_SPEECH_MODEL: 'command_and_search'
|
||||
});
|
||||
if (this.hints && this.hints.length > 1) opts.GOOGLE_SPEECH_HINTS = this.hints.join(',');
|
||||
if (this.hints && this.hints.length > 1) {
|
||||
opts.GOOGLE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
||||
}
|
||||
if (this.altLanguages && this.altLanguages.length > 1) {
|
||||
opts.GOOGLE_SPEECH_ALTERNATIVE_LANGUAGE_CODES = this.altLanguages.join(',');
|
||||
}
|
||||
@@ -145,22 +156,41 @@ class TaskGather extends Task {
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
|
||||
}
|
||||
else {
|
||||
else if (['aws', 'polly'].includes(this.vendor)) {
|
||||
if (this.vocabularyName) opts.AWS_VOCABULARY_NAME = this.vocabularyName;
|
||||
if (this.vocabularyFilterName) {
|
||||
opts.AWS_VOCABULARY_NAME = this.vocabularyFilterName;
|
||||
opts.AWS_VOCABULARY_FILTER_METHOD = this.filterMethod || 'mask';
|
||||
}
|
||||
Object.assign(opts, {
|
||||
AWS_ACCESS_KEY_ID: this.sttCredentials.accessKeyId,
|
||||
AWS_SECRET_ACCESS_KEY: this.sttCredentials.secretAccessKey,
|
||||
AWS_REGION: this.sttCredentials.region
|
||||
});
|
||||
if (this.sttCredentials) {
|
||||
Object.assign(opts, {
|
||||
AWS_ACCESS_KEY_ID: this.sttCredentials.accessKeyId,
|
||||
AWS_SECRET_ACCESS_KEY: this.sttCredentials.secretAccessKey,
|
||||
AWS_REGION: this.sttCredentials.region
|
||||
});
|
||||
}
|
||||
ep.addCustomEventListener(AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
}
|
||||
else if ('microsoft' === this.vendor) {
|
||||
if (this.sttCredentials) {
|
||||
Object.assign(opts, {
|
||||
'AZURE_SUBSCRIPTION_KEY': this.sttCredentials.api_key,
|
||||
'AZURE_REGION': this.sttCredentials.region
|
||||
});
|
||||
}
|
||||
if (this.hints && this.hints.length > 1) {
|
||||
opts.AZURE_SPEECH_HINTS = this.hints.map((h) => h.trim()).join(',');
|
||||
}
|
||||
//if (this.requestSnr) opts.AZURE_REQUEST_SNR = 1;
|
||||
//if (this.profanityOption !== 'raw') opts.AZURE_PROFANITY_OPTION = this.profanityOption;
|
||||
if (this.initialSpeechTimeoutMs > 0) opts.AZURE_INITIAL_SPEECH_TIMEOUT_MS = this.initialSpeechTimeoutMs;
|
||||
opts.AZURE_USE_OUTPUT_FORMAT_DETAILED = 1;
|
||||
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
|
||||
ep.addCustomEventListener(AzureTranscriptionEvents.NoSpeechDetected, this._onNoSpeechDetected.bind(this, cs, ep));
|
||||
}
|
||||
await ep.set(opts)
|
||||
.catch((err) => this.logger.info(err, 'Error setting channel variables'));
|
||||
|
||||
}
|
||||
|
||||
_startTranscribing(ep) {
|
||||
@@ -208,11 +238,21 @@ class TaskGather extends Task {
|
||||
|
||||
_onTranscription(cs, ep, evt) {
|
||||
if ('aws' === this.vendor && Array.isArray(evt) && evt.length > 0) evt = evt[0];
|
||||
this.logger.debug(evt, 'TaskGather:_onTranscription');
|
||||
const final = evt.is_final;
|
||||
if (final) {
|
||||
this._resolve('speech', evt);
|
||||
if ('microsoft' === this.vendor) {
|
||||
const nbest = evt.NBest;
|
||||
const newEvent = {
|
||||
is_final: evt.RecognitionStatus === 'Success',
|
||||
alternatives: [
|
||||
{
|
||||
confidence: nbest[0].Confidence,
|
||||
transcript: nbest[0].Display
|
||||
}
|
||||
]
|
||||
};
|
||||
evt = newEvent;
|
||||
}
|
||||
this.logger.debug(evt, 'TaskGather:_onTranscription');
|
||||
if (evt.is_final) this._resolve('speech', evt);
|
||||
else if (this.partialResultHook) {
|
||||
this.cs.requestor.request(this.partialResultHook, Object.assign({speech: evt}, this.cs.callInfo))
|
||||
.catch((err) => this.logger.info(err, 'GatherTask:_onTranscription error'));
|
||||
@@ -225,6 +265,10 @@ class TaskGather extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
_onNoSpeechDetected(cs, ep) {
|
||||
this._resolve('timeout');
|
||||
}
|
||||
|
||||
async _resolve(reason, evt) {
|
||||
if (this.resolved) return;
|
||||
this.resolved = true;
|
||||
|
||||
Reference in New Issue
Block a user