Compare commits

...

7 Commits

Author SHA1 Message Date
Guilherme Rauen
0fc3c95cc5 update node image to the latest and most secure (#81)
Co-authored-by: Guilherme Rauen <g.rauen@cognigy.com>
2022-11-11 17:44:40 -05:00
Dave Horton
aeebf6aee0 add real testing of microsoft stt 2022-11-10 18:11:04 -05:00
Dave Horton
8026f45467 update deps 2022-11-05 10:44:19 -04:00
Dave Horton
505884e68e update deps 2022-11-04 08:31:41 -04:00
Dave Horton
9e4d6eb88b update deps 2022-11-02 13:39:33 -04:00
Dave Horton
e1cfbe5010 update db-helpers 2022-11-01 21:21:38 -04:00
Dave Horton
b8dc0b2f05 update google speech libs 2022-10-31 11:15:40 -04:00
6 changed files with 2054 additions and 2402 deletions

1
.gitignore vendored
View File

@@ -1,6 +1,7 @@
# Logs
logs
*.log
run-tests.sh
# Runtime data
pids

View File

@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 node:18.9.0-alpine3.16 as base
FROM --platform=linux/amd64 node:18.12.1-alpine3.16 as base
RUN apk --update --no-cache add --virtual .builds-deps build-base python3

View File

@@ -2,6 +2,8 @@ const ttsGoogle = require('@google-cloud/text-to-speech');
const sttGoogle = require('@google-cloud/speech').v1p1beta1;
const Polly = require('aws-sdk/clients/polly');
const AWS = require('aws-sdk');
const { Deepgram } = require('@deepgram/sdk');
const sdk = require('microsoft-cognitiveservices-speech-sdk');
const bent = require('bent');
const fs = require('fs');
@@ -32,6 +34,65 @@ const testGoogleStt = async(logger, credentials) => {
}
};
const testDeepgramStt = async(logger, credentials) => {
const {api_key} = credentials;
const deepgram = new Deepgram(api_key);
const mimetype = 'audio/wav';
const source = {
buffer: fs.readFileSync(`${__dirname}/../../data/test_audio.wav`),
mimetype: mimetype
};
return new Promise((resolve, reject) => {
// Send the audio to Deepgram and get the response
deepgram.transcription
.preRecorded(source, {punctuate: true})
.then((response) => {
//logger.debug({response}, 'got transcript');
if (response?.results?.channels[0]?.alternatives?.length > 0) resolve(response);
else reject(new Error('no transcript returned'));
return;
})
.catch((err) => {
logger.info({err}, 'failed to get deepgram transcript');
reject(err);
});
});
};
const testMicrosoftStt = async(logger, credentials) => {
const {api_key, region} = credentials;
const speechConfig = sdk.SpeechConfig.fromSubscription(api_key, region);
const audioConfig = sdk.AudioConfig.fromWavFileInput(fs.readFileSync(`${__dirname}/../../data/test_audio.wav`));
speechConfig.speechRecognitionLanguage = 'en-US';
const speechRecognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
return new Promise((resolve, reject) => {
speechRecognizer.recognizeOnceAsync((result) => {
switch (result.reason) {
case sdk.ResultReason.RecognizedSpeech:
resolve();
break;
case sdk.ResultReason.NoMatch:
reject('Speech could not be recognized.');
break;
case sdk.ResultReason.Canceled:
const cancellation = sdk.CancellationDetails.fromResult(result);
logger.info(`CANCELED: Reason=${cancellation.reason}`);
if (cancellation.reason == sdk.CancellationReason.Error) {
logger.info(`CANCELED: ErrorCode=${cancellation.ErrorCode}`);
logger.info(`CANCELED: ErrorDetails=${cancellation.errorDetails}`);
}
reject(cancellation.reason);
break;
}
speechRecognizer.close();
});
});
};
const testAwsTts = (logger, credentials) => {
const polly = new Polly(credentials);
return new Promise((resolve, reject) => {
@@ -89,11 +150,6 @@ const testMicrosoftTts = async(logger, credentials) => {
}
};
const testMicrosoftStt = async(logger, credentials) => {
//TODO
return true;
};
const testWellSaidTts = async(logger, credentials) => {
const {api_key} = credentials;
try {
@@ -127,4 +183,5 @@ module.exports = {
testMicrosoftTts,
testMicrosoftStt,
testWellSaidStt,
testDeepgramStt
};

4321
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -18,10 +18,11 @@
"url": "https://github.com/jambonz/jambonz-api-server.git"
},
"dependencies": {
"@google-cloud/speech": "^4.10.2",
"@google-cloud/text-to-speech": "^3.4.0",
"@jambonz/db-helpers": "^0.6.19",
"@jambonz/realtimedb-helpers": "^0.4.35",
"@deepgram/sdk": "^1.10.2",
"@google-cloud/speech": "^5.1.0",
"@google-cloud/text-to-speech": "^4.0.3",
"@jambonz/db-helpers": "^0.7.3",
"@jambonz/realtimedb-helpers": "^0.5.9",
"@jambonz/time-series": "^0.2.5",
"argon2-ffi": "^2.0.0",
"aws-sdk": "^2.1152.0",
@@ -35,6 +36,7 @@
"helmet": "^5.1.0",
"jsonwebtoken": "^8.5.1",
"mailgun.js": "^3.7.3",
"microsoft-cognitiveservices-speech-sdk": "^1.24.1",
"mysql2": "^2.3.3",
"passport": "^0.6.0",
"passport-http-bearer": "^1.0.1",

View File

@@ -30,7 +30,9 @@ test('speech credentials tests', async(t) => {
json: true,
body: {
vendor: 'google',
service_key: jsonKey
service_key: jsonKey,
use_for_tts: true,
use_for_stt: true
}
});
t.ok(result.statusCode === 201, 'successfully added a speech credential to service provider');
@@ -61,7 +63,9 @@ test('speech credentials tests', async(t) => {
json: true,
body: {
vendor: 'google',
service_key: jsonKey
service_key: jsonKey,
use_for_tts: true,
use_for_stt: true
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential');
@@ -110,20 +114,20 @@ test('speech credentials tests', async(t) => {
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential');
/* add a credential for microsoft */
if (process.env.MICROSOFT_API_KEY && process.env.MICROSOFT_REGION) {
/* add / test a credential for google */
if (process.env.GCP_JSON_KEY) {
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'microsoft',
vendor: 'google',
use_for_tts: true,
api_key: process.env.MICROSOFT_API_KEY,
region: process.env.MICROSOFT_REGION
use_for_stt: true,
service_key: process.env.GCP_JSON_KEY
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential');
t.ok(result.statusCode === 201, 'successfully added speech credential for google');
const ms_sid = result.body.sid;
/* test the speech credential */
@@ -132,7 +136,37 @@ test('speech credentials tests', async(t) => {
auth: authUser,
json: true,
});
console.log(JSON.stringify(result));
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for google tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for google stt');
}
/* add / test a credential for microsoft */
if (process.env.MICROSOFT_API_KEY && process.env.MICROSOFT_REGION) {
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'microsoft',
use_for_tts: true,
use_for_stt: true,
api_key: process.env.MICROSOFT_API_KEY,
region: process.env.MICROSOFT_REGION
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for microsoft');
const ms_sid = result.body.sid;
/* test the speech credential */
result = await request.get(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}/test`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
});
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for microsoft tts');
t.ok(result.statusCode === 200 && result.body.stt.status === 'ok', 'successfully tested speech credential for microsoft stt');
}
/* add a credential for wellsaid */
@@ -156,7 +190,8 @@ test('speech credentials tests', async(t) => {
auth: authUser,
json: true,
});
console.log(JSON.stringify(result));
//console.log(JSON.stringify(result));
t.ok(result.statusCode === 200 && result.body.tts.status === 'ok', 'successfully tested speech credential for wellsaid');
/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${ms_sid}`, {