Feat/deepgram tts onprem (#338)

* support deepgram onpremise

* wip

* update speech utils version

* install docker in ci
This commit is contained in:
Hoan Luu Huu
2024-08-07 18:24:58 +07:00
committed by GitHub
parent 7553e2b617
commit 2d2b98dab5
6 changed files with 93 additions and 17 deletions

View File

@@ -7,6 +7,11 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
docker-compose --version
- uses: actions/setup-node@v3 - uses: actions/setup-node@v3
with: with:
node-version: lts/* node-version: lts/*

View File

@@ -124,6 +124,7 @@ const encryptCredential = (obj) => {
nuance_stt_uri, nuance_stt_uri,
deepgram_stt_uri, deepgram_stt_uri,
deepgram_stt_use_tls, deepgram_stt_use_tls,
deepgram_tts_uri,
use_custom_tts, use_custom_tts,
custom_tts_endpoint, custom_tts_endpoint,
custom_tts_endpoint_url, custom_tts_endpoint_url,
@@ -204,10 +205,10 @@ const encryptCredential = (obj) => {
case 'deepgram': case 'deepgram':
// API key is optional if onprem // API key is optional if onprem
if (!deepgram_stt_uri) { if (!deepgram_stt_uri || !deepgram_tts_uri) {
assert(api_key, 'invalid deepgram speech credential: api_key is required'); assert(api_key, 'invalid deepgram speech credential: api_key is required');
} }
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls}); const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
return encrypt(deepgramData); return encrypt(deepgramData);
case 'ibm': case 'ibm':
@@ -458,6 +459,7 @@ router.put('/:sid', async(req, res) => {
options, options,
deepgram_stt_uri, deepgram_stt_uri,
deepgram_stt_use_tls, deepgram_stt_use_tls,
deepgram_tts_uri,
engine_version engine_version
} = req.body; } = req.body;
@@ -485,6 +487,7 @@ router.put('/:sid', async(req, res) => {
options, options,
deepgram_stt_uri, deepgram_stt_uri,
deepgram_stt_use_tls, deepgram_stt_use_tls,
deepgram_tts_uri,
engine_version engine_version
}; };
logger.info({o, newCred}, 'updating speech credential with this new credential'); logger.info({o, newCred}, 'updating speech credential with this new credential');

View File

@@ -92,8 +92,8 @@ const testGoogleStt = async(logger, credentials) => {
}; };
const testDeepgramStt = async(logger, credentials) => { const testDeepgramStt = async(logger, credentials) => {
const {api_key} = credentials; const {api_key, deepgram_stt_uri, deepgram_stt_use_tls} = credentials;
const deepgram = new Deepgram(api_key); const deepgram = new Deepgram(api_key, deepgram_stt_uri, deepgram_stt_uri && deepgram_stt_use_tls);
const mimetype = 'audio/wav'; const mimetype = 'audio/wav';
const source = { const source = {
@@ -272,7 +272,8 @@ const testPlayHT = async(logger, synthAudio, credentials) => {
credentials, credentials,
language: 'en-US', language: 'en-US',
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json', voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
text: 'Hi there and welcome to jambones!' text: 'Hi there and welcome to jambones!',
renderForCaching: true
} }
); );
// Test if playHT can fetch voices // Test if playHT can fetch voices
@@ -295,7 +296,8 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
credentials, credentials,
language: 'en-US', language: 'en-US',
voice: 'amber', voice: 'amber',
text: 'Hi there and welcome to jambones!' text: 'Hi there and welcome to jambones!',
renderForCaching: true
} }
); );
} catch (err) { } catch (err) {
@@ -312,7 +314,8 @@ const testWhisper = async(logger, synthAudio, credentials) => {
credentials, credentials,
language: 'en-US', language: 'en-US',
voice: 'alloy', voice: 'alloy',
text: 'Hi there and welcome to jambones!' text: 'Hi there and welcome to jambones!',
renderForCaching: true
} }
); );
} catch (err) { } catch (err) {
@@ -328,7 +331,8 @@ const testDeepgramTTS = async(logger, synthAudio, credentials) => {
vendor: 'deepgram', vendor: 'deepgram',
credentials, credentials,
model: 'aura-asteria-en', model: 'aura-asteria-en',
text: 'Hi there and welcome to jambones!' text: 'Hi there and welcome to jambones!',
renderForCaching: true
} }
); );
} catch (err) { } catch (err) {
@@ -383,7 +387,8 @@ const testVerbioTts = async(logger, synthAudio, credentials) => {
credentials, credentials,
language: 'en-US', language: 'en-US',
voice: 'tommy_en-us', voice: 'tommy_en-us',
text: 'Hi there and welcome to jambones!' text: 'Hi there and welcome to jambones!',
renderForCaching: true
} }
); );
} catch (err) { } catch (err) {
@@ -509,6 +514,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key; obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.deepgram_stt_uri = o.deepgram_stt_uri; obj.deepgram_stt_uri = o.deepgram_stt_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls; obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
obj.deepgram_tts_uri = o.deepgram_tts_uri;
} }
else if ('ibm' === obj.vendor) { else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential)); const o = JSON.parse(decrypt(credential));

68
package-lock.json generated
View File

@@ -19,7 +19,7 @@
"@jambonz/lamejs": "^1.2.2", "@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7", "@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.9", "@jambonz/realtimedb-helpers": "^0.8.9",
"@jambonz/speech-utils": "^0.1.11", "@jambonz/speech-utils": "^0.1.13",
"@jambonz/time-series": "^0.2.8", "@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.72", "@jambonz/verb-specifications": "^0.0.72",
"@soniox/soniox-node": "^1.2.2", "@soniox/soniox-node": "^1.2.2",
@@ -2027,9 +2027,10 @@
} }
}, },
"node_modules/@jambonz/speech-utils": { "node_modules/@jambonz/speech-utils": {
"version": "0.1.11", "version": "0.1.13",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.11.tgz", "resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.13.tgz",
"integrity": "sha512-VgljBLUF871adib/3yWpzd7kv26ioxiLVkAIxm94CSk9WeZuzX1lVcE2SohojW3mjCYdYY6+B8FRyzlTD+en3g==", "integrity": "sha512-QeVmNFLtJGPGQfmp7jXpy742AyJIv2EteelDmNTqWGFEwTBj88q8GLP51hUsIR2ZbE5n/ZmZb/ytT6Y6LIQSDg==",
"license": "MIT",
"dependencies": { "dependencies": {
"@aws-sdk/client-polly": "^3.496.0", "@aws-sdk/client-polly": "^3.496.0",
"@aws-sdk/client-sts": "^3.496.0", "@aws-sdk/client-sts": "^3.496.0",
@@ -2041,7 +2042,7 @@
"form-urlencoded": "^6.1.4", "form-urlencoded": "^6.1.4",
"google-protobuf": "^3.21.2", "google-protobuf": "^3.21.2",
"ibm-watson": "^8.0.0", "ibm-watson": "^8.0.0",
"microsoft-cognitiveservices-speech-sdk": "1.36.0", "microsoft-cognitiveservices-speech-sdk": "1.38.0",
"openai": "^4.25.0", "openai": "^4.25.0",
"undici": "^6.4.0" "undici": "^6.4.0"
} }
@@ -2051,6 +2052,28 @@
"resolved": "https://registry.npmjs.org/@types/node/-/node-13.13.52.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-13.13.52.tgz",
"integrity": "sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ==" "integrity": "sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ=="
}, },
"node_modules/@jambonz/speech-utils/node_modules/https-proxy-agent": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz",
"integrity": "sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==",
"license": "MIT",
"dependencies": {
"agent-base": "5",
"debug": "4"
},
"engines": {
"node": ">= 6.0.0"
}
},
"node_modules/@jambonz/speech-utils/node_modules/https-proxy-agent/node_modules/agent-base": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz",
"integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==",
"license": "MIT",
"engines": {
"node": ">= 6.0.0"
}
},
"node_modules/@jambonz/speech-utils/node_modules/ibm-watson": { "node_modules/@jambonz/speech-utils/node_modules/ibm-watson": {
"version": "8.0.0", "version": "8.0.0",
"resolved": "https://registry.npmjs.org/ibm-watson/-/ibm-watson-8.0.0.tgz", "resolved": "https://registry.npmjs.org/ibm-watson/-/ibm-watson-8.0.0.tgz",
@@ -2072,6 +2095,41 @@
"node": ">=16.0.0" "node": ">=16.0.0"
} }
}, },
"node_modules/@jambonz/speech-utils/node_modules/microsoft-cognitiveservices-speech-sdk": {
"version": "1.38.0",
"resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.38.0.tgz",
"integrity": "sha512-NA6J4eIDkeR9iN83rcn77Kn5AWQcizDEn1tLMjzRvSovUNB1FrZe0mWYO0fsGltUwMl3Ns5OZ3lGw42PU4fEYA==",
"license": "MIT",
"dependencies": {
"@types/webrtc": "^0.0.37",
"agent-base": "^6.0.1",
"bent": "^7.3.12",
"https-proxy-agent": "^4.0.0",
"uuid": "^9.0.0",
"ws": "^7.5.6"
}
},
"node_modules/@jambonz/speech-utils/node_modules/ws": {
"version": "7.5.10",
"resolved": "https://registry.npmjs.org/ws/-/ws-7.5.10.tgz",
"integrity": "sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==",
"license": "MIT",
"engines": {
"node": ">=8.3.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": "^5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/@jambonz/time-series": { "node_modules/@jambonz/time-series": {
"version": "0.2.8", "version": "0.2.8",
"resolved": "https://registry.npmjs.org/@jambonz/time-series/-/time-series-0.2.8.tgz", "resolved": "https://registry.npmjs.org/@jambonz/time-series/-/time-series-0.2.8.tgz",

View File

@@ -29,7 +29,7 @@
"@jambonz/lamejs": "^1.2.2", "@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7", "@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.9", "@jambonz/realtimedb-helpers": "^0.8.9",
"@jambonz/speech-utils": "^0.1.11", "@jambonz/speech-utils": "^0.1.13",
"@jambonz/time-series": "^0.2.8", "@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.72", "@jambonz/verb-specifications": "^0.0.72",
"@soniox/soniox-node": "^1.2.2", "@soniox/soniox-node": "^1.2.2",

View File

@@ -371,7 +371,8 @@ test('speech credentials tests', async(t) => {
vendor: 'deepgram', vendor: 'deepgram',
use_for_stt: true, use_for_stt: true,
deepgram_stt_uri: "127.0.0.1:50002", deepgram_stt_uri: "127.0.0.1:50002",
deepgram_stt_use_tls: true deepgram_stt_use_tls: true,
deepgram_tts_uri: 'https://server.com'
} }
}); });
t.ok(result.statusCode === 201, 'successfully added speech credential for deepgram'); t.ok(result.statusCode === 201, 'successfully added speech credential for deepgram');
@@ -386,6 +387,7 @@ test('speech credentials tests', async(t) => {
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram'); t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram');
t.ok(result.body.deepgram_stt_uri === '127.0.0.1:50002', "deepgram_stt_uri is correct for deepgram"); t.ok(result.body.deepgram_stt_uri === '127.0.0.1:50002', "deepgram_stt_uri is correct for deepgram");
t.ok(result.body.deepgram_stt_use_tls === true, "deepgram_stt_use_tls is correct for deepgram"); t.ok(result.body.deepgram_stt_use_tls === true, "deepgram_stt_use_tls is correct for deepgram");
t.ok(result.body.deepgram_tts_uri === 'https://server.com', "deepgram_tts_uri is correct for deepgram")
result = await request.put(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, { result = await request.put(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
resolveWithFullResponse: true, resolveWithFullResponse: true,
@@ -395,7 +397,8 @@ test('speech credentials tests', async(t) => {
vendor: 'deepgram', vendor: 'deepgram',
use_for_stt: true, use_for_stt: true,
deepgram_stt_uri: "127.0.0.2:50002", deepgram_stt_uri: "127.0.0.2:50002",
deepgram_stt_use_tls: false deepgram_stt_use_tls: false,
deepgram_tts_uri: 'https://server2.com'
} }
}); });
t.ok(result.statusCode === 204, 'successfully updated speech credential for deepgram onprem'); t.ok(result.statusCode === 204, 'successfully updated speech credential for deepgram onprem');
@@ -409,6 +412,7 @@ test('speech credentials tests', async(t) => {
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram onprem'); t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram onprem');
t.ok(result.body.deepgram_stt_uri === '127.0.0.2:50002', "deepgram_stt_uri is correct for deepgram onprem"); t.ok(result.body.deepgram_stt_uri === '127.0.0.2:50002', "deepgram_stt_uri is correct for deepgram onprem");
t.ok(result.body.deepgram_stt_use_tls === false, "deepgram_stt_use_tls is correct for deepgram onprem"); t.ok(result.body.deepgram_stt_use_tls === false, "deepgram_stt_use_tls is correct for deepgram onprem");
t.ok(result.body.deepgram_tts_uri === 'https://server2.com', "deepgram_tts_uri is correct for deepgram onprem");
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, { result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
auth: authUser, auth: authUser,