mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
Feat/deepgram tts onprem (#338)
* support deepgram onpremise * wip * update speech utils version * install docker in ci
This commit is contained in:
5
.github/workflows/ci.yml
vendored
5
.github/workflows/ci.yml
vendored
@@ -7,6 +7,11 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install Docker Compose
|
||||
run: |
|
||||
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
|
||||
sudo chmod +x /usr/local/bin/docker-compose
|
||||
docker-compose --version
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: lts/*
|
||||
|
||||
@@ -124,6 +124,7 @@ const encryptCredential = (obj) => {
|
||||
nuance_stt_uri,
|
||||
deepgram_stt_uri,
|
||||
deepgram_stt_use_tls,
|
||||
deepgram_tts_uri,
|
||||
use_custom_tts,
|
||||
custom_tts_endpoint,
|
||||
custom_tts_endpoint_url,
|
||||
@@ -204,10 +205,10 @@ const encryptCredential = (obj) => {
|
||||
|
||||
case 'deepgram':
|
||||
// API key is optional if onprem
|
||||
if (!deepgram_stt_uri) {
|
||||
if (!deepgram_stt_uri || !deepgram_tts_uri) {
|
||||
assert(api_key, 'invalid deepgram speech credential: api_key is required');
|
||||
}
|
||||
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls});
|
||||
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
|
||||
return encrypt(deepgramData);
|
||||
|
||||
case 'ibm':
|
||||
@@ -458,6 +459,7 @@ router.put('/:sid', async(req, res) => {
|
||||
options,
|
||||
deepgram_stt_uri,
|
||||
deepgram_stt_use_tls,
|
||||
deepgram_tts_uri,
|
||||
engine_version
|
||||
} = req.body;
|
||||
|
||||
@@ -485,6 +487,7 @@ router.put('/:sid', async(req, res) => {
|
||||
options,
|
||||
deepgram_stt_uri,
|
||||
deepgram_stt_use_tls,
|
||||
deepgram_tts_uri,
|
||||
engine_version
|
||||
};
|
||||
logger.info({o, newCred}, 'updating speech credential with this new credential');
|
||||
|
||||
@@ -92,8 +92,8 @@ const testGoogleStt = async(logger, credentials) => {
|
||||
};
|
||||
|
||||
const testDeepgramStt = async(logger, credentials) => {
|
||||
const {api_key} = credentials;
|
||||
const deepgram = new Deepgram(api_key);
|
||||
const {api_key, deepgram_stt_uri, deepgram_stt_use_tls} = credentials;
|
||||
const deepgram = new Deepgram(api_key, deepgram_stt_uri, deepgram_stt_uri && deepgram_stt_use_tls);
|
||||
|
||||
const mimetype = 'audio/wav';
|
||||
const source = {
|
||||
@@ -272,7 +272,8 @@ const testPlayHT = async(logger, synthAudio, credentials) => {
|
||||
credentials,
|
||||
language: 'en-US',
|
||||
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
|
||||
text: 'Hi there and welcome to jambones!'
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
// Test if playHT can fetch voices
|
||||
@@ -295,7 +296,8 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
|
||||
credentials,
|
||||
language: 'en-US',
|
||||
voice: 'amber',
|
||||
text: 'Hi there and welcome to jambones!'
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
} catch (err) {
|
||||
@@ -312,7 +314,8 @@ const testWhisper = async(logger, synthAudio, credentials) => {
|
||||
credentials,
|
||||
language: 'en-US',
|
||||
voice: 'alloy',
|
||||
text: 'Hi there and welcome to jambones!'
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
} catch (err) {
|
||||
@@ -328,7 +331,8 @@ const testDeepgramTTS = async(logger, synthAudio, credentials) => {
|
||||
vendor: 'deepgram',
|
||||
credentials,
|
||||
model: 'aura-asteria-en',
|
||||
text: 'Hi there and welcome to jambones!'
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
} catch (err) {
|
||||
@@ -383,7 +387,8 @@ const testVerbioTts = async(logger, synthAudio, credentials) => {
|
||||
credentials,
|
||||
language: 'en-US',
|
||||
voice: 'tommy_en-us',
|
||||
text: 'Hi there and welcome to jambones!'
|
||||
text: 'Hi there and welcome to jambones!',
|
||||
renderForCaching: true
|
||||
}
|
||||
);
|
||||
} catch (err) {
|
||||
@@ -509,6 +514,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
obj.deepgram_stt_uri = o.deepgram_stt_uri;
|
||||
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
||||
obj.deepgram_tts_uri = o.deepgram_tts_uri;
|
||||
}
|
||||
else if ('ibm' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
|
||||
68
package-lock.json
generated
68
package-lock.json
generated
@@ -19,7 +19,7 @@
|
||||
"@jambonz/lamejs": "^1.2.2",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.9",
|
||||
"@jambonz/speech-utils": "^0.1.11",
|
||||
"@jambonz/speech-utils": "^0.1.13",
|
||||
"@jambonz/time-series": "^0.2.8",
|
||||
"@jambonz/verb-specifications": "^0.0.72",
|
||||
"@soniox/soniox-node": "^1.2.2",
|
||||
@@ -2027,9 +2027,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils": {
|
||||
"version": "0.1.11",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.11.tgz",
|
||||
"integrity": "sha512-VgljBLUF871adib/3yWpzd7kv26ioxiLVkAIxm94CSk9WeZuzX1lVcE2SohojW3mjCYdYY6+B8FRyzlTD+en3g==",
|
||||
"version": "0.1.13",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.13.tgz",
|
||||
"integrity": "sha512-QeVmNFLtJGPGQfmp7jXpy742AyJIv2EteelDmNTqWGFEwTBj88q8GLP51hUsIR2ZbE5n/ZmZb/ytT6Y6LIQSDg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-polly": "^3.496.0",
|
||||
"@aws-sdk/client-sts": "^3.496.0",
|
||||
@@ -2041,7 +2042,7 @@
|
||||
"form-urlencoded": "^6.1.4",
|
||||
"google-protobuf": "^3.21.2",
|
||||
"ibm-watson": "^8.0.0",
|
||||
"microsoft-cognitiveservices-speech-sdk": "1.36.0",
|
||||
"microsoft-cognitiveservices-speech-sdk": "1.38.0",
|
||||
"openai": "^4.25.0",
|
||||
"undici": "^6.4.0"
|
||||
}
|
||||
@@ -2051,6 +2052,28 @@
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-13.13.52.tgz",
|
||||
"integrity": "sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ=="
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils/node_modules/https-proxy-agent": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz",
|
||||
"integrity": "sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"agent-base": "5",
|
||||
"debug": "4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils/node_modules/https-proxy-agent/node_modules/agent-base": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz",
|
||||
"integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils/node_modules/ibm-watson": {
|
||||
"version": "8.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ibm-watson/-/ibm-watson-8.0.0.tgz",
|
||||
@@ -2072,6 +2095,41 @@
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils/node_modules/microsoft-cognitiveservices-speech-sdk": {
|
||||
"version": "1.38.0",
|
||||
"resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.38.0.tgz",
|
||||
"integrity": "sha512-NA6J4eIDkeR9iN83rcn77Kn5AWQcizDEn1tLMjzRvSovUNB1FrZe0mWYO0fsGltUwMl3Ns5OZ3lGw42PU4fEYA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/webrtc": "^0.0.37",
|
||||
"agent-base": "^6.0.1",
|
||||
"bent": "^7.3.12",
|
||||
"https-proxy-agent": "^4.0.0",
|
||||
"uuid": "^9.0.0",
|
||||
"ws": "^7.5.6"
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/speech-utils/node_modules/ws": {
|
||||
"version": "7.5.10",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-7.5.10.tgz",
|
||||
"integrity": "sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8.3.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": "^5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@jambonz/time-series": {
|
||||
"version": "0.2.8",
|
||||
"resolved": "https://registry.npmjs.org/@jambonz/time-series/-/time-series-0.2.8.tgz",
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"@jambonz/lamejs": "^1.2.2",
|
||||
"@jambonz/mw-registrar": "^0.2.7",
|
||||
"@jambonz/realtimedb-helpers": "^0.8.9",
|
||||
"@jambonz/speech-utils": "^0.1.11",
|
||||
"@jambonz/speech-utils": "^0.1.13",
|
||||
"@jambonz/time-series": "^0.2.8",
|
||||
"@jambonz/verb-specifications": "^0.0.72",
|
||||
"@soniox/soniox-node": "^1.2.2",
|
||||
|
||||
@@ -371,7 +371,8 @@ test('speech credentials tests', async(t) => {
|
||||
vendor: 'deepgram',
|
||||
use_for_stt: true,
|
||||
deepgram_stt_uri: "127.0.0.1:50002",
|
||||
deepgram_stt_use_tls: true
|
||||
deepgram_stt_use_tls: true,
|
||||
deepgram_tts_uri: 'https://server.com'
|
||||
}
|
||||
});
|
||||
t.ok(result.statusCode === 201, 'successfully added speech credential for deepgram');
|
||||
@@ -386,6 +387,7 @@ test('speech credentials tests', async(t) => {
|
||||
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram');
|
||||
t.ok(result.body.deepgram_stt_uri === '127.0.0.1:50002', "deepgram_stt_uri is correct for deepgram");
|
||||
t.ok(result.body.deepgram_stt_use_tls === true, "deepgram_stt_use_tls is correct for deepgram");
|
||||
t.ok(result.body.deepgram_tts_uri === 'https://server.com', "deepgram_tts_uri is correct for deepgram")
|
||||
|
||||
result = await request.put(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
|
||||
resolveWithFullResponse: true,
|
||||
@@ -395,7 +397,8 @@ test('speech credentials tests', async(t) => {
|
||||
vendor: 'deepgram',
|
||||
use_for_stt: true,
|
||||
deepgram_stt_uri: "127.0.0.2:50002",
|
||||
deepgram_stt_use_tls: false
|
||||
deepgram_stt_use_tls: false,
|
||||
deepgram_tts_uri: 'https://server2.com'
|
||||
}
|
||||
});
|
||||
t.ok(result.statusCode === 204, 'successfully updated speech credential for deepgram onprem');
|
||||
@@ -409,6 +412,7 @@ test('speech credentials tests', async(t) => {
|
||||
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram onprem');
|
||||
t.ok(result.body.deepgram_stt_uri === '127.0.0.2:50002', "deepgram_stt_uri is correct for deepgram onprem");
|
||||
t.ok(result.body.deepgram_stt_use_tls === false, "deepgram_stt_use_tls is correct for deepgram onprem");
|
||||
t.ok(result.body.deepgram_tts_uri === 'https://server2.com', "deepgram_tts_uri is correct for deepgram onprem");
|
||||
|
||||
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
|
||||
auth: authUser,
|
||||
|
||||
Reference in New Issue
Block a user