mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
Feat/deepgram tts onprem (#338)
* support deepgram onpremise * wip * update speech utils version * install docker in ci
This commit is contained in:
5
.github/workflows/ci.yml
vendored
5
.github/workflows/ci.yml
vendored
@@ -7,6 +7,11 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
- name: Install Docker Compose
|
||||||
|
run: |
|
||||||
|
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
|
||||||
|
sudo chmod +x /usr/local/bin/docker-compose
|
||||||
|
docker-compose --version
|
||||||
- uses: actions/setup-node@v3
|
- uses: actions/setup-node@v3
|
||||||
with:
|
with:
|
||||||
node-version: lts/*
|
node-version: lts/*
|
||||||
|
|||||||
@@ -124,6 +124,7 @@ const encryptCredential = (obj) => {
|
|||||||
nuance_stt_uri,
|
nuance_stt_uri,
|
||||||
deepgram_stt_uri,
|
deepgram_stt_uri,
|
||||||
deepgram_stt_use_tls,
|
deepgram_stt_use_tls,
|
||||||
|
deepgram_tts_uri,
|
||||||
use_custom_tts,
|
use_custom_tts,
|
||||||
custom_tts_endpoint,
|
custom_tts_endpoint,
|
||||||
custom_tts_endpoint_url,
|
custom_tts_endpoint_url,
|
||||||
@@ -204,10 +205,10 @@ const encryptCredential = (obj) => {
|
|||||||
|
|
||||||
case 'deepgram':
|
case 'deepgram':
|
||||||
// API key is optional if onprem
|
// API key is optional if onprem
|
||||||
if (!deepgram_stt_uri) {
|
if (!deepgram_stt_uri || !deepgram_tts_uri) {
|
||||||
assert(api_key, 'invalid deepgram speech credential: api_key is required');
|
assert(api_key, 'invalid deepgram speech credential: api_key is required');
|
||||||
}
|
}
|
||||||
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls});
|
const deepgramData = JSON.stringify({api_key, deepgram_stt_uri, deepgram_stt_use_tls, deepgram_tts_uri});
|
||||||
return encrypt(deepgramData);
|
return encrypt(deepgramData);
|
||||||
|
|
||||||
case 'ibm':
|
case 'ibm':
|
||||||
@@ -458,6 +459,7 @@ router.put('/:sid', async(req, res) => {
|
|||||||
options,
|
options,
|
||||||
deepgram_stt_uri,
|
deepgram_stt_uri,
|
||||||
deepgram_stt_use_tls,
|
deepgram_stt_use_tls,
|
||||||
|
deepgram_tts_uri,
|
||||||
engine_version
|
engine_version
|
||||||
} = req.body;
|
} = req.body;
|
||||||
|
|
||||||
@@ -485,6 +487,7 @@ router.put('/:sid', async(req, res) => {
|
|||||||
options,
|
options,
|
||||||
deepgram_stt_uri,
|
deepgram_stt_uri,
|
||||||
deepgram_stt_use_tls,
|
deepgram_stt_use_tls,
|
||||||
|
deepgram_tts_uri,
|
||||||
engine_version
|
engine_version
|
||||||
};
|
};
|
||||||
logger.info({o, newCred}, 'updating speech credential with this new credential');
|
logger.info({o, newCred}, 'updating speech credential with this new credential');
|
||||||
|
|||||||
@@ -92,8 +92,8 @@ const testGoogleStt = async(logger, credentials) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const testDeepgramStt = async(logger, credentials) => {
|
const testDeepgramStt = async(logger, credentials) => {
|
||||||
const {api_key} = credentials;
|
const {api_key, deepgram_stt_uri, deepgram_stt_use_tls} = credentials;
|
||||||
const deepgram = new Deepgram(api_key);
|
const deepgram = new Deepgram(api_key, deepgram_stt_uri, deepgram_stt_uri && deepgram_stt_use_tls);
|
||||||
|
|
||||||
const mimetype = 'audio/wav';
|
const mimetype = 'audio/wav';
|
||||||
const source = {
|
const source = {
|
||||||
@@ -272,7 +272,8 @@ const testPlayHT = async(logger, synthAudio, credentials) => {
|
|||||||
credentials,
|
credentials,
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
|
voice: 's3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json',
|
||||||
text: 'Hi there and welcome to jambones!'
|
text: 'Hi there and welcome to jambones!',
|
||||||
|
renderForCaching: true
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
// Test if playHT can fetch voices
|
// Test if playHT can fetch voices
|
||||||
@@ -295,7 +296,8 @@ const testRimelabs = async(logger, synthAudio, credentials) => {
|
|||||||
credentials,
|
credentials,
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'amber',
|
voice: 'amber',
|
||||||
text: 'Hi there and welcome to jambones!'
|
text: 'Hi there and welcome to jambones!',
|
||||||
|
renderForCaching: true
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -312,7 +314,8 @@ const testWhisper = async(logger, synthAudio, credentials) => {
|
|||||||
credentials,
|
credentials,
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'alloy',
|
voice: 'alloy',
|
||||||
text: 'Hi there and welcome to jambones!'
|
text: 'Hi there and welcome to jambones!',
|
||||||
|
renderForCaching: true
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -328,7 +331,8 @@ const testDeepgramTTS = async(logger, synthAudio, credentials) => {
|
|||||||
vendor: 'deepgram',
|
vendor: 'deepgram',
|
||||||
credentials,
|
credentials,
|
||||||
model: 'aura-asteria-en',
|
model: 'aura-asteria-en',
|
||||||
text: 'Hi there and welcome to jambones!'
|
text: 'Hi there and welcome to jambones!',
|
||||||
|
renderForCaching: true
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -383,7 +387,8 @@ const testVerbioTts = async(logger, synthAudio, credentials) => {
|
|||||||
credentials,
|
credentials,
|
||||||
language: 'en-US',
|
language: 'en-US',
|
||||||
voice: 'tommy_en-us',
|
voice: 'tommy_en-us',
|
||||||
text: 'Hi there and welcome to jambones!'
|
text: 'Hi there and welcome to jambones!',
|
||||||
|
renderForCaching: true
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -509,6 +514,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
|||||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||||
obj.deepgram_stt_uri = o.deepgram_stt_uri;
|
obj.deepgram_stt_uri = o.deepgram_stt_uri;
|
||||||
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
|
||||||
|
obj.deepgram_tts_uri = o.deepgram_tts_uri;
|
||||||
}
|
}
|
||||||
else if ('ibm' === obj.vendor) {
|
else if ('ibm' === obj.vendor) {
|
||||||
const o = JSON.parse(decrypt(credential));
|
const o = JSON.parse(decrypt(credential));
|
||||||
|
|||||||
68
package-lock.json
generated
68
package-lock.json
generated
@@ -19,7 +19,7 @@
|
|||||||
"@jambonz/lamejs": "^1.2.2",
|
"@jambonz/lamejs": "^1.2.2",
|
||||||
"@jambonz/mw-registrar": "^0.2.7",
|
"@jambonz/mw-registrar": "^0.2.7",
|
||||||
"@jambonz/realtimedb-helpers": "^0.8.9",
|
"@jambonz/realtimedb-helpers": "^0.8.9",
|
||||||
"@jambonz/speech-utils": "^0.1.11",
|
"@jambonz/speech-utils": "^0.1.13",
|
||||||
"@jambonz/time-series": "^0.2.8",
|
"@jambonz/time-series": "^0.2.8",
|
||||||
"@jambonz/verb-specifications": "^0.0.72",
|
"@jambonz/verb-specifications": "^0.0.72",
|
||||||
"@soniox/soniox-node": "^1.2.2",
|
"@soniox/soniox-node": "^1.2.2",
|
||||||
@@ -2027,9 +2027,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@jambonz/speech-utils": {
|
"node_modules/@jambonz/speech-utils": {
|
||||||
"version": "0.1.11",
|
"version": "0.1.13",
|
||||||
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.11.tgz",
|
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.1.13.tgz",
|
||||||
"integrity": "sha512-VgljBLUF871adib/3yWpzd7kv26ioxiLVkAIxm94CSk9WeZuzX1lVcE2SohojW3mjCYdYY6+B8FRyzlTD+en3g==",
|
"integrity": "sha512-QeVmNFLtJGPGQfmp7jXpy742AyJIv2EteelDmNTqWGFEwTBj88q8GLP51hUsIR2ZbE5n/ZmZb/ytT6Y6LIQSDg==",
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@aws-sdk/client-polly": "^3.496.0",
|
"@aws-sdk/client-polly": "^3.496.0",
|
||||||
"@aws-sdk/client-sts": "^3.496.0",
|
"@aws-sdk/client-sts": "^3.496.0",
|
||||||
@@ -2041,7 +2042,7 @@
|
|||||||
"form-urlencoded": "^6.1.4",
|
"form-urlencoded": "^6.1.4",
|
||||||
"google-protobuf": "^3.21.2",
|
"google-protobuf": "^3.21.2",
|
||||||
"ibm-watson": "^8.0.0",
|
"ibm-watson": "^8.0.0",
|
||||||
"microsoft-cognitiveservices-speech-sdk": "1.36.0",
|
"microsoft-cognitiveservices-speech-sdk": "1.38.0",
|
||||||
"openai": "^4.25.0",
|
"openai": "^4.25.0",
|
||||||
"undici": "^6.4.0"
|
"undici": "^6.4.0"
|
||||||
}
|
}
|
||||||
@@ -2051,6 +2052,28 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-13.13.52.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-13.13.52.tgz",
|
||||||
"integrity": "sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ=="
|
"integrity": "sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jambonz/speech-utils/node_modules/https-proxy-agent": {
|
||||||
|
"version": "4.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz",
|
||||||
|
"integrity": "sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"agent-base": "5",
|
||||||
|
"debug": "4"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jambonz/speech-utils/node_modules/https-proxy-agent/node_modules/agent-base": {
|
||||||
|
"version": "5.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-5.1.1.tgz",
|
||||||
|
"integrity": "sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@jambonz/speech-utils/node_modules/ibm-watson": {
|
"node_modules/@jambonz/speech-utils/node_modules/ibm-watson": {
|
||||||
"version": "8.0.0",
|
"version": "8.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/ibm-watson/-/ibm-watson-8.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/ibm-watson/-/ibm-watson-8.0.0.tgz",
|
||||||
@@ -2072,6 +2095,41 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@jambonz/speech-utils/node_modules/microsoft-cognitiveservices-speech-sdk": {
|
||||||
|
"version": "1.38.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/microsoft-cognitiveservices-speech-sdk/-/microsoft-cognitiveservices-speech-sdk-1.38.0.tgz",
|
||||||
|
"integrity": "sha512-NA6J4eIDkeR9iN83rcn77Kn5AWQcizDEn1tLMjzRvSovUNB1FrZe0mWYO0fsGltUwMl3Ns5OZ3lGw42PU4fEYA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/webrtc": "^0.0.37",
|
||||||
|
"agent-base": "^6.0.1",
|
||||||
|
"bent": "^7.3.12",
|
||||||
|
"https-proxy-agent": "^4.0.0",
|
||||||
|
"uuid": "^9.0.0",
|
||||||
|
"ws": "^7.5.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jambonz/speech-utils/node_modules/ws": {
|
||||||
|
"version": "7.5.10",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-7.5.10.tgz",
|
||||||
|
"integrity": "sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=8.3.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": "^5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@jambonz/time-series": {
|
"node_modules/@jambonz/time-series": {
|
||||||
"version": "0.2.8",
|
"version": "0.2.8",
|
||||||
"resolved": "https://registry.npmjs.org/@jambonz/time-series/-/time-series-0.2.8.tgz",
|
"resolved": "https://registry.npmjs.org/@jambonz/time-series/-/time-series-0.2.8.tgz",
|
||||||
|
|||||||
@@ -29,7 +29,7 @@
|
|||||||
"@jambonz/lamejs": "^1.2.2",
|
"@jambonz/lamejs": "^1.2.2",
|
||||||
"@jambonz/mw-registrar": "^0.2.7",
|
"@jambonz/mw-registrar": "^0.2.7",
|
||||||
"@jambonz/realtimedb-helpers": "^0.8.9",
|
"@jambonz/realtimedb-helpers": "^0.8.9",
|
||||||
"@jambonz/speech-utils": "^0.1.11",
|
"@jambonz/speech-utils": "^0.1.13",
|
||||||
"@jambonz/time-series": "^0.2.8",
|
"@jambonz/time-series": "^0.2.8",
|
||||||
"@jambonz/verb-specifications": "^0.0.72",
|
"@jambonz/verb-specifications": "^0.0.72",
|
||||||
"@soniox/soniox-node": "^1.2.2",
|
"@soniox/soniox-node": "^1.2.2",
|
||||||
|
|||||||
@@ -371,7 +371,8 @@ test('speech credentials tests', async(t) => {
|
|||||||
vendor: 'deepgram',
|
vendor: 'deepgram',
|
||||||
use_for_stt: true,
|
use_for_stt: true,
|
||||||
deepgram_stt_uri: "127.0.0.1:50002",
|
deepgram_stt_uri: "127.0.0.1:50002",
|
||||||
deepgram_stt_use_tls: true
|
deepgram_stt_use_tls: true,
|
||||||
|
deepgram_tts_uri: 'https://server.com'
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
t.ok(result.statusCode === 201, 'successfully added speech credential for deepgram');
|
t.ok(result.statusCode === 201, 'successfully added speech credential for deepgram');
|
||||||
@@ -386,6 +387,7 @@ test('speech credentials tests', async(t) => {
|
|||||||
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram');
|
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram');
|
||||||
t.ok(result.body.deepgram_stt_uri === '127.0.0.1:50002', "deepgram_stt_uri is correct for deepgram");
|
t.ok(result.body.deepgram_stt_uri === '127.0.0.1:50002', "deepgram_stt_uri is correct for deepgram");
|
||||||
t.ok(result.body.deepgram_stt_use_tls === true, "deepgram_stt_use_tls is correct for deepgram");
|
t.ok(result.body.deepgram_stt_use_tls === true, "deepgram_stt_use_tls is correct for deepgram");
|
||||||
|
t.ok(result.body.deepgram_tts_uri === 'https://server.com', "deepgram_tts_uri is correct for deepgram")
|
||||||
|
|
||||||
result = await request.put(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
|
result = await request.put(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
|
||||||
resolveWithFullResponse: true,
|
resolveWithFullResponse: true,
|
||||||
@@ -395,7 +397,8 @@ test('speech credentials tests', async(t) => {
|
|||||||
vendor: 'deepgram',
|
vendor: 'deepgram',
|
||||||
use_for_stt: true,
|
use_for_stt: true,
|
||||||
deepgram_stt_uri: "127.0.0.2:50002",
|
deepgram_stt_uri: "127.0.0.2:50002",
|
||||||
deepgram_stt_use_tls: false
|
deepgram_stt_use_tls: false,
|
||||||
|
deepgram_tts_uri: 'https://server2.com'
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
t.ok(result.statusCode === 204, 'successfully updated speech credential for deepgram onprem');
|
t.ok(result.statusCode === 204, 'successfully updated speech credential for deepgram onprem');
|
||||||
@@ -409,6 +412,7 @@ test('speech credentials tests', async(t) => {
|
|||||||
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram onprem');
|
t.ok(result.statusCode === 200, 'successfully get speech credential for deepgram onprem');
|
||||||
t.ok(result.body.deepgram_stt_uri === '127.0.0.2:50002', "deepgram_stt_uri is correct for deepgram onprem");
|
t.ok(result.body.deepgram_stt_uri === '127.0.0.2:50002', "deepgram_stt_uri is correct for deepgram onprem");
|
||||||
t.ok(result.body.deepgram_stt_use_tls === false, "deepgram_stt_use_tls is correct for deepgram onprem");
|
t.ok(result.body.deepgram_stt_use_tls === false, "deepgram_stt_use_tls is correct for deepgram onprem");
|
||||||
|
t.ok(result.body.deepgram_tts_uri === 'https://server2.com', "deepgram_tts_uri is correct for deepgram onprem");
|
||||||
|
|
||||||
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
|
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${dg_sid}`, {
|
||||||
auth: authUser,
|
auth: authUser,
|
||||||
|
|||||||
Reference in New Issue
Block a user