mirror of
https://github.com/jambonz/jambonz-api-server.git
synced 2025-12-19 05:47:46 +00:00
support mod cartesia transcribe (#463)
This commit is contained in:
@@ -135,6 +135,8 @@ const encryptCredential = (obj) => {
|
||||
custom_tts_endpoint,
|
||||
custom_tts_endpoint_url,
|
||||
use_custom_stt,
|
||||
use_for_stt,
|
||||
use_for_tts,
|
||||
custom_stt_endpoint,
|
||||
custom_stt_endpoint_url,
|
||||
tts_api_key,
|
||||
@@ -148,7 +150,10 @@ const encryptCredential = (obj) => {
|
||||
custom_tts_streaming_url,
|
||||
auth_token = '',
|
||||
cobalt_server_uri,
|
||||
// For most vendors, model_id is being used for both TTS and STT, or one of them.
|
||||
// for Cartesia, model_id is used for TTS only. introduce stt_model_id for STT
|
||||
model_id,
|
||||
stt_model_id,
|
||||
user_id,
|
||||
voice_engine,
|
||||
engine_version,
|
||||
@@ -259,8 +264,17 @@ const encryptCredential = (obj) => {
|
||||
|
||||
case 'cartesia':
|
||||
assert(api_key, 'invalid cartesia speech credential: api_key is required');
|
||||
if (use_for_tts) {
|
||||
assert(model_id, 'invalid cartesia speech credential: model_id is required');
|
||||
const cartesiaData = JSON.stringify({api_key, model_id, options});
|
||||
}
|
||||
if (use_for_stt) {
|
||||
assert(stt_model_id, 'invalid cartesia speech credential: stt_model_id is required');
|
||||
}
|
||||
const cartesiaData = JSON.stringify({
|
||||
api_key,
|
||||
...(model_id && {model_id}),
|
||||
...(stt_model_id && {stt_model_id}),
|
||||
options});
|
||||
return encrypt(cartesiaData);
|
||||
|
||||
case 'rimelabs':
|
||||
@@ -487,6 +501,7 @@ router.put('/:sid', async(req, res) => {
|
||||
custom_tts_streaming_url,
|
||||
cobalt_server_uri,
|
||||
model_id,
|
||||
stt_model_id,
|
||||
voice_engine,
|
||||
options,
|
||||
deepgram_stt_uri,
|
||||
@@ -518,6 +533,7 @@ router.put('/:sid', async(req, res) => {
|
||||
custom_tts_streaming_url,
|
||||
cobalt_server_uri,
|
||||
model_id,
|
||||
stt_model_id,
|
||||
voice_engine,
|
||||
options,
|
||||
deepgram_stt_uri,
|
||||
@@ -833,17 +849,28 @@ router.get('/:sid/test', async(req, res) => {
|
||||
}
|
||||
}
|
||||
} else if (cred.vendor === 'cartesia') {
|
||||
if (cred.use_for_tts) {
|
||||
if (cred.use_for_tts || cred.use_for_stt) {
|
||||
try {
|
||||
// Cartesia does not have API for testing STT, same key is used for both TTS and STT
|
||||
await testCartesia(logger, synthAudio, credential);
|
||||
if (cred.use_for_tts) {
|
||||
results.tts.status = 'ok';
|
||||
}
|
||||
if (cred.use_for_stt) {
|
||||
results.stt.status = 'ok';
|
||||
}
|
||||
SpeechCredential.ttsTestResult(sid, true);
|
||||
} catch (err) {
|
||||
let reason = err.message;
|
||||
try {
|
||||
reason = await err.text();
|
||||
} catch {}
|
||||
if (cred.use_for_tts) {
|
||||
results.tts = {status: 'fail', reason};
|
||||
}
|
||||
if (cred.use_for_stt) {
|
||||
results.stt = {status: 'fail', reason};
|
||||
}
|
||||
SpeechCredential.ttsTestResult(sid, false);
|
||||
}
|
||||
}
|
||||
|
||||
4
lib/utils/speech-data/stt-model-cartesia.js
Normal file
4
lib/utils/speech-data/stt-model-cartesia.js
Normal file
@@ -0,0 +1,4 @@
|
||||
module.exports = [
|
||||
{ name: 'Ink-whisper', value: 'ink-whisper' },
|
||||
];
|
||||
|
||||
@@ -49,6 +49,7 @@ const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
|
||||
|
||||
const SttModelOpenai = require('./speech-data/stt-model-openai');
|
||||
const sttModelDeepgram = require('./speech-data/stt-model-deepgram');
|
||||
const sttModelCartesia = require('./speech-data/stt-model-cartesia');
|
||||
|
||||
function capitalizeFirst(str) {
|
||||
if (!str) return str;
|
||||
@@ -680,6 +681,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
|
||||
obj.model_id = o.model_id;
|
||||
obj.stt_model_id = o.stt_model_id;
|
||||
obj.options = o.options;
|
||||
} else if ('rimelabs' === obj.vendor) {
|
||||
const o = JSON.parse(decrypt(credential));
|
||||
@@ -1403,9 +1405,23 @@ async function getLanguagesVoicesForCartesia(credential) {
|
||||
return acc;
|
||||
}, []);
|
||||
|
||||
return tranform(ttsVoices, undefined, TtsModelCartesia);
|
||||
return tranform(
|
||||
ttsVoices,
|
||||
ttsVoices.map((voice) => ({
|
||||
name: voice.name,
|
||||
value: voice.value,
|
||||
})),
|
||||
TtsModelCartesia,
|
||||
sttModelCartesia);
|
||||
}
|
||||
return tranform(ttsCartesia, undefined, TtsModelCartesia);
|
||||
return tranform(
|
||||
ttsCartesia,
|
||||
ttsCartesia.map((voice) => ({
|
||||
name: voice.name,
|
||||
value: voice.value,
|
||||
})),
|
||||
TtsModelCartesia,
|
||||
sttModelCartesia);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user