support mod cartesia transcribe (#463)

This commit is contained in:
Hoan Luu Huu
2025-06-18 01:53:22 +07:00
committed by GitHub
parent fcff3d4b32
commit e2fc0216e1
3 changed files with 54 additions and 7 deletions

View File

@@ -135,6 +135,8 @@ const encryptCredential = (obj) => {
custom_tts_endpoint,
custom_tts_endpoint_url,
use_custom_stt,
use_for_stt,
use_for_tts,
custom_stt_endpoint,
custom_stt_endpoint_url,
tts_api_key,
@@ -148,7 +150,10 @@ const encryptCredential = (obj) => {
custom_tts_streaming_url,
auth_token = '',
cobalt_server_uri,
// For most vendors, model_id is being used for both TTS and STT, or one of them.
// for Cartesia, model_id is used for TTS only. introduce stt_model_id for STT
model_id,
stt_model_id,
user_id,
voice_engine,
engine_version,
@@ -259,8 +264,17 @@ const encryptCredential = (obj) => {
case 'cartesia':
assert(api_key, 'invalid cartesia speech credential: api_key is required');
if (use_for_tts) {
assert(model_id, 'invalid cartesia speech credential: model_id is required');
const cartesiaData = JSON.stringify({api_key, model_id, options});
}
if (use_for_stt) {
assert(stt_model_id, 'invalid cartesia speech credential: stt_model_id is required');
}
const cartesiaData = JSON.stringify({
api_key,
...(model_id && {model_id}),
...(stt_model_id && {stt_model_id}),
options});
return encrypt(cartesiaData);
case 'rimelabs':
@@ -487,6 +501,7 @@ router.put('/:sid', async(req, res) => {
custom_tts_streaming_url,
cobalt_server_uri,
model_id,
stt_model_id,
voice_engine,
options,
deepgram_stt_uri,
@@ -518,6 +533,7 @@ router.put('/:sid', async(req, res) => {
custom_tts_streaming_url,
cobalt_server_uri,
model_id,
stt_model_id,
voice_engine,
options,
deepgram_stt_uri,
@@ -833,17 +849,28 @@ router.get('/:sid/test', async(req, res) => {
}
}
} else if (cred.vendor === 'cartesia') {
if (cred.use_for_tts) {
if (cred.use_for_tts || cred.use_for_stt) {
try {
// Cartesia does not have API for testing STT, same key is used for both TTS and STT
await testCartesia(logger, synthAudio, credential);
if (cred.use_for_tts) {
results.tts.status = 'ok';
}
if (cred.use_for_stt) {
results.stt.status = 'ok';
}
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
let reason = err.message;
try {
reason = await err.text();
} catch {}
if (cred.use_for_tts) {
results.tts = {status: 'fail', reason};
}
if (cred.use_for_stt) {
results.stt = {status: 'fail', reason};
}
SpeechCredential.ttsTestResult(sid, false);
}
}

View File

@@ -0,0 +1,4 @@
module.exports = [
{ name: 'Ink-whisper', value: 'ink-whisper' },
];

View File

@@ -49,6 +49,7 @@ const SttOpenaiLanguagesVoices = require('./speech-data/stt-openai');
const SttModelOpenai = require('./speech-data/stt-model-openai');
const sttModelDeepgram = require('./speech-data/stt-model-deepgram');
const sttModelCartesia = require('./speech-data/stt-model-cartesia');
function capitalizeFirst(str) {
if (!str) return str;
@@ -680,6 +681,7 @@ function decryptCredential(obj, credential, logger, isObscureKey = true) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
obj.stt_model_id = o.stt_model_id;
obj.options = o.options;
} else if ('rimelabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
@@ -1403,9 +1405,23 @@ async function getLanguagesVoicesForCartesia(credential) {
return acc;
}, []);
return tranform(ttsVoices, undefined, TtsModelCartesia);
return tranform(
ttsVoices,
ttsVoices.map((voice) => ({
name: voice.name,
value: voice.value,
})),
TtsModelCartesia,
sttModelCartesia);
}
return tranform(ttsCartesia, undefined, TtsModelCartesia);
return tranform(
ttsCartesia,
ttsCartesia.map((voice) => ({
name: voice.name,
value: voice.value,
})),
TtsModelCartesia,
sttModelCartesia);
}
module.exports = {