mirror of
https://github.com/jambonz/jambonz-feature-server.git
synced 2025-12-20 08:40:38 +00:00
Feat/tts streaming (#994)
* wip * add TtsStreamingBuffer class to abstract handling of streaming tokens * wip * add throttling support * support background ttsStream (#995) * wip * add TtsStreamingBuffer class to abstract handling of streaming tokens * wip * support background ttsStream * wip --------- Co-authored-by: Dave Horton <daveh@beachdognet.com> * wip * dont send if we have nothing to send * initial testing with cartesia * wip --------- Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
This commit is contained in:
@@ -13,11 +13,11 @@ class TtsTask extends Task {
|
||||
|
||||
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
|
||||
/**
|
||||
* Task use taskInlcudeSynthesizer to identify
|
||||
* if taskInlcudeSynthesizer === true, use label from verb.synthesizer, even it's empty
|
||||
* if taskInlcudeSynthesizer === false, use label from application.synthesizer
|
||||
* Task use taskIncludeSynthesizer to identify
|
||||
* if taskIncludeSynthesizer === true, use label from verb.synthesizer, even it's empty
|
||||
* if taskIncludeSynthesizer === false, use label from application.synthesizer
|
||||
*/
|
||||
this.taskInlcudeSynthesizer = !!this.data.synthesizer;
|
||||
this.taskIncludeSynthesizer = !!this.data.synthesizer;
|
||||
this.synthesizer = this.data.synthesizer || {};
|
||||
this.disableTtsCache = this.data.disableTtsCache;
|
||||
this.options = this.synthesizer.options || {};
|
||||
@@ -44,6 +44,47 @@ class TtsTask extends Task {
|
||||
}
|
||||
}
|
||||
|
||||
getTtsVendorData(cs) {
|
||||
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
|
||||
this.synthesizer.vendor :
|
||||
cs.speechSynthesisVendor;
|
||||
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
|
||||
this.synthesizer.language :
|
||||
cs.speechSynthesisLanguage ;
|
||||
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
|
||||
this.synthesizer.voice :
|
||||
cs.speechSynthesisVoice;
|
||||
const label = this.taskIncludeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
|
||||
return {vendor, language, voice, label};
|
||||
}
|
||||
|
||||
async setTtsStreamingChannelVars(vendor, language, voice, credentials, ep) {
|
||||
const {api_key, cartesia_model_id, cartesia_voice_id} = credentials;
|
||||
let obj;
|
||||
|
||||
switch (vendor) {
|
||||
case 'deepgram':
|
||||
obj = {
|
||||
DEEPGRAM_API_KEY: api_key,
|
||||
DEEPGRAM_TTS_STREAMING_MODEL: voice
|
||||
};
|
||||
break;
|
||||
case 'cartesia':
|
||||
obj = {
|
||||
CARTESIA_API_KEY: api_key,
|
||||
CARTESIA_TTS_STREAMING_MODEL_ID: cartesia_model_id,
|
||||
CARTESIA_TTS_STREAMING_VOICE_ID: cartesia_voice_id,
|
||||
CARTESIA_TTS_STREAMING_LANGUAGE: language || 'en'
|
||||
};
|
||||
break;
|
||||
default:
|
||||
throw new Error(`vendor ${vendor} is not supported for tts streaming yet`);
|
||||
}
|
||||
this.logger.info({vendor, credentials, obj}, 'setTtsStreamingChannelVars');
|
||||
|
||||
await ep.set(obj);
|
||||
}
|
||||
|
||||
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
|
||||
const {srf, accountSid:account_sid} = cs;
|
||||
const {writeAlerts, AlertType, stats} = srf.locals;
|
||||
|
||||
Reference in New Issue
Block a user