support caching tts audio with model/model_id (#1062)

* support caching tts audio with model/model_id

* update speech utils version
This commit is contained in:
Hoan Luu Huu
2025-02-03 20:47:44 +07:00
committed by GitHub
parent 8487a4be68
commit 7105453d81
4 changed files with 12 additions and 8 deletions

View File

@@ -240,6 +240,7 @@ class TaskSay extends TtsTask {
language, language,
voice, voice,
engine, engine,
model: this.model || this.model_id,
text text
}).catch((err) => this.logger.info({err}, 'Error adding file to cache')); }).catch((err) => this.logger.info({err}, 'Error adding file to cache'));
} }

View File

@@ -143,16 +143,16 @@ class TtsTask extends Task {
`No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`); `No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`);
} }
/* parse Nuance voices into name and model */ /* parse Nuance voices into name and model */
let model;
if (vendor === 'nuance' && voice) { if (vendor === 'nuance' && voice) {
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice); const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
if (arr) { if (arr) {
voice = arr[1]; voice = arr[1];
model = arr[2]; this.model = arr[2];
} }
} else if (vendor === 'deepgram') { } else if (vendor === 'deepgram') {
model = voice; this.model = voice;
} }
this.model_id = credentials.model_id;
/* allow for microsoft custom region voice and api_key to be specified as an override */ /* allow for microsoft custom region voice and api_key to be specified as an override */
if (vendor === 'microsoft' && this.options.deploymentId) { if (vendor === 'microsoft' && this.options.deploymentId) {
@@ -215,7 +215,8 @@ class TtsTask extends Task {
// If vendor is changed from the previous one, then reset the cache_speech_handles flag // If vendor is changed from the previous one, then reset the cache_speech_handles flag
//cs.currentTtsVendor = vendor; //cs.currentTtsVendor = vendor;
if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec'); if (!preCache && !this._disableTracing)
this.logger.info({vendor, language, voice, model: this.model}, 'TaskSay:exec');
try { try {
if (!credentials) { if (!credentials) {
writeAlerts({ writeAlerts({
@@ -250,7 +251,7 @@ class TtsTask extends Task {
language, language,
voice, voice,
engine, engine,
model, model: this.model,
salt, salt,
credentials, credentials,
options: this.options, options: this.options,

6
package-lock.json generated
View File

@@ -15,7 +15,7 @@
"@jambonz/http-health-check": "^0.0.1", "@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7", "@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.8", "@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.2.1", "@jambonz/speech-utils": "^0.2.2",
"@jambonz/stats-collector": "^0.1.10", "@jambonz/stats-collector": "^0.1.10",
"@jambonz/time-series": "^0.2.13", "@jambonz/time-series": "^0.2.13",
"@jambonz/verb-specifications": "^0.0.95", "@jambonz/verb-specifications": "^0.0.95",
@@ -1512,7 +1512,9 @@
} }
}, },
"node_modules/@jambonz/speech-utils": { "node_modules/@jambonz/speech-utils": {
"version": "0.2.1", "version": "0.2.2",
"resolved": "https://registry.npmjs.org/@jambonz/speech-utils/-/speech-utils-0.2.2.tgz",
"integrity": "sha512-+O+5Ej6RhQZjbZLRbJSA4UT1Es2JcDSDJT24kGRSVWCf8SuG5B3TqKzZP0aaVA297I12b7ztNG9ShWjY0iR7Fg==",
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@aws-sdk/client-polly": "^3.496.0", "@aws-sdk/client-polly": "^3.496.0",

View File

@@ -31,7 +31,7 @@
"@jambonz/http-health-check": "^0.0.1", "@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.7", "@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.8", "@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.2.1", "@jambonz/speech-utils": "^0.2.2",
"@jambonz/stats-collector": "^0.1.10", "@jambonz/stats-collector": "^0.1.10",
"@jambonz/verb-specifications": "^0.0.95", "@jambonz/verb-specifications": "^0.0.95",
"@jambonz/time-series": "^0.2.13", "@jambonz/time-series": "^0.2.13",