mirror of
https://github.com/jambonz/freeswitch-modules.git
synced 2025-12-19 06:47:43 +00:00
fix elevenlabs and whisper for multiple codec (#14)
* fix elevenlabs and whisper for multiple codec Signed-off-by: Quan HL <quan.luuhoang8@gmail.com> * fix review comments Signed-off-by: Quan HL <quan.luuhoang8@gmail.com> --------- Signed-off-by: Quan HL <quan.luuhoang8@gmail.com>
This commit is contained in:
@@ -31,6 +31,7 @@
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "mod_elevenlabs_tts.h"
|
||||
#include <speex/speex_resampler.h>
|
||||
|
||||
#define TXNID_LEN (255)
|
||||
#define URL_LEN (1024)
|
||||
@@ -820,6 +821,7 @@ extern "C" {
|
||||
CURL* easy = createEasyHandle();
|
||||
|
||||
el->conn = (void *) conn ;
|
||||
el->sample_rate = 0;
|
||||
conn->elevenlabs = el;
|
||||
conn->easy = easy;
|
||||
conn->global = &global;
|
||||
@@ -830,6 +832,23 @@ extern "C" {
|
||||
|
||||
el->circularBuffer = (void *) new CircularBuffer_t(8192);
|
||||
|
||||
if (el->session_id) {
|
||||
int err;
|
||||
switch_codec_implementation_t read_impl;
|
||||
switch_core_session_t *psession = switch_core_session_locate(el->session_id);
|
||||
switch_core_session_get_read_impl(psession, &read_impl);
|
||||
uint32_t samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second;
|
||||
el->sample_rate = samples_per_second;
|
||||
// elevenlabs output is PCMU 8000
|
||||
if (samples_per_second != 8000 /*Hz*/) {
|
||||
el->resampler = speex_resampler_init(1, 8000, samples_per_second, SWITCH_RESAMPLE_QUALITY, &err);
|
||||
if (0 != err) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing resampler: %s.\n", speex_resampler_strerror(err));
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::ostringstream api_key_stream;
|
||||
api_key_stream << "xi-api-key: " << el->api_key;
|
||||
|
||||
@@ -881,7 +900,6 @@ extern "C" {
|
||||
{
|
||||
switch_mutex_lock(el->mutex);
|
||||
ConnInfo_t *conn = (ConnInfo_t *) el->conn;
|
||||
|
||||
if (el->response_code > 0 && el->response_code != 200) {
|
||||
switch_mutex_unlock(el->mutex);
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "elevenlabs_speech_read_tts, returning failure\n") ;
|
||||
@@ -901,14 +919,35 @@ extern "C" {
|
||||
switch_mutex_unlock(el->mutex);
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
size_t size = std::min((*datalen/2), cBuffer->size());
|
||||
size_t size = el->sample_rate ?
|
||||
std::min((*datalen/(2 * el->sample_rate / 8000)), cBuffer->size()) :
|
||||
std::min((*datalen/2), cBuffer->size());
|
||||
pcm_data.insert(pcm_data.end(), cBuffer->begin(), cBuffer->begin() + size);
|
||||
cBuffer->erase(cBuffer->begin(), cBuffer->begin() + size);
|
||||
switch_mutex_unlock(el->mutex);
|
||||
}
|
||||
|
||||
memcpy(data, pcm_data.data(), pcm_data.size() * sizeof(uint16_t));
|
||||
*datalen = pcm_data.size() * sizeof(uint16_t);
|
||||
size_t data_size = pcm_data.size();
|
||||
|
||||
if (el->resampler) {
|
||||
std::vector<int16_t> in(pcm_data.begin(), pcm_data.end());
|
||||
|
||||
std::vector<int16_t> out((*datalen));
|
||||
spx_uint32_t in_len = data_size;
|
||||
spx_uint32_t out_len = out.size();
|
||||
speex_resampler_process_interleaved_int(el->resampler, in.data(), &in_len, out.data(), &out_len);
|
||||
|
||||
if (out_len > out.size()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Resampler output exceeded maximum buffer size!\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
memcpy(data, out.data(), out_len * sizeof(int16_t));
|
||||
*datalen = out_len * sizeof(int16_t);
|
||||
} else {
|
||||
memcpy(data, pcm_data.data(), pcm_data.size() * sizeof(uint16_t));
|
||||
*datalen = pcm_data.size() * sizeof(uint16_t);
|
||||
}
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -922,8 +961,15 @@ extern "C" {
|
||||
delete cBuffer;
|
||||
el->circularBuffer = nullptr ;
|
||||
|
||||
// destroy resampler
|
||||
if (el->resampler) {
|
||||
speex_resampler_destroy(el->resampler);
|
||||
el->resampler = NULL;
|
||||
}
|
||||
|
||||
if (conn) {
|
||||
conn->flushed = true;
|
||||
|
||||
if (!download_complete) {
|
||||
if (conn->file) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "closing audio cache file %s because download was interrupted\n", el->cache_filename);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <speex/speex_resampler.h>
|
||||
|
||||
struct elevenlabs_data {
|
||||
char *session_id;
|
||||
@@ -30,6 +31,7 @@ struct elevenlabs_data {
|
||||
char *cache_filename;
|
||||
|
||||
int rate;
|
||||
uint32_t sample_rate;
|
||||
|
||||
void *conn;
|
||||
FILE *file;
|
||||
@@ -38,6 +40,7 @@ struct elevenlabs_data {
|
||||
int draining;
|
||||
int reads;
|
||||
int cache_audio;
|
||||
SpeexResamplerState *resampler;
|
||||
};
|
||||
|
||||
typedef struct elevenlabs_data elevenlabs_t;
|
||||
|
||||
@@ -786,10 +786,10 @@ extern "C" {
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
if (mpg123_param(mh, MPG123_FORCE_RATE, 8000 /*Hz*/, 0) != MPG123_OK) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error mpg123_param!\n");
|
||||
if (mpg123_param(mh, MPG123_FLAGS, MPG123_MONO_MIX, 0) != MPG123_OK) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error forcing single channel!\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
CURL* easy = createEasyHandle();
|
||||
w->conn = (void *) conn ;
|
||||
@@ -800,10 +800,23 @@ extern "C" {
|
||||
conn->hdr_list = NULL ;
|
||||
conn->file = w->file;
|
||||
conn->body = json;
|
||||
conn->flushed = false;
|
||||
conn->flushed = false;
|
||||
|
||||
|
||||
w->circularBuffer = (void *) new CircularBuffer_t(8192);
|
||||
|
||||
if (w->session_id) {
|
||||
int err;
|
||||
switch_codec_implementation_t read_impl;
|
||||
switch_core_session_t *psession = switch_core_session_locate(w->session_id);
|
||||
switch_core_session_get_read_impl(psession, &read_impl);
|
||||
uint32_t samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second;
|
||||
if (mpg123_param(mh, MPG123_FORCE_RATE, samples_per_second /*Hz*/, 0) != MPG123_OK) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error mpg123_param!\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
std::ostringstream api_key_stream;
|
||||
api_key_stream << "Authorization: Bearer " << w->api_key;
|
||||
|
||||
@@ -851,7 +864,6 @@ extern "C" {
|
||||
{
|
||||
switch_mutex_lock(w->mutex);
|
||||
ConnInfo_t *conn = (ConnInfo_t *) w->conn;
|
||||
|
||||
if (w->response_code > 0 && w->response_code != 200) {
|
||||
switch_mutex_unlock(w->mutex);
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "whisper_speech_read_tts, returning failure\n") ;
|
||||
|
||||
Reference in New Issue
Block a user