mirror of
https://github.com/jambonz/freeswitch-modules.git
synced 2025-12-19 08:57:44 +00:00
mod_dub support sayOnTrack Deepgram (#35)
* mod_dub support sayOnTrack Deepgram Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * mod_dub sayOnTrack support azure Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * wip Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * wip Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * wip Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * wip Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * wip Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> * support whisper * wip Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com> --------- Signed-off-by: Hoan HL <quan.luuhoang8@gmail.com>
This commit is contained in:
@@ -142,6 +142,8 @@ void AudioProducerHttp::start(std::function<void(bool, const std::string&)> call
|
||||
curl_easy_setopt(_easy, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)31415);
|
||||
/*Add request body*/
|
||||
if (!_body.empty()) curl_easy_setopt(_easy, CURLOPT_POSTFIELDS, _body.c_str());
|
||||
/*Add request proxy*/
|
||||
if (!_proxy.empty()) curl_easy_setopt(_easy, CURLOPT_PROXY, _proxy.c_str());
|
||||
|
||||
/*Add request headers*/
|
||||
struct curl_slist *hdr_list = nullptr;
|
||||
@@ -167,11 +169,12 @@ void AudioProducerHttp::queueHttpPostAudio(const std::string& url, int gain, boo
|
||||
_gain = gain;
|
||||
_loop = loop;
|
||||
}
|
||||
void AudioProducerHttp::queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, int gain, bool loop) {
|
||||
void AudioProducerHttp::queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, const std::string& proxy, int gain, bool loop) {
|
||||
_method = HttpMethod_t::HTTP_METHOD_POST;
|
||||
_url = url;
|
||||
_body = body;
|
||||
_headers = headers;
|
||||
_proxy = proxy;
|
||||
_gain = gain;
|
||||
_loop = loop;
|
||||
}
|
||||
|
||||
@@ -68,7 +68,7 @@ public:
|
||||
|
||||
void queueHttpGetAudio(const std::string& url, int gain = 0, bool loop = false);
|
||||
void queueHttpPostAudio(const std::string& url, int gain = 0, bool loop = false);
|
||||
void queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, int gain = 0, bool loop = false);
|
||||
void queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, const std::string& proxy, int gain = 0, bool loop = false);
|
||||
|
||||
Status_t getStatus() const { return _status; }
|
||||
void setStatus(Status_t status) { _status = status; }
|
||||
@@ -121,6 +121,7 @@ private:
|
||||
HttpMethod_t _method;
|
||||
std::string _url;
|
||||
std::string _body;
|
||||
std::string _proxy;
|
||||
std::vector<std::string> _headers;
|
||||
Status_t _status;
|
||||
mpg123_handle *_mh;
|
||||
|
||||
@@ -98,18 +98,18 @@ extern "C" {
|
||||
|
||||
switch_status_t say_dub_track(struct cap_cb* cb, char* trackName, char* text, int gain) {
|
||||
std::vector<std::string> headers;
|
||||
std::string url, body;
|
||||
std::string url, body, proxy;
|
||||
Track* track = find_track_by_name(cb->tracks, trackName);
|
||||
|
||||
if (!track) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "play_dub_track: track %s not found\n", trackName);
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
if (tts_vendor_parse_text(text, url, body, headers) != SWITCH_STATUS_SUCCESS) {
|
||||
if (tts_vendor_parse_text(text, url, body, headers, proxy) != SWITCH_STATUS_SUCCESS) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "say_dub_track: failed to parse text\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
track->queueHttpPostAudio(url, body, headers, gain);
|
||||
track->queueHttpPostAudio(url, body, headers, proxy, gain);
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -114,11 +114,11 @@ void Track::queueHttpPostAudio(const std::string& url, int gain, bool loop) {
|
||||
}
|
||||
}
|
||||
|
||||
void Track::queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, int gain, bool loop) {
|
||||
void Track::queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, const std::string& proxy, int gain, bool loop) {
|
||||
bool startIt = false;
|
||||
if (_stopping) return;
|
||||
auto ap = std::make_shared<AudioProducerHttp>(_mutex, _buffer, _sampleRate);
|
||||
ap->queueHttpPostAudio(url, body, headers, gain, loop);
|
||||
ap->queueHttpPostAudio(url, body, headers, proxy, gain, loop);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_apQueue.push(ap);
|
||||
|
||||
@@ -14,7 +14,7 @@ public:
|
||||
/* audio production methods */
|
||||
void queueHttpGetAudio(const std::string& url, int gain = 0, bool loop = false);
|
||||
void queueHttpPostAudio(const std::string& url, int gain = 0, bool loop = false);
|
||||
void queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, int gain = 0, bool loop = false);
|
||||
void queueHttpPostAudio(const std::string& url, const std::string& body, std::vector<std::string>& headers, const std::string& proxy, int gain = 0, bool loop = false);
|
||||
void queueFileAudio(const std::string& path, int gain = 0, bool loop = false);
|
||||
void removeAllAudio();
|
||||
|
||||
|
||||
@@ -4,6 +4,186 @@
|
||||
#include <switch_json.h>
|
||||
#include <map>
|
||||
|
||||
switch_status_t whisper_parse_text(const std::map<std::string, std::string>& params, const std::string& text,
|
||||
std::string& url, std::string& body, std::vector<std::string>& headers) {
|
||||
std::string api_key;
|
||||
std::string voice_name;
|
||||
std::string model_id;
|
||||
std::string speed;
|
||||
|
||||
for (const auto& pair : params) {
|
||||
if (pair.first == "api_key") {
|
||||
api_key = pair.second;
|
||||
} else if (pair.first == "voice") {
|
||||
voice_name = pair.second;
|
||||
} else if (pair.first == "model_id") {
|
||||
model_id = pair.second;
|
||||
} else if (pair.first == "speed") {
|
||||
speed = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
if (api_key.empty()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "whisper_parse_text: no api_key provided\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
if (model_id.empty()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "whisper_parse_text: no model_id provided\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
url = "https://api.openai.com/v1/audio/speech";
|
||||
|
||||
/* create the JSON body */
|
||||
cJSON * jResult = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(jResult, "model", model_id.c_str());
|
||||
cJSON_AddStringToObject(jResult, "input", text.c_str());
|
||||
cJSON_AddStringToObject(jResult, "voice", voice_name.c_str());
|
||||
cJSON_AddStringToObject(jResult, "response_format", "mp3");
|
||||
if (!speed.empty()) {
|
||||
cJSON_AddStringToObject(jResult, "speed", speed.c_str());
|
||||
}
|
||||
char* _body = cJSON_PrintUnformatted(jResult);
|
||||
body = _body;
|
||||
|
||||
cJSON_Delete(jResult);
|
||||
free(_body);
|
||||
|
||||
// Create headers
|
||||
headers.push_back("Authorization: Bearer " + api_key);
|
||||
headers.push_back("Content-Type: application/json");
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
switch_status_t azure_parse_text(const std::map<std::string, std::string>& params, const std::string& text,
|
||||
std::string& url, std::string& body, std::vector<std::string>& headers, std::string& proxy) {
|
||||
|
||||
std::string api_key;
|
||||
std::string voice_name;
|
||||
std::string language;
|
||||
std::string region;
|
||||
std::string endpoint;
|
||||
std::string endpointId;
|
||||
std::string http_proxy_ip;
|
||||
std::string http_proxy_port;
|
||||
|
||||
for (const auto& pair : params) {
|
||||
if (pair.first == "api_key") {
|
||||
api_key = pair.second;
|
||||
} else if (pair.first == "voice") {
|
||||
voice_name = pair.second;
|
||||
} else if (pair.first == "language") {
|
||||
language = pair.second;
|
||||
} else if (pair.first == "region") {
|
||||
region = pair.second;
|
||||
} else if (pair.first == "endpoint") {
|
||||
endpoint = pair.second;
|
||||
} else if (pair.first == "endpointId") {
|
||||
endpointId = pair.second;
|
||||
} else if (pair.first == "http_proxy_ip") {
|
||||
http_proxy_ip = pair.second;
|
||||
} else if (pair.first == "http_proxy_port") {
|
||||
http_proxy_port = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
if (language.empty()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "azure_parse_text: no language provided\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
if (voice_name.empty()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "azure_parse_text: no voice_name provided\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
if (region.empty()) {
|
||||
region = "westus";
|
||||
}
|
||||
/* format url*/
|
||||
url = !endpoint.empty() ? endpoint : "https://" + region + ".tts.speech.microsoft.com/cognitiveservices/v1";
|
||||
|
||||
// Body
|
||||
if (strncmp(text.c_str(), "<speak", 6) == 0) {
|
||||
body = text;
|
||||
} else {
|
||||
std::ostringstream body_stream;
|
||||
body_stream << "<speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xmlns:mstts=\"https://www.w3.org/2001/mstts\" xml:lang=\"" << language << "\">";
|
||||
body_stream << "<voice name=\"" << voice_name << "\">";
|
||||
body_stream << text;
|
||||
body_stream << "</voice>";
|
||||
body_stream << "</speak>";
|
||||
body = body_stream.str();
|
||||
}
|
||||
|
||||
// Create headers
|
||||
if (!api_key.empty()) {
|
||||
headers.push_back("Ocp-Apim-Subscription-Key: " + api_key);
|
||||
}
|
||||
if (!endpointId.empty()) {
|
||||
headers.push_back("X-Microsoft-EndpointId: " + endpointId);
|
||||
}
|
||||
headers.push_back("Content-Type: application/ssml+xml");
|
||||
headers.push_back("X-Microsoft-OutputFormat: audio-16khz-32kbitrate-mono-mp3");
|
||||
|
||||
// Proxy
|
||||
std::ostringstream proxy_stream;
|
||||
if (!http_proxy_ip.empty()) {
|
||||
proxy_stream << "http://" << http_proxy_ip;
|
||||
if (!http_proxy_port.empty()) {
|
||||
proxy_stream << ":" << http_proxy_port;
|
||||
}
|
||||
}
|
||||
proxy = proxy_stream.str();
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
switch_status_t deepgram_parse_text(const std::map<std::string, std::string>& params, const std::string& text,
|
||||
std::string& url, std::string& body, std::vector<std::string>& headers) {
|
||||
|
||||
std::string api_key;
|
||||
std::string voice_name;
|
||||
|
||||
for (const auto& pair : params) {
|
||||
if (pair.first == "api_key") {
|
||||
api_key = pair.second;
|
||||
} else if (pair.first == "voice") {
|
||||
voice_name = pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
if (api_key.empty()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "deepgram_parse_text: no api_key provided\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
if (voice_name.empty()) {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "deepgram_parse_text: no voice_name provided\n");
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
|
||||
/* format url*/
|
||||
std::ostringstream url_stream;
|
||||
url_stream << "https://api.deepgram.com/v1/speak?model=" << voice_name << "&encoding=mp3";
|
||||
url = url_stream.str();
|
||||
|
||||
/* create the JSON body */
|
||||
cJSON * jResult = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(jResult, "text", text.c_str());
|
||||
|
||||
char* _body = cJSON_PrintUnformatted(jResult);
|
||||
body = _body;
|
||||
|
||||
cJSON_Delete(jResult);
|
||||
free(_body);
|
||||
|
||||
// Create headers
|
||||
headers.push_back("Authorization: Token " + api_key);
|
||||
headers.push_back("Content-Type: application/json");
|
||||
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
switch_status_t elevenlabs_parse_text(const std::map<std::string, std::string>& params, const std::string& text,
|
||||
std::string& url, std::string& body, std::vector<std::string>& headers) {
|
||||
|
||||
@@ -87,7 +267,7 @@ switch_status_t elevenlabs_parse_text(const std::map<std::string, std::string>&
|
||||
return SWITCH_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
switch_status_t tts_vendor_parse_text(const std::string& say, std::string& url, std::string& body, std::vector<std::string>& headers) {
|
||||
switch_status_t tts_vendor_parse_text(const std::string& say, std::string& url, std::string& body, std::vector<std::string>& headers, std::string& proxy) {
|
||||
size_t start = say.find("{") + 1;
|
||||
size_t end = say.find("}");
|
||||
|
||||
@@ -111,8 +291,14 @@ switch_status_t tts_vendor_parse_text(const std::string& say, std::string& url,
|
||||
|
||||
if (params["vendor"] == "elevenlabs") {
|
||||
return elevenlabs_parse_text(params, text, url, body, headers);
|
||||
} else if (params["vendor"] == "deepgram") {
|
||||
return deepgram_parse_text(params, text, url, body, headers);
|
||||
} else if (params["vendor"] == "microsoft") {
|
||||
return azure_parse_text(params, text, url, body, headers, proxy);
|
||||
} else if (params["vendor"] == "whisper") {
|
||||
return whisper_parse_text(params, text, url, body, headers);
|
||||
} else {
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "tts_vendor_parse_text: There is no available parser for text\n");
|
||||
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "tts_vendor_parse_text: There is no available parser for vendor %s\n", params["vendor"]);
|
||||
return SWITCH_STATUS_FALSE;
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,6 @@
|
||||
#include "common.h"
|
||||
|
||||
|
||||
switch_status_t tts_vendor_parse_text(const std::string& say, std::string& url, std::string& body, std::vector<std::string>& headers);
|
||||
switch_status_t tts_vendor_parse_text(const std::string& say, std::string& url, std::string& body, std::vector<std::string>& headers, std::string& proxy);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user