freeswitch-modules/mod_whisper_tts/mod_whisper_tts.c

#include "mod_whisper_tts.h"
#include "whisper_glue.h"

SWITCH_MODULE_LOAD_FUNCTION(mod_whisper_tts_load);
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_whisper_tts_shutdown);
SWITCH_MODULE_DEFINITION(mod_whisper_tts, mod_whisper_tts_load, mod_whisper_tts_shutdown, NULL);

static void clearWhisper(whisper_t* w, int freeAll) {
  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "clearWhisper\n");
  if (w->api_key) free(w->api_key);
  if (w->model_id) free(w->model_id);
  if (w->speed) free(w->speed);
  if (w->request_id) free(w->request_id);
  if (w->reported_latency) free(w->reported_latency);
  if (w->reported_organization) free(w->reported_organization);
  if (w->reported_ratelimit_requests) free(w->reported_ratelimit_requests);
  if (w->reported_ratelimit_remaining_requests) free(w->reported_ratelimit_remaining_requests);
  if (w->reported_ratelimit_reset_requests) free(w->reported_ratelimit_reset_requests);
  if (w->ct) free(w->ct);
  if (w->err_msg) free(w->err_msg);
  if (w->name_lookup_time_ms) free(w->name_lookup_time_ms);
  if (w->connect_time_ms) free(w->connect_time_ms);
  if (w->final_response_time_ms) free(w->final_response_time_ms);
  if (w->cache_filename) free(w->cache_filename);


  w->api_key = NULL;
  w->model_id = NULL;
  w->speed = NULL;
  w->request_id = NULL;
  w->reported_latency = NULL;
  w->reported_organization = NULL;
  w->reported_ratelimit_requests = NULL;
  w->reported_ratelimit_remaining_requests = NULL;
  w->reported_ratelimit_reset_requests = NULL;
  w->ct = NULL;
  w->err_msg = NULL;
  w->name_lookup_time_ms = NULL;
  w->connect_time_ms = NULL;
  w->final_response_time_ms = NULL;
  w->cache_filename = NULL;

  if (freeAll) {
    if (w->voice_name) free(w->voice_name);
    if (w->session_id) free(w->session_id);
    w->voice_name = NULL;
    w->session_id = NULL;
  }
}

static whisper_t * createOrRetrievePrivateData(switch_speech_handle_t *sh) {
  whisper_t *w = (whisper_t *) sh->private_info;
  if (!w) {
    w = switch_core_alloc(sh->memory_pool, sizeof(*w));
  	sh->private_info = w;
    memset(w, 0, sizeof(*w));
    switch_mutex_init(&w->mutex, SWITCH_MUTEX_NESTED, sh->memory_pool);
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "allocated whisper_t\n");
  }
  return w;
}

switch_status_t w_speech_open(switch_speech_handle_t *sh, const char *voice_name, int rate, int channels, switch_speech_flag_t *flags)
{
  whisper_t *w = createOrRetrievePrivateData(sh);
  w->voice_name = strdup(voice_name);
  w->rate = rate;
  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "w_speech_open voice: %s, rate %d, channels %d\n", voice_name, rate, channels);
  return whisper_speech_open(w);
}

static switch_status_t w_speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags)
{
  switch_status_t rc;
  whisper_t *w = createOrRetrievePrivateData(sh);
  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_close\n");

  switch_mutex_destroy(w->mutex);

  rc = whisper_speech_close(w);
  clearWhisper(w, 1);
  return rc;
}

/**
 * Freeswitch will call this function to feed us text to speak
 */
static switch_status_t w_speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags)
{
  whisper_t *w = createOrRetrievePrivateData(sh);
  w->draining = 0;
  w->reads = 0;
  w->response_code = 0;
  w->err_msg = NULL;
  w->playback_start_sent = 0;

  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_feed_tts\n");

  return whisper_speech_feed_tts(w, text, flags);
}

/**
 * Freeswitch calls periodically to get some rendered audio in L16 format. We can provide up to 8k of audio at a time.
 */
static switch_status_t w_speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *datalen, switch_speech_flag_t *flags)
{
  whisper_t *w = createOrRetrievePrivateData(sh);
  return whisper_speech_read_tts(w, data, datalen, flags);
}

/**
 * This is called at the end, not sure exactly what we need to do here..
 */
static void w_speech_flush_tts(switch_speech_handle_t *sh)
{
  whisper_t *w = createOrRetrievePrivateData(sh);
  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_speech_flush_tts\n");
  whisper_speech_flush_tts(w);

  clearWhisper(w, 0);
}

static void w_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val)
{
  whisper_t *w = createOrRetrievePrivateData(sh);
  switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "w_text_param_tts: %s=%s\n", param, val);
  if (0 == strcmp(param, "api_key")) {
    if (w->api_key) free(w->api_key);
    w->api_key = strdup(val);
  } else if (0 == strcmp(param, "voice")) {
    if (w->voice_name) free(w->voice_name);
    w->voice_name = strdup(val);
  } else if (0 == strcmp(param, "model_id")) {
    if (w->model_id) free(w->model_id);
    w->model_id = strdup(val);
  } else if (0 == strcmp(param, "speed")) {
    if (w->speed) free(w->speed);
    w->speed = strdup(val);
  } else if (0 == strcmp(param, "session-uuid")) {
    if (w->session_id) free(w->session_id);
    w->session_id = strdup(val);
  } else if (0 == strcmp(param, "write_cache_file") && switch_true(val)) {
    w->cache_audio = 1;
  }
}
static void w_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val)
{
}
static void w_float_param_tts(switch_speech_handle_t *sh, char *param, double val)
{
}

SWITCH_MODULE_LOAD_FUNCTION(mod_whisper_tts_load)
{
  switch_speech_interface_t *speech_interface;

  *module_interface = switch_loadable_module_create_module_interface(pool, modname);
  speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE);
  speech_interface->interface_name = "whisper";
  speech_interface->speech_open = w_speech_open;
  speech_interface->speech_close = w_speech_close;
  speech_interface->speech_feed_tts = w_speech_feed_tts;
  speech_interface->speech_read_tts = w_speech_read_tts;
	speech_interface->speech_flush_tts = w_speech_flush_tts;
	speech_interface->speech_text_param_tts = w_text_param_tts;
	speech_interface->speech_numeric_param_tts = w_numeric_param_tts;
	speech_interface->speech_float_param_tts = w_float_param_tts;
  return whisper_speech_load();
}

SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_whisper_tts_shutdown)
{
  return whisper_speech_unload();
}