eliminate support for multiple lws threads as part of fixing valgrind errors

Signed-off-by: Dave Horton <daveh@beachdognet.com>
2025-12-19 08:27:44 +00:00 · 2023-12-26 10:57:15 -05:00
parent a2324972eb
commit 420e51eac7
140 changed files with 19851 additions and 0 deletions
--- a/mod_cobalt_transcribe/.DS_Store
+++ b/mod_cobalt_transcribe/.DS_Store
--- a/mod_cobalt_transcribe/LICENSE
+++ b/mod_cobalt_transcribe/LICENSE
@@ -0,0 +1,8 @@
+Copyright 2023, Drachtio Communications Services, LLC
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
--- a/mod_cobalt_transcribe/Makefile.am
+++ b/mod_cobalt_transcribe/Makefile.am
@@ -0,0 +1,11 @@
+include $(top_srcdir)/build/modmake.rulesam
+MODNAME=mod_cobalt_transcribe
+
+mod_LTLIBRARIES = mod_cobalt_transcribe.la
+mod_cobalt_transcribe_la_SOURCES  = mod_cobalt_transcribe.c cobalt_glue.cpp 
+mod_cobalt_transcribe_la_CFLAGS   = $(AM_CFLAGS)
+mod_cobalt_transcribe_la_CXXFLAGS =  -I $(top_srcdir)/libs/googleapis/gens -I $(top_srcdir)/libs/cobalt-asr-grpc-api/stubs $(AM_CXXFLAGS) -std=c++17
+
+mod_cobalt_transcribe_la_LIBADD   = $(switch_builddir)/libfreeswitch.la
+mod_cobalt_transcribe_la_LDFLAGS  = -avoid-version -module -no-undefined -shared `pkg-config --libs grpc++ grpc`
+
--- a/mod_cobalt_transcribe/README.md
+++ b/mod_cobalt_transcribe/README.md
@@ -0,0 +1,53 @@
+# mod_cobalt_transcribe
+
+A Freeswitch module that generates real-time transcriptions on a Freeswitch channel by using the [streaming transcription API](https://docs-v2.cobaltspeech.com/docs/asr/) from [Cobalt Speech](https://www.cobaltspeech.com/).  Cobalt Speech provides a speech recognition product that can be run on-prem on a Linux server.
+
+## API
+
+### Commands
+The freeswitch module exposes the following API commands:
+
+```
+uuid_cobalt_get_version <uuid> <hostport>
+```
+Returns version information about the Cobalt server listening at the specified ip address and port
+
+```
+uuid_cobalt_list_models <uuid> <hostport> 
+```
+Lists the available models for a Cobalt speech server
+
+```
+uuid_cobalt_compile_context <uuid> <hostport> <model> <token> <phrases>
+```
+Compiles a list of hint phrases into a context string that can later be used in a transcribe command.  The context string is returned as a base64-encoded string.  Hints must be compiled within the context of a single model, thus it is required to provide the model name.  Hints must also be associated with a "token"; the default token that you may generally use is "unk:default".  See [here](https://docs-v2.cobaltspeech.com/docs/asr/transcribe/recognition_context/) for more details.
+
+```
+uuid_cobalt_transcribe <uuid> hostport start model [interim|full] [stereo|mono] [bug-name]
+```
+Attaches media bug to channel and performs streaming recognize request.
+
+```
+uuid_cobalt_transcribe <uuid> hostport stop model
+```
+Stop transcription on a channel.
+
+
+### Channel Variables
+
+| variable | Description |
+| --- | ----------- |
+| COBALT_ENABLE_CONFUSION_NETWORK | if true, enable [confusion network](https://docs-v2.cobaltspeech.com/docs/asr/transcribe/#confusion-network) |
+| COBALT_METADATA | custom metadata to send with a transcribe request  |
+| COBALT_COMPILED_CONTEXT_DATA | base64-encoded compiled context hints to include with the transcribe request |
+
+
+### Events
+`cobalt_speech::transcription` - returns an interim or final transcription.  The event contains a JSON body describing the transcription result.
+
+`cobalt_speech::version_response` - returns the response to a `uuid_cobalt_get_version` request. The event contains a JSON body describing the version.
+
+`cobalt_speech::model_list_response` - returns the response to a `uuid_cobalt_list_models` request. The event contains a JSON body describing the available models.
+
+`cobalt_speech::compile_context_response` - returns the response to a uuid_cobalt_compile_context request. The event contains a JSON body containing the base64-encoded context.
+
--- a/mod_cobalt_transcribe/cobalt_glue.cpp
+++ b/mod_cobalt_transcribe/cobalt_glue.cpp
@@ -0,0 +1,775 @@
+#include <cstdlib>
+#include <algorithm>
+#include <future>
+#include <string>
+#include <vector>
+#include <sstream>
+
+#include <switch.h>
+#include <switch_json.h>
+#include <grpc++/grpc++.h>
+
+#include "cobaltspeech/transcribe/v5/transcribe.grpc.pb.h"
+
+namespace cobalt_asr = cobaltspeech::transcribe::v5;
+
+#include "mod_cobalt_transcribe.h"
+#include "simple_buffer.h"
+
+#define CHUNKSIZE (320)
+#define DEFAULT_CONTEXT_TOKEN "unk:default"
+
+namespace {
+  int case_insensitive_match(std::string s1, std::string s2) {
+   std::transform(s1.begin(), s1.end(), s1.begin(), ::tolower);
+   std::transform(s2.begin(), s2.end(), s2.begin(), ::tolower);
+   if(s1.compare(s2) == 0)
+      return 1; //The strings are same
+   return 0; //not matched
+  }
+  std::string trim(const std::string& str) {
+    size_t start = str.find_first_not_of(" \t\n\r");
+    size_t end = str.find_last_not_of(" \t\n\r");
+    if (start == std::string::npos) {
+      return "";
+    }
+    return str.substr(start, end - start + 1);
+  }
+
+  std::vector<std::string> splitAndTrim(const char* input, char delimiter) {
+    std::string s(input);
+    std::vector<std::string> result;
+    std::stringstream ss(s);
+    std::string token;
+
+    while (getline(ss, token, delimiter)) {
+      result.push_back(trim(token));
+    }
+
+    return result;
+  }
+  std::string base64_encode(const std::string &input) {
+    const std::string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+    std::string encoded;
+    int val = 0;
+    int valb = -6;
+    for (unsigned char c : input) {
+      val = (val << 8) + c;
+      valb += 8;
+      while (valb >= 0) {
+        encoded.push_back(chars[(val >> valb) & 0x3F]);
+        valb -= 6;
+      }
+    }
+    if (valb > -6) encoded.push_back(chars[((val << 8) >> (valb + 8)) & 0x3F]);
+    while (encoded.size() % 4) encoded.push_back('=');
+    return encoded;
+  }
+
+  std::string base64_decode(const std::string &input) {
+    const std::string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+    std::vector<int> T(256, -1);
+    for (int i = 0; i < 64; i++) T[chars[i]] = i;
+
+    std::string decoded;
+    int val = 0;
+    int valb = -8;
+    for (unsigned char c : input) {
+      if (T[c] == -1) break;
+      val = (val << 6) + T[c];
+      valb += 6;
+      while (valb >= 0) {
+          decoded.push_back(char((val >> valb) & 0xFF));
+          valb -= 8;
+      }
+    }
+    return decoded;
+  }
+
+  const char* compile_context_phrases(switch_core_session_t *session, const char* hostport, const char* model, const char* token, const char* phrases) {
+    switch_channel_t *channel = switch_core_session_get_channel(session);
+    switch_event_t *event;
+
+    grpc::ClientContext context;
+    std::shared_ptr<grpc::Channel> grpcChannel ;
+    grpcChannel = grpc::CreateChannel(hostport, grpc::InsecureChannelCredentials());
+
+    if (!grpcChannel) {
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "failed creating grpc channel\n");	
+      return nullptr;
+    }
+
+    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "compile context, model: %s, token: %s, phrases: %s\n", model, token, phrases);
+
+    std::unique_ptr<cobalt_asr::TranscribeService::Stub> stub = std::move(cobalt_asr::TranscribeService::NewStub(grpcChannel));
+    
+    cobalt_asr::CompileContextRequest request;
+    cobalt_asr::CompileContextResponse response;
+
+    request.set_model_id(model);
+    request.set_token(token);
+
+
+    // hints are either a simple comma-separated list of phrases, or a json array of objects
+    // containing a phrase and a boost value
+    char* originalPhrases = strdup(phrases);
+    request.clear_phrases();
+    auto *jPhrases = cJSON_Parse((char *) phrases);
+    if (jPhrases) {
+      int i = 0;
+      cJSON *jPhrase = NULL;
+      cJSON_ArrayForEach(jPhrase, jPhrase) {
+        auto* contextPhrase = request.add_phrases();
+        cJSON *jItem = cJSON_GetObjectItem(jPhrase, "phrase");
+        if (jItem) {
+          auto text = cJSON_GetStringValue(jItem);
+          contextPhrase->set_text(text);
+          switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "added text: %f\n", text);
+          if (cJSON_GetObjectItem(jPhrase, "boost")) {
+            float boost = (float) cJSON_GetObjectItem(jPhrase, "boost")->valuedouble;
+            contextPhrase->set_boost(boost);
+            switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "added boost value: %f\n", boost);
+          }
+          i++;
+        }
+      }
+      cJSON_Delete(jPhrases);
+      switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "added %d hints\n", i);
+    }
+    else {
+      std::vector<std::string> tokens = splitAndTrim(phrases, ',');
+      for (const std::string& token : tokens) {
+        auto* contextPhrase = request.add_phrases();
+        contextPhrase->set_text(token);
+        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "added: %s\n", token.c_str());
+      }
+      switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "added %d hints\n", request.phrases_size());
+    }
+
+    stub->CompileContext(&context, request, &response);
+
+    cJSON * jResult = cJSON_CreateObject();
+    cJSON_AddBoolToObject(jResult, "has_context", response.has_context());
+    auto& c = response.context();
+    auto data = base64_encode(c.data());
+    cJSON_AddItemToObject(jResult, "compiled_context", cJSON_CreateString(data.c_str()));
+    cJSON_AddItemToObject(jResult, "phrases", cJSON_CreateString(phrases));
+
+    char* json = cJSON_PrintUnformatted(jResult);
+
+    switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_COMPILE_CONTEXT_RESPONSE);
+    switch_channel_event_set_data(channel, event);
+    switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "cobalt");
+    switch_event_add_body(event, "%s", json);
+    switch_event_fire(&event);
+
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "compile context response for cobalt speech: %s\n", json);	
+
+    free(json);
+    cJSON_Delete(jResult);
+
+    free(originalPhrases);
+
+    return response.has_context() ? c.data().c_str() : nullptr;
+  }
+
+}
+
+class GStreamer {
+public:
+	GStreamer(
+    switch_core_session_t *session, const char* hostport, const char* model, uint32_t channels, int interim) : 
+      m_session(session), 
+      m_writesDone(false), 
+      m_connected(false), 
+      m_interim(interim),
+      m_hostport(hostport),
+      m_model(model),
+      m_channelCount(channels),
+      m_audioBuffer(CHUNKSIZE, 15) {
+  
+    const char* var;
+    char sessionId[256];
+    switch_channel_t *channel = switch_core_session_get_channel(session);
+    strncpy(m_sessionId, switch_core_session_get_uuid(session), 256);
+	}
+
+	~GStreamer() {
+		//switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(m_session), SWITCH_LOG_INFO, "GStreamer::~GStreamer - deleting channel and stub: %p\n", (void*)this);
+	}
+
+  std::shared_ptr<grpc::Channel> createGrpcConnection() {
+    switch_channel_t *channel = switch_core_session_get_channel(m_session);
+
+    std::shared_ptr<grpc::Channel> grpcChannel ;
+    grpcChannel = grpc::CreateChannel(m_hostport, grpc::InsecureChannelCredentials());
+
+    if (!grpcChannel) {
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "GStreamer %p failed creating grpc channel\n", this);	
+      throw std::runtime_error(std::string("Error creating grpc channel"));
+    }
+
+    m_stub = std::move(cobalt_asr::TranscribeService::NewStub(grpcChannel));
+    return grpcChannel;
+  }
+
+  void connect() {
+    const char* var;
+    switch_channel_t *channel = switch_core_session_get_channel(m_session);
+
+    assert(!m_connected);
+    // Begin a stream.
+
+    std::shared_ptr<grpc::Channel> grpcChannel = createGrpcConnection();
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p creating streamer\n", this);	
+  	m_streamer = m_stub->StreamingRecognize(&m_context);
+    m_connected = true;
+
+    /* set configuration parameters which are carried in the RecognitionInitMessage */
+    auto config = m_request.mutable_config();
+    auto format = config->mutable_audio_format_raw();
+    config->set_model_id(m_model);
+    format->set_encoding(cobalt_asr::AudioEncoding::AUDIO_ENCODING_SIGNED);
+    format->set_bit_depth(16);
+    format->set_sample_rate(8000);
+    format->set_channels(m_channelCount);
+    format->set_byte_order(cobalt_asr::ByteOrder::BYTE_ORDER_LITTLE_ENDIAN);
+
+    // confusion network
+    if (switch_true(switch_channel_get_variable(channel, "COBALT_ENABLE_CONFUSION_NETWORK"))) {
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p set_enable_confusion_network true\n", this);	
+      config->set_enable_confusion_network(true);
+    }
+    // metadata
+    if (var = switch_channel_get_variable(channel, "COBALT_METADATA")) {
+      auto metadata = config->mutable_metadata();
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p cobalt metadata %s\n", this, var);	
+      metadata->set_custom_metadata(var);
+    }
+
+    // set_enable_word_details
+    if (switch_true(switch_channel_get_variable(channel, "COBALT_ENABLE_WORD_TIME_OFFSETS"))) {
+      config->set_enable_word_details(true);
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p enable word-level details\n", this);	
+    }
+
+    // compiled context data
+    if (var = switch_channel_get_variable(channel, "COBALT_COMPILED_CONTEXT_DATA")) {
+      auto data = base64_decode(var);
+      config->mutable_context()->add_compiled()->set_data(data);
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p set compiled context %s\n", this, var);	
+    }
+
+    // read thread is waiting on this
+    m_promise.set_value();
+
+  	// Write the first request, containing the config only.
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p sending initial message\n", this);	
+  	m_streamer->Write(m_request);
+    m_request.clear_config();
+
+    // send any buffered audio
+    int nFrames = m_audioBuffer.getNumItems();
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "GStreamer %p got stream ready, %d buffered frames\n", this, nFrames);	
+    if (nFrames) {
+      char *p;
+      do {
+        p = m_audioBuffer.getNextChunk();
+        if (p) {
+          write(p, CHUNKSIZE);
+        }
+      } while (p);
+    }
+  }
+
+	bool write(void* data, uint32_t datalen) {
+    if (!m_connected) {
+      if (datalen % CHUNKSIZE == 0) {
+        m_audioBuffer.add(data, datalen);
+      }
+      return true;
+    }
+    m_request.clear_audio();
+    m_request.mutable_audio()->set_data(data, datalen);
+    bool ok = m_streamer->Write(m_request);
+    return ok;
+  }
+
+	uint32_t nextMessageSize(void) {
+		uint32_t size = 0;
+		m_streamer->NextMessageSize(&size);
+		return size;
+	}
+
+	bool read(cobalt_asr::StreamingRecognizeResponse* response) {
+		return m_streamer->Read(response);
+	}
+
+	grpc::Status finish() {
+		return m_streamer->Finish();
+	}
+
+	void writesDone() {
+    // grpc crashes if we call this twice on a stream
+    if (!m_connected) {
+      cancelConnect();
+    }
+    else if (!m_writesDone) {
+      m_streamer->WritesDone();
+      m_writesDone = true;
+    }
+	}
+
+  bool waitForConnect() {
+    std::shared_future<void> sf(m_promise.get_future());
+    sf.wait();
+    return m_connected;
+  }
+
+  void cancelConnect() {
+    assert(!m_connected);
+    m_promise.set_value();
+  } 
+
+  bool isConnected() {
+    return m_connected;
+  }
+
+private:
+	switch_core_session_t* m_session;
+  grpc::ClientContext m_context;
+	std::shared_ptr<grpc::Channel> m_channel;
+	std::unique_ptr<cobalt_asr::TranscribeService::Stub> m_stub;
+  cobalt_asr::StreamingRecognizeRequest m_request;
+	std::unique_ptr< grpc::ClientReaderWriterInterface<cobalt_asr::StreamingRecognizeRequest, cobalt_asr::StreamingRecognizeResponse> > m_streamer;
+  bool m_writesDone;
+  bool m_connected;
+  bool m_interim;
+  std::string m_hostport;
+  std::string m_model;
+  std::promise<void> m_promise;
+  SimpleBuffer m_audioBuffer;
+  uint32_t m_channelCount;
+  char m_sessionId[256];
+};
+
+static void *SWITCH_THREAD_FUNC grpc_read_thread(switch_thread_t *thread, void *obj) {
+	struct cap_cb *cb = (struct cap_cb *) obj;
+	GStreamer* streamer = (GStreamer *) cb->streamer;
+
+  bool connected = streamer->waitForConnect();
+  if (!connected) {
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "cobalt transcribe grpc read thread exiting since we didnt connect\n") ;
+    return nullptr;
+  }
+
+  // Read responses.
+  cobalt_asr::StreamingRecognizeResponse response;
+  while (streamer->read(&response)) {  // Returns false when no more to read.
+    switch_core_session_t* session = switch_core_session_locate(cb->sessionId);
+    if (!session) {
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "grpc_read_thread: session %s is gone!\n", cb->sessionId) ;
+      return nullptr;
+    }
+    if (response.has_error()) {
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "grpc_read_thread: error: %s\n", response.error().message().c_str()) ;
+    }
+    if (!response.has_result()) {
+      switch_core_session_rwunlock(session);
+      continue;
+    }
+
+    const auto& result = response.result();
+    auto is_final = !result.is_partial();
+    auto audio_channel = result.audio_channel();
+
+    cJSON * jResult = cJSON_CreateObject();
+    cJSON * jAlternatives = cJSON_CreateArray();
+    cJSON_AddItemToObject(jResult, "is_final", cJSON_CreateBool(is_final));
+    cJSON_AddItemToObject(jResult, "channel", cJSON_CreateNumber(audio_channel));
+    cJSON_AddItemToObject(jResult, "alternatives", jAlternatives);
+
+    for (int a = 0; a < result.alternatives_size(); ++a) {
+      auto alternative = result.alternatives(a);
+      cJSON* jAlt = cJSON_CreateObject();
+      cJSON* jTranscriptRaw = cJSON_CreateString(alternative.transcript_raw().c_str());
+
+      cJSON_AddItemToObject(jAlt, "confidence", cJSON_CreateNumber(alternative.confidence()));
+      cJSON_AddItemToObject(jAlt, "transcript_formatted", cJSON_CreateString(alternative.transcript_formatted().c_str()));
+      cJSON_AddItemToObject(jAlt, "transcript_raw", cJSON_CreateString(alternative.transcript_raw().c_str()));
+      cJSON_AddItemToObject(jAlt, "start_time_ms", cJSON_CreateNumber(alternative.start_time_ms()));
+      cJSON_AddItemToObject(jAlt, "duration_ms", cJSON_CreateNumber(alternative.duration_ms()));
+
+      if (alternative.has_word_details()) {
+        cJSON * jWords = cJSON_CreateArray();
+        cJSON * jWordsRaw = cJSON_CreateArray();
+        auto& word_details = alternative.word_details();
+        for (int b = 0; b < word_details.formatted_size(); ++b) {
+          cJSON* jWord = cJSON_CreateObject();
+          auto& word_info = word_details.formatted(b);
+          cJSON_AddItemToObject(jWord, "word", cJSON_CreateString(word_info.word().c_str()));
+          cJSON_AddItemToObject(jWord, "confidence", cJSON_CreateNumber(word_info.confidence()));
+          cJSON_AddItemToObject(jWord, "start_time_ms", cJSON_CreateNumber(word_info.start_time_ms()));
+          cJSON_AddItemToObject(jWord, "duration_ms", cJSON_CreateNumber(word_info.duration_ms()));
+
+          cJSON_AddItemToArray(jWords, jWord);
+        }
+        cJSON_AddItemToObject(jAlt, "formatted_words", jWords);
+
+        for (int c = 0; c < word_details.raw_size(); ++c) {
+          cJSON* jWord = cJSON_CreateObject();
+          auto& word_info = word_details.raw(c);
+          cJSON_AddItemToObject(jWord, "word", cJSON_CreateString(word_info.word().c_str()));
+          cJSON_AddItemToObject(jWord, "confidence", cJSON_CreateNumber(word_info.confidence()));
+          cJSON_AddItemToObject(jWord, "start_time_ms", cJSON_CreateNumber(word_info.start_time_ms()));
+          cJSON_AddItemToObject(jWord, "duration_ms", cJSON_CreateNumber(word_info.duration_ms()));
+
+          cJSON_AddItemToArray(jWordsRaw, jWord);
+        }
+        cJSON_AddItemToObject(jAlt, "raw_words", jWordsRaw);
+
+      }
+      cJSON_AddItemToArray(jAlternatives, jAlt);
+    }
+    char* json = cJSON_PrintUnformatted(jResult);
+    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "cobalt models: %s\n", json) ;
+    cb->responseHandler(session, (const char *) json, cb->bugname, NULL);
+    free(json);
+
+    cJSON_Delete(jResult);
+  
+    switch_core_session_rwunlock(session);
+  }
+
+  {
+    switch_core_session_t* session = switch_core_session_locate(cb->sessionId);
+    if (session) {
+      grpc::Status status = streamer->finish();
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "grpc_read_thread: finish() status %s (%d)\n", status.error_message().c_str(), status.error_code()) ;
+      switch_core_session_rwunlock(session);
+    }
+  }
+
+  return nullptr;
+}
+
+extern "C" {
+
+    switch_status_t cobalt_speech_get_version(switch_core_session_t *session, char* hostport) {
+      switch_channel_t *channel = switch_core_session_get_channel(session);
+    	switch_event_t *event;
+
+      grpc::ClientContext context;
+      std::shared_ptr<grpc::Channel> grpcChannel ;
+      grpcChannel = grpc::CreateChannel(hostport, grpc::InsecureChannelCredentials());
+
+      if (!grpcChannel) {
+        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "failed creating grpc channel\n");	
+        return SWITCH_STATUS_FALSE;
+      }
+
+      std::unique_ptr<cobalt_asr::TranscribeService::Stub> stub = std::move(cobalt_asr::TranscribeService::NewStub(grpcChannel));
+      
+      cobalt_asr::VersionResponse response;
+      stub->Version(&context, cobalt_asr::VersionRequest(), &response);
+
+      cJSON * jResult = cJSON_CreateObject();
+      cJSON_AddItemToObject(jResult, "version", cJSON_CreateString(response.version().c_str()));
+
+      char* json = cJSON_PrintUnformatted(jResult);
+
+      switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_VERSION_RESPONSE);
+      switch_channel_event_set_data(channel, event);
+      switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "cobalt");
+      switch_event_add_body(event, "%s", json);
+    	switch_event_fire(&event);
+      switch_event_destroy(&event);
+
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "retrieved version for cobalt speech: %s\n", json);	
+
+      free(json);
+      cJSON_Delete(jResult);
+      
+
+      return SWITCH_STATUS_SUCCESS;
+    }
+
+    switch_status_t cobalt_speech_compile_context(switch_core_session_t *session, char* hostport, char* model, char* token, char* phrases) {
+      switch_channel_t *channel = switch_core_session_get_channel(session);
+    	switch_event_t *event;
+
+      switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "compile context, model: %s, token: %s, phrases: %s\n", model, token, phrases);
+
+      return compile_context_phrases(session, hostport, model, token, phrases) != nullptr ?
+        SWITCH_STATUS_SUCCESS :
+        SWITCH_STATUS_FALSE;
+    }
+
+
+    switch_status_t cobalt_speech_list_models(switch_core_session_t *session, char* hostport) {
+      switch_channel_t *channel = switch_core_session_get_channel(session);
+    	switch_event_t *event;
+
+      grpc::ClientContext context;
+      std::shared_ptr<grpc::Channel> grpcChannel ;
+      grpcChannel = grpc::CreateChannel(hostport, grpc::InsecureChannelCredentials());
+
+      if (!grpcChannel) {
+        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "failed creating grpc channel\n");	
+        return SWITCH_STATUS_FALSE;
+      }
+
+      std::unique_ptr<cobalt_asr::TranscribeService::Stub> stub = std::move(cobalt_asr::TranscribeService::NewStub(grpcChannel));
+      
+      cobalt_asr::ListModelsResponse response;
+      stub->ListModels(&context, cobalt_asr::ListModelsRequest(), &response);
+      cJSON * jModels = cJSON_CreateArray();
+      for (int i = 0; i < response.models_size(); i++) {
+        auto model = response.models(i);
+        cJSON* jModel = cJSON_CreateObject();
+        cJSON * jAttributes = cJSON_CreateArray();
+
+        cJSON_AddItemToArray(jModels, jModel);
+        cJSON_AddItemToObject(jModel, "attributes", jAttributes);
+        cJSON_AddItemToObject(jModel, "id", cJSON_CreateString(model.id().c_str()));
+        cJSON_AddItemToObject(jModel, "name", cJSON_CreateString(model.name().c_str()));
+
+        if (model.has_attributes()) {
+          auto& attributes = model.attributes();
+          cJSON* jAttr = cJSON_CreateObject();
+          cJSON_AddItemToArray(jAttributes, jAttr);
+
+          /* supported sample rates */
+          cJSON * jSupportedSampleRates = cJSON_CreateArray();
+          cJSON_AddItemToObject(jAttr, "supported_sample_rates", jSupportedSampleRates);
+          for (int j = 0; j < attributes.supported_sample_rates_size(); j++) {
+            cJSON_AddItemToObject(jSupportedSampleRates, "supported_sample_rates", cJSON_CreateNumber(attributes.supported_sample_rates(j)));
+          }
+
+          /* sample rate */
+          cJSON_AddItemToObject(jAttr, "sample_rate", cJSON_CreateNumber(attributes.sample_rate()));
+
+          /* context info */
+          auto& context_info = attributes.context_info();
+          cJSON * jContextInfo = cJSON_CreateObject();
+          cJSON* jAllowedContextTokens = cJSON_CreateArray();
+          cJSON_AddItemToObject(jAttr, "context_info", jContextInfo);
+          cJSON_AddItemToObject(jContextInfo, "allowed_context_tokens", jAllowedContextTokens);
+          for (int j = 0; j < context_info.allowed_context_tokens_size(); j++) {
+            cJSON_AddItemToArray(jAllowedContextTokens, cJSON_CreateString(context_info.allowed_context_tokens(j).c_str()));
+          }
+
+          cJSON_AddBoolToObject(jContextInfo, "supports_context", context_info.supports_context());
+        }
+      }
+
+      char* json = cJSON_PrintUnformatted(jModels);
+
+      switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_MODEL_LIST_RESPONSE);
+      switch_channel_event_set_data(channel, event);
+      switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "cobalt");
+      switch_event_add_body(event, "%s", json);
+    	switch_event_fire(&event);
+      switch_event_destroy(&event);
+
+      switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "retrieved %d models for cobalt speech: %s\n", response.models_size(), json);	
+
+      free(json);
+      cJSON_Delete(jModels);
+      
+      return SWITCH_STATUS_SUCCESS;
+    }
+
+
+    switch_status_t cobalt_speech_init() {
+      return SWITCH_STATUS_SUCCESS;
+    }
+
+    switch_status_t cobalt_speech_cleanup() {
+      return SWITCH_STATUS_SUCCESS;
+    }
+    switch_status_t cobalt_speech_session_init(switch_core_session_t *session, responseHandler_t responseHandler, char* hostport,
+      uint32_t samples_per_second, uint32_t channels, char* model, int interim, char *bugname, void **ppUserData) {
+
+      switch_channel_t *channel = switch_core_session_get_channel(session);
+      auto read_codec = switch_core_session_get_read_codec(session);
+      uint32_t sampleRate = read_codec->implementation->actual_samples_per_second;
+      struct cap_cb *cb;
+      int err;
+
+      cb =(struct cap_cb *) switch_core_session_alloc(session, sizeof(*cb));
+      strncpy(cb->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID);
+      strncpy(cb->bugname, bugname, MAX_BUG_LEN);
+      cb->end_of_utterance = 0;
+      
+      switch_mutex_init(&cb->mutex, SWITCH_MUTEX_NESTED, switch_core_session_get_pool(session));
+      if (sampleRate != 8000) {
+          switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_init:  initializing resampler\n");
+          cb->resampler = speex_resampler_init(channels, sampleRate, 8000, SWITCH_RESAMPLE_QUALITY, &err);
+        if (0 != err) {
+           switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing resampler: %s.\n",
+                                 switch_channel_get_name(channel), speex_resampler_strerror(err));
+          return SWITCH_STATUS_FALSE;
+        }
+      } else {
+        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s: no resampling needed for this call (bug: %s)\n", switch_channel_get_name(channel), bugname);
+      }
+      cb->responseHandler = responseHandler;
+
+      // allocate vad if we are delaying connecting to the recognizer until we detect speech
+      if (switch_channel_var_true(channel, "START_RECOGNIZING_ON_VAD")) {
+        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_init:  initializing vad\n");
+        cb->vad = switch_vad_init(sampleRate, channels);
+        if (cb->vad) {
+          const char* var;
+          int mode = 2;
+          int silence_ms = 150;
+          int voice_ms = 250;
+          int debug = 0;
+
+          if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_MODE")) {
+            mode = atoi(var);
+          }
+          if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_SILENCE_MS")) {
+            silence_ms = atoi(var);
+          }
+          if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_VOICE_MS")) {
+            voice_ms = atoi(var);
+          }
+          if (var = switch_channel_get_variable(channel, "RECOGNIZER_VAD_VOICE_MS")) {
+            voice_ms = atoi(var);
+          }
+          switch_vad_set_mode(cb->vad, mode);
+          switch_vad_set_param(cb->vad, "silence_ms", silence_ms);
+          switch_vad_set_param(cb->vad, "voice_ms", voice_ms);
+          switch_vad_set_param(cb->vad, "debug", debug);
+        }
+      }
+
+      GStreamer *streamer = NULL;
+      try {
+        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_init:  allocating streamer\n");
+        streamer = new GStreamer(session, hostport, model, channels, interim);
+        cb->streamer = streamer;
+      } catch (std::exception& e) {
+        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error initializing gstreamer: %s.\n", 
+          switch_channel_get_name(channel), e.what());
+        return SWITCH_STATUS_FALSE;
+      }
+
+      if (!cb->vad) {
+        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_init:  no vad so connecting to cobalt immediately\n");
+        streamer->connect();
+      }
+
+      // create the read thread
+      switch_threadattr_t *thd_attr = NULL;
+      switch_memory_pool_t *pool = switch_core_session_get_pool(session);
+
+      switch_threadattr_create(&thd_attr, pool);
+      switch_threadattr_stacksize_set(thd_attr, SWITCH_THREAD_STACKSIZE);
+      switch_thread_create(&cb->thread, thd_attr, grpc_read_thread, cb, pool);
+
+      *ppUserData = cb;
+      return SWITCH_STATUS_SUCCESS;
+    }
+
+    switch_status_t cobalt_speech_session_cleanup(switch_core_session_t *session, int channelIsClosing, switch_media_bug_t *bug) {
+      switch_channel_t *channel = switch_core_session_get_channel(session);
+
+      if (bug) {
+        struct cap_cb *cb = (struct cap_cb *) switch_core_media_bug_get_user_data(bug);
+        switch_mutex_lock(cb->mutex);
+
+        if (!switch_channel_get_private(channel, cb->bugname)) {
+          // race condition
+          switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "%s Bug %s is not attached (race).\n", switch_channel_get_name(channel), cb->bugname);
+          switch_mutex_unlock(cb->mutex);
+          return SWITCH_STATUS_FALSE;
+        }
+        switch_channel_set_private(channel, cb->bugname, NULL);
+
+        // close connection and get final responses
+        GStreamer* streamer = (GStreamer *) cb->streamer;
+
+        if (streamer) {
+          streamer->writesDone();
+
+          switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_cleanup: GStreamer (%p) waiting for read thread to complete\n", (void*)streamer);
+          switch_status_t st;
+          switch_thread_join(&st, cb->thread);
+          switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_cleanup:  GStreamer (%p) read thread completed\n", (void*)streamer);
+
+          delete streamer;
+          cb->streamer = NULL;
+        }
+
+        if (cb->resampler) {
+          speex_resampler_destroy(cb->resampler);
+        }
+        if (cb->vad) {
+          switch_vad_destroy(&cb->vad);
+          cb->vad = nullptr;
+        }
+        if (!channelIsClosing) {
+          switch_core_media_bug_remove(session, &bug);
+        }
+
+			  switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "cobalt_speech_session_cleanup: Closed stream\n");
+
+			  switch_mutex_unlock(cb->mutex);
+
+
+			  return SWITCH_STATUS_SUCCESS;
+      }
+
+      switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "%s Bug is not attached.\n", switch_channel_get_name(channel));
+      return SWITCH_STATUS_FALSE;
+    }
+
+    switch_bool_t cobalt_speech_frame(switch_media_bug_t *bug, void* user_data) {
+    	switch_core_session_t *session = switch_core_media_bug_get_session(bug);
+    	struct cap_cb *cb = (struct cap_cb *) user_data;
+		  if (cb->streamer && !cb->end_of_utterance) {
+        GStreamer* streamer = (GStreamer *) cb->streamer;
+        uint8_t data[SWITCH_RECOMMENDED_BUFFER_SIZE];
+        switch_frame_t frame = {};
+        frame.data = data;
+        frame.buflen = SWITCH_RECOMMENDED_BUFFER_SIZE;
+
+        if (switch_mutex_trylock(cb->mutex) == SWITCH_STATUS_SUCCESS) {
+          while (switch_core_media_bug_read(bug, &frame, SWITCH_TRUE) == SWITCH_STATUS_SUCCESS && !switch_test_flag((&frame), SFF_CNG)) {
+            if (frame.datalen) {
+              if (cb->vad && !streamer->isConnected()) {
+                switch_vad_state_t state = switch_vad_process(cb->vad, (int16_t*) frame.data, frame.samples);
+                if (state == SWITCH_VAD_STATE_START_TALKING) {
+                  switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "detected speech, connect to google speech now\n");
+                  streamer->connect();
+                  cb->responseHandler(session, "vad_detected", cb->bugname, NULL);
+                }
+              }
+
+              if (cb->resampler) {
+                spx_int16_t out[SWITCH_RECOMMENDED_BUFFER_SIZE];
+                spx_uint32_t out_len = SWITCH_RECOMMENDED_BUFFER_SIZE;
+                spx_uint32_t in_len = frame.samples;
+                size_t written;
+
+                speex_resampler_process_interleaved_int(cb->resampler,
+                  (const spx_int16_t *) frame.data,
+                  (spx_uint32_t *) &in_len,
+                  &out[0],
+                  &out_len);
+                streamer->write( &out[0], sizeof(spx_int16_t) * out_len);
+              }
+              else {
+                streamer->write( frame.data, sizeof(spx_int16_t) * frame.samples);
+              }
+            }
+          }
+          switch_mutex_unlock(cb->mutex);
+        }
+      }
+      return SWITCH_TRUE;
+    }
+}
--- a/mod_cobalt_transcribe/cobalt_glue.h
+++ b/mod_cobalt_transcribe/cobalt_glue.h
@@ -0,0 +1,14 @@
+#ifndef __COBALT_GLUE_H__
+#define __COBALT_GLUE_H__
+
+switch_status_t cobalt_speech_init();
+switch_status_t cobalt_speech_cleanup();
+switch_status_t cobalt_speech_session_init(switch_core_session_t *session, responseHandler_t responseHandler, char* hostport, 
+		uint32_t samples_per_second, uint32_t channels, char* lang, int interim, char *bugname, void **ppUserData);
+switch_status_t cobalt_speech_session_cleanup(switch_core_session_t *session, int channelIsClosing, switch_media_bug_t *bug);
+switch_bool_t cobalt_speech_frame(switch_media_bug_t *bug, void* user_data);
+switch_status_t cobalt_speech_list_models(switch_core_session_t *session, char* hostport);
+switch_status_t cobalt_speech_get_version(switch_core_session_t *session, char* hostport);
+switch_status_t cobalt_speech_compile_context(switch_core_session_t *session, char* hostport, char* model, char* token, char* phrases);
+
+#endif
--- a/mod_cobalt_transcribe/mod_cobalt_transcribe.c
+++ b/mod_cobalt_transcribe/mod_cobalt_transcribe.c
@@ -0,0 +1,368 @@
+/* 
+ *
+ * mod_cobalt_transcribe.c -- Freeswitch module for real-time transcription using cobalt's gRPC interface
+ *
+ */
+#include "mod_cobalt_transcribe.h"
+#include "cobalt_glue.h"
+#include <stdlib.h>
+#include <switch.h>
+#include <switch_curl.h>
+
+
+/* Prototypes */
+SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_transcribe_shutdown);
+SWITCH_MODULE_RUNTIME_FUNCTION(mod_transcribe_runtime);
+SWITCH_MODULE_LOAD_FUNCTION(mod_transcribe_load);
+
+SWITCH_MODULE_DEFINITION(mod_cobalt_transcribe, mod_transcribe_load, mod_transcribe_shutdown, NULL);
+
+
+static switch_status_t do_stop(switch_core_session_t *session, char* bugname);
+
+static void responseHandler(switch_core_session_t* session, const char * json, const char* bugname, 
+	const char* details) {
+	switch_event_t *event;
+	switch_channel_t *channel = switch_core_session_get_channel(session);
+
+	if (0 == strcmp("vad_detected", json)) {
+		switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_VAD_DETECTED);
+		switch_channel_event_set_data(channel, event);
+		switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "cobalt");
+	}
+	else if (0 == strcmp("error", json)) {
+		switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_ERROR);
+		switch_channel_event_set_data(channel, event);
+		switch_event_add_body(event, "%s", details);
+		switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "cobalt");
+	}
+	else {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "%s json payload: %s.\n", bugname ? bugname : "cobalt_transcribe", json);
+
+		switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, TRANSCRIBE_EVENT_RESULTS);
+		switch_channel_event_set_data(channel, event);
+		switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "transcription-vendor", "cobalt");
+		switch_event_add_body(event, "%s", json);
+	}
+	if (bugname) switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "media-bugname", bugname);
+	switch_event_fire(&event);
+}
+
+static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type)
+{
+	switch_core_session_t *session = switch_core_media_bug_get_session(bug);
+
+	switch (type) {
+	case SWITCH_ABC_TYPE_INIT:
+			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Got SWITCH_ABC_TYPE_INIT.\n");
+		break;
+
+	case SWITCH_ABC_TYPE_CLOSE:
+		{
+			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Got SWITCH_ABC_TYPE_CLOSE, calling cobalt_speech_session_cleanup.\n");
+			cobalt_speech_session_cleanup(session, 1, bug);
+			switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Finished SWITCH_ABC_TYPE_CLOSE.\n");
+		}
+		break;
+	
+	case SWITCH_ABC_TYPE_READ:
+
+		return cobalt_speech_frame(bug, user_data);
+		break;
+
+	case SWITCH_ABC_TYPE_WRITE:
+	default:
+		break;
+	}
+
+	return SWITCH_TRUE;
+}
+
+static switch_status_t do_stop(switch_core_session_t *session, char *bugname)
+{
+	switch_status_t status = SWITCH_STATUS_SUCCESS;
+	switch_channel_t *channel = switch_core_session_get_channel(session);
+	switch_media_bug_t *bug = switch_channel_get_private(channel, bugname);
+
+	if (bug) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "Received user command command, calling cobalt_speech_session_cleanup (possibly to stop prev transcribe)\n");
+		status = cobalt_speech_session_cleanup(session, 0, bug);
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "stopped transcription.\n");
+	}
+
+	return status;
+}
+
+static switch_status_t start_capture(switch_core_session_t *session, switch_media_bug_flag_t flags, 
+  char* hostport, char* model, int interim, char* bugname)
+{
+	switch_channel_t *channel = switch_core_session_get_channel(session);
+	switch_media_bug_t *bug;
+	switch_status_t status;
+	switch_codec_implementation_t read_impl = { 0 };
+	void *pUserData;
+	uint32_t samples_per_second;
+
+	if (switch_channel_get_private(channel, bugname)) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "removing bug from previous transcribe\n");
+		do_stop(session, bugname);
+	}
+
+	switch_core_session_get_read_impl(session, &read_impl);
+
+	if (switch_channel_pre_answer(channel) != SWITCH_STATUS_SUCCESS) {
+		return SWITCH_STATUS_FALSE;
+	}
+
+	samples_per_second = !strcasecmp(read_impl.iananame, "g722") ? read_impl.actual_samples_per_second : read_impl.samples_per_second;
+
+	if (SWITCH_STATUS_FALSE == cobalt_speech_session_init(session, responseHandler, hostport, samples_per_second, flags & SMBF_STEREO ? 2 : 1, model, interim, bugname, &pUserData)) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Error initializing cobalt speech session.\n");
+		return SWITCH_STATUS_FALSE;
+	}
+
+	if ((status = switch_core_media_bug_add(session, bugname, NULL, capture_callback, pUserData, 0, flags, &bug)) != SWITCH_STATUS_SUCCESS) {
+		return status;
+	}
+
+	switch_channel_set_private(channel, bugname, bug);
+
+	return SWITCH_STATUS_SUCCESS;
+}
+
+#define TRANSCRIBE_API_SYNTAX "<uuid> hostport [start|stop] [model] [interim|full] [stereo|mono] [bug-name]"
+SWITCH_STANDARD_API(transcribe_function)
+{
+	char *mycmd = NULL, *argv[7] = { 0 };
+	int argc = 0;
+	switch_status_t status = SWITCH_STATUS_FALSE;
+	switch_media_bug_flag_t flags = SMBF_READ_STREAM;
+
+	if (!zstr(cmd) && (mycmd = strdup(cmd))) {
+		argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
+	}
+
+	if (zstr(cmd) || 
+      (!strcasecmp(argv[1], "stop") && argc < 2) ||
+      (!strcasecmp(argv[1], "start") && argc < 3) ||
+      zstr(argv[0])) {
+		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s %s %s.\n", cmd, argv[0], argv[1]);
+		stream->write_function(stream, "-USAGE: %s\n", TRANSCRIBE_API_SYNTAX);
+		goto done;
+	} else {
+		switch_core_session_t *lsession = NULL;
+
+		if ((lsession = switch_core_session_locate(argv[0]))) {
+			if (!strcasecmp(argv[1], "stop")) {
+				char *bugname = argc > 2 ? argv[2] : MY_BUG_NAME;
+    		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "stop transcribing (bug=%s)\n", bugname);
+				status = do_stop(lsession, bugname);
+			} 
+			else if (!strcasecmp(argv[1], "start")) {
+        char* hostport = argv[2];
+        char* model = argv[3];
+        int interim = argc > 4 && !strcmp(argv[4], "interim");
+				char *bugname = argc > 6 ? argv[6] : MY_BUG_NAME;
+				if (argc > 5 && !strcmp(argv[5], "stereo")) {
+          flags |= SMBF_WRITE_STREAM ;
+          flags |= SMBF_STEREO;
+				}
+    		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "(bug=%s) (hostport=%s) start transcribing %s %s\n", bugname, hostport, model, interim ? "interim": "complete");
+				status = start_capture(lsession, flags, hostport, model, interim, bugname);
+			}
+      switch_core_session_rwunlock(lsession);
+		}
+	}
+
+	if (status == SWITCH_STATUS_SUCCESS) {
+		stream->write_function(stream, "+OK Success\n");
+	} else {
+		stream->write_function(stream, "-ERR Operation Failed\n");
+	}
+
+  done:
+
+	switch_safe_free(mycmd);
+	return SWITCH_STATUS_SUCCESS;
+}
+
+#define TRANSCRIBE_API_MODELS_SYNTAX "<uuid> hostport"
+SWITCH_STANDARD_API(list_models_function)
+{
+	char *mycmd = NULL, *argv[2] = { 0 };
+	int argc = 0;
+	switch_status_t status = SWITCH_STATUS_FALSE;
+
+	if (!zstr(cmd) && (mycmd = strdup(cmd))) {
+		argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
+	}
+
+	if (zstr(cmd) || argc < 2) {
+		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s\n", cmd);
+		stream->write_function(stream, "-USAGE: %s\n", TRANSCRIBE_API_MODELS_SYNTAX);
+		goto done;
+	} else {
+		switch_core_session_t *lsession = NULL;
+
+		if ((lsession = switch_core_session_locate(argv[0]))) {
+      char* hostport = argv[1];
+      status = cobalt_speech_list_models(lsession, hostport);
+      switch_core_session_rwunlock(lsession);
+    }
+	}
+
+	if (status == SWITCH_STATUS_SUCCESS) {
+		stream->write_function(stream, "+OK Success\n");
+	} else {
+		stream->write_function(stream, "-ERR Operation Failed\n");
+	}
+
+  done:
+
+	switch_safe_free(mycmd);
+	return SWITCH_STATUS_SUCCESS;
+}
+
+#define TRANSCRIBE_API_VERSION_SYNTAX "<uuid> hostport"
+SWITCH_STANDARD_API(version_function)
+{
+	char *mycmd = NULL, *argv[2] = { 0 };
+	int argc = 0;
+	switch_status_t status = SWITCH_STATUS_FALSE;
+
+	if (!zstr(cmd) && (mycmd = strdup(cmd))) {
+		argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
+	}
+
+	if (zstr(cmd) || argc < 2) {
+		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s\n", cmd);
+		stream->write_function(stream, "-USAGE: %s\n", TRANSCRIBE_API_VERSION_SYNTAX);
+		goto done;
+	} else {
+		switch_core_session_t *lsession = NULL;
+
+		if ((lsession = switch_core_session_locate(argv[0]))) {
+      char* hostport = argv[1];
+      status = cobalt_speech_get_version(lsession, hostport);
+      switch_core_session_rwunlock(lsession);
+    }
+	}
+
+	if (status == SWITCH_STATUS_SUCCESS) {
+		stream->write_function(stream, "+OK Success\n");
+	} else {
+		stream->write_function(stream, "-ERR Operation Failed\n");
+	}
+
+  done:
+
+	switch_safe_free(mycmd);
+	return SWITCH_STATUS_SUCCESS;
+}
+
+#define TRANSCRIBE_API_COMPILE_CONTEXT_SYNTAX "<uuid> hostport model token phrases"
+SWITCH_STANDARD_API(compile_context_function)
+{
+	char *mycmd = NULL, *argv[5] = { 0 };
+	int argc = 0;
+	switch_status_t status = SWITCH_STATUS_FALSE;
+
+	if (!zstr(cmd) && (mycmd = strdup(cmd))) {
+		argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
+	}
+
+	if (zstr(cmd) || argc < 5) {
+		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s\n", cmd);
+		stream->write_function(stream, "-USAGE: %s\n", TRANSCRIBE_API_COMPILE_CONTEXT_SYNTAX);
+		goto done;
+	} else {
+		switch_core_session_t *lsession = NULL;
+
+		if ((lsession = switch_core_session_locate(argv[0]))) {
+      char* hostport = argv[1];
+      status = cobalt_speech_compile_context(lsession, hostport, argv[2], argv[3], argv[4]);
+      switch_core_session_rwunlock(lsession);
+    }
+	}
+
+	if (status == SWITCH_STATUS_SUCCESS) {
+		stream->write_function(stream, "+OK Success\n");
+	} else {
+		stream->write_function(stream, "-ERR Operation Failed\n");
+	}
+
+  done:
+
+	switch_safe_free(mycmd);
+	return SWITCH_STATUS_SUCCESS;
+}
+
+SWITCH_MODULE_LOAD_FUNCTION(mod_transcribe_load)
+{
+	switch_api_interface_t *api_interface;
+
+	/* create/register custom event message type */
+	if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_RESULTS) != SWITCH_STATUS_SUCCESS) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_RESULTS);
+		return SWITCH_STATUS_TERM;
+	}
+	switch_event_reserve_subclass(TRANSCRIBE_EVENT_ERROR);
+	if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_VAD_DETECTED) != SWITCH_STATUS_SUCCESS) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_VAD_DETECTED);
+		return SWITCH_STATUS_TERM;
+	}
+	if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_VERSION_RESPONSE) != SWITCH_STATUS_SUCCESS) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_VERSION_RESPONSE);
+		return SWITCH_STATUS_TERM;
+	}
+	if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_MODEL_LIST_RESPONSE) != SWITCH_STATUS_SUCCESS) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_MODEL_LIST_RESPONSE);
+		return SWITCH_STATUS_TERM;
+	}
+	if (switch_event_reserve_subclass(TRANSCRIBE_EVENT_COMPILE_CONTEXT_RESPONSE) != SWITCH_STATUS_SUCCESS) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Couldn't register subclass %s!\n", TRANSCRIBE_EVENT_COMPILE_CONTEXT_RESPONSE);
+		return SWITCH_STATUS_TERM;
+	}
+
+	/* connect my internal structure to the blank pointer passed to me */
+	*module_interface = switch_loadable_module_create_module_interface(pool, modname);
+
+	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "Soniox Speech Transcription API loading..\n");
+
+  if (SWITCH_STATUS_FALSE == cobalt_speech_init()) {
+		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "Failed initializing cobalt speech interface\n");
+	}
+
+	switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "Soniox Speech Transcription API successfully loaded\n");
+
+	SWITCH_ADD_API(api_interface, "uuid_cobalt_transcribe", "Soniox Speech Transcription API", transcribe_function, TRANSCRIBE_API_SYNTAX);
+	switch_console_set_complete("add uuid_cobalt_transcribe hostport start model");
+	switch_console_set_complete("add uuid_cobalt_transcribe hostport stop ");
+
+	SWITCH_ADD_API(api_interface, "uuid_cobalt_list_models", "Soniox Speech Transcription API", list_models_function, TRANSCRIBE_API_MODELS_SYNTAX);
+	switch_console_set_complete("add uuid_cobalt_list_models hostport");
+
+	SWITCH_ADD_API(api_interface, "uuid_cobalt_compile_context", "Soniox Speech Transcription API", compile_context_function, TRANSCRIBE_API_COMPILE_CONTEXT_SYNTAX);
+	switch_console_set_complete("add uuid_cobalt_compile_context hostport token phrases");
+
+	SWITCH_ADD_API(api_interface, "uuid_cobalt_get_version", "Soniox Speech Transcription API", version_function, TRANSCRIBE_API_VERSION_SYNTAX);
+	switch_console_set_complete("add uuid_cobalt_get_version hostport");
+
+	/* indicate that the module should continue to be loaded */
+	return SWITCH_STATUS_SUCCESS;
+}
+
+/*
+  Called when the system shuts down
+  Macro expands to: switch_status_t mod_cobalt_transcribe_shutdown() */
+SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_transcribe_shutdown)
+{
+	cobalt_speech_cleanup();
+	switch_event_free_subclass(TRANSCRIBE_EVENT_RESULTS);
+	switch_event_free_subclass(TRANSCRIBE_EVENT_VAD_DETECTED);
+	switch_event_free_subclass(TRANSCRIBE_EVENT_VERSION_RESPONSE);
+	switch_event_free_subclass(TRANSCRIBE_EVENT_MODEL_LIST_RESPONSE);
+	switch_event_free_subclass(TRANSCRIBE_EVENT_COMPILE_CONTEXT_RESPONSE);
+	return SWITCH_STATUS_SUCCESS;
+}
+
--- a/mod_cobalt_transcribe/mod_cobalt_transcribe.h
+++ b/mod_cobalt_transcribe/mod_cobalt_transcribe.h
@@ -0,0 +1,39 @@
+#ifndef __MOD_COBALT_TRANSCRIBE_H__
+#define __MOD_COBALT_TRANSCRIBE_H__
+
+#include <switch.h>
+#include <speex/speex_resampler.h>
+
+#include <unistd.h>
+
+#define MAX_SESSION_ID (256)
+#define MAX_BUG_LEN (64)
+#define MY_BUG_NAME "cobalt_speech"
+#define TRANSCRIBE_EVENT_RESULTS "cobalt_speech::transcription"
+#define TRANSCRIBE_EVENT_ERROR      "jambonz::error"
+#define TRANSCRIBE_EVENT_VAD_DETECTED "cobalt_speech::vad_detected"
+#define TRANSCRIBE_EVENT_MODEL_LIST_RESPONSE "cobalt_speech::model_list_response"
+#define TRANSCRIBE_EVENT_VERSION_RESPONSE "cobalt_speech::version_response"
+#define TRANSCRIBE_EVENT_COMPILE_CONTEXT_RESPONSE "cobalt_speech::compile_context_response"
+
+
+/* per-channel data */
+typedef void (*responseHandler_t)(switch_core_session_t* session, 
+	const char* json, const char* bugname, 
+	const char* details);
+
+struct cap_cb {
+	switch_mutex_t *mutex;
+	char bugname[MAX_BUG_LEN+1];
+	char sessionId[MAX_SESSION_ID+1];
+	char *base;
+  SpeexResamplerState *resampler;
+	void* streamer;
+	responseHandler_t responseHandler;
+	switch_thread_t* thread;
+	int end_of_utterance;
+	switch_vad_t * vad;
+	uint32_t samples_per_second;
+};
+
+#endif
--- a/mod_cobalt_transcribe/simple_buffer.h
+++ b/mod_cobalt_transcribe/simple_buffer.h
@@ -0,0 +1,51 @@
+/**
+ * (very) simple and limited circular buffer, 
+ * supporting only the use case of doing all of the adds
+ * and then subsquently retrieves.
+ * 
+ */
+class SimpleBuffer {
+  public:
+    SimpleBuffer(uint32_t chunkSize, uint32_t numChunks) : numItems(0),
+    m_numChunks(numChunks), m_chunkSize(chunkSize) {
+      m_pData = new char[chunkSize * numChunks];
+      m_pNextWrite = m_pData;
+    }
+    ~SimpleBuffer() {
+      delete [] m_pData;
+    }
+
+    void add(void *data, uint32_t datalen) {
+      if (datalen % m_chunkSize != 0) return;
+      int numChunks = datalen / m_chunkSize;
+      for (int i = 0; i < numChunks; i++) {
+        memcpy(m_pNextWrite, data, m_chunkSize);
+        data = static_cast<char*>(data) + m_chunkSize;
+        if (numItems < m_numChunks) numItems++;
+
+        uint32_t offset = (m_pNextWrite - m_pData) / m_chunkSize;
+        if (offset >= m_numChunks - 1) m_pNextWrite = m_pData;
+        else m_pNextWrite += m_chunkSize;
+      }
+    }
+
+    char* getNextChunk() {
+      if (numItems--) {
+        char *p = m_pNextWrite;
+        uint32_t offset = (m_pNextWrite - m_pData) / m_chunkSize;
+        if (offset >= m_numChunks - 1) m_pNextWrite = m_pData;
+        else m_pNextWrite += m_chunkSize;
+        return p;
+      }
+      return nullptr;
+    }
+
+    uint32_t getNumItems() { return numItems;}
+
+  private:
+    char *m_pData;
+    uint32_t numItems;
+    uint32_t m_chunkSize;
+    uint32_t m_numChunks;
+    char* m_pNextWrite;
+};