From d90844845757798f21a882c01b98ad383d9e7ea2 Mon Sep 17 00:00:00 2001
From: Dave Horton <daveh@beachdognet.com>
Date: Tue, 22 Oct 2024 15:38:52 -0400
Subject: [PATCH] wip (#98)

* wip

* 0.9.2 release notes
---
 data/docs.yml                        |  6 ++
 markdown/docs/release-notes/0.9.2.md | 62 +++++++++++++++++++
 markdown/docs/webhooks/listen.md     |  2 +-
 markdown/docs/webhooks/llm.md        | 89 ++++++++++++++++++++++++++++
 markdown/docs/webhooks/message.md    |  2 +-
 markdown/docs/ws/llm-tool-call.md    |  0
 markdown/docs/ws/llm-tool-output.md  |  0
 markdown/docs/ws/llm-update.md       |  0
 markdown/docs/ws/overview.md         |  4 ++
 9 files changed, 163 insertions(+), 2 deletions(-)
 create mode 100644 markdown/docs/release-notes/0.9.2.md
 create mode 100644 markdown/docs/webhooks/llm.md
 create mode 100644 markdown/docs/ws/llm-tool-call.md
 create mode 100644 markdown/docs/ws/llm-tool-output.md
 create mode 100644 markdown/docs/ws/llm-update.md

diff --git a/data/docs.yml b/data/docs.yml
index 7e86cb6..e270a0c 100644
--- a/data/docs.yml
+++ b/data/docs.yml
@@ -51,6 +51,9 @@ navi:
       -
         path: listen
         title: listen
+      -
+        path: llm
+        title: llm
       -
         path: message
         title: message
@@ -168,6 +171,9 @@ navi:
     path: release-notes
     title: Release Notes
     pages:
+      -
+        path: 0.9.2
+        title: 0.9.2
       -
         path: 0.9.1
         title: 0.9.1
diff --git a/markdown/docs/release-notes/0.9.2.md b/markdown/docs/release-notes/0.9.2.md
new file mode 100644
index 0000000..f22239b
--- /dev/null
+++ b/markdown/docs/release-notes/0.9.2.md
@@ -0,0 +1,62 @@
+# Release 0.9.2
+#### Info
+- Release Date: Oct 21, 2024
+
+#### New Features
+- added support for OpenAI Realtime API
+- added new [llm](/docs/webhooks/llm) verb for integration with LLMs
+- add support for [Speechmatics STT](https://www.speechmatics.com/)
+- add support for PlayHT 3.0 TTS
+- support Deepgram on-prem
+- log levels of applications can now be changed at runtime without a restart
+- include network SIP Call-ID in call detail records in influxdb
+- add support for aws language model name when transcribing
+- support config referHook
+- support config referHook 
+- loop dial music
+- Add support for configuring the IP address that is advertised to the API server
+- Sending callSid in the custom-stt start message
+- support wait hook for conf:participant-action hold
+- add notify speech-bargein-detected and dtmf-bargein-detected events
+- add callSid for STT and TTS alerts
+- Check the confidence levels of a transcript  with minConfidence
+- added private_newtwork_cidr to system_information table
+- sip gateways support inbound pad crypto
+
+#### Bug fixes
+- Allow Say, Gather, Transcribe is able to finished if there is error for speech credential
+- Fixed Gather digits does not work without nested say/play 
+- race condition where call just ended when action hook play completes
+- fix issues with labels on speech vendors
+- fixed adulting call session does not send status callback if hangup is used
+- enable bargeIn when minBargeinWordCount is 0
+- tts: allow set vendor model or engine in runtime
+- Create Call Rest is missing target headers on outdial
+- Fix/audio issue kick conference
+- Fixed long amd hints make freeswitch module cannot connect the vendor 
+- fix support precache audio with tts stream
+- rest call session does not handle for RE-INVITE 
+- fix conference in cluster have correct direction in callInfo
+- fix: support _lccMuteStatus for conference
+- fix: do not run snake case for customer data
+- clear gather timeout if imterim result received
+- support jambonz transcribe sampling rate 
+- fix conference end is not sent when moderator leave conference
+- fixed pad_crypto does not work if not a incoming call to a sip realm
+- Handle cases where gateway query at account level returns more than one
+- sip scheme in contact header of re-invite 200 OK should be same as initial 200 OK
+- fix sbc crash while outbound calling to user
+
+
+#### SQL changes
+```
+ALTER TABLE system_information ADD COLUMN private_network_cidr VARCHAR(8192);
+ALTER TABLE system_information ADD COLUMN log_level ENUM('info', 'debug') NOT NULL DEFAULT 'info';
+ALTER TABLE accounts ADD COLUMN enable_debug_log BOOLEAN NOT NULL DEFAULT false;
+```
+
+#### Availability
+- Available now on jambonz.cloud
+- Available now with devops scripts for subscription customers
+
+**Questions?** Contact us at <a href="mailto:support@jambonz.org">support@jambonz.org</a>
\ No newline at end of file
diff --git a/markdown/docs/webhooks/listen.md b/markdown/docs/webhooks/listen.md
index 2d07062..dafc2de 100644
--- a/markdown/docs/webhooks/listen.md
+++ b/markdown/docs/webhooks/listen.md
@@ -188,5 +188,5 @@ This command clears (removes) and audio marks that are being tracked.  When you
 
 <p class="flex">
 <a href="/docs/webhooks/lex">Prev: lex</a>
-<a href="/docs/webhooks/message">Next: message</a>
+<a href="/docs/webhooks/llm">Next: message</a>
 </p>
diff --git a/markdown/docs/webhooks/llm.md b/markdown/docs/webhooks/llm.md
new file mode 100644
index 0000000..e4e92ae
--- /dev/null
+++ b/markdown/docs/webhooks/llm.md
@@ -0,0 +1,89 @@
+# llm
+
+The `llm` verb connects a call to AI language model.  
+
+> In release 0.9.2, OpenAI's realtime API is the only supported model.  Support for other LLMs will be rolling out shortly.
+
+[Here is an example](https://github.com/jambonz/openai-s2s-example) showing how to connect to OpenAI Realtime API.
+
+```js
+session.llm(
+{
+  vendor: 'openai',
+  model: "gpt-4o-realtime-preview-2024-10-01",
+  auth: {
+    apiKey
+  },
+  actionHook: '/final',
+  eventHook: '/event',
+  toolHook: '/toolCall',
+  events: [
+    'conversation.item.*',
+    'response.audio_transcript.done',
+    'input_audio_buffer.committed'
+  ],
+  llmOptions: {
+    response_create: {
+      modalities: ['text', 'audio'],
+      instructions: 'Please assist the user with their request.',
+      voice: 'alloy',
+      output_audio_format: 'pcm16',
+      temperature: 0.8,
+      max_output_tokens: 4096,
+    },
+    session_update: {
+      tools: [
+        {
+          name: 'get_weather',
+          type: 'function',
+          description: 'Get the weather at a given location',
+          parameters: {
+            type: 'object',
+            properties: {
+              location: {
+                type: 'string',
+                description: 'Location to get the weather from',
+              },
+              scale: {
+                type: 'string',
+                enum: ['fahrenheit', 'celsius'],
+              },
+            },
+            required: ['location', 'scale'],
+          },
+        },
+      ],
+      tool_choice: 'auto',
+      input_audio_transcription: {
+        model: 'whisper-1',
+      },
+      turn_detection: {
+        type: 'server_vad',
+        threshold: 0.8,
+        prefix_padding_ms: 300,
+        silence_duration_ms: 500,
+      }
+    }
+  }
+})
+```
+
+You can use the following options in the `llm` verb:
+
+| option        | description | required  |
+| ------------- |-------------| -----|
+| vendor | name of the LLM vendor | yes |
+| model | name of the LLM mode | yes |
+| auth | object containing authentication credentials; format according to the model (see below) | no |
+| connectOptions | object containing information such as URI to connect to | no |
+| actionHook | webhook that will be called when the LLM session ends | no |
+| eventHook | webhook that will be called when a requested LLM event happens (e.g. transcript) | no |
+| toolHook | webhook that will be called when the LLM wants to call a function | no |
+| events | array of event names listing the events requested (wildcards allowed) | no |
+| llmOptions | object containing instructions for the LLM; format depdendent on the LLM model | no |
+
+
+<p class="flex">
+<a href="/docs/webhooks/listen">Prev: leave</a>
+<a href="/docs/webhooks/message">Next: listen</a>
+</p>
diff --git a/markdown/docs/webhooks/message.md b/markdown/docs/webhooks/message.md
index 8330e5b..e3efbf0 100644
--- a/markdown/docs/webhooks/message.md
+++ b/markdown/docs/webhooks/message.md
@@ -37,6 +37,6 @@ The actionHook that is invoked when the `message` command completes will include
 | carrier_message_id | the message identifier assigned by the carrier for this SMS|
 
 <p class="flex">
-<a href="/docs/webhooks/listen">Prev: listen</a>
+<a href="/docs/webhooks/llm">Prev: listen</a>
 <a href="/docs/webhooks/pause">Next: pause</a>
 </p>
diff --git a/markdown/docs/ws/llm-tool-call.md b/markdown/docs/ws/llm-tool-call.md
new file mode 100644
index 0000000..e69de29
diff --git a/markdown/docs/ws/llm-tool-output.md b/markdown/docs/ws/llm-tool-output.md
new file mode 100644
index 0000000..e69de29
diff --git a/markdown/docs/ws/llm-update.md b/markdown/docs/ws/llm-update.md
new file mode 100644
index 0000000..e69de29
diff --git a/markdown/docs/ws/overview.md b/markdown/docs/ws/overview.md
index 4ab4e8b..bdd6f82 100644
--- a/markdown/docs/ws/overview.md
+++ b/markdown/docs/ws/overview.md
@@ -47,9 +47,13 @@ In the sections that follow, we will describe each of the message types in detai
 |call:status|jambonz|sent any time the call status changes.|
 |verb:hook|jambonz| sent when an action hook or event hook configured for a verb has been triggered (e.g. a “gather” verb has collected an utterance from the user).|
 |verb:status|jambonz|sent when a verb has just started or completed executing.  See “command” below; this message is only sent if the app includes “id” properties on the verbs provided.|
+|llm:event|jambonz|sent when an LLM generates any kind of event; e.g. transcript, etc|
 |jambonz:error|jambonz| if jambonz encounters some sort of fatal error (i.e. something that would necessitate ending the call unexpectedly) jambonz will send an error event to the far end app describing the problem.|
 |ack|websocket server|the ws server will respond to any `session:new` or `verb:hook` message with an `ack` message indicating that the provided content in the message has been processed.  The ack message may optionally contain a payload of new instructions for jambonz.|
 |command|websocket server|the ws server  will send this message when it wants to asynchronously  provide a new set of instructions to jambonz. The app **may** include an `id` property in each of the verbs included in the command; if so, jambonz will send `verb:status` notifications back to the app when the verb is executed.  The `id` property is a string value that is assigned by the app and is meaningful only to the app (i.e. to jambonz it is simply an opaque piece of tracking data).|
+|llm:tool-call|jambonz|sent when an LLM agent makes a tool or function call that the app needs to invoke|
+|llm:tool-output|websocket server|the ws server sends when a tool has been invoked and results are available|
+|llm:update|websocket server|the ws server application sends when it wants to asynchronously provide new instructions or session state to the LLM|
 
 
 <p class="flex">