From d90844845757798f21a882c01b98ad383d9e7ea2 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Tue, 22 Oct 2024 15:38:52 -0400 Subject: [PATCH] wip (#98) * wip * 0.9.2 release notes --- data/docs.yml | 6 ++ markdown/docs/release-notes/0.9.2.md | 62 +++++++++++++++++++ markdown/docs/webhooks/listen.md | 2 +- markdown/docs/webhooks/llm.md | 89 ++++++++++++++++++++++++++++ markdown/docs/webhooks/message.md | 2 +- markdown/docs/ws/llm-tool-call.md | 0 markdown/docs/ws/llm-tool-output.md | 0 markdown/docs/ws/llm-update.md | 0 markdown/docs/ws/overview.md | 4 ++ 9 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 markdown/docs/release-notes/0.9.2.md create mode 100644 markdown/docs/webhooks/llm.md create mode 100644 markdown/docs/ws/llm-tool-call.md create mode 100644 markdown/docs/ws/llm-tool-output.md create mode 100644 markdown/docs/ws/llm-update.md diff --git a/data/docs.yml b/data/docs.yml index 7e86cb6..e270a0c 100644 --- a/data/docs.yml +++ b/data/docs.yml @@ -51,6 +51,9 @@ navi: - path: listen title: listen + - + path: llm + title: llm - path: message title: message @@ -168,6 +171,9 @@ navi: path: release-notes title: Release Notes pages: + - + path: 0.9.2 + title: 0.9.2 - path: 0.9.1 title: 0.9.1 diff --git a/markdown/docs/release-notes/0.9.2.md b/markdown/docs/release-notes/0.9.2.md new file mode 100644 index 0000000..f22239b --- /dev/null +++ b/markdown/docs/release-notes/0.9.2.md @@ -0,0 +1,62 @@ +# Release 0.9.2 +#### Info +- Release Date: Oct 21, 2024 + +#### New Features +- added support for OpenAI Realtime API +- added new [llm](/docs/webhooks/llm) verb for integration with LLMs +- add support for [Speechmatics STT](https://www.speechmatics.com/) +- add support for PlayHT 3.0 TTS +- support Deepgram on-prem +- log levels of applications can now be changed at runtime without a restart +- include network SIP Call-ID in call detail records in influxdb +- add support for aws language model name when transcribing +- support config referHook +- support config referHook +- loop dial music +- Add support for configuring the IP address that is advertised to the API server +- Sending callSid in the custom-stt start message +- support wait hook for conf:participant-action hold +- add notify speech-bargein-detected and dtmf-bargein-detected events +- add callSid for STT and TTS alerts +- Check the confidence levels of a transcript with minConfidence +- added private_newtwork_cidr to system_information table +- sip gateways support inbound pad crypto + +#### Bug fixes +- Allow Say, Gather, Transcribe is able to finished if there is error for speech credential +- Fixed Gather digits does not work without nested say/play +- race condition where call just ended when action hook play completes +- fix issues with labels on speech vendors +- fixed adulting call session does not send status callback if hangup is used +- enable bargeIn when minBargeinWordCount is 0 +- tts: allow set vendor model or engine in runtime +- Create Call Rest is missing target headers on outdial +- Fix/audio issue kick conference +- Fixed long amd hints make freeswitch module cannot connect the vendor +- fix support precache audio with tts stream +- rest call session does not handle for RE-INVITE +- fix conference in cluster have correct direction in callInfo +- fix: support _lccMuteStatus for conference +- fix: do not run snake case for customer data +- clear gather timeout if imterim result received +- support jambonz transcribe sampling rate +- fix conference end is not sent when moderator leave conference +- fixed pad_crypto does not work if not a incoming call to a sip realm +- Handle cases where gateway query at account level returns more than one +- sip scheme in contact header of re-invite 200 OK should be same as initial 200 OK +- fix sbc crash while outbound calling to user + + +#### SQL changes +``` +ALTER TABLE system_information ADD COLUMN private_network_cidr VARCHAR(8192); +ALTER TABLE system_information ADD COLUMN log_level ENUM('info', 'debug') NOT NULL DEFAULT 'info'; +ALTER TABLE accounts ADD COLUMN enable_debug_log BOOLEAN NOT NULL DEFAULT false; +``` + +#### Availability +- Available now on jambonz.cloud +- Available now with devops scripts for subscription customers + +**Questions?** Contact us at support@jambonz.org \ No newline at end of file diff --git a/markdown/docs/webhooks/listen.md b/markdown/docs/webhooks/listen.md index 2d07062..dafc2de 100644 --- a/markdown/docs/webhooks/listen.md +++ b/markdown/docs/webhooks/listen.md @@ -188,5 +188,5 @@ This command clears (removes) and audio marks that are being tracked. When you

Prev: lex -Next: message +Next: message

diff --git a/markdown/docs/webhooks/llm.md b/markdown/docs/webhooks/llm.md new file mode 100644 index 0000000..e4e92ae --- /dev/null +++ b/markdown/docs/webhooks/llm.md @@ -0,0 +1,89 @@ +# llm + +The `llm` verb connects a call to AI language model. + +> In release 0.9.2, OpenAI's realtime API is the only supported model. Support for other LLMs will be rolling out shortly. + +[Here is an example](https://github.com/jambonz/openai-s2s-example) showing how to connect to OpenAI Realtime API. + +```js +session.llm( +{ + vendor: 'openai', + model: "gpt-4o-realtime-preview-2024-10-01", + auth: { + apiKey + }, + actionHook: '/final', + eventHook: '/event', + toolHook: '/toolCall', + events: [ + 'conversation.item.*', + 'response.audio_transcript.done', + 'input_audio_buffer.committed' + ], + llmOptions: { + response_create: { + modalities: ['text', 'audio'], + instructions: 'Please assist the user with their request.', + voice: 'alloy', + output_audio_format: 'pcm16', + temperature: 0.8, + max_output_tokens: 4096, + }, + session_update: { + tools: [ + { + name: 'get_weather', + type: 'function', + description: 'Get the weather at a given location', + parameters: { + type: 'object', + properties: { + location: { + type: 'string', + description: 'Location to get the weather from', + }, + scale: { + type: 'string', + enum: ['fahrenheit', 'celsius'], + }, + }, + required: ['location', 'scale'], + }, + }, + ], + tool_choice: 'auto', + input_audio_transcription: { + model: 'whisper-1', + }, + turn_detection: { + type: 'server_vad', + threshold: 0.8, + prefix_padding_ms: 300, + silence_duration_ms: 500, + } + } + } +}) +``` + +You can use the following options in the `llm` verb: + +| option | description | required | +| ------------- |-------------| -----| +| vendor | name of the LLM vendor | yes | +| model | name of the LLM mode | yes | +| auth | object containing authentication credentials; format according to the model (see below) | no | +| connectOptions | object containing information such as URI to connect to | no | +| actionHook | webhook that will be called when the LLM session ends | no | +| eventHook | webhook that will be called when a requested LLM event happens (e.g. transcript) | no | +| toolHook | webhook that will be called when the LLM wants to call a function | no | +| events | array of event names listing the events requested (wildcards allowed) | no | +| llmOptions | object containing instructions for the LLM; format depdendent on the LLM model | no | + + +

+Prev: leave +Next: listen +

diff --git a/markdown/docs/webhooks/message.md b/markdown/docs/webhooks/message.md index 8330e5b..e3efbf0 100644 --- a/markdown/docs/webhooks/message.md +++ b/markdown/docs/webhooks/message.md @@ -37,6 +37,6 @@ The actionHook that is invoked when the `message` command completes will include | carrier_message_id | the message identifier assigned by the carrier for this SMS|

-Prev: listen +Prev: listen Next: pause

diff --git a/markdown/docs/ws/llm-tool-call.md b/markdown/docs/ws/llm-tool-call.md new file mode 100644 index 0000000..e69de29 diff --git a/markdown/docs/ws/llm-tool-output.md b/markdown/docs/ws/llm-tool-output.md new file mode 100644 index 0000000..e69de29 diff --git a/markdown/docs/ws/llm-update.md b/markdown/docs/ws/llm-update.md new file mode 100644 index 0000000..e69de29 diff --git a/markdown/docs/ws/overview.md b/markdown/docs/ws/overview.md index 4ab4e8b..bdd6f82 100644 --- a/markdown/docs/ws/overview.md +++ b/markdown/docs/ws/overview.md @@ -47,9 +47,13 @@ In the sections that follow, we will describe each of the message types in detai |call:status|jambonz|sent any time the call status changes.| |verb:hook|jambonz| sent when an action hook or event hook configured for a verb has been triggered (e.g. a “gather” verb has collected an utterance from the user).| |verb:status|jambonz|sent when a verb has just started or completed executing. See “command” below; this message is only sent if the app includes “id” properties on the verbs provided.| +|llm:event|jambonz|sent when an LLM generates any kind of event; e.g. transcript, etc| |jambonz:error|jambonz| if jambonz encounters some sort of fatal error (i.e. something that would necessitate ending the call unexpectedly) jambonz will send an error event to the far end app describing the problem.| |ack|websocket server|the ws server will respond to any `session:new` or `verb:hook` message with an `ack` message indicating that the provided content in the message has been processed. The ack message may optionally contain a payload of new instructions for jambonz.| |command|websocket server|the ws server will send this message when it wants to asynchronously provide a new set of instructions to jambonz. The app **may** include an `id` property in each of the verbs included in the command; if so, jambonz will send `verb:status` notifications back to the app when the verb is executed. The `id` property is a string value that is assigned by the app and is meaningful only to the app (i.e. to jambonz it is simply an opaque piece of tracking data).| +|llm:tool-call|jambonz|sent when an LLM agent makes a tool or function call that the app needs to invoke| +|llm:tool-output|websocket server|the ws server sends when a tool has been invoked and results are available| +|llm:update|websocket server|the ws server application sends when it wants to asynchronously provide new instructions or session state to the LLM|