From c79a6aaf8ae57c01408884fe56f581dabf880ab5 Mon Sep 17 00:00:00 2001 From: Dave Horton Date: Wed, 16 Oct 2024 09:27:51 -0400 Subject: [PATCH] Feat/llm update (#936) * add support for llm:update during LLM session * make sure to end openai session when Llm task is killed * wip * wip * wip * wip * wip * wip * wip --- lib/session/call-session.js | 26 ++++++++++++++-- lib/tasks/llm/index.js | 13 +++++++- lib/tasks/llm/llms/openai_s2s.js | 53 +++++++++++++++++++++++++++----- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/lib/session/call-session.js b/lib/session/call-session.js index 080e1ddc..a2ec9d32 100644 --- a/lib/session/call-session.js +++ b/lib/session/call-session.js @@ -1590,17 +1590,29 @@ Duration=${duration} ` } _lccToolOutput(tool_call_id, opts, callSid) { - // this whole thing requires us to be in a Dial verb + // only valid if we are in an LLM verb const task = this.currentTask; if (!task || !task.name.startsWith('Llm')) { return this.logger.info('CallSession:_lccToolOutput - invalid command since we are not in an llm'); } - task.processToolOutput(tool_call_id, opts) + task.processToolOutput(tool_call_id, opts, callSid) .catch((err) => this.logger.error(err, 'CallSession:_lccToolOutput')); } + _lccLlmUpdate(opts, callSid) { + // only valid if we are in an LLM verb + const task = this.currentTask; + if (!task || !task.name.startsWith('Llm')) { + return this.logger.info('CallSession:_lccLlmUpdate - invalid command since we are not in an llm'); + } + + task.processLlmUpdate(opts, callSid) + .catch((err) => this.logger.error(err, 'CallSession:_lccLlmUpdate')); + } + + /** * perform call hangup by jambonz */ @@ -1660,6 +1672,12 @@ Duration=${duration} ` else if (opts.boostAudioSignal) { return this._lccBoostAudioSignal(opts, callSid); } + else if (opts.llm_tool_output) { + return this._lccToolOutput(opts.tool_call_id, opts.llm_tool_output, callSid); + } + else if (opts.llm_update) { + return this._lccLlmUpdate(opts.llm_update, callSid); + } // whisper may be the only thing we are asked to do, or it may that // we are doing a whisper after having muted, paused recording etc.. @@ -1961,6 +1979,10 @@ Duration=${duration} ` this._lccToolOutput(tool_call_id, data, call_sid); break; + case 'llm:update': + this._lccLlmUpdate(data, call_sid); + break; + default: this.logger.info(`CallSession:_onCommand - invalid command ${command}`); } diff --git a/lib/tasks/llm/index.js b/lib/tasks/llm/index.js index a1c16e33..8fbe3eac 100644 --- a/lib/tasks/llm/index.js +++ b/lib/tasks/llm/index.js @@ -79,7 +79,18 @@ class TaskLlm extends Task { this.llm.processToolOutput(this.ep, tool_call_id, data); } - + async processLlmUpdate(data, callSid) { + if (this.ep.connected) { + if (typeof this.llm.processLlmUpdate === 'function') { + this.llm.processLlmUpdate(this.ep, data, callSid); + } + else { + const {vendor, model} = this.llm; + this.logger.info({data, callSid}, + `TaskLlm:_processLlmUpdate: LLM ${vendor}:${model} does not support llm:update`); + } + } + } } module.exports = TaskLlm; diff --git a/lib/tasks/llm/llms/openai_s2s.js b/lib/tasks/llm/llms/openai_s2s.js index 9c716a65..6b6ae03e 100644 --- a/lib/tasks/llm/llms/openai_s2s.js +++ b/lib/tasks/llm/llms/openai_s2s.js @@ -2,6 +2,7 @@ const Task = require('../../task'); const TaskName = 'Llm_OpenAI_s2s'; const {LlmEvents_OpenAI} = require('../../../utils/constants'); const ClientEvent = 'client.event'; +const SessionDelete = 'session.delete'; const openai_server_events = [ 'error', @@ -125,6 +126,13 @@ class TaskLlmOpenAI_S2S extends Task { } } + async _api(ep, args) { + const res = await ep.api('uuid_openai_s2s', `^^|${args.join('|')}`); + if (!res.body?.startsWith('+OK')) { + throw new Error({args}, `Error calling uuid_openai_s2s: ${res.body}`); + } + } + async exec(cs, {ep}) { await super.exec(cs); @@ -140,26 +148,57 @@ class TaskLlmOpenAI_S2S extends Task { async kill(cs) { super.kill(cs); + + this._api(cs.ep, [cs.ep.uuid, SessionDelete]) + .catch((err) => this.logger.info({err}, 'TaskLlmOpenAI_S2S:kill - error deleting session')); + this.notifyTaskDone(); } + /** + * Send function call output to the OpenAI server in the form of conversation.item.create + * per https://platform.openai.com/docs/guides/realtime/function-calls + */ async processToolOutput(ep, tool_call_id, data) { try { this.logger.debug({tool_call_id, data}, 'TaskLlmOpenAI_S2S:processToolOutput'); - await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]); + if (!data.type || data.type !== 'conversation.item.create') { + this.logger.info({data}, + 'TaskLlmOpenAI_S2S:processToolOutput - invalid tool output, must be conversation.item.create'); + } + else { + await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]); - // send immediate response.create per https://platform.openai.com/docs/guides/realtime/function-calls - await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify({type: 'response.create'})]); + // spec also recommends to send immediate response.create + await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify({type: 'response.create'})]); + } } catch (err) { this.logger.info({err}, 'TaskLlmOpenAI_S2S:processToolOutput'); } } - async _api(ep, args) { - const res = await ep.api('uuid_openai_s2s', `^^|${args.join('|')}`); - if (!res.body?.startsWith('+OK')) { - throw new Error({args}, `Error calling uuid_openai_s2s: ${res.body}`); + /** + * Send a session.update to the OpenAI server + * Note: creating and deleting conversation items also supported as well as interrupting the assistant + */ + async processLlmUpdate(ep, data, _callSid) { + try { + this.logger.debug({data, _callSid}, 'TaskLlmOpenAI_S2S:processLlmUpdate'); + + if (!data.type || ![ + 'session.update', + 'conversation.item.create', + 'conversation.item.delete', + 'response.cancel' + ].includes(data.type)) { + this.logger.info({data}, 'TaskLlmOpenAI_S2S:processLlmUpdate - invalid mid-call request'); + } + else { + await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]); + } + } catch (err) { + this.logger.info({err}, 'TaskLlmOpenAI_S2S:processLlmUpdate'); } }