Compare commits

...

234 Commits

Author SHA1 Message Date
Sam Machin
f85b5f9a01 Update call-session.js 2025-01-10 13:56:43 +00:00
Dave Horton
6bad1a22f3 fix #1025 (#1026)
* fix #1025

* redirect verb should be able to redirect to a new websocket endpoint
2025-01-09 15:45:20 -05:00
Hoan Luu Huu
fcefa1ff31 fix inband dtmf does not work in dial verb (#1018) 2025-01-08 18:29:43 -05:00
Hoan Luu Huu
67cd53c930 rest:dial support timeLimit (#1024)
* rest:dial support timeLimit

* wip

* wip

* clear maxCallDuration timer
2025-01-07 12:21:09 -05:00
Dave Horton
a59784b8ab update base image to node:20-alpine (#1022) 2025-01-04 16:38:25 -05:00
Dave Horton
a2581eaeb4 tts throttling and send user_interruption event (#1019)
* tts throttling and send user_interruption event

* tts streaming: if we get a flush with tokens pending, send the flush after the tokens

* wip
2025-01-04 16:34:01 -05:00
Dave Horton
3706aa4d98 #1020 - fix for sticky bargein (#1021) 2025-01-03 10:41:35 -05:00
Dave Horton
25f1e65f63 feed TTS in sentence chunks when streaming (#1013)
* feed TTS in sentence chunks when streaming

* tts streaming: treat a paragraph as a chunk of text, even it not ending with a line end character

* wip
2024-12-31 15:16:25 -05:00
rammohan-y
c9f0481ca6 feat/1009, sending reason in X-Reason header when AHD processor giveup (#1014)
* feat/1009, sending reason in X-Reason header when AHD processor giveup is executed

* fixed jslint error

* added an alert
2024-12-31 15:09:23 -05:00
Hoan Luu Huu
564f6c9e55 support kill dial if sd ep is media timeout (#1001)
* support kill dial if sd ep is media timeout

* support kill dial if sd ep is media timeout

* support kill dial if sd ep is media timeout

* add media timeout reason header to bye message

* wip

* wip

* make configuration for freeswitch media timeout

* make configuration for freeswitch media timeout

* wip
2024-12-23 07:19:41 -05:00
Dave Horton
02f25f8343 fix cartesia channel vars for streaming (#1012) 2024-12-20 16:48:20 -05:00
Hoan Luu Huu
13ef89d605 support elevenlabs tts stream (#1011)
* support elevenlabs tts stream

* wip

* wip
2024-12-20 09:50:13 -05:00
Dave Horton
d05e470867 remove hardcoding of openai model 2024-12-19 18:42:57 -05:00
Hoan Luu Huu
17250f8386 support cartesia tts (#1008)
* support cartesia tts

* update speech util version

* update speech utils version
2024-12-19 07:35:47 -05:00
Dave Horton
ba3f46df64 Feat/tts streaming (#994)
* wip

* add TtsStreamingBuffer class to abstract handling of streaming tokens

* wip

* add throttling support

* support background ttsStream (#995)

* wip

* add TtsStreamingBuffer class to abstract handling of streaming tokens

* wip

* support background ttsStream

* wip

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>

* wip

* dont send if we have nothing to send

* initial testing with cartesia

* wip

---------

Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
2024-12-18 14:44:37 -05:00
RJ Burnham
f37e1540ee Make voicemail hints case insensitive (#1007) 2024-12-13 13:42:29 -05:00
Dave Horton
5e04db82bf Feat/deepgram voice agent (#1006)
* wip

* wip

* wip
2024-12-13 10:05:23 -05:00
Dave Horton
0aa37a83ae Feat/handle 3pcc invite (#1005)
* wip

* wip

* linting
2024-12-12 18:39:15 -05:00
Hoan Luu Huu
c29ab0d858 support referBy display name (#1000)
* support referBy display name

* wip

* update verb specification
2024-12-11 12:46:29 -05:00
Sam Machin
71d4c90cbc catch error (#1002)
* catch error

* remove notifyTaskDone
2024-12-11 12:34:44 -05:00
Hoan Luu Huu
a929a649f9 fix ConfirmCallSession cannot be played (#993)
* fix ConfirmCallSession cannot be played

* fix review comments

* fix review comments
2024-12-10 19:36:42 -05:00
Dave Horton
3bb4f1a29f fix #998 incorrectly sending final transcript with is_final=false (#999) 2024-12-10 18:48:02 -05:00
Hoan Luu Huu
54cc76606b fix cannot replace endpoint for adulting session (#992)
* fix cannot replace endpoint for adulting session

* fix cannot replace endpoint for adulting session
2024-12-06 07:51:24 -05:00
rammohan-y
0458bb7d6c Feat/884: Capture system_alert when feature-server is online or offline (#950)
* writing alerts during startup and shutdown of feature-server

* feat/884: created constants for system component name and state

* feat/88: added 0.2.11 version of time-series

* feat/884: renamed constant, and added GracefulShutdownInProgress system alert
2024-12-05 09:23:03 -05:00
Sam Machin
dce4fe1f82 Fix/986 (#990)
* throw new NonFatalTask error on play file not found

* linting

* make SpeechCredentialError subclass of NonFatalTask error

* cleanup

* Update action-hook-delay.js

* bump fsmrf version

* linting and package-lock

* Update package-lock.json

* update error

* only throw on fs error "File not found"

* add alert

* update time-series dep

* Update package-lock.json

* linting

* Update play.js

* remove stack trace from error message

* fix error formatting
2024-12-04 05:47:49 -05:00
Hoan Luu Huu
e96c35d571 fixed iamrole from sessionToken to securityToken (#988)
* fixed iamrole from sessionToken to securityToken

* wip

* support get aws credential from instance profile
2024-11-29 21:58:42 -05:00
Hoan Luu Huu
070671a3fb support send refer custom header to referhook (#981) 2024-11-28 08:34:34 -05:00
rammohan-y
efdb56f0a0 feat/971 - fix to allow hints objects array (#987) 2024-11-28 07:25:10 -05:00
Hoan Luu Huu
e2edbb4a5b support enable dtmf tone (#976)
* support enable dtmf tone

* wip

* wip
2024-11-26 20:25:48 -05:00
Markus Frindt
3a6d63a1c6 Fix the issue for outbound calls that always the None credentials wer… (#984)
* Fix the issue for outbound calls that always the None credentials were used. session:new for rest dial did not contain recognizer.label and synthesizer.label

* update comment

---------

Co-authored-by: mfrindt <m.frindt@cognigy.com>
2024-11-26 10:26:20 -05:00
rammohan-y
c874ab8100 feat/975: fixed continuous asr not stopping when asrDtmfTerminationDi… (#977)
* feat/975: fixed continuous asr not stopping when asrDtmfTerminationDigit is configured

* feat/975: giving first preference to asrDtmfTerminationDigit if there is already ASR happened
2024-11-26 08:23:11 -05:00
Dave Horton
24a66fed64 wip (#979) 2024-11-19 09:37:00 -05:00
Hoan Luu Huu
c8c3738ae8 custom stt vendor ws connection should not be closed in asrTimeout (#973) 2024-11-18 10:17:31 -05:00
Dave Horton
c1330d4651 fix transcribe fixes for speechmatics (#978)
* fix transcribe fixes for speechmatics

* update to verb-specs with fixes for speechmatics

* add support for speechmatics translation

* add handlers for receiving translations

* call translation hookd

* gather: no need to restart speechmatics after a final transcript during continuous asr

* graceful shutdown

* wip

* wip

* wip

* wip

* wip
2024-11-16 10:21:04 -05:00
Hoan Luu Huu
27f3a4b520 support SIP Privacy (#970) 2024-11-15 07:11:47 -05:00
Hoan Luu Huu
594c867192 unbridge dial ep with caller ep to avoid media release when referHook (#972) 2024-11-14 19:30:49 -05:00
Hoan Luu Huu
71c475e758 allow dub as http updateCall request (#974) 2024-11-14 07:20:33 -05:00
RJ Burnham
22ef201360 Add support to export to more than one otel platform. (#969)
* Add support to export to more than one otel platform.

This is helpful for if you want to keep using the bundled jaeger
support in the web console AND send to external OTLP based platform
(such as Axiom.co!).

* Lint issues and cleanup.
2024-11-13 10:25:02 -05:00
Hoan Luu Huu
5be3a910ad fix google custom voice can not be used without voice cloning key (#968) 2024-11-11 07:24:40 -05:00
Dave Horton
7615509e0b update test to use drachtio/drachtio-freeswitch-mrf:0.9.2-4 with aws_transcribe_ws fix (#964) 2024-11-08 09:52:26 -05:00
Dave Horton
851c071345 fix for #962 (#963) 2024-11-08 07:12:08 -05:00
rammohan-y
7911459c8c feat/940 stopped calling updateSpeechCredentialLastUsed (#944) 2024-11-05 15:19:08 -05:00
Hoan Luu Huu
be258950b0 feature server should send USER call to the sbc sip that is connect with the user (#949)
* feature server should send USER call to the sbc sip that is connect with the user

* feature server should send USER call to the sbc sip that is connect with the user

* feature server should send USER call to the sbc sip that is connect with the user

* fix review comment

* add env variable to enable the feature

* add env variable to enable the feature

* add env variable to enable the feature

* minor test update

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-11-05 15:14:04 -05:00
Hoan Luu Huu
0520386a1e fixed dial verb should use calling id from From header (#958)
* fixed dial verb should use calling id from From header

* fix review comment

* wip
2024-11-05 13:48:35 -05:00
Hoan Luu Huu
a4b1b22324 update speech utils version (#957) 2024-11-04 08:04:19 -05:00
Hoan Luu Huu
e800cca961 support google voice cloning (#956)
* support google voice cloning

* wip
2024-11-04 07:10:52 -05:00
Dave Horton
1efb198f72 Dial: fix error when receiving a REFER without a Referred-By header (#954) 2024-10-30 13:01:36 -04:00
rammohan-y
4b5df855e1 feat/952: removed unnecessary condition which is not logging the target_sid (#953) 2024-10-29 07:34:49 -04:00
Hoan Luu Huu
24126ef1ec fixed feature server kill currenttask if jambonz hangup the call (#948) 2024-10-26 10:21:16 -04:00
Dave Horton
8e4995ec02 fix bug where middleware produces a cached app.tasks with an empty array (#947) 2024-10-24 20:43:27 -04:00
Dave Horton
a005253a9f update to latest speech-utils 2024-10-18 12:27:29 -04:00
rammohan-y
10efc5d608 feat/942: updated optimal google models (#943) 2024-10-18 10:03:56 -04:00
Hoan Luu Huu
1c48c40496 Support sip_parent_callid for sbc-outbound (#939)
* include X-CID for dial outbound if the call-session is outbound

* include X-CID for dial outbound if the call-session is outbound

* include X-CID for dial outbound if the call-session is outbound

* include X-CID for dial outbound if the call-session is outbound
2024-10-17 07:18:58 -04:00
Dave Horton
c79a6aaf8a Feat/llm update (#936)
* add support for llm:update during LLM session

* make sure to end openai session when Llm task is killed

* wip

* wip

* wip

* wip

* wip

* wip

* wip
2024-10-16 09:27:51 -04:00
Hoan Luu Huu
da5f51e8e0 update speech utils version (#937) 2024-10-16 08:26:06 -04:00
Hoan Luu Huu
e7fd40e297 support sbcCallId in calling/status hook (#934)
* support sbcCallId in calling/status hook

* support sbcCallId in calling/status hook

* support sbcCallId in calling/status hook

* wip

* wip

* wip
2024-10-14 18:00:09 -04:00
Dave Horton
f541ff1a15 add support for aws language model name when transcribing (#890)
* add support for aws language model name when transcribing

* wip - from prev branch

* wip

* support both aws grpc and ws api - detect based on transcription payload

* update to drachtio-fsmrf@4.0.0

* fix for grpc compatibility, requires JAMBONES_AWS_TRANSCRIBE_USE_GRPC env

* back out major update to drachtio-srf and fsmrf; that should come in a separate PR

* update drachtio-srf
2024-10-12 19:46:31 -04:00
Dave Horton
98b968d61f update test db (#933) 2024-10-12 19:34:40 -04:00
Dave Horton
f09722a5b5 Feat/llm verb (#931)
* wip

* working version for openai realtime beta

* lint

* tests: update db to latest 0.9.2 schema
2024-10-12 19:26:27 -04:00
Dave Horton
f84b3793e1 Feat/speechmatics (#932)
* wip

* initial working version of speechmatics

* linting
2024-10-12 18:42:53 -04:00
Dave Horton
84b7456c2d add support for speechmatics asr (#920)
* update to verb specs with speechmatics support

* discover local ip using os module
2024-10-11 09:24:36 -04:00
Hoan Luu Huu
c67499e38b update speech version 0.1.18 (#930) 2024-10-11 08:59:33 -04:00
Hoan Luu Huu
e372a3cdfb update speech version (#927) 2024-10-09 19:46:44 -04:00
rammohan-y
ea303caa1c feat/924: made actions as optional when there is no noResponseTimeout (#925) 2024-10-08 08:06:12 -04:00
Hoan Luu Huu
2af67d8f05 support changing log level runtime (#926)
* support changing log level runtime

* support changing log level runtime

* support changing log level runtime
2024-10-07 09:51:51 -04:00
Hoan Luu Huu
96b3b0fe07 Allow Say, Gather, Transcribe is able to finished if there is error for speech credential (#910)
* allow move to next task if say verb is failed because of speech credential

* allow move to next task if say verb is failed because of speech credential

* allow move to next task if say verb is failed because of speech credential

* wip

* wip
2024-10-01 13:40:41 -04:00
Hoan Luu Huu
b898b70520 support config referHook (#915) 2024-09-30 08:13:48 -04:00
Hoan Luu Huu
b9ef00dfc7 Fixed Gather digits does not work without nested say/play (#914)
* Fixed Gather digits does not work without nested say/play

* fix review comment

* add assert to make sure we don't register dtmf twice in gather verb
2024-09-30 07:46:21 -04:00
rammohan-y
68fa3c013d Feat/902: executing giveUpAction when noResponseGiveupTimeout is reached (#908)
* feat/893: made noResponseTimeout and noResponseGiveUpTimout independent

* support for giveUpActions implemented

* feat/902: using makeTask and exec of task to execute the giveUpActions

* feat/902: changed version of verb-specifications and speech-utils

* feat/902: fixed jslint errors

* feat/902: modified log

* feat/902: using a new event giveupWithTasks for processing giveUpActions

* feat/902: removed check for wakeupResolver and replaceApplication already taking care of wakeupResolver, also updated the verb-specifications version

* feat/902: removed sync for _onNoResponseGiveUpTimer function
2024-09-26 09:40:30 -04:00
Dave Horton
7c24208067 fix #916: race condition where call just ended when action hook play completes (#917) 2024-09-25 20:17:22 -04:00
Dave Horton
7f7c26e982 fix for https://github.com/jambonz/freeswitch-modules/issues/117 (#912) 2024-09-25 20:13:56 -04:00
Markus Frindt
402adc2098 add label to tts stt spans (#909)
Co-authored-by: Markus Frindt <m.frindt@cognigy.com>
2024-09-25 16:44:15 -04:00
rammohan-y
724d4fb713 Feat/893: Made noResponseTimeout and noResponseGiveUpTimeout independent (#896)
* feat/893: made noResponseTimeout and noResponseGiveUpTimout independent

* feat/893: not assuming 0 if noResponseTimeout is not specified
2024-09-19 10:29:51 -04:00
Hoan Luu Huu
673827cce3 fixed adulting call session does not send status callback if hangup is used (#907) 2024-09-19 10:08:15 -04:00
Anton Voylenko
c4c5ad33d8 feat: loop dial music (#769) 2024-09-17 13:51:01 -04:00
rammohan-y
7bbfc01cb0 feat/864: enable bargeIn when minBargeinWordCount is 0 (#900)
* feat/864: checking for undefined, because 0 is a valid value for minBargeinWordCount

* feat/864: checking for undefined and null

* feat/864: corrected spelling of mode and added check for undefined as 0 is a valid value for vad.mode
2024-09-17 13:26:29 -04:00
Hoan Luu Huu
7daf056d6b allow set vendor model or engine in runtime (#897) 2024-09-12 09:03:15 +01:00
Hoan Luu Huu
e69afc4be4 fix recognizer/synthesizer label wrongly select between verb and app (#881)
* fix recognizer/synthesizer label wrongly select between verb and application

* fix jslint

* fix ASR cannot fallback

* update tts fallback does not send notification
2024-09-11 09:34:52 +01:00
rammohan-y
3a7cc27d0a feat/891: getting the options from customOptions in case of custom stt (#892) 2024-09-10 09:38:33 +01:00
Dave Horton
c4a6057fc6 bump version 2024-09-04 13:31:05 +01:00
rammohan-y
174438bb01 Feat/882: default model setting for en-IN language (#888)
* feat/882: default model setting for en-IN language

* feat/882: refactored if into ||
2024-09-03 13:22:38 +01:00
Antony Jukes
4348615b75 Create Call Rest is missing target headers on outdial (#874)
* add target headers for rest create-call

* rebased

---------

Co-authored-by: ajukes <ajukes@callable.io>
2024-09-02 21:48:09 +01:00
Hoan Luu Huu
d365883bfe fix #883 that after kicked from conference, no long receive freeswitch CUSTOM event (#886)
* fix #883 that after kicked from conference, no long receive freeswitch CUSTOM event

* fix #883 that after kicked from conference, no long receive freeswitch CUSTOM event

* reset Esl Custom event after conference.

* update drachtio fsmrf version
2024-08-31 14:47:39 +01:00
Dave Horton
c0ab936b76 wip (#830)
* wip

* wip

* wip

* wip

* update deps

* update test to use latest freeswitch image
2024-08-29 15:23:49 -04:00
Dave Horton
600ff763fa fix #840 (#880) 2024-08-26 10:14:59 -04:00
Hoan Luu Huu
4d077e990f Fix/audio issue kick conference (#878)
* rest call session does not handle for RE-INVITE

* fixed audio is bad after kicked from conference

* fix review comment
2024-08-23 09:28:39 -04:00
RJ Burnham
eccef54b04 Add support for configuring the IP address that is advertised to the API server. (#875)
This is needed when running in fargate as ip.address() will return the wrong ip address.
2024-08-23 08:33:16 -04:00
Dave Horton
2790e6d9ad fix linting error from PR 2024-08-20 08:36:24 -04:00
rammohan-y
f95d8639be Feat/868: Use global synthesizer config properties for say verb (#869)
* feat/868: Use the properties from global config in verb for TTS

* feat/868: setting this.options to combination of cs.synthesizer.options and this.options

* feat/868: Move the logic of copying cs properties to parent class tts-task.js

* feat/868: add empty line that was removed, say.js restored to original version

* feat/868: moved _synthesizeWithSpecificVendor to tts-task.js

---------

Co-authored-by: Rammohan Yadavalli <rammohan.yadavalli@kore.com>
2024-08-20 08:31:44 -04:00
Hoan Luu Huu
fc838512b6 Fixed long amd hints make freeswitch module cannot connect the vendor (#872)
* rest call session does not handle for RE-INVITE

* fixed long amd hints make freeswitch module cannot connect the vendor
2024-08-20 07:30:32 -04:00
Dave Horton
68992bccf6 fix #866 (#867) 2024-08-16 14:54:22 -04:00
Anton Voylenko
c131fceea7 fix: misleading log on call creation (#865) 2024-08-15 09:15:08 -04:00
Hoan Luu Huu
12174359f2 fix support precache audio with tts stream (#855)
* fix support precache audio with tts stream

* update speech util
2024-08-15 08:22:00 -04:00
Hoan Luu Huu
020c84d2df rest call session does not handle for RE-INVITE (#863) 2024-08-14 07:19:00 -04:00
Hoan Luu Huu
62d71d2504 fix conference in cluster have correct direction in callInfo (#842)
* fix conference in cluster have correct direction

* update github action
2024-08-13 20:02:50 -04:00
Anton Voylenko
c594797cb0 fix: support _lccMuteStatus for conference (#853) 2024-08-13 09:57:26 -04:00
Anton Voylenko
bae96a6752 fix: do not run snake case for customer data (#861) 2024-08-13 09:45:58 -04:00
rammohan-kore
ee68575ea4 Feat/844 Sending callSid in the custom-stt start message (#848)
* https://github.com/jambonz/jambonz-feature-server/issues/844

sending callSid in options, so that the callSid is sent to stt websocket in case of custom websocket

* feat/844: checking for existance of task.cs.callSid

* feat/844: changed the condition to task.cs?.callSid
2024-08-13 09:28:19 -04:00
rammohan-kore
6d0aeff6e2 feat/859: updated verb-specifications to 0.0.76 (#860) 2024-08-13 08:56:56 -04:00
rammohan-kore
d2a5d483d0 feat/856: sending DEEPGRAM_SPEECH_MODEL_VERSION to deepgram (#858) 2024-08-12 09:34:23 -04:00
Hoan Luu Huu
d3eb106d5d clear gather timeout if imterim result received (#800)
* clear gather timeout if imterim result received

* fix to reset timeout timer if receive interrim result

* fix to reset timeout timer if receive interrim result
2024-08-08 07:50:46 -04:00
Hoan Luu Huu
689e55bdf0 support wait hook for conf:participant-action hold (#851) 2024-08-08 07:42:11 -04:00
Hoan Luu Huu
ed7e036890 support jambonz transcribe sampling rate (#847)
* support jambonz transcribe sampling rate

* fix review comment

* update verb specification version
2024-08-07 10:39:58 -04:00
Hoan Luu Huu
f90fcdf57b Feat/deepgrap tts onprem (#846)
* support deepgram tts onprem

* upodate speech utils version
2024-08-07 07:25:28 -04:00
rammohan-kore
c2a1819cbb feat/813 - notify speech-bargein-detected and dtmf-bargein-detected events (#823)
* feat/813 - notify speech-bargein-detected and dtmf-bargein-detected events

* fix for #826 race condition in say (#827)

* fix for #826 race condition in say

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* Update transcription-utils.js (#802)

* Check the confidence levels of a transcript  with minConfidence (#808)

* https://github.com/jambonz/jambonz-feature-server/issues/807

* feat/807: Using minConfidence from recognizer settings

* feat/807: new reason stt-min-confidence-error

* feat/807: sending stt-min-confidence instead of  stt-min-confidence-error

* feat/807: sending stt-low-confidence instead of  stt-min-confidence-error

* feat/807 - removed ? for this.data

* fix conference end is not sent when moderator leave conference (#825)

* fix conference end is not sent when moderator leave conference

* wip

* fix review comment

* feat/813: checking for playComplete before sending dtmf-bargein-detected event

* feat/813: added this.playComplete=true at the end of _killAudio method

* feat/813: removed empty line

* feat/813: removed nested if and added condition to main if

* feat/813: notifyStatus called when not playComplete

* feat/813: referring to time-series 0.2.9 version

* feat/813: generated package-lock.json

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
Co-authored-by: Vinod Dharashive <vdharashive@gmail.com>
Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
2024-08-06 09:34:15 -04:00
rammohan-kore
4259a24fa0 feat/758: for google getting the language_code from evt (#843) 2024-08-06 07:45:08 -04:00
rammohan-kore
e4e37d5697 feat/836: capturing callSid for STT and TTS alerts (#838)
* feat/836: capturing callSid for STT and TTS alerts

* feat/836: corrected assignment of callSid and added target_sid at few more alerts

* update github action

---------

Co-authored-by: Quan HL <quan.luuhoang8@gmail.com>
2024-08-05 12:14:08 -04:00
Markus Frindt
b7a3c2970a Bug/fix missing arg reconnect alert (#835)
* Add url as argument to a webhook connection failure alert after reconnect error

* npm audit fix to remove 15 high vulnerabilities

---------

Co-authored-by: Markus Frindt <m.frindt@cognigy.com>
2024-07-31 09:25:31 -04:00
Hoan Luu Huu
cc33ac1d51 fix conference end is not sent when moderator leave conference (#825)
* fix conference end is not sent when moderator leave conference

* wip

* fix review comment
2024-07-30 07:32:07 -04:00
rammohan-kore
4b4807e4cf Check the confidence levels of a transcript with minConfidence (#808)
* https://github.com/jambonz/jambonz-feature-server/issues/807

* feat/807: Using minConfidence from recognizer settings

* feat/807: new reason stt-min-confidence-error

* feat/807: sending stt-min-confidence instead of  stt-min-confidence-error

* feat/807: sending stt-low-confidence instead of  stt-min-confidence-error

* feat/807 - removed ? for this.data
2024-07-25 12:22:42 -04:00
Vinod Dharashive
9a3c731389 Update transcription-utils.js (#802) 2024-07-24 15:20:26 -04:00
Dave Horton
edd8f20642 fix for #826 race condition in say (#827)
* fix for #826 race condition in say

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip
2024-07-24 12:56:03 -04:00
Hoan Luu Huu
ee24041cba Allow joining conference as muted (#821)
* allow entering conference as muted

* allow entering conference as muted
2024-07-20 12:31:25 -04:00
Hoan Luu Huu
83f7abcd89 Kick member out conference (#820) 2024-07-20 12:11:36 -04:00
Hoan Luu Huu
c9194168d2 support restDial.referhook (#812)
* support restDial.referhook

* support restDial.referhook

* wip
2024-07-19 10:22:29 -04:00
Hoan Luu Huu
83191487cf fix config.transcribe should not override config.transcribe.recognizer (#817) 2024-07-19 07:26:49 -04:00
Hoan Luu Huu
65ef4e6d64 fix conference in feature server cluster join, leave, end events are … (#803)
* fix conference in feature server cluster join, leave, end events are missing original data

* wip
2024-07-12 08:36:43 -06:00
Hoan Luu Huu
ddb4719220 Merge pull request #806 from jambonz/feat/fd_269
support disable/enable listen DTMF in prompt
2024-07-11 20:09:40 +07:00
Quan HL
f514a65f63 support disable/enable listen DTMF in prompt 2024-07-10 08:37:02 -06:00
Hoan Luu Huu
5ccea65b7f stt/tts label can be empty, should not assign application level label… (#804)
* stt/tts label can be empty, should not assign application level label as default value

* wip
2024-07-10 08:36:00 -06:00
Dave Horton
8672152873 fix for #765 (#785) 2024-06-28 09:05:05 -04:00
Dave Horton
425b88f930 fix: package.json & package-lock.json to reduce vulnerabilities (#792)
The following vulnerabilities are fixed with an upgrade:
- https://snyk.io/vuln/SNYK-JS-UNDICI-7361667

Co-authored-by: snyk-bot <snyk-bot@snyk.io>
2024-06-28 09:04:25 -04:00
Dave Horton
111976bea5 bug: clear asr timer when gather resolves with timeout (#788) 2024-06-28 08:54:36 -04:00
Dave Horton
ec6d7b3f42 persistent connection for custom stt vendors in transcribe (#794) 2024-06-28 08:33:10 -04:00
Vinod Dharashive
5e1b826da4 Aws polly engine fix (#789)
* Aws polly engine fix  

engine parameter was  not able to change using synthesizer

* WIP

code correction and set default engine to Neural

* WIP

* WIP

Updated  tts-task.js

* WIP
2024-06-25 13:29:28 -04:00
Dave Horton
be9c3406c1 fix bug where play incorrectly plays again after response received (#786)
* fix bug where play incorrectly plays again after response received

* wip

* fix race condition where bot delay audio kcks off same instant we receive commands
2024-06-25 12:25:55 -04:00
Dave Horton
2f3ef1654a fix: package.json & package-lock.json to reduce vulnerabilities (#787)
The following vulnerabilities are fixed with an upgrade:
- https://snyk.io/vuln/SNYK-JS-WS-7266574

Co-authored-by: snyk-bot <snyk-bot@snyk.io>
2024-06-24 15:07:55 -04:00
Hoan Luu Huu
0baa080a1e update getAwsAuthToken use parameters in an object (#784)
* update getAwsAuthToken use parameters in an object

* wip

* update speech utils
2024-06-15 08:11:31 -04:00
Dave Horton
f5cbd26c9f update to speech-utils with support for JAMBONES_DISABLE_AZURE_TTS_STREAMING (#776) 2024-06-14 09:31:28 -04:00
Dave Horton
d9fd82fa60 major refactor and simplification of actionHookDelay feature (#771)
* major refactor and simplification of actionHookDelay feature

* wip for #765

* wip

* testing

* wip

* added validity checks for actionHookDelay properties

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* fix bug where config happens before endpoint is established

* wip

* hangup and clear ws connection if nogiveuptimer expires

* wip

* wip

* wip
2024-06-14 09:24:26 -04:00
Dave Horton
76a3aa7f42 send end of utterance events if using deepgram, interim events are enabled, and utterance_end_ms option is set (#772) (#782) 2024-06-13 13:18:32 -04:00
Hoan Luu Huu
cafe149bdf fix wrong vad notification to background bargin (#781)
* fix wrong vad notification to background bargin

* wip

* wip

* reset bargeinHandled every on reset
2024-06-12 10:52:53 -04:00
Anton Voylenko
9969e39e7e set valid terminatedBy for rest call (#779) 2024-06-08 17:39:40 -04:00
Hoan Luu Huu
8eea212df2 Fix/verbio stt (#770)
* fix verbio stt normalization

* wip
2024-06-01 07:38:36 -04:00
Hoan Luu Huu
e8e356ea3a update speech util version to fix verbio cache (#766) 2024-05-31 06:42:22 -04:00
Hoan Luu Huu
c5e19bf775 support verbio speech (#757)
* support verbio speech

* wip

* wip

* wip

* update speech utils

* update verb specification
2024-05-29 07:55:46 -04:00
Hoan Luu Huu
498dd64025 support mod_vad_detect (#762)
* support mod_vad_detect

* wip

* update verb spec and drachtio fsmrf

* Update example-voicemail-greetings.json (#761)

Update voicemail english greetings

* wip

* stopvad if playdone

---------

Co-authored-by: Vinod Dharashive <vdharashive@gmail.com>
2024-05-29 07:31:59 -04:00
Dave Horton
24b6d2464b update speech-utils and fsmrf (#764) 2024-05-28 18:24:51 -04:00
Dave Horton
cd5421120f fix race condition with filler noise and also play filler noise when idle and waiting for commands (#763) 2024-05-28 12:45:29 -04:00
Hoan Luu Huu
d7c3a4a632 support mod_custom_tts (#731) 2024-05-28 12:30:25 -04:00
Hoan Luu Huu
c53ad89154 support direct call to conference (#746)
* support direct call to conference

* wip

* wip

* wip
2024-05-28 10:30:52 -04:00
Vinod Dharashive
10b98630d3 Update example-voicemail-greetings.json (#761)
Update voicemail english greetings
2024-05-27 21:13:48 -04:00
Dave Horton
d132bdb92b fix gather race condition (#759) 2024-05-22 14:03:15 -04:00
Hoan Luu Huu
6be3fd9b64 say verb should not print speech credentials in log when tts stream API is used (#756) 2024-05-21 08:38:18 -04:00
Dave Horton
844b0cb05d log endpoint uuid for cross referencing with freeswitch logs 2024-05-20 11:04:15 -04:00
Dave Horton
c0b56d4fc6 per email from microsoft, do not restart STT connection when we get a no audio event (#754) 2024-05-17 11:19:01 -04:00
Dave Horton
d27de284e7 update to drachtio-srf@4.5.35 (#750) 2024-05-09 08:32:52 -04:00
Hoan Luu Huu
5e97847a2f fix fs keep looping forever if there is no fallback TTS (#749) 2024-05-09 06:15:57 -04:00
Hoan Luu Huu
17c379df47 update stats colector version (#744) 2024-05-06 20:06:04 -04:00
Hoan Luu Huu
e7bc0b0737 fix dead lock in say verb while waiting playback-stop and say verb is killed (#742) 2024-05-05 08:12:29 -04:00
Dave Horton
dfe623e78a Fix/google race condition gather (#743)
* lint

* logging

* wip
2024-05-03 12:53:26 -04:00
Dave Horton
56b8f0623b limit utterance_end_ms to (1000,5000) per discussion with Deepgram (#740) 2024-05-02 13:19:14 -04:00
Hoan Luu Huu
7bcbab5b74 feat tts stream fallback (#736)
* feat tts stream fallback

* wip

* wip

* wip

* wip

* wip

* wip

* fix review comment
2024-05-02 08:43:41 -04:00
Hoan Luu Huu
44e6a3513d support speech aws polly by role_arn (#729)
* support speech aws polly by role_arn

* support aws stt assume role

* wip

* update speech utils version
2024-05-02 07:59:21 -04:00
Dave Horton
fad16144b9 update undici and ws (#739) 2024-05-01 14:20:43 -04:00
Dave Horton
6523a861c0 fix asr error notify wrong vendor name (#728) (#738)
Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
2024-05-01 13:48:23 -04:00
Dave Horton
cff67f5e4c dial race where caller hangs up while dial is starting (#737) 2024-05-01 13:38:59 -04:00
Dave Horton
c77bd84e0e we should restart asr timer after a partial transcript (#735) 2024-04-30 14:53:08 -04:00
Dave Horton
3cd7a619ad ignore transcriptions from previous turns of conversation (#734) 2024-04-30 08:21:27 -04:00
Dave Horton
59cf02bd04 wait for session:reconnect ack to send queued msgs (#723) (#732)
Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
2024-04-25 11:22:15 -04:00
Dave Horton
a18d55e9ab minor fix for leaving coach mode in conferencing 2024-04-22 12:46:34 -04:00
Dave Horton
d474b9d604 Feat/advanced conferencing features (#730)
* update drachtio-fsmrf and fixes to setCoachMode

* wip

* wip

* wip

* wip

* wip

* update gh actions
2024-04-22 11:00:05 -04:00
Dave Horton
8d2b60c284 minor 2024-04-21 09:51:05 -04:00
Dave Horton
9cf9d4f587 Fix/0.8.5 cherries (#724)
* kill play task if bot responds verbs while actionHook delay is enabled (#712)

* kill play task if bot responds verbs while actionHook delay is enabled

* fix actionHook delay continues even the bot already responded verbs

* wip

* wip

* wip

* gather is hang if listenDuringPrompt = false and say/play task throw exception (#717)

* merge fix for Support ASR TTS fallback (#713)

---------

Co-authored-by: Hoan Luu Huu <110280845+xquanluu@users.noreply.github.com>
2024-04-17 11:01:21 -04:00
Dave Horton
bd002ede48 ignore google errors with error_code 0 2024-04-16 20:06:26 -04:00
Dave Horton
1a2aa91973 proper fix for precache (#721)
* proper fix for precache

* wip
2024-04-15 16:25:12 -04:00
Dave Horton
e322b7d8d3 be more cautious about pre-caching prompts; in particular, a Config verb will not give us time to precache so avoid in that scenario (#720) 2024-04-15 15:38:10 -04:00
Hoan Luu Huu
7da11df88e default DEEPGRAM_SPEECH_UTTERANCE_END_MS is 1000 (#719) 2024-04-14 19:39:07 -04:00
Hoan Luu Huu
09cf1345f6 tts span for whisper (#718)
* tts span for whisper

* support deepgram tts span

* support playht tts span

* support rimelabs tts span

* wip
2024-04-14 09:14:49 -04:00
Dave Horton
2595f527ff gather: fix bug where empty deepgram transcript saved incorrectly 2024-04-13 09:59:02 -04:00
Dave Horton
1d77c0cd20 bugfx: bargein after first when config bargein with sticky=true fails 2024-04-12 20:08:21 -04:00
Hoan Luu Huu
9eab81268b support mod_rimelabs_tts (#716)
* support mod_rimelabs_tts

* update speech utils
2024-04-12 07:28:45 -04:00
Dave Horton
ecf3d140d6 fix #714 (#715) 2024-04-10 16:23:22 -04:00
Hoan Luu Huu
4a52be9171 support mod_playht_tts (#711)
* support mod_playht_tts

* update speech utils version
2024-04-08 10:21:54 -04:00
Dave Horton
9b722ae36d update deps (#709)
* update deps

* version
2024-04-07 18:22:31 -04:00
Dave Horton
370b046fac update to speech utils with azure 1.36.0 2024-04-07 12:16:35 -04:00
Hoan Luu Huu
fca391c32e support listen verb support bidirectionalAudioSampleRate (#695)
* support listen verb support bidirectionalAudioSampleRate

* ưip

* update verb spec and drachtio fsmrf

* fix listen failing testcase

* fix review comment

* update freeswitch test image

* update freeswitch teset image

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-04-06 13:20:01 -04:00
Dave Horton
043860c4a3 update to speech utils supporting deepgram tts (#708) 2024-04-06 12:35:03 -04:00
Dave Horton
a021ee3112 update unidici (#707) 2024-04-05 17:23:21 -04:00
Dave Horton
8999c85a71 Fixes/ws testing dh (#704)
* fixes from testing with translator app

* more updates

* linting

* update gh actions to node 20

* add support for google v2 preconfigured recognizer

* add support for google voice activity events

* update to speech-utils@0.0.45

* update speech-utils to support caching azure tts

* transcribe must buffer transcripts for channel 1 and 2 separately

* further fix for accumulating transcripts

* linting

* deepgram sends transcripts with empty alternatives array

* fix deepgram returning an empty array
2024-04-03 14:30:49 -04:00
Hoan Luu Huu
72147a8110 support google v2 enableVoiceActivityEvents (#703)
* support google v2 enableVoiceActivityEvents

* support google v2 enableVoiceActivityEvents
2024-04-02 10:14:54 -04:00
Hoan Luu Huu
93d0e41e31 support google version 2 (#699)
* support google version 2

* update new parameters for google v2
2024-04-02 07:33:22 -04:00
Hoan Luu Huu
5b1d8a8ff3 Feat/ambient sounds (#678)
* initial support for coaching mode in conference

* wip

* wip

* add support for answer verb

* wip

* wip

* wip

* wip

* wip

* updates to rename option to dub

* wip

* wip

* wip

* update verb-specs

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* add option to boost audio signal in main channel

* wip

* wip

* wip

* wip

* wip

* wip

* for now, bypass use of streaming apis when generating tts audio for dub tracks

* add nested dub to dial

* wip

* add support for filler noise

* kill filler noise when gather killed

* wip

* wip

* while using sayOnTrack, we have to enclose the say command in double quotes

* disableTtsStreaming = false

* allow transcribe of b leg only on dial verb

* dub.say can either be text or object like say verb with text and synthesizer

* remove loop for sayOnTrack

* update speech-utils

* fixes for testing transcribe verb and support for dub and boostAudioSignal in lcc commands

* add dial.boostAudioSignal

* fix bug where session-level recognizer settings incorrectly overwrite verb-level settings

* update verb specs

* update dial to support array of dub verbs

* fix bug setting gain

* lint

* wip

* update speech-utils

* use new endpoint methods for mod_dub

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-03-23 16:23:57 -04:00
Dave Horton
ec58232b61 Fix/replace application issue (#692)
* fix scenario where ws replace application from gather while awaiting command and no tasks on execution stack

* lint

* remove some debug logging
2024-03-23 16:14:16 -04:00
Hoan Luu Huu
65c241bcd1 gather verb should clean dtmf listerner even dtmfBargein=false (#686) 2024-03-23 16:01:41 -04:00
Hoan Luu Huu
75b6f89e0c add log to get more detail for AMD issue (#687)
* add log to check issue

* update drachtio-fsmrf 3.0.38
2024-03-21 09:14:32 -04:00
Hoan Luu Huu
b80d39d205 fix asrtimer always return vendor=deepgram (#682) 2024-03-13 12:57:55 -04:00
Hoan Luu Huu
40f70e3531 update speech utils version 0.0.63 (#681) 2024-03-12 09:12:18 -04:00
Hoan Luu Huu
1914b88af9 support azure language id mode (#674) 2024-03-12 08:35:01 -04:00
Hoan Luu Huu
c946a5d14d fix actionHookDelay feature is not working properly if there is no de… (#679)
* fix actionHookDelayAction when no actions is defnied

* terminated by jambonz for giveuptimeout
2024-03-12 08:33:03 -04:00
Hoan Luu Huu
878578fe0f Fix/issue 676 (#680)
* fix bargin is not working

* fix bargin is not working
2024-03-11 08:46:38 -04:00
Hoan Luu Huu
9b3be6c0b9 allow custom header on pause, resume recording (#670)
* allow custom header on pause, resume recording

* fix review comments
2024-03-05 18:01:32 -05:00
Hoan Luu Huu
4ae661daea remove unnecessary code for cleanup disableBotMode (#673) 2024-03-04 18:03:32 -05:00
Dave Horton
dbd3b59901 fix #666 2024-02-26 09:39:49 -05:00
Hoan Luu Huu
06b066a3f2 update speech util to support whisper stream (#657)
* update speech util to support whisper stream

* minor editing of span attributes

* more span attrs cleanup

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-02-22 14:17:29 -05:00
Dave Horton
fc3655c9bd fixes for confirm session (#663)
* fixes for confirm session

* allow empty dialconfirm array
2024-02-22 12:33:35 -05:00
dependabot[bot]
1b5f801830 Bump undici from 5.26.2 to 5.28.3 (#647)
Bumps [undici](https://github.com/nodejs/undici) from 5.26.2 to 5.28.3.
- [Release notes](https://github.com/nodejs/undici/releases)
- [Commits](https://github.com/nodejs/undici/compare/v5.26.2...v5.28.3)

---
updated-dependencies:
- dependency-name: undici
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-02-22 10:13:19 -05:00
Dave Horton
d0ebe3f99f fix possible undefined reference in precache audio (#662)
* fix possible undefined reference in precache audio

* fix parsing of JAMBONES_EAGERLY_PRE_CACHE_AUDIO
2024-02-22 07:58:41 -05:00
Dave Horton
51a379998f fix #655 (#658)
* fix #655

* fix race condition
2024-02-22 07:46:53 -05:00
dependabot[bot]
c2ae42a456 Bump ip from 1.1.8 to 1.1.9 (#660)
Bumps [ip](https://github.com/indutny/node-ip) from 1.1.8 to 1.1.9.
- [Commits](https://github.com/indutny/node-ip/compare/v1.1.8...v1.1.9)

---
updated-dependencies:
- dependency-name: ip
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-02-22 07:31:01 -05:00
Hoan Luu Huu
c187685054 feat actionHook delay action (#470)
* feat actionHook delay action

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip

* wip
2024-02-20 21:09:19 -05:00
Hoan Luu Huu
81234a583c support update record from application ws connection (#645) 2024-02-19 07:53:39 -05:00
Hoan Luu Huu
206849fa25 create outbound dial from webhook ws (#581)
* wip, create outbound dial from webhook ws

* wip, create outbound dial from webhook ws

* clean
2024-02-13 07:58:39 -05:00
Dave Horton
662b6d3d95 fix: elevenlabs caching with streaming 2024-02-12 21:08:41 -05:00
Anton Voylenko
5c070597cf tag outdial session (#643) 2024-02-12 13:16:43 -05:00
Dave Horton
42be9ff1ca update to speech-utils with env to disable elevenlabs streaming (default is on) 2024-02-12 12:49:45 -05:00
Dave Horton
f0533c881b deepgram gather: if both endpointing and utterance_end_ms are set (bu… (#644)
* deepgram gather: if both endpointing and utterance_end_ms are set (but not continous asr) return either when we get speech_final or UtteranceEnd.  This is the belt-and-suspenders apprach deepgram is recommending

* include verb id in action hook if one was provided in the verb set

* minor
2024-02-12 12:32:43 -05:00
Hoan Luu Huu
c894369a13 fix pause resume background transcribe (#586)
* fix pause resume background transcribe

* fix review comments
2024-02-12 10:38:07 -05:00
Dave Horton
565478cc0a #573 address race condition in pause/resume recording (#584) 2024-02-12 10:26:34 -05:00
Hoan Luu Huu
cdd25ca33d Fix/gather timeout (#594)
* fix gather verb timeout does not work

* wip

* wip

* wip

* wip

* fix review comments
2024-02-12 10:13:02 -05:00
Markus Frindt
ef2306e558 Improve Deepgram default modely by language (#641)
Co-authored-by: Markus Frindt <m.frindt@cognigy.com>
2024-02-12 09:53:14 -05:00
Dave Horton
9c33a790bd update to latest speech-utils (#639) 2024-02-08 15:54:45 -05:00
Dave Horton
9f9a9ec598 initial changes for deepgram on-prem (#636)
* initial changes for deepgram on-prem

* typo

* fixes for selecting deepgram model

* update some property names

* wip

* wip

* wip
2024-02-07 14:21:05 -05:00
Dave Horton
75566bb268 bump to start 0.8.6 2024-02-07 08:51:05 -05:00
Hoan Luu Huu
a55f81676b Tts/elevenlabs streaming (#629)
* update to fsmrf with fix

* changes to support elevenlabs tts streaming

* say: add vendor data to span

* bug: tts spans must include cached property

* add env for JAMBONES_USE_FREESWITCH_TIMER_FD

* fix bug in prev commit

* wip

* linting

* wip - caching files generating by streaming tts

* wip caching

* cleanup some logs

* handle tts streaming failure, write alert

* update node version dependency

* set timerfd on outbound call scenarios

* default model to nova-2-phonecall when using deepgram

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-02-07 08:49:36 -05:00
Hoan Luu Huu
48a81072e8 fix gather should not play audio if gather already resolved (#638) 2024-02-06 07:42:44 -05:00
Hoan Luu Huu
74ede31cd3 fix ws reconnect does not send verb:hook data (#633) 2024-01-31 07:20:57 -08:00
Anton Voylenko
048229f019 fix(dequeue): retrieve by callsid (#630) 2024-01-31 07:06:08 -08:00
Hoan Luu Huu
71e266ae32 Merge pull request #632 from jambonz/fix/issue_631
fix default gather input is digits and gather dtmf should not require speech
2024-01-31 12:01:36 +07:00
Quan HL
5b607693dc fix default gather input is digits and gather dtmf should not require speech 2024-01-31 11:46:29 +07:00
Dave Horton
0491c5ce25 minor logging changes 2024-01-27 12:59:23 -05:00
Vinod Dharashive
a7fa2f95dd Change regex to have fqdn and IP (#625) 2024-01-25 09:13:30 -05:00
Dave Horton
901e412343 fix bug where final transcript with finished header results in timeout (#624) 2024-01-25 08:48:22 -05:00
Dave Horton
e57c7ba90a fix for #627 (#628) 2024-01-25 08:46:57 -05:00
Hoan Luu Huu
b867395d87 fix aldulting call does not send status callback when hhangup (#623) 2024-01-23 07:12:43 -05:00
Hoan Luu Huu
1a80910f91 fix pause transcribe cannot close transcription on 2nd leg (#621) 2024-01-18 11:21:25 -05:00
Hoan Luu Huu
5d4f25622d fixed call hangup as call is await for new task and received ws command (#619)
* fixed call hangup as call is await for new task and received ws command

* wi
2024-01-18 11:12:50 -05:00
Dave Horton
aabf37e269 update db-helpers 2024-01-17 13:23:21 -05:00
Hoan Luu Huu
b45275789b verbhook on ws connection should be ended in next redirect command (#616)
* verbhook on ws connection should be ended in next redirect command

* wip

* wip

* minor change for readability

---------

Co-authored-by: Dave Horton <daveh@beachdognet.com>
2024-01-17 12:37:03 -05:00
Dave Horton
6d5ef6a215 gather: dont resolve if deepgram sends final/empty transcript with no transcripts previously buffered (#618) 2024-01-17 10:59:37 -05:00
Hoan Luu Huu
b423a51638 feat: allow update azure endpoint ID from recognizer property (#612) 2024-01-17 07:34:02 -05:00
Hoan Luu Huu
b4ff2ea702 fix onholdHOok (#540)
* fix onholdHOok

* wip

* wip

* wip

* wip

* adding more debug log

* wip

* wip

* wip
2024-01-15 08:34:45 -05:00
Dave Horton
f22d66dfd6 set default deepgram model by language and task (gather vs transcribe) (#610)
* set default deepgram model by language and task (gather vs transcribe)

* wip
2024-01-14 10:38:14 -05:00
Dave Horton
09a83e3a31 Feature/precache audio (#609)
* wip

* fix for establishing vendor etc

* more fixes

* avoid a pre-caching attempt if synth settings change
2024-01-13 12:51:25 -05:00
61 changed files with 10890 additions and 10521 deletions

View File

@@ -6,12 +6,17 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 18
node-version: 20
- run: npm ci
- run: npm run jslint
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
docker-compose --version
- run: docker pull drachtio/sipp
- run: npm test
env:

2
.gitignore vendored
View File

@@ -42,3 +42,5 @@ ecosystem.config.js
test/credentials/*.json
run-tests.sh
run-coverage.sh
.vscode
.env

17
.vscode/launch.json vendored
View File

@@ -1,17 +0,0 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceFolder}/test/index.js",
"env": {
"NODE_ENV": "test"
}
}
]
}

View File

@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 node:18.15-alpine3.16 as base
FROM --platform=linux/amd64 node:20-alpine as base
RUN apk --update --no-cache add --virtual .builds-deps build-base python3

View File

@@ -1,6 +1,6 @@
MIT License
Copyright (c) 2021 Drachtio Communications Services, LLC
Copyright (c) 2018-2024 FirstFive8, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@@ -21,6 +21,7 @@ Configuration is provided via environment variables:
|ENCRYPTION_SECRET| secret for credential encryption(JWT_SECRET is deprecated) |yes|
|GOOGLE_APPLICATION_CREDENTIALS| path to gcp service key file|yes|
|HTTP_PORT| tcp port to listen on for API requests from jambonz-api-server|yes|
|HTTP_IP| IP Address for API requests from jambonz-api-server |no|
|JAMBONES_GATHER_EARLY_HINTS_MATCH| if true and hints are provided, gather will opportunistically review interim transcripts if possible to reduce ASR latency |no|
|JAMBONES_FREESWITCH| IP:port:secret for Freeswitch server (e.g. '127.0.0.1:8021:JambonzR0ck$'|yes|
|JAMBONES_LOGLEVEL| log level for application, 'info' or 'debug'|no|

44
app.js
View File

@@ -25,10 +25,22 @@ const opts = {
};
const pino = require('pino');
const logger = pino(opts, pino.destination({sync: false}));
const {LifeCycleEvents, FS_UUID_SET_NAME} = require('./lib/utils/constants');
const {LifeCycleEvents, FS_UUID_SET_NAME, SystemState, FEATURE_SERVER} = require('./lib/utils/constants');
const installSrfLocals = require('./lib/utils/install-srf-locals');
installSrfLocals(srf, logger);
const writeSystemAlerts = srf.locals?.writeSystemAlerts;
if (writeSystemAlerts) {
writeSystemAlerts({
system_component: FEATURE_SERVER,
state : SystemState.Online,
fields : {
detail: `feature-server with process_id ${process.pid} started`,
host: srf.locals?.ipv4
}
});
}
const {
initLocals,
createRootSpan,
@@ -100,8 +112,20 @@ createHttpListener(logger, srf)
});
setInterval(() => {
const monInterval = setInterval(async() => {
srf.locals.stats.gauge('fs.sip.calls.count', sessionTracker.count);
try {
const systemInformation = await srf.locals.dbHelpers.lookupSystemInformation();
if (systemInformation && systemInformation.log_level) {
logger.level = systemInformation.log_level;
}
} catch (err) {
if (process.env.NODE_ENV === 'test') {
clearInterval(monInterval);
logger.error('all tests complete');
}
else logger.error({err}, 'Error checking system log level in database');
}
}, 20000);
const disconnect = () => {
@@ -112,13 +136,25 @@ const disconnect = () => {
srf.locals.mediaservers?.forEach((ms) => ms.disconnect());
});
};
process.on('SIGTERM', handle);
process.on('SIGINT', handle);
function handle(signal) {
async function handle(signal) {
const {removeFromSet} = srf.locals.dbHelpers;
srf.locals.disabled = true;
logger.info(`got signal ${signal}`);
const writeSystemAlerts = srf.locals?.writeSystemAlerts;
if (writeSystemAlerts) {
// it has to be synchronous call, or else by the time system saves the app terminates
await writeSystemAlerts({
system_component: FEATURE_SERVER,
state : SystemState.Offline,
fields : {
detail: `feature-server with process_id ${process.pid} stopped, signal ${signal}`,
host: srf.locals?.ipv4
}
});
}
const setName = `${(JAMBONES_CLUSTER_ID || 'default')}:active-fs`;
const fsServiceUrlSetName = `${(JAMBONES_CLUSTER_ID || 'default')}:fs-service-url`;
if (setName && srf.locals.localSipAddress) {

View File

@@ -9,7 +9,112 @@
"can't take your call",
"will get back to you",
"I'll get back to you",
"we are unable"
"we are unable",
"Unable to take your call now",
"I'll reply soon",
"I'll call back",
"I'll reach out to you as soon as possible",
"Leave a message",
"Away from phone",
"Not available now",
"I'll return call",
"On another call",
"Currently on another call",
"I will return call later",
"Busy please leave message",
"Message will be returned promptly",
"Currently unavailable to answer",
"Planning to return your call soon",
"Apologies for missing your call",
"Not by the phone at the moment",
"Expecting to return your call",
"Currently not accessible",
"Intend to call back",
"Appreciate your patience!",
"Engaged in another conversation",
"I Will respond promptly",
"Kindly leave a message",
"Currently occupied leave a message",
"Unfortunately unable to answer right now",
"Occupied at the moment",
"Not present leave a message",
"Regrettably unavailable kindly leave a message",
"Will ensure a prompt response to your message",
"Currently engaged",
"Will return your call at the earliest opportunity",
"Your message will receive my prompt attention",
"I'll respond as soon as I can",
"Your message is important please leave it after the beep",
"Away from the phone at the moment",
"Unable to answer right now",
"Engaged in another task",
"Not by the phone presently",
"I'll respond at my earliest convenience",
"Away from the phone momentarily",
"I'll return your call shortly",
"Currently not able to answer",
"Your message is important please leave it after the tone",
"I'm unable to take your call right now",
"Please leave your message for me",
"I'll get back to you soon",
"Your call has been missed",
"Please leave a detailed message for me to respond to",
"Leave a message I'll make sure to respond",
"Feel free to leave a message",
"Your call is important to me",
"I'll get back to you shortly",
"Your message will be attended to promptly",
"Not available at the moment",
"I'll be sure to get back to you",
"I'll call you back soon",
"I'll ensure a prompt response",
"Sorry for the inconvenience",
"I'll return your call",
"I'll make sure to get back to you",
"I'll call you back shortly",
"I'll return your call as soon as possible",
"Apologies for the inconvenience leave your message",
"Your call is appreciated",
"I'm unavailable to answer",
"I'm currently away",
"I'll return your call as soon as I can",
"I'm away from the phone",
"I'm currently unavailable to take your call",
"Sorry for missing your call",
"I'll ensure it receives my immediate attention",
"I'm away from the phone momentarily",
"I'll reach out to you shortly",
"Apologies for the inconvenience",
"Currently occupied",
"Unable to answer your call at the moment",
"I'll make sure to follow up with you",
"Sorry for not being available",
"I'll reach out to you as soon as I can",
"I'm currently engaged",
"I'm currently busy",
"I'm currently unavailable",
"I'll respond to you at my earliest convenience",
"Your message is appreciated",
"I'll get back to you promptly",
"I'll get back to you without delay",
"Currently away from the phone",
"I'll return your call at my earliest opportunity",
"Sorry for the missed call",
"I'll make sure to address your concerns",
"Please provide your details for a callback",
"I'll make every effort to respond promptly",
"I'll ensure it's attended to promptly",
"Away from the phone temporarily",
"I'll get back to you as soon as I return",
"Currently not in a position to answer your call",
"Your call cannot be answered at the moment",
"I'll ensure to respond as soon as I'm able",
"Your call is important please leave a message",
"Unable to answer right now please leave your message",
"Currently not accessible intending to return your call",
"I'll respond promptly to your message",
"leave a memo",
"please leave a memo"
],
"es-ES": [
"le pasamos la llamada",

View File

@@ -73,6 +73,7 @@ const JAMBONES_LOGLEVEL = process.env.JAMBONES_LOGLEVEL || 'info';
const JAMBONES_INJECT_CONTENT = process.env.JAMBONES_INJECT_CONTENT;
const PORT = parseInt(process.env.HTTP_PORT, 10) || 3000;
const HTTP_IP = process.env.HTTP_IP;
const HTTP_PORT_MAX = parseInt(process.env.HTTP_PORT_MAX, 10);
const K8S = process.env.K8S;
@@ -107,6 +108,8 @@ const DEEPGRAM_API_KEY = process.env.DEEPGRAM_API_KEY;
const ANCHOR_MEDIA_ALWAYS = process.env.ANCHOR_MEDIA_ALWAYS;
const VMD_HINTS_FILE = process.env.VMD_HINTS_FILE;
const JAMBONES_AWS_TRANSCRIBE_USE_GRPC = process.env.JAMBONES_AWS_TRANSCRIBE_USE_GRPC;
/* security, secrets */
const LEGACY_CRYPTO = !!process.env.LEGACY_CRYPTO;
const JWT_SECRET = process.env.JWT_SECRET;
@@ -130,6 +133,13 @@ const JAMBONZ_RECORD_WS_PASSWORD = process.env.JAMBONZ_RECORD_WS_PASSWORD || pro
const JAMBONZ_DISABLE_DIAL_PAI_HEADER = process.env.JAMBONZ_DISABLE_DIAL_PAI_HEADER || false;
const JAMBONES_DISABLE_DIRECT_P2P_CALL = process.env.JAMBONES_DISABLE_DIRECT_P2P_CALL || false;
const JAMBONES_EAGERLY_PRE_CACHE_AUDIO = parseInt(process.env.JAMBONES_EAGERLY_PRE_CACHE_AUDIO, 10) || 0;
const JAMBONES_USE_FREESWITCH_TIMER_FD = process.env.JAMBONES_USE_FREESWITCH_TIMER_FD;
const JAMBONES_DIAL_SBC_FOR_REGISTERED_USER = process.env.JAMBONES_DIAL_SBC_FOR_REGISTERED_USER || false;
const JAMBONES_MEDIA_TIMEOUT_MS = process.env.JAMBONES_MEDIA_TIMEOUT_MS || 0;
const JAMBONES_MEDIA_HOLD_TIMEOUT_MS = process.env.JAMBONES_MEDIA_HOLD_TIMEOUT_MS || 0;
module.exports = {
JAMBONES_MYSQL_HOST,
JAMBONES_MYSQL_USER,
@@ -152,6 +162,7 @@ module.exports = {
JAMBONES_API_BASE_URL,
JAMBONES_TIME_SERIES_HOST,
JAMBONES_INJECT_CONTENT,
JAMBONES_EAGERLY_PRE_CACHE_AUDIO,
JAMBONES_ESL_LISTEN_ADDRESS,
JAMBONES_SBCS,
JAMBONES_OTEL_ENABLED,
@@ -165,6 +176,7 @@ module.exports = {
JAMBONES_CLUSTER_ID,
PORT,
HTTP_PORT_MAX,
HTTP_IP,
K8S,
K8S_SBC_SIP_SERVICE_NAME,
JAMBONES_SUBNET,
@@ -183,6 +195,7 @@ module.exports = {
ANCHOR_MEDIA_ALWAYS,
VMD_HINTS_FILE,
JAMBONES_FREESWITCH_MAX_CALL_DURATION_MINS,
JAMBONES_AWS_TRANSCRIBE_USE_GRPC,
LEGACY_CRYPTO,
JWT_SECRET,
@@ -210,5 +223,9 @@ module.exports = {
JAMBONZ_RECORD_WS_USERNAME,
JAMBONZ_RECORD_WS_PASSWORD,
JAMBONZ_DISABLE_DIAL_PAI_HEADER,
JAMBONES_DISABLE_DIRECT_P2P_CALL
JAMBONES_DISABLE_DIRECT_P2P_CALL,
JAMBONES_USE_FREESWITCH_TIMER_FD,
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER,
JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS
};

View File

@@ -30,6 +30,20 @@ const appsMap = {
}
]
}]
},
conference: {
// Dummy hook to follow later feature server logic.
call_hook: {
url: 'https://jambonz.org',
method: 'GET'
},
account_sid: '',
app_json: [{
verb: 'conference',
name: '',
beep: false,
startConferenceOnEnter: true
}]
}
};
@@ -38,6 +52,7 @@ const createJambonzApp = (type, {account_sid, name, caller_id}) => {
app.account_sid = account_sid;
switch (type) {
case 'queue':
case 'conference':
app.app_json[0].name = name;
break;
case 'user':

View File

@@ -14,6 +14,8 @@ const RootSpan = require('../../utils/call-tracer');
const dbUtils = require('../../utils/db-utils');
const { mergeSdpMedia, extractSdpMedia } = require('../../utils/sdp-utils');
const { createCallSchema, customSanitizeFunction } = require('../schemas/create-call');
const { selectHostPort } = require('../../utils/network');
const { JAMBONES_DIAL_SBC_FOR_REGISTERED_USER } = require('../../config');
const removeNullProperties = (obj) => (Object.keys(obj).forEach((key) => obj[key] === null && delete obj[key]), obj);
const removeNulls = (req, res, next) => {
@@ -65,7 +67,7 @@ router.post('/',
lookupAppBySid
} = srf.locals.dbHelpers;
const {getSBC, getFreeswitch} = srf.locals;
const sbcAddress = getSBC();
let sbcAddress = getSBC();
if (!sbcAddress) throw new Error('no available SBCs for outbound call creation');
const target = restDial.to;
const opts = {
@@ -97,7 +99,8 @@ router.post('/',
'X-Trace-ID': rootSpan.traceId,
...(req.body?.application_sid && {'X-Application-Sid': req.body.application_sid}),
...(restDial.fromHost && {'X-Preferred-From-Host': restDial.fromHost}),
...(record_all_calls && {'X-Record-All-Calls': recordOutputFormat})
...(record_all_calls && {'X-Record-All-Calls': recordOutputFormat}),
...target.headers
};
switch (target.type) {
@@ -139,6 +142,16 @@ router.post('/',
}
}
// find handling sbc sip for called user
if (JAMBONES_DIAL_SBC_FOR_REGISTERED_USER && target.type === 'user') {
const { registrar } = srf.locals.dbHelpers;
const reg = await registrar.query(target.name);
if (reg) {
sbcAddress = selectHostPort(logger, reg.sbcAddress, 'tcp')[1];
}
//sbc outbound return 404 Notfound to handle case called user is not reigstered.
}
/**
* trunk isn't specified,
* check if from-number matches any existing numbers on Jambonz
@@ -195,10 +208,13 @@ router.post('/',
/**
* create our application object -
* not from the database as per an inbound call,
* but from the provided params in the request
* we merge the inbound call application,
* with the provided app params from the request body
*/
const app = req.body;
const app = {
...application,
...req.body
};
/**
* attach our requestor and notifier objects
@@ -218,7 +234,7 @@ router.post('/',
}
if (!app.notifier && app.call_status_hook) {
app.notifier = new HttpRequestor(logger, account.account_sid, app.call_status_hook, account.webhook_secret);
logger.debug({call_hook: app.call_hook}, 'creating http client for call status hook');
logger.debug({call_status_hook: app.call_status_hook}, 'creating http client for call status hook');
}
else if (!app.notifier) {
logger.debug('creating null call status hook');
@@ -257,6 +273,8 @@ router.post('/',
callId: inviteReq.get('Call-ID'),
accountSid,
traceId: rootSpan.traceId
}, {
...(account.enable_debug_log && {level: 'debug'})
});
app.requestor.logger = app.notifier.logger = sipLogger;
const callInfo = new CallInfo({
@@ -290,6 +308,8 @@ router.post('/',
},
cbProvisional: (prov) => {
const callStatus = prov.body ? CallStatus.EarlyMedia : CallStatus.Ringing;
// Update call-id for sbc outbound INVITE
cs.callInfo.sbcCallid = prov.get('X-CID');
if ([180, 183].includes(prov.status) && prov.body) connectStream(prov.body);
restDial.emit('callStatus', prov.status, !!prov.body);
cs.emit('callStatusChange', {callStatus, sipStatus: prov.status});

View File

@@ -75,13 +75,19 @@ module.exports = function(srf, logger) {
req.locals.application_sid = application_sid;
}
// check for call to queue
if (uri.user?.startsWith('queue-') && req.locals.originatingUser && clientDb?.allow_direct_queue_calling) {
else if (uri.user?.startsWith('queue-') && req.locals.originatingUser && clientDb?.allow_direct_queue_calling) {
const queue_name = uri.user.match(/queue-(.*)/)[1];
logger.debug(`got Queue from Request URI header: ${queue_name}`);
req.locals.queue_name = queue_name;
}
// check for call to conference
else if (uri.user?.startsWith('conference-') && req.locals.originatingUser && clientDb?.allow_direct_app_calling) {
const conference_id = uri.user.match(/conference-(.*)/)[1];
logger.debug(`got Conference from Request URI header: ${conference_id}`);
req.locals.conference_id = conference_id;
}
// check for call to registered user
if (!JAMBONES_DISABLE_DIRECT_P2P_CALL && req.locals.originatingUser && clientDb?.allow_direct_user_calling) {
else if (!JAMBONES_DISABLE_DIRECT_P2P_CALL && req.locals.originatingUser && clientDb?.allow_direct_user_calling) {
const arr = /^(.*)@(.*)/.exec(req.locals.originatingUser);
if (arr) {
const sipRealm = arr[2];
@@ -97,7 +103,7 @@ module.exports = function(srf, logger) {
if (req.has('X-MS-Teams-Tenant-FQDN')) req.locals.msTeamsTenant = req.get('X-MS-Teams-Tenant-FQDN');
if (req.has('X-Cisco-Recording-Participant')) {
const ciscoParticipants = req.get('X-Cisco-Recording-Participant');
const regex = /sip:[\d]+@[\d]+\.[\d]+\.[\d]+\.[\d]+/g;
const regex = /sip:[a-zA-Z0-9]+@[a-zA-Z0-9.-_]+/g;
const sipURIs = ciscoParticipants.match(regex);
logger.info(`X-Cisco-Recording-Participant : ${sipURIs} `);
if (sipURIs && sipURIs.length > 0) {
@@ -181,14 +187,20 @@ module.exports = function(srf, logger) {
const {span} = rootSpan.startChildSpan('lookupAccountDetails');
try {
req.locals.accountInfo = await lookupAccountDetails(account_sid);
req.locals.service_provider_sid = req.locals.accountInfo?.account?.service_provider_sid;
const accountDetail = await lookupAccountDetails(account_sid);
const account = accountDetail?.account;
req.locals.accountInfo = accountDetail;
req.locals.service_provider_sid = account?.service_provider_sid;
span.end();
if (!req.locals.accountInfo.account.is_active) {
if (!account?.is_active) {
logger.info(`Account is inactive or suspended ${account_sid}`);
// TODO: alert
return res.send(503, {headers: {'X-Reason': 'Account exists but is inactive'}});
}
// Change the default log level to debug
if (account?.enable_debug_log) {
req.locals.logger.level = 'debug';
}
logger.debug({accountInfo: req.locals?.accountInfo?.account}, `retrieved account info for ${account_sid}`);
next();
} catch (err) {
@@ -237,6 +249,9 @@ module.exports = function(srf, logger) {
logger.debug(`calling to registered user ${req.locals.called_user}, generating dial app`);
app = createJambonzApp('user',
{account_sid, name: req.locals.called_user, caller_id: req.locals.callingNumber});
} else if (req.locals.conference_id) {
logger.debug(`calling to conference ${req.locals.conference_id}, generating conference app`);
app = createJambonzApp('conference', {account_sid, name: req.locals.conference_id});
} else if (req.locals.application_sid) {
app = await lookupAppBySid(req.locals.application_sid);
} else if (req.locals.originatingUser) {
@@ -321,7 +336,9 @@ module.exports = function(srf, logger) {
if (arr) {
const google_custom_voice_sid = arr[1];
const [custom_voice] = await lookupGoogleCustomVoice(google_custom_voice_sid);
if (custom_voice) {
//google voice cloning key has size 200kb, jambonz should not resolve the voice here that the app's calling
//webhook will receive big payload, tts-task should resolve the voice later.
if (!custom_voice.use_voice_cloning_key) {
app2.speech_synthesis_voice = {
reportedUsage: custom_voice.reported_usage,
model: custom_voice.model
@@ -343,6 +360,17 @@ module.exports = function(srf, logger) {
direction: CallDirection.Inbound,
traceId: rootSpan.traceId
});
// if transferred call contains callInfo, let update original data to newly created callInfo in this instance.
if (app.transferredCall && app.callInfo) {
const {direction, callerName, from, to, originatingSipIp, originatingSipTrunkName} = app.callInfo;
req.locals.callInfo.direction = direction;
req.locals.callInfo.callerName = callerName;
req.locals.callInfo.from = from;
req.locals.callInfo.to = to;
req.locals.callInfo.originatingSipIp = originatingSipIp;
req.locals.callInfo.originatingSipTrunkName = originatingSipTrunkName;
delete app.callInfo;
}
next();
} catch (err) {
span.end();
@@ -359,7 +387,7 @@ module.exports = function(srf, logger) {
const {rootSpan, siprec, application:app} = req.locals;
let span;
try {
if (app.tasks && !JAMBONES_MYSQL_REFRESH_TTL) {
if (app.tasks && app.tasks?.length > 0 && !JAMBONES_MYSQL_REFRESH_TTL) {
app.tasks = normalizeJambones(logger, app.tasks).map((tdata) => makeTask(logger, tdata));
if (0 === app.tasks.length) throw new Error('no application provided');
return next();

View File

@@ -45,24 +45,34 @@ class AdultingCallSession extends CallSession {
return this.sd.ep;
}
/* see note above */
set ep(newEp) {}
// When adulting session kicked from conference, replaceEndpoint is a must
set ep(newEp) {
this.sd.ep = newEp;
}
get callSid() {
return this.callInfo.callSid;
}
_callerHungup() {
this._hangup('caller');
}
_jambonzHangup() {
this._hangup();
}
_hangup(terminatedBy = 'jambonz') {
if (this.dlg.connectTime) {
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.rootSpan.setAttributes({'call.termination': 'hangup by caller'});
this.callInfo.callTerminationBy = 'caller';
this.rootSpan.setAttributes({'call.termination': `hangup by ${terminatedBy}`});
this.callInfo.callTerminationBy = terminatedBy;
this.emit('callStatusChange', {
callStatus: CallStatus.Completed,
duration
});
}
this.logger.info('InboundCallSession: caller hung up');
this.logger.info(`InboundCallSession: ${terminatedBy} hung up`);
this._callReleased();
this.req.removeAllListeners('cancel');
}

View File

@@ -32,6 +32,7 @@ class CallInfo {
this.sipStatus = 100;
this.sipReason = 'Trying';
this.callStatus = CallStatus.Trying;
this.sbcCallid = req.get('X-CID');
this.originatingSipIp = req.get('X-Forwarded-For');
this.originatingSipTrunkName = req.get('X-Originating-Carrier');
const {siprec} = req.locals;
@@ -129,6 +130,7 @@ class CallInfo {
from: this.from,
to: this.to,
callId: this.callId,
sbcCallid: this.sbcCallid,
sipStatus: this.sipStatus,
sipReason: this.sipReason,
callStatus: this.callStatus,

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,7 @@ const CallSession = require('./call-session');
*/
class ConfirmCallSession extends CallSession {
constructor({logger, application, dlg, ep, tasks, callInfo, accountInfo, memberId, confName, rootSpan}) {
constructor({logger, application, dlg, ep, tasks, callInfo, accountInfo, memberId, confName, rootSpan, req}) {
super({
logger,
application,
@@ -23,6 +23,7 @@ class ConfirmCallSession extends CallSession {
});
this.dlg = dlg;
this.ep = ep;
this.req = req;
}
/**
@@ -34,6 +35,9 @@ class ConfirmCallSession extends CallSession {
_callerHungup() {
}
_jambonzHangup() {
}
}

View File

@@ -67,15 +67,33 @@ class InboundCallSession extends CallSession {
* This is invoked when the caller hangs up, in order to calculate the call duration.
*/
_callerHungup() {
this._hangup('caller');
}
_jambonzHangup(reason) {
this.dlg?.destroy({
headers: {
...(reason && {'X-Reason': reason})
}
});
// kill current task or wakeup the call session.
this._callReleased();
}
_hangup(terminatedBy = 'jambonz') {
if (this.dlg === null) {
this.logger.info('InboundCallSession:_hangup - race condition, dlg cleared by app hangup');
return;
}
this.logger.info(`InboundCallSession: ${terminatedBy} hung up`);
assert(this.dlg.connectTime);
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.rootSpan.setAttributes({'call.termination': 'hangup by caller'});
this.callInfo.callTerminationBy = 'caller';
this.rootSpan.setAttributes({'call.termination': `hangup by ${terminatedBy}`});
this.callInfo.callTerminationBy = terminatedBy;
this.emit('callStatusChange', {
callStatus: CallStatus.Completed,
duration
});
this.logger.info('InboundCallSession: caller hung up');
this._callReleased();
this.req.removeAllListeners('cancel');
}

View File

@@ -1,7 +1,6 @@
const CallSession = require('./call-session');
const {CallStatus} = require('../utils/constants');
const moment = require('moment');
/**
* @classdesc Subclass of CallSession. This represents a CallSession that is
* created for an outbound call that is initiated via the REST API.
@@ -42,20 +41,29 @@ class RestCallSession extends CallSession {
setDialog(dlg) {
this.dlg = dlg;
dlg.on('destroy', this._callerHungup.bind(this));
dlg.on('refer', this._onRefer.bind(this));
dlg.on('modify', this._onReinvite.bind(this));
this.wrapDialog(dlg);
}
/**
* This is invoked when the called party hangs up, in order to calculate the call duration.
*/
_callerHungup() {
this._hangup('caller');
}
_jambonzHangup() {
this._hangup();
}
_hangup(terminatedBy = 'jambonz') {
if (this.restDialTask) {
this.restDialTask.turnOffAmd();
}
this.callInfo.callTerminationBy = 'caller';
this.callInfo.callTerminationBy = terminatedBy;
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
this.logger.debug('RestCallSession: called party hung up');
this.logger.debug(`RestCallSession: called party hung up by ${terminatedBy}`);
this._callReleased();
}

22
lib/tasks/answer.js Normal file
View File

@@ -0,0 +1,22 @@
const Task = require('./task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
/**
* Answer the call.
* Note: This is rarely used, as the call is typically answered automatically when required by the app,
* but it can be useful to force an answer before a pause in some cases
*/
class TaskAnswer extends Task {
constructor(logger, opts) {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
}
get name() { return TaskName.Answer; }
async exec(cs) {
super.exec(cs);
}
}
module.exports = TaskAnswer;

View File

@@ -6,6 +6,7 @@ const { normalizeJambones } = require('@jambonz/verb-specifications');
const makeTask = require('./make_task');
const bent = require('bent');
const assert = require('assert');
const HttpRequestor = require('../utils/http-requestor');
const WAIT = 'wait';
const JOIN = 'join';
const START = 'start';
@@ -60,6 +61,8 @@ class Conference extends Task {
this.emitter = new Emitter();
this.results = {};
this.coaching = [];
this.speakOnlyTo = this.data.speakOnlyTo;
// transferred from another server in order to bridge to a local caller?
if (this.data._ && this.data._.connectTime) {
@@ -115,7 +118,9 @@ class Conference extends Task {
this.emitter.emit('kill');
await this._doFinalMemberCheck(cs);
if (this.ep && this.ep.connected) {
this.ep.conn.removeAllListeners('esl::event::CUSTOM::*');
// drachtio-fsmrf override esl::event::CUSTOM to conference join listerner, After finish the conference
// the application need to reset the esl::event::CUSTOM for another use on the same endpoint
this.ep.resetEslCustomEvent();
this.ep.api(`conference ${this.confName} kick ${this.memberId}`)
.catch((err) => this.logger.info({err}, 'Error kicking participant'));
}
@@ -132,15 +137,10 @@ class Conference extends Task {
* @param {SipDialog} dlg
*/
async _init(cs, dlg) {
const friendlyName = this.confName;
const {createHash, retrieveHash} = cs.srf.locals.dbHelpers;
this.friendlyName = this.confName;
this.confName = `conf:${cs.accountSid}:${this.confName}`;
this.statusParams = Object.assign({
conferenceSid: this.confName,
friendlyName
}, cs.callInfo);
// check if conference is in progress
const obj = await retrieveHash(this.confName);
if (obj) {
@@ -348,16 +348,29 @@ class Conference extends Task {
Object.assign(opts, {flags: {
...(this.endConferenceOnExit && {endconf: true}),
...(this.startConferenceOnEnter && {moderator: true}),
...(this.joinMuted && {joinMuted: true}),
//https://developer.signalwire.com/freeswitch/FreeSWITCH-Explained/Modules/mod_conference_3965534/
// mute | Enter conference muted
...((this.joinMuted || this.speakOnlyTo) && {mute: true}),
}});
/**
* Note on the above: if we are joining in "coaching" mode (ie only going to heard by a subset of participants)
* then we join muted temporarily, and then unmute ourselves once we have identified the subset of participants
* to whom we will be speaking.
*/
}
try {
const {memberId, confUuid} = await this.ep.join(this.confName, opts);
this.logger.debug({memberId, confUuid}, `Conference:_joinConference: successfully joined ${this.confName}`);
this.memberId = memberId;
this.memberId = parseInt(memberId, 10);
this.confUuid = confUuid;
// set a tag for this member, if provided
if (this.data.memberTag) {
this.setMemberTag(this.data.memberTag);
}
cs.setConferenceDetails(memberId, this.confName, confUuid);
const response = await this.ep.api('conference', [this.confName, 'get', 'count']);
if (response.body && /\d+/.test(response.body)) this.participantCount = parseInt(response.body);
@@ -384,6 +397,9 @@ class Conference extends Task {
.catch((err) => {});
}
if (this.speakOnlyTo) {
this.setCoachMode(this.speakOnlyTo);
}
} catch (err) {
this.logger.error(err, `Failed to join conference ${this.confName}`);
throw err;
@@ -428,7 +444,15 @@ class Conference extends Task {
}
}
async doConferenceHold(cs, opts) {
doConferenceMute(cs, opts) {
assert (cs.isInConference);
const mute = opts.conf_mute_status === 'mute';
this.ep.api(`conference ${this.confName} ${mute ? 'mute' : 'unmute'} ${this.memberId}`)
.catch((err) => this.logger.info({err}, 'Error muting or unmuting participant'));
}
doConferenceHold(cs, opts) {
assert (cs.isInConference);
const {conf_hold_status, wait_hook} = opts;
@@ -465,6 +489,46 @@ class Conference extends Task {
}
}
async doConferenceParticipantAction(cs, opts) {
const {action, tag, wait_hook } = opts;
switch (action) {
case 'tag':
await this.setMemberTag(tag);
break;
case 'untag':
await this.clearMemberTag();
break;
case 'coach':
await this.setCoachMode(tag);
break;
case 'uncoach':
await this.clearCoachMode();
break;
case 'hold':
this.doConferenceHold(cs, {
conf_hold_status: 'hold',
...(wait_hook && {wait_hook})
});
break;
case 'unhold':
this.doConferenceHold(cs, {conf_hold_status: 'unhold'});
break;
case 'mute':
this.doConferenceMute(cs, {conf_mute_status: 'mute'});
break;
case 'unmute':
this.doConferenceMute(cs, {conf_mute_status: 'unmute'});
break;
case 'kick':
this.kickMember(cs);
break;
default:
this.logger.info(`Conference:doConferenceParticipantAction - unhandled action ${action}`);
break;
}
}
async _doWaitHookWhileOnHold(cs, dlg, wait_hook) {
do {
try {
@@ -482,6 +546,13 @@ class Conference extends Task {
} while (!this.killed && this.conf_hold_status === 'hold');
}
/**
* mute or unmute side of the call
*/
mute(callSid, doMute) {
this.doConferenceMute(this.callSession, {conf_mute_status: doMute});
}
/**
* Add ourselves to the waitlist of sessions to be notified once
* the conference starts
@@ -511,7 +582,7 @@ class Conference extends Task {
_normalizeHook(cs, hook) {
if (typeof hook === 'object') return hook;
const url = hook.startsWith('/') ?
`${cs.application.requestor.baseUrl}${hook}` :
`${cs.application.requestor instanceof HttpRequestor ? cs.application.requestor.baseUrl : ''}${hook}` :
hook;
return { url } ;
@@ -530,7 +601,7 @@ class Conference extends Task {
const response = await this.ep.api('conference', [this.confName, 'get', 'count']);
if (response.body && confNoMatch(response.body)) this.participantCount = 0;
else if (response.body && /^\d+$/.test(response.body)) this.participantCount = parseInt(response.body) - 1;
this.logger.debug({response}, `Conference:_doFinalMemberCheck conference count ${this.participantCount}`);
this.logger.debug(`Conference:_doFinalMemberCheck conference count ${this.participantCount}`);
} catch (err) {
this.logger.info({err}, 'Conference:_doFinalMemberCheck error retrieving count (we were probably kicked');
}
@@ -540,7 +611,7 @@ class Conference extends Task {
* when we hang up as the last member, the current member count = 1
* when we are kicked out of the call when the moderator leaves, the member count = 0
*/
if (this.participantCount === 0) {
if (this.participantCount === 0 || this.endConferenceOnExit) {
const {deleteKey} = cs.srf.locals.dbHelpers;
try {
this._notifyConferenceEvent(cs, 'end');
@@ -548,7 +619,8 @@ class Conference extends Task {
this.logger.info(`conf ${this.confName} deprovisioned: ${removed ? 'success' : 'failure'}`);
}
catch (err) {
this.logger.error(err, `Error deprovisioning conference ${this.confName}`);
this.logger.error(err, `Error deprovisioning conference ${this.confName},
might be the conference already cleaned by another moderator`);
}
}
}
@@ -581,7 +653,8 @@ class Conference extends Task {
memberId: this.memberId,
confName: this.confName,
tasks,
rootSpan: cs.rootSpan
rootSpan: cs.rootSpan,
req: cs.req
});
await this._playSession.exec();
this._playSession = null;
@@ -625,8 +698,24 @@ class Conference extends Task {
if (!params.time) params.time = (new Date()).toISOString();
if (!params.members && typeof this.participantCount === 'number') params.members = this.participantCount;
cs.application.requestor
.request('verb:hook', this.statusHook, Object.assign(params, this.statusParams, httpHeaders))
.catch((err) => this.logger.info(err, 'Conference:notifyConferenceEvent - error'));
.request(
'verb:hook',
this.statusHook,
Object.assign(
params,
Object.assign(
{
conferenceSid: this.confName,
friendlyName: this.friendlyName,
},
cs.callInfo.toJSON()
),
httpHeaders
)
)
.catch((err) =>
this.logger.info(err, 'Conference:notifyConferenceEvent - error')
);
}
}
@@ -642,11 +731,19 @@ class Conference extends Task {
}
// conference event handlers
_onAddMember(logger, cs, evt) {
const memberId = parseInt(evt.getHeader('Member-ID')) ;
if (this.speakOnlyTo) {
logger.debug(`Conference:_onAddMember - member ${memberId} added to ${this.confName}, updating coaching mode`);
this.setCoachMode(this.speakOnlyTo).catch(() => {});
}
else logger.debug(`Conference:_onAddMember - member ${memberId} added to conference ${this.confName}`);
}
_onDelMember(logger, cs, evt) {
const memberId = parseInt(evt.getHeader('Member-ID')) ;
this.participantCount = parseInt(evt.getHeader('Conference-Size'));
if (memberId === this.memberId) {
this.logger.info(`Conference:_onDelMember - I was dropped from conference ${this.confName}, task is complete`);
logger.info(`Conference:_onDelMember - I was dropped from conference ${this.confName}, task is complete`);
this.replaceEndpointAndEnd(cs);
}
}
@@ -675,6 +772,99 @@ class Conference extends Task {
}
}
_onTag(logger, cs, evt) {
const memberId = parseInt(evt.getHeader('Member-ID')) ;
const tag = evt.getHeader('Tag') || '';
if (memberId !== this.memberId && this.speakOnlyTo) {
logger.info(`Conference:_onTag - member ${memberId} set tag to '${tag }'; updating coach mode accordingly`);
this.setCoachMode(this.speakOnlyTo).catch(() => {});
}
}
/**
* Set the conference to "coaching" mode, where the audio of the participant is only heard
* by a subset of the participants in the conference.
* We do this by first getting all of the members who do *not* have this tag, and then
* we configure this members audio to not be sent to them.
* @param {string} speakOnlyTo - tag of the members who should receive our audio
*
* N.B.: this feature requires jambonz patches to freeswitch mod_conference
*/
async setCoachMode(speakOnlyTo) {
this.speakOnlyTo = speakOnlyTo;
if (!this.memberId) {
this.logger.info('Conference:_setCoachMode: no member id yet');
return;
}
try {
const members = (await this.ep.getNonMatchingConfParticipants(this.confName, speakOnlyTo))
.filter((m) => m !== this.memberId);
if (members.length === 0) {
this.logger.info({members}, 'Conference:_setCoachMode: all participants have the tag, so all will hear me');
if (this.coaching.length) {
await this.ep.api('conference', [this.confName, 'relate', this.memberId, this.coaching.join(','), 'clear']);
this.coaching = [];
}
}
else {
const memberList = members.join(',');
this.logger.info(`Conference:_setCoachMode: my audio will NOT be sent to ${memberList}`);
await this.ep.api('conference', [this.confName, 'relate', this.memberId, memberList, 'nospeak']);
this.coaching = members;
}
} catch (err) {
this.logger.error({err, speakOnlyTo}, '_setCoachMode: Error');
}
}
async clearCoachMode() {
if (!this.memberId) return;
try {
if (this.coaching.length === 0) {
this.logger.info('Conference:_clearCoachMode: no coaching mode to clear');
}
else {
const memberList = this.coaching.join(',');
this.logger.info(`Conference:_clearCoachMode: now sending my audio to all, including ${memberList}`);
await this.ep.api('conference', [this.confName, 'relate', this.memberId, memberList, 'clear']);
}
this.speakOnlyTo = null;
this.coaching = [];
} catch (err) {
this.logger.error({err}, '_clearCoachMode: Error');
}
}
async setMemberTag(tag) {
try {
await this.ep.api('conference', [this.confName, 'tag', this.memberId, tag]);
this.logger.info(`Conference:setMemberTag: set tag for ${this.memberId} to ${tag}`);
this.memberTag = tag;
} catch (err) {
this.logger.error({err}, `Error setting tag for ${this.memberId} to ${tag}`);
}
}
async clearMemberTag() {
try {
await this.ep.api('conference', [this.confName, 'tag', this.memberId]);
this.logger.info(`Conference:setMemberTag: clearing tag for ${this.memberId}`);
this.memberTag = null;
} catch (err) {
this.logger.error({err}, `Error clearing tag for ${this.memberId}`);
}
}
async kickMember(cs) {
assert(cs.isInConference);
try {
await this.ep.api('conference', [this.confName, 'kick', this.memberId]);
this.logger.info(`Conference:kickMember: kick ${this.memberId} out of conference ${this.confName}`);
} catch (err) {
this.logger.error({err}, `Error kicking member out of conference for ${this.memberId}`);
}
}
}
module.exports = Conference;

View File

@@ -1,16 +1,23 @@
const Task = require('./task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
const parseDecibels = require('../utils/parse-decibels');
class TaskConfig extends Task {
constructor(logger, opts) {
super(logger, opts);
[
'synthesizer',
'recognizer',
'bargeIn',
'record',
'listen',
'transcribe'
'transcribe',
'fillerNoise',
'actionHookDelayAction',
'boostAudioSignal',
'vad',
'ttsStream'
].forEach((k) => this[k] = this.data[k] || {});
if ('notifyEvents' in this.data) {
@@ -28,7 +35,8 @@ class TaskConfig extends Task {
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
'interDigitTimeout', 'bargein', 'dtmfBargein', 'minBargeinWordCount', 'actionHook'
].forEach((k) => {
if (this.bargeIn[k]) this.gatherOpts[k] = this.bargeIn[k];
const val = this.bargeIn[k];
if (val !== undefined && val !== null) this.gatherOpts[k] = val;
});
}
if (this.transcribe?.enable) {
@@ -38,6 +46,12 @@ class TaskConfig extends Task {
};
delete this.transcribeOpts.enable;
}
if (this.ttsStream.enable) {
this.sayOpts = {
verb: 'say',
stream: true
};
}
if (this.data.reset) {
if (typeof this.data.reset === 'string') this.data.reset = [this.data.reset];
@@ -49,6 +63,7 @@ class TaskConfig extends Task {
this.record?.action ||
this.listen?.url ||
this.data.amd ||
'boostAudioSignal' in this.data ||
this.transcribe?.enable) ?
TaskPreconditions.Endpoint :
TaskPreconditions.None;
@@ -63,6 +78,11 @@ class TaskConfig extends Task {
get hasRecording() { return Object.keys(this.record).length; }
get hasListen() { return Object.keys(this.listen).length; }
get hasTranscribe() { return Object.keys(this.transcribe).length; }
get hasDub() { return Object.keys(this.dub).length; }
get hasVad() { return Object.keys(this.vad).length; }
get hasFillerNoise() { return Object.keys(this.fillerNoise).length; }
get hasReferHook() { return Object.keys(this.data).includes('referHook'); }
get hasTtsStream() { return Object.keys(this.ttsStream).length; }
get summary() {
const phrase = [];
@@ -72,13 +92,13 @@ class TaskConfig extends Task {
if (this.bargeIn.enable) phrase.push('enable barge-in');
if (this.hasSynthesizer) {
const {vendor:v, language:l, voice} = this.synthesizer;
const s = `{${v},${l},${voice}}`;
const {vendor:v, language:l, voice, label} = this.synthesizer;
const s = `{${v},${l},${voice},${label || 'None'}}`;
phrase.push(`set synthesizer${s}`);
}
if (this.hasRecognizer) {
const {vendor:v, language:l} = this.recognizer;
const s = `{${v},${l}}`;
const {vendor:v, language:l, label} = this.recognizer;
const s = `{${v},${l},${label || 'None'}}`;
phrase.push(`set recognizer${s}`);
}
if (this.hasRecording) phrase.push(this.record.action);
@@ -88,9 +108,15 @@ class TaskConfig extends Task {
if (this.hasTranscribe) {
phrase.push(this.transcribe.enable ? `transcribe ${this.transcribe.transcriptionHook}` : 'stop transcribe');
}
if (this.hasFillerNoise) phrase.push(`fillerNoise ${this.fillerNoise.enable ? 'on' : 'off'}`);
if (this.data.amd) phrase.push('enable amd');
if (this.notifyEvents) phrase.push(`event notification ${this.notifyEvents ? 'on' : 'off'}`);
if (this.onHoldMusic) phrase.push(`onHoldMusic: ${this.onHoldMusic}`);
if ('boostAudioSignal' in this.data) phrase.push(`setGain ${this.data.boostAudioSignal}`);
if (this.hasReferHook) phrase.push('set referHook');
if (this.hasTtsStream) {
phrase.push(`${this.ttsStream.enable ? 'enable' : 'disable'} ttsStream`);
}
return `${this.name}{${phrase.join(',')}}`;
}
@@ -129,9 +155,8 @@ class TaskConfig extends Task {
cs.speechSynthesisVendor = this.synthesizer.vendor !== 'default'
? this.synthesizer.vendor
: cs.speechSynthesisVendor;
cs.speechSynthesisLabel = this.synthesizer.label !== 'default'
? this.synthesizer.label
: cs.speechSynthesisLabel;
cs.speechSynthesisLabel = this.synthesizer.label === 'default'
? cs.speechSynthesisLabel : this.synthesizer.label;
cs.speechSynthesisLanguage = this.synthesizer.language !== 'default'
? this.synthesizer.language
: cs.speechSynthesisLanguage;
@@ -143,15 +168,16 @@ class TaskConfig extends Task {
cs.fallbackSpeechSynthesisVendor = this.synthesizer.fallbackVendor !== 'default'
? this.synthesizer.fallbackVendor
: cs.fallbackSpeechSynthesisVendor;
cs.fallbackSpeechSynthesisLabel = this.synthesizer.fallbackLabel !== 'default'
? this.synthesizer.fallbackLabel
: cs.fallbackSpeechSynthesisLabel;
cs.fallbackSpeechSynthesisLabel = this.synthesizer.fallbackLabel === 'default'
? cs.fallbackSpeechSynthesisLabel : this.synthesizer.fallbackLabel;
cs.fallbackSpeechSynthesisLanguage = this.synthesizer.fallbackLanguage !== 'default'
? this.synthesizer.fallbackLanguage
: cs.fallbackSpeechSynthesisLanguage;
cs.fallbackSpeechSynthesisVoice = this.synthesizer.fallbackVoice !== 'default'
? this.synthesizer.fallbackVoice
: cs.fallbackSpeechSynthesisVoice;
// new vendor is set, reset fallback vendor
cs.hasFallbackTts = false;
this.logger.info({synthesizer: this.synthesizer}, 'Config: updated synthesizer');
}
if (this.hasRecognizer) {
@@ -159,9 +185,8 @@ class TaskConfig extends Task {
cs.speechRecognizerVendor = this.recognizer.vendor !== 'default'
? this.recognizer.vendor
: cs.speechRecognizerVendor;
cs.speechRecognizerLabel = this.recognizer.label !== 'default'
? this.recognizer.label
: cs.speechRecognizerLabel;
cs.speechRecognizerLabel = this.recognizer.label === 'default'
? cs.speechRecognizerLabel : this.recognizer.label;
cs.speechRecognizerLanguage = this.recognizer.language !== 'default'
? this.recognizer.language
: cs.speechRecognizerLanguage;
@@ -170,9 +195,9 @@ class TaskConfig extends Task {
cs.fallbackSpeechRecognizerVendor = this.recognizer.fallbackVendor !== 'default'
? this.recognizer.fallbackVendor
: cs.fallbackSpeechRecognizerVendor;
cs.fallbackSpeechRecognizerLabel = this.recognizer.fallbackLabel !== 'default'
? this.recognizer.fallbackLabel
: cs.fallbackSpeechRecognizerLabel;
cs.fallbackSpeechRecognizerLabel = this.recognizer.fallbackLabel === 'default' ?
cs.fallbackSpeechRecognizerLabel :
this.recognizer.fallbackLabel;
cs.fallbackSpeechRecognizerLanguage = this.recognizer.fallbackLanguage !== 'default'
? this.recognizer.fallbackLanguage
: cs.fallbackSpeechRecognizerLanguage;
@@ -196,6 +221,8 @@ class TaskConfig extends Task {
if ('punctuation' in this.recognizer) {
cs.globalSttPunctuation = this.recognizer.punctuation;
}
// new vendor is set, reset fallback vendor
cs.hasFallbackAsr = false;
this.logger.info({
recognizer: this.recognizer,
isContinuousAsr: cs.isContinuousAsr
@@ -236,12 +263,14 @@ class TaskConfig extends Task {
}
if (this.hasTranscribe) {
if (this.transcribe.enable) {
this.transcribeOpts.recognizer = this.hasRecognizer ?
this.recognizer :
{
vendor: cs.speechRecognizerVendor,
language: cs.speechRecognizerLanguage
};
if (!this.transcribeOpts.recognizer) {
this.transcribeOpts.recognizer = this.hasRecognizer ?
this.recognizer :
{
vendor: cs.speechRecognizerVendor,
language: cs.speechRecognizerLanguage
};
}
this.logger.debug(this.transcribeOpts, 'Config: enabling transcribe');
cs.startBackgroundTask('transcribe', this.transcribeOpts);
} else {
@@ -249,9 +278,60 @@ class TaskConfig extends Task {
cs.stopBackgroundTask('transcribe');
}
}
if (Object.keys(this.actionHookDelayAction).length !== 0) {
cs.actionHookDelayProperties = this.actionHookDelayAction;
}
if (this.data.sipRequestWithinDialogHook) {
cs.sipRequestWithinDialogHook = this.data.sipRequestWithinDialogHook;
}
if ('boostAudioSignal' in this.data) {
const db = parseDecibels(this.data.boostAudioSignal);
this.logger.info(`Config: boosting audio signal by ${db} dB`);
const args = [ep.uuid, 'setGain', db];
ep.api('uuid_dub', args).catch((err) => {
this.logger.error(err, 'Error boosting audio signal');
});
}
if (this.hasFillerNoise) {
const {enable, ...opts} = this.fillerNoise;
this.logger.info({fillerNoise: this.fillerNoise}, 'Config: fillerNoise');
if (!enable) cs.disableFillerNoise();
else {
cs.enableFillerNoise(opts);
}
}
if (this.hasVad) {
cs.vad = {
enable: this.vad.enable || false,
voiceMs: this.vad.voiceMs || 250,
silenceMs: this.vad.silenceMs || 150,
strategy: this.vad.strategy || 'one-shot',
mode: (this.vad.mode !== undefined && this.vad.mode !== null) ? this.vad.mode : 2
};
}
if (this.hasReferHook) {
cs.referHook = this.data.referHook;
}
if (this.ttsStream.enable && this.sayOpts) {
this.sayOpts.synthesizer = this.hasSynthesizer ? this.synthesizer : {
vendor: cs.speechSynthesisVendor,
language: cs.speechSynthesisLanguage,
voice: cs.speechSynthesisVoice,
...(cs.speechSynthesisLabel && {
label: cs.speechSynthesisLabel
})
};
this.logger.info({opts: this.gatherOpts}, 'Config: enabling ttsStream');
cs.enableBackgroundTtsStream(this.sayOpts);
} else if (!this.ttsStream.enable) {
this.logger.info('Config: disabling ttsStream');
cs.disableTtsStream();
}
}
async kill(cs) {

View File

@@ -73,7 +73,8 @@ class TaskDequeue extends Task {
try {
let url;
if (this.callSid) {
url = await retrieveByPatternSortedSet(this.queueName, `*${this.callSid}`);
const r = await retrieveByPatternSortedSet(this.queueName, `*${this.callSid}`);
url = r[0];
} else {
url = await retrieveFromSortedSet(this.queueName);
}

View File

@@ -6,6 +6,7 @@ const {
TaskName,
TaskPreconditions,
MAX_SIMRINGS,
MediaPath,
KillReason
} = require('../utils/constants');
const assert = require('assert');
@@ -14,11 +15,15 @@ const sessionTracker = require('../session/session-tracker');
const DtmfCollector = require('../utils/dtmf-collector');
const ConfirmCallSession = require('../session/confirm-call-session');
const dbUtils = require('../utils/db-utils');
const parseDecibels = require('../utils/parse-decibels');
const debug = require('debug')('jambonz:feature-server');
const {parseUri} = require('drachtio-srf');
const {ANCHOR_MEDIA_ALWAYS, JAMBONZ_DISABLE_DIAL_PAI_HEADER} = require('../config');
const {ANCHOR_MEDIA_ALWAYS,
JAMBONZ_DISABLE_DIAL_PAI_HEADER,
JAMBONES_DIAL_SBC_FOR_REGISTERED_USER} = require('../config');
const { isOnhold, isOpusFirst } = require('../utils/sdp-utils');
const { normalizeJambones } = require('@jambonz/verb-specifications');
const { selectHostPort } = require('../utils/network');
function parseDtmfOptions(logger, dtmfCapture) {
let parentDtmfCollector, childDtmfCollector;
@@ -81,6 +86,8 @@ function filterAndLimit(logger, tasks) {
return unique;
}
const sleepFor = (ms) => new Promise((resolve) => setTimeout(() => resolve(), ms));
class TaskDial extends Task {
constructor(logger, opts) {
super(logger, opts);
@@ -100,6 +107,9 @@ class TaskDial extends Task {
this.referHook = this.data.referHook;
this.dtmfHook = this.data.dtmfHook;
this.proxy = this.data.proxy;
this.tag = this.data.tag;
this.boostAudioSignal = this.data.boostAudioSignal;
this._mediaPath = MediaPath.FullMedia;
if (this.dtmfHook) {
const {parentDtmfCollector, childDtmfCollector} = parseDtmfOptions(logger, this.data.dtmfCapture || {});
@@ -117,6 +127,9 @@ class TaskDial extends Task {
if (this.data.transcribe) {
this.transcribeTask = makeTask(logger, {'transcribe' : this.data.transcribe}, this);
}
if (this.data.dub && Array.isArray(this.data.dub) && this.data.dub.length > 0) {
this.dubTasks = this.data.dub.map((d) => makeTask(logger, {'dub': d}, this));
}
this.results = {};
this.bridged = false;
@@ -138,21 +151,27 @@ class TaskDial extends Task {
get name() { return TaskName.Dial; }
get isOnHold() {
return this.isIncomingLegHold || this.isOutgoingLegHold;
get isOnHoldEnabled() {
return !!this.data.onHoldHook;
}
get canReleaseMedia() {
const keepAnchor = this.data.anchorMedia ||
this.cs.isBackGroundListen ||
ANCHOR_MEDIA_ALWAYS ||
this.listenTask ||
this.transcribeTask ||
this.startAmd;
this.cs.isBackGroundListen ||
this.cs.onHoldMusic ||
ANCHOR_MEDIA_ALWAYS ||
this.listenTask ||
this.dubTasks ||
this.transcribeTask ||
this.startAmd;
return !keepAnchor;
}
get shouldExitMediaPathEntirely() {
return this.data.exitMediaPath;
}
get summary() {
if (this.target.length === 1) {
const target = this.target[0];
@@ -173,6 +192,16 @@ class TaskDial extends Task {
async exec(cs) {
await super.exec(cs);
if (this.data.anchorMedia && this.data.exitMediaPath) {
this.logger.info('Dial:exec - incompatible anchorMedia and exitMediaPath are both set, will obey anchorMedia');
delete this.data.exitMediaPath;
}
if (!this.canReleaseMedia && this.data.exitMediaPath) {
this.logger.info(
'Dial:exec - exitMediaPath is set so features such as transcribe and record will not work on this call');
}
try {
if (this.listenTask) {
const {span, ctx} = this.startChildSpan(`nested:${this.listenTask.summary}`);
@@ -195,7 +224,16 @@ class TaskDial extends Task {
else {
this.epOther = cs.ep;
if (this.dialMusic && this.epOther && this.epOther.connected) {
this.epOther.play(this.dialMusic).catch((err) => {});
(async() => {
do {
try {
await this.epOther.play(this.dialMusic);
} catch (err) {
this.logger.error(err, `TaskDial:exec error playing ${this.dialMusic}`);
await sleepFor(1000);
}
} while (!this.killed || !this.bridged);
})();
}
}
if (!this.killed) await this._attemptCalls(cs);
@@ -235,7 +273,9 @@ class TaskDial extends Task {
this._removeDtmfDetection(this.dlg);
await this._killOutdials();
if (this.sd) {
this.sd.kill();
const byeReasonHeader = this.killReason === KillReason.MediaTimeout ? 'Media Timeout' : undefined;
this.sd.kill(byeReasonHeader);
this.sd.ep?.removeListener('destroy', this._handleMediaTimeout.bind(this));
this.sd.removeAllListeners();
this.sd = null;
}
@@ -281,7 +321,7 @@ class TaskDial extends Task {
if (!cs.callGone && this.epOther) {
/* if we can release the media back to the SBC, do so now */
if (this.canReleaseMedia) this._releaseMedia(cs, this.sd);
if (this.canReleaseMedia) this._releaseMedia(cs, this.sd, this.shouldExitMediaPathEntirely);
else this.epOther.bridge(this.ep);
}
} catch (err) {
@@ -321,17 +361,26 @@ class TaskDial extends Task {
const to = parseUri(req.getParsedHeader('Refer-To').uri);
const by = parseUri(req.getParsedHeader('Referred-By').uri);
const referredBy = req.get('Referred-By');
const userAgent = req.get('User-Agent');
const customHeaders = Object.keys(req.headers)
.filter((h) => h.toLowerCase().startsWith('x-'))
.reduce((acc, h) => {
acc[h] = req.get(h);
return acc;
}, {});
this.logger.info({to}, 'refer to parsed');
const json = await cs.requestor.request('verb:hook', this.referHook, {
...(callInfo.toJSON()),
refer_details: {
sip_refer_to: req.get('Refer-To'),
sip_referred_by: req.get('Referred-By'),
sip_user_agent: req.get('User-Agent'),
refer_to_user: to.scheme === 'tel' ? to.number : to.user,
referred_by_user: by.scheme === 'tel' ? by.number : by.user,
...(referredBy && {sip_referred_by: referredBy}),
...(userAgent && {sip_user_agent: userAgent}),
...(by && {referred_by_user: by.scheme === 'tel' ? by.number : by.user}),
referring_call_sid,
referred_call_sid
referred_call_sid,
...customHeaders
}
}, httpHeaders);
if (json && Array.isArray(json)) {
@@ -357,9 +406,13 @@ class TaskDial extends Task {
this.logger.info(err, 'Dial:handleRefer - error setting new application after receiving REFER');
}
}
//caller and callee legs are briged together, accept refer with 202 will release callee leg endpoint
//that makes freeswitch release endpoint for caller leg.
if (this.ep) this.ep.unbridge();
res.send(202);
this.logger.info('DialTask:handleRefer - sent 202 Accepted');
} catch (err) {
this.logger.info({err}, 'DialTask:handleRefer - error processing incoming REFER');
res.send(err.statusCode || 501);
}
}
@@ -465,11 +518,11 @@ class TaskDial extends Task {
}
async _attemptCalls(cs) {
const {req, srf} = cs;
const {req, callInfo, direction, srf} = cs;
const {getSBC} = srf.locals;
const {lookupTeamsByAccount, lookupAccountBySid} = srf.locals.dbHelpers;
const {lookupCarrier, lookupCarrierByPhoneNumber} = dbUtils(this.logger, cs.srf);
const sbcAddress = this.proxy || getSBC();
const {lookupCarrier, lookupCarrierByPhoneNumber, lookupVoipCarrierBySid} = dbUtils(this.logger, cs.srf);
let sbcAddress = this.proxy || getSBC();
const teamsInfo = {};
let fqdn;
@@ -477,17 +530,25 @@ class TaskDial extends Task {
this.headers = {
'X-Account-Sid': cs.accountSid,
...(req && req.has('X-CID') && {'X-CID': req.get('X-CID')}),
...(req && req.has('P-Asserted-Identity') && !JAMBONZ_DISABLE_DIAL_PAI_HEADER &&
{'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
...(direction === 'outbound' && callInfo.sbcCallid && {'X-CID': callInfo.sbcCallid}),
...(!JAMBONZ_DISABLE_DIAL_PAI_HEADER && req && {
...(req.has('P-Asserted-Identity') && {'P-Asserted-Identity': req.get('P-Asserted-Identity')}),
...(req.has('Privacy') && {'Privacy': req.get('Privacy')}),
}),
...(req && req.has('X-Voip-Carrier-Sid') && {'X-Voip-Carrier-Sid': req.get('X-Voip-Carrier-Sid')}),
// Put headers at the end to make sure opt.headers override all default behavior.
...this.headers
};
// default to inband dtmf if not specified
this.inbandDtmfEnabled = cs.inbandDtmfEnabled;
// get calling user from From header
const parsedFrom = req.getParsedHeader('from');
const fromUri = parseUri(parsedFrom.uri);
const opts = {
headers: this.headers,
proxy: `sip:${sbcAddress}`,
callingNumber: this.callerId || req.callingNumber,
callingNumber: this.callerId || fromUri.user,
...(this.callerName && {callingName: this.callerName}),
opusFirst: isOpusFirst(this.cs.ep.remote.sdp)
};
@@ -533,6 +594,15 @@ class TaskDial extends Task {
this.logger.error({err}, 'Error looking up account by sid');
}
}
// find handling sbc sip for called user
if (JAMBONES_DIAL_SBC_FOR_REGISTERED_USER && t.type === 'user') {
const { registrar } = srf.locals.dbHelpers;
const reg = await registrar.query(t.name);
if (reg) {
sbcAddress = selectHostPort(this.logger, reg.sbcAddress, 'tcp')[1];
}
//sbc outbound return 404 Notfound to handle case called user is not reigstered.
}
if (t.type === 'phone' && t.trunk) {
const voip_carrier_sid = await lookupCarrier(cs.accountSid, t.trunk);
this.logger.info(`Dial:_attemptCalls: selected ${voip_carrier_sid} for requested carrier: ${t.trunk}`);
@@ -549,10 +619,17 @@ class TaskDial extends Task {
const str = this.callerId || req.callingNumber || '';
const callingNumber = str.startsWith('+') ? str.substring(1) : str;
const voip_carrier_sid = await lookupCarrierByPhoneNumber(cs.accountSid, callingNumber);
this.logger.info(
`Dial:_attemptCalls: selected ${voip_carrier_sid} for requested phone number: ${callingNumber}`);
const req_voip_carrier_sid = req.has('X-Voip-Carrier-Sid') ? req.get('X-Voip-Carrier-Sid') : null;
if (voip_carrier_sid) {
this.logger.info(
`Dial:_attemptCalls: selected voip_carrier_sid ${voip_carrier_sid} for callingNumber: ${callingNumber}`);
opts.headers['X-Requested-Carrier-Sid'] = voip_carrier_sid;
// Checking if outbound carrier is different from inbound carrier and has dtmf type tones
if (voip_carrier_sid !== req_voip_carrier_sid) {
const [voipCarrier] = await lookupVoipCarrierBySid(voip_carrier_sid);
this.inbandDtmfEnabled = voipCarrier?.dtmf_type === 'tones';
}
}
}
@@ -570,7 +647,8 @@ class TaskDial extends Task {
accountInfo: cs.accountInfo,
rootSpan: cs.rootSpan,
startSpan: this.startSpan.bind(this),
dialTask: this
dialTask: this,
onHoldMusic: this.cs.onHoldMusic
});
this.dials.set(sd.callSid, sd);
@@ -592,6 +670,7 @@ class TaskDial extends Task {
dialCallStatus: obj.callStatus,
dialSipStatus: obj.sipStatus,
dialCallSid: sd.callSid,
dialSbcCallid: sd.callInfo.sbcCallid
});
}
switch (obj.callStatus) {
@@ -627,6 +706,8 @@ class TaskDial extends Task {
await this._connectSingleDial(cs, sd);
} catch (err) {
this.logger.info({err}, 'Dial:_attemptCalls - Error calling _connectSingleDial ');
sd.removeAllListeners();
this.kill(cs);
}
})
.on('decline', () => {
@@ -679,22 +760,43 @@ class TaskDial extends Task {
async _onReinvite(req, res) {
try {
let isHandled = false;
if (this.cs.onHoldMusic) {
if (isOnhold(req.body) && !this.epOther && !this.ep) {
await this.cs.handleReinviteAfterMediaReleased(req, res);
// Onhold but media is already released
// reconnect A Leg and Response B leg
await this.reAnchorMedia(this.cs, this.sd);
this.isOutgoingLegHold = true;
if (this.isOnHoldEnabled) {
if (isOnhold(req.body)) {
this.logger.debug('Dial: _onReinvite receive hold Request');
if (!this.epOther && !this.ep) {
this.logger.debug(`Dial: _onReinvite receive hold Request,
media already released, reconnect media server`);
// update caller leg for new SDP from callee.
await this.cs.handleReinviteAfterMediaReleased(req, res);
// Freeswitch media is released, reconnect
await this.reAnchorMedia(this.cs, this.sd);
this.isOutgoingLegHold = true;
} else {
this.logger.debug('Dial: _onReinvite receive hold Request, update SDP');
const newSdp = await this.ep.modify(req.body);
res.send(200, {body: newSdp});
}
isHandled = true;
this._onHoldHook();
} else if (!isOnhold(req.body) && this.epOther && this.ep && this.isOutgoingLegHold && this.canReleaseMedia) {
// Offhold, time to release media
const newSdp = await this.ep.modify(req.body);
await res.send(200, {body: newSdp});
await this._releaseMedia(this.cs, this.sd);
// Media already connected, ask for onHoldHook
this._onHoldHook(req);
} else if (!isOnhold(req.body)) {
this.logger.debug('Dial: _onReinvite receive unhold Request');
if (this.epOther && this.ep && this.isOutgoingLegHold && this.canReleaseMedia) {
this.logger.debug('Dial: _onReinvite receive unhold Request, release media');
// Offhold, time to release media
const newSdp = await this.ep.modify(req.body);
await res.send(200, {body: newSdp});
await this._releaseMedia(this.cs, this.sd, this.shouldExitMediaPathEntirely);
this.isOutgoingLegHold = false;
} else {
this.logger.debug('Dial: _onReinvite receive unhold Request, update media server');
const newSdp = await this.ep.modify(req.body);
res.send(200, {body: newSdp});
}
if (this._onHoldSession) {
this._onHoldSession.kill();
}
isHandled = true;
this.isOutgoingLegHold = false;
}
}
if (!isHandled) {
@@ -755,6 +857,17 @@ class TaskDial extends Task {
dialCallSid: sd.callSid,
});
if (this.dubTasks) {
for (const dub of this.dubTasks) {
try {
await dub.exec(cs, {ep: sd.ep});
}
catch (err) {
this.logger.error({err}, 'Dial:_selectSingleDial - error executing dubTask');
}
}
}
if (this.parentDtmfCollector) this._installDtmfDetection(cs, cs.dlg);
if (this.childDtmfCollector) this._installDtmfDetection(cs, this.dlg);
if (cs.sipRequestWithinDialogHook) this._initSipIndialogRequestListener(cs, this.dlg);
@@ -769,8 +882,30 @@ class TaskDial extends Task {
}
}
/* boost audio signal if requested */
if (this.boostAudioSignal) {
try {
const db = parseDecibels(this.boostAudioSignal);
this.logger.info(`Dial: boosting audio signal by ${db} dB`);
const args = [this.ep.uuid, 'setGain', db];
await this.ep.api('uuid_dub', args);
} catch (err) {
this.logger.info({err}, 'Dial:_selectSingleDial - Error boosting audio signal');
}
}
/* if we can release the media back to the SBC, do so now */
if (this.canReleaseMedia) setTimeout(this._releaseMedia.bind(this, cs, sd), 200);
if (this.canReleaseMedia || this.shouldExitMediaPathEntirely) {
setTimeout(this._releaseMedia.bind(this, cs, sd, this.shouldExitMediaPathEntirely), 200);
}
this.sd.ep.once('destroy', this._handleMediaTimeout.bind(this));
}
_handleMediaTimeout(evt) {
if (evt.reason === 'MEDIA_TIMEOUT' && this.sd && this.bridged) {
this.kill(this.cs, KillReason.MediaTimeout);
}
}
_bridgeEarlyMedia(sd) {
@@ -782,22 +917,57 @@ class TaskDial extends Task {
}
}
/* public api */
async updateMediaPath(desiredPath) {
this.logger.info(`Dial:updateMediaPath - ${this._mediaPath} => ${desiredPath}`);
switch (desiredPath) {
case MediaPath.NoMedia:
assert(this._mediaPath !== MediaPath.NoMedia, 'updateMediaPath: already no-media');
await this._releaseMedia(this.cs, this.sd, true);
break;
case MediaPath.PartialMedia:
assert(this._mediaPath !== MediaPath.PartialMedia, 'updateMediaPath: already partial-media');
if (this._mediaPath === MediaPath.FullMedia) {
await this._releaseMedia(this.cs, this.sd, false);
}
else {
// to go from no-media to partial-media we need to go through full-media first
await this.reAnchorMedia(this.cs, this.sd);
await this._releaseMedia(this.cs, this.sd, false);
}
assert(!this.epOther, 'updateMediaPath: epOther should be null');
assert(!this.ep, 'updateMediaPath: ep should be null');
break;
case MediaPath.FullMedia:
assert(this._mediaPath !== MediaPath.FullMedia, 'updateMediaPath: already full-media');
await this.reAnchorMedia(this.cs, this.sd);
break;
default:
assert(false, `updateMediaPath: invalid path request ${desiredPath}`);
}
}
/**
* Release the media from freeswitch
* @param {*} cs
* @param {*} sd
*/
async _releaseMedia(cs, sd) {
async _releaseMedia(cs, sd, releaseEntirely = false) {
assert(cs.ep && sd.ep);
try {
// Wait until we got new SDP from B leg to ofter to A Leg
const aLegSdp = cs.ep.remote.sdp;
await sd.releaseMediaToSBC(aLegSdp, cs.ep.local.sdp);
await sd.releaseMediaToSBC(aLegSdp, cs.ep.local.sdp, releaseEntirely);
const bLegSdp = sd.dlg.remote.sdp;
await cs.releaseMediaToSBC(bLegSdp);
await cs.releaseMediaToSBC(bLegSdp, releaseEntirely);
this.epOther = null;
this.logger.info('Dial:_releaseMedia - successfully released media from freewitch');
this._mediaPath = releaseEntirely ? MediaPath.NoMedia : MediaPath.PartialMedia;
this.logger.info(
`Dial:_releaseMedia - successfully released media from freewitch, media path is now ${this._mediaPath}`);
} catch (err) {
this.logger.info({err}, 'Dial:_releaseMedia error');
}
@@ -807,27 +977,45 @@ class TaskDial extends Task {
if (cs.ep && sd.ep) return;
this.logger.info('Dial:reAnchorMedia - re-anchoring media to freewitch');
await Promise.all([sd.reAnchorMedia(), cs.reAnchorMedia()]);
await Promise.all([sd.reAnchorMedia(this._mediaPath), cs.reAnchorMedia(this._mediaPath)]);
this.epOther = cs.ep;
this.epOther.bridge(this.ep);
this._mediaPath = MediaPath.FullMedia;
this.logger.info(
`Dial:_releaseMedia - successfully re-anchored media to freewitch, media path is now ${this._mediaPath}`);
}
// Handle RE-INVITE hold from caller leg.
async handleReinviteAfterMediaReleased(req, res) {
let isHandled = false;
if (isOnhold(req.body) && !this.epOther && !this.ep) {
const sdp = await this.dlg.modify(req.body);
res.send(200, {body: sdp});
// Onhold but media is already released
await this.reAnchorMedia(this.cs, this.sd);
isHandled = true;
this.isIncomingLegHold = true;
this._onHoldHook();
} else if (!isOnhold(req.body) && this.epOther && this.ep && this.isIncomingLegHold && this.canReleaseMedia) {
// Offhold, time to release media
const newSdp = await this.epOther.modify(req.body);
await res.send(200, {body: newSdp});
await this._releaseMedia(this.cs, this.sd);
isHandled = true;
this.isIncomingLegHold = false;
if (this.isOnHoldEnabled) {
if (isOnhold(req.body)) {
if (!this.epOther && !this.ep) {
// update callee leg for new SDP from caller.
const sdp = await this.dlg.modify(req.body);
res.send(200, {body: sdp});
// Onhold but media is already released, reconnect
await this.reAnchorMedia(this.cs, this.sd);
isHandled = true;
this.isIncomingLegHold = true;
}
this._onHoldHook(req);
} else if (!isOnhold(req.body)) {
if (this.epOther && this.ep && this.isIncomingLegHold &&
(this.canReleaseMedia || this.shouldExitMediaPathEntirely)) {
// Offhold, time to release media
const newSdp = await this.epOther.modify(req.body);
await res.send(200, {body: newSdp});
await this._releaseMedia(this.cs, this.sd, this.shouldExitMediaPathEntirely);
isHandled = true;
}
this.isIncomingLegHold = false;
if (this._onHoldSession) {
this._onHoldSession.kill();
}
}
}
if (!isHandled) {
@@ -846,7 +1034,7 @@ class TaskDial extends Task {
});
}
async _onHoldHook(allowed = [TaskName.Play, TaskName.Say, TaskName.Pause]) {
async _onHoldHook(req, allowed = [TaskName.Play, TaskName.Say, TaskName.Pause]) {
if (this.data.onHoldHook) {
// send silence for keep Voice quality
await this.epOther.play('silence_stream://500');
@@ -856,7 +1044,13 @@ class TaskDial extends Task {
const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3};
const json = await this.cs.application.requestor.
request('verb:hook', this.data.onHoldHook, this.cs.callInfo.toJSON(), httpHeaders);
request('verb:hook', this.data.onHoldHook, {
...this.cs.callInfo.toJSON(),
hold_detail: {
from: req.get('From'),
to: req.get('To')
}
}, httpHeaders);
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
allowedTasks = tasks.filter((t) => allowed.includes(t.name));
if (tasks.length !== allowedTasks.length) {
@@ -865,7 +1059,7 @@ class TaskDial extends Task {
}
this.logger.debug(`DialTask:_onHoldHook: executing ${tasks.length} tasks`);
if (tasks.length) {
this._playSession = new ConfirmCallSession({
this._onHoldSession = new ConfirmCallSession({
logger: this.logger,
application: this.cs.application,
dlg: this.isIncomingLegHold ? this.dlg : this.cs.dlg,
@@ -873,14 +1067,15 @@ class TaskDial extends Task {
callInfo: this.cs.callInfo,
accountInfo: this.cs.accountInfo,
tasks,
rootSpan: this.cs.rootSpan
rootSpan: this.cs.rootSpan,
req: this.cs.req
});
await this._playSession.exec();
this._playSession = null;
await this._onHoldSession.exec();
this._onHoldSession = null;
}
} catch (error) {
this.logger.info(error, 'DialTask:_onHoldHook: failed retrieving waitHook');
this._playSession = null;
this._onHoldSession = null;
break;
}
} while (allowedTasks && allowedTasks.length > 0 && !this.killed && this.isOnHold);

144
lib/tasks/dub.js Normal file
View File

@@ -0,0 +1,144 @@
const {TaskName} = require('../utils/constants');
const TtsTask = require('./tts-task');
const assert = require('assert');
const parseDecibels = require('../utils/parse-decibels');
/**
* Dub task: add or remove additional audio tracks into the call
*/
class TaskDub extends TtsTask {
constructor(logger, opts, parentTask) {
super(logger, opts, parentTask);
this.logger.debug({opts: this.data}, 'TaskDub constructor');
['action', 'track', 'play', 'say', 'loop'].forEach((prop) => {
this[prop] = this.data[prop];
});
this.gain = parseDecibels(this.data.gain);
assert.ok(this.action, 'TaskDub: action is required');
assert.ok(this.track, 'TaskDub: track is required');
}
get name() { return TaskName.Dub; }
async exec(cs, {ep}) {
super.exec(cs);
try {
switch (this.action) {
case 'addTrack':
await this._addTrack(cs, ep);
break;
case 'removeTrack':
await this._removeTrack(cs, ep);
break;
case 'silenceTrack':
await this._silenceTrack(cs, ep);
break;
case 'playOnTrack':
await this._playOnTrack(cs, ep);
break;
case 'sayOnTrack':
await this._sayOnTrack(cs, ep);
break;
default:
throw new Error(`TaskDub: unsupported action ${this.action}`);
}
} catch (err) {
this.logger.error(err, 'Error executing dub task');
}
}
async _addTrack(cs, ep) {
this.logger.info(`adding track: ${this.track}`);
await ep.dub({
action: 'addTrack',
track: this.track
});
if (this.play) await this._playOnTrack(cs, ep);
else if (this.say) await this._sayOnTrack(cs, ep);
}
async _removeTrack(_cs, ep) {
this.logger.info(`removing track: ${this.track}`);
await ep.dub({
action: 'removeTrack',
track: this.track
});
}
async _silenceTrack(_cs, ep) {
this.logger.info(`silencing track: ${this.track}`);
await ep.dub({
action: 'silenceTrack',
track: this.track
});
}
async _playOnTrack(_cs, ep) {
this.logger.info(`playing on track: ${this.track}`);
await ep.dub({
action: 'playOnTrack',
track: this.track,
play: this.play,
loop: this.loop ? 'loop' : 'once',
gain: this.gain
});
}
async _sayOnTrack(cs, ep) {
const text = this.say.text || this.say;
this.synthesizer = this.say.synthesizer || {};
if (Object.keys(this.synthesizer).length) {
this.logger.info({synthesizer: this.synthesizer},
`saying on track ${this.track}: ${text} with synthesizer options`);
}
else {
this.logger.info(`saying on track ${this.track}: ${text}`);
}
this.synthesizer = this.synthesizer || {};
this.text = [text];
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
this.synthesizer.vendor :
cs.speechSynthesisVendor;
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
this.synthesizer.language :
cs.speechSynthesisLanguage ;
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
const label = this.synthesizer.label && this.synthesizer.label !== 'default' ?
this.synthesizer.label :
cs.speechSynthesisLabel;
const disableTtsStreaming = false;
const filepath = await this._synthesizeWithSpecificVendor(cs, ep, {
vendor, language, voice, label, disableTtsStreaming
});
assert.ok(filepath.length === 1, 'TaskDub: no filepath returned from synthesizer');
const path = filepath[0];
if (!path.startsWith('say:{')) {
/* we have a local file of mp3 or r8 of synthesized speech audio to play */
this.logger.info(`playing synthesized speech from file on track ${this.track}: ${path}`);
this.play = path;
await this._playOnTrack(cs, ep);
}
else {
this.logger.info(`doing actual text to speech file on track ${this.track}: ${path}`);
await ep.dub({
action: 'sayOnTrack',
track: this.track,
say: path,
gain: this.gain
});
}
}
}
module.exports = TaskDub;

View File

@@ -338,6 +338,7 @@ class TaskEnqueue extends Task {
this.logger.error({err}, `TaskEnqueue:_playHook error retrieving list info for queue ${this.queueName}`);
}
const json = await cs.application.requestor.request('verb:hook', hook, params, httpHeaders);
this.logger.debug({json}, 'TaskEnqueue:_playHook: received response from waitHook');
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
const allowedTasks = tasks.filter((t) => allowed.includes(t.name));
@@ -368,7 +369,8 @@ class TaskEnqueue extends Task {
callInfo: cs.callInfo,
accountInfo: cs.accountInfo,
tasks: tasksToRun,
rootSpan: cs.rootSpan
rootSpan: cs.rootSpan,
req: cs.req
});
await this._playSession.exec();
this._playSession = null;

View File

@@ -10,7 +10,10 @@ const {
IbmTranscriptionEvents,
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents,
AssemblyAiTranscriptionEvents
AssemblyAiTranscriptionEvents,
VadDetection,
VerbioTranscriptionEvents,
SpeechmaticsTranscriptionEvents
} = require('../utils/constants.json');
const {
JAMBONES_GATHER_EARLY_HINTS_MATCH,
@@ -20,6 +23,7 @@ const {
const makeTask = require('./make_task');
const assert = require('assert');
const SttTask = require('./stt-task');
const { SpeechCredentialError } = require('../utils/error');
class TaskGather extends SttTask {
constructor(logger, opts, parentTask) {
@@ -27,9 +31,13 @@ class TaskGather extends SttTask {
[
'finishOnKey', 'input', 'numDigits', 'minDigits', 'maxDigits',
'interDigitTimeout', 'partialResultHook', 'bargein', 'dtmfBargein',
'speechTimeout', 'timeout', 'say', 'play'
'speechTimeout', 'timeout', 'say', 'play', 'actionHookDelayAction', 'fillerNoise', 'vad'
].forEach((k) => this[k] = this.data[k]);
// gather default input is digits
if (!this.input) {
this.input = ['digits'];
}
/* when collecting dtmf, bargein on dtmf is true unless explicitly set to false */
if (this.dtmfBargein !== false && this.input.includes('digits')) this.dtmfBargein = true;
@@ -37,7 +45,8 @@ class TaskGather extends SttTask {
this.timeout = this.timeout === 0 ? 0 : (this.timeout || 15) * 1000;
this.interim = !!this.partialResultHook || this.bargein || (this.timeout > 0);
this.listenDuringPrompt = this.data.listenDuringPrompt === false ? false : true;
this.minBargeinWordCount = this.data.minBargeinWordCount || 1;
this.minBargeinWordCount = this.data.minBargeinWordCount !== undefined ? this.data.minBargeinWordCount : 1;
this._vadEnabled = this.minBargeinWordCount === 0;
if (this.data.recognizer) {
/* continuous ASR (i.e. compile transcripts until a special timeout or dtmf key) */
this.asrTimeout = typeof this.data.recognizer.asrTimeout === 'number' ?
@@ -87,6 +96,18 @@ class TaskGather extends SttTask {
(this.playTask && this.playTask.earlyMedia);
}
get hasFillerNoise() {
return Object.keys(this.fillerNoise).length > 0 && this.fillerNoise.enabled !== false;
}
get fillerNoiseUrl() {
return this.fillerNoise.url;
}
get fillerNoiseStartDelaySecs() {
return this.fillerNoise.startDelaySecs;
}
get summary() {
let s = `${this.name}{`;
if (this.input.length === 2) s += 'inputs=[speech,digits],';
@@ -98,20 +119,44 @@ class TaskGather extends SttTask {
}
if (this.sayTask) s += ',with nested say task';
if (this.playTask) s += ',with nested play task';
if (this.actionHookDelayAction) s += ',with actionHookDelayAction';
s += '}';
return s;
}
async exec(cs, {ep}) {
async exec(cs, obj) {
try {
await this.handling(cs, obj);
} catch (error) {
if (error instanceof SpeechCredentialError) {
this.logger.info('Gather failed due to SpeechCredentialError, finished!');
this.notifyTaskDone();
return;
}
throw error;
}
}
async handling(cs, {ep}) {
this.logger.debug({options: this.data}, 'Gather:exec');
await super.exec(cs, {ep});
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
this.fillerNoise = {
...(cs.fillerNoise || {}),
...(this.fillerNoise || {})
};
this.vad = {
...(cs.vad || {}),
...(this.vad || {})
};
if (cs.hasGlobalSttHints && !this.maskGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
const setOfHints = new Set((this.data.recognizer.hints || [])
.concat(hints)
.filter((h) => typeof h === 'string' && h.length > 0));
// allow for hints to be an array of object
.filter((h) => (typeof h === 'string' && h.length > 0) || (typeof h === 'object')));
this.data.recognizer.hints = [...setOfHints];
if (!this.data.recognizer.hintsBoost && hintsBoost) this.data.recognizer.hintsBoost = hintsBoost;
this.logger.debug({hints: this.data.recognizer.hints, hintsBoost: this.data.recognizer.hintsBoost},
@@ -134,6 +179,26 @@ class TaskGather extends SttTask {
this.interim = true;
this.logger.debug('Gather:exec - early hints match enabled');
}
// if we have actionHook delay, and the session does as well, stash the session config
if (this.actionHookDelayAction) {
if (cs.actionHookDelayProcessor) {
this.logger.debug('Gather:exec - stashing session-level ahd proprerties');
cs.stashActionHookDelayProperties();
}
cs.actionHookDelayProperties = this.actionHookDelayAction;
}
this._startVad();
const startDtmfListener = () => {
assert(!this._dtmfListenerStarted);
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
this._dtmfListenerStarted = true;
}
};
const startListening = async(cs, ep) => {
this._startTimer();
if (this.isContinuousAsr && 0 === this.timeout) this._startAsrTimer();
@@ -145,14 +210,8 @@ class TaskGather extends SttTask {
return;
}
this._startTranscribing(ep);
return updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
} catch (e) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
startListening(cs, ep);
} else {
this.logger.error({error: e}, 'error in initSpeech');
}
await this._startFallback(cs, ep, {error: e});
}
}
};
@@ -160,13 +219,12 @@ class TaskGather extends SttTask {
try {
if (this.sayTask) {
const {span, ctx} = this.startChildSpan(`nested:${this.sayTask.summary}`);
this.sayTask.span = span;
this.sayTask.ctx = ctx;
this.sayTask.exec(cs, {ep}); // kicked off, _not_ waiting for it to complete
this.sayTask.on('playDone', (err) => {
span.end();
if (err) this.logger.error({err}, 'Gather:exec Error playing tts');
const process = () => {
this.logger.debug('Gather: nested say task completed');
if (!this.listenDuringPrompt) {
startDtmfListener();
}
this._stopVad();
if (!this.killed) {
startListening(cs, ep);
if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
@@ -176,17 +234,27 @@ class TaskGather extends SttTask {
});
}
}
};
this.sayTask.span = span;
this.sayTask.ctx = ctx;
this.sayTask.exec(cs, {ep}) // kicked off, _not_ waiting for it to complete
.catch((err) => {
process();
});
this.sayTask.on('playDone', (err) => {
span.end();
if (err) this.logger.error({err}, 'Gather:exec Error playing tts');
process();
});
}
else if (this.playTask) {
const {span, ctx} = this.startChildSpan(`nested:${this.playTask.summary}`);
this.playTask.span = span;
this.playTask.ctx = ctx;
this.playTask.exec(cs, {ep}); // kicked off, _not_ waiting for it to complete
this.playTask.on('playDone', (err) => {
span.end();
if (err) this.logger.error({err}, 'Gather:exec Error playing url');
const process = () => {
this.logger.debug('Gather: nested play task completed');
if (!this.listenDuringPrompt) {
startDtmfListener();
}
this._stopVad();
if (!this.killed) {
startListening(cs, ep);
if (this.input.includes('speech') && this.vendor === 'nuance' && this.listenDuringPrompt) {
@@ -196,6 +264,17 @@ class TaskGather extends SttTask {
});
}
}
};
this.playTask.span = span;
this.playTask.ctx = ctx;
this.playTask.exec(cs, {ep}) // kicked off, _not_ waiting for it to complete
.catch((err) => {
process();
});
this.playTask.on('playDone', (err) => {
span.end();
if (err) this.logger.error({err}, 'Gather:exec Error playing url');
process();
});
}
else {
@@ -208,16 +287,21 @@ class TaskGather extends SttTask {
if (this.input.includes('speech') && this.listenDuringPrompt) {
await this._setSpeechHandlers(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
if (!this.resolved && !this.killed) {
this._startTranscribing(ep);
}
else {
this.logger.info('Gather:exec - task was killed or resolved quickly, not starting transcription');
}
}
if (this.input.includes('digits') || this.dtmfBargein || this.asrDtmfTerminationDigit) {
ep.on('dtmf', this._onDtmf.bind(this, cs, ep));
// https://github.com/jambonz/jambonz-feature-server/issues/913
if (this.listenDuringPrompt || (!this.sayTask && !this.playTask)) {
startDtmfListener();
}
await this.awaitTaskDone();
this._killAudio(cs);
} catch (err) {
this.logger.error(err, 'TaskGather:exec error');
}
@@ -227,11 +311,13 @@ class TaskGather extends SttTask {
kill(cs) {
super.kill(cs);
this._killAudio(cs);
this._clearFillerNoiseTimer();
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._clearAsrTimer();
this.playTask?.span.end();
this.sayTask?.span.end();
this._stopVad();
this._resolve('killed');
}
@@ -251,9 +337,19 @@ class TaskGather extends SttTask {
clearTimeout(this.interDigitTimer);
let resolved = false;
if (this.dtmfBargein) {
if (!this.playComplete) {
this.notifyStatus({event: 'dtmf-bargein-detected', ...evt});
}
this._killAudio(cs);
this.emit('dtmf', evt);
}
if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit && this._bufferedTranscripts.length > 0) {
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
this._clearAsrTimer();
this._clearTimer();
this._startFinalAsrTimer();
return;
}
if (evt.dtmf === this.finishOnKey && this.input.includes('digits')) {
resolved = true;
this._resolve('dtmf-terminator-key');
@@ -276,13 +372,6 @@ class TaskGather extends SttTask {
this._resolve('dtmf-num-digits');
}
}
else if (this.isContinuousAsr && evt.dtmf === this.asrDtmfTerminationDigit) {
this.logger.info(`continuousAsr triggered with dtmf ${this.asrDtmfTerminationDigit}`);
this._clearAsrTimer();
this._clearTimer();
this._startFinalAsrTimer();
return;
}
if (!resolved && this.interDigitTimeout > 0 && this.digitBuffer.length >= this.minDigits) {
/* start interDigitTimer */
const ms = this.interDigitTimeout * 1000;
@@ -301,7 +390,7 @@ class TaskGather extends SttTask {
if (this.data.recognizer?.deepgramOptions?.shortUtterance) this.shortUtterance = true;
}
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
switch (this.vendor) {
case 'google':
this.bugname = `${this.bugname_prefix}google_transcribe`;
@@ -309,23 +398,19 @@ class TaskGather extends SttTask {
ep, GoogleTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(
ep, GoogleTranscriptionEvents.EndOfUtterance, this._onEndOfUtterance.bind(this, cs, ep));
this.addCustomEventListener(
ep, GoogleTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
break;
case 'aws':
case 'polly':
this.bugname = `${this.bugname_prefix}aws_transcribe`;
this.addCustomEventListener(ep, AwsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, AwsTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
break;
case 'microsoft':
this.bugname = `${this.bugname_prefix}azure_transcribe`;
this.addCustomEventListener(
ep, AzureTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, AzureTranscriptionEvents.NoSpeechDetected,
this._onNoSpeechDetected.bind(this, cs, ep));
this.addCustomEventListener(ep, AzureTranscriptionEvents.VadDetected, this._onVadDetected.bind(this, cs, ep));
//this.addCustomEventListener(ep, AzureTranscriptionEvents.NoSpeechDetected,
//this._onNoSpeechDetected.bind(this, cs, ep));
break;
case 'nuance':
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
@@ -335,8 +420,6 @@ class TaskGather extends SttTask {
this._onStartOfSpeech.bind(this, cs, ep));
this.addCustomEventListener(ep, NuanceTranscriptionEvents.TranscriptionComplete,
this._onTranscriptionComplete.bind(this, cs, ep));
this.addCustomEventListener(ep, NuanceTranscriptionEvents.VadDetected,
this._onVadDetected.bind(this, cs, ep));
/* stall timers until prompt finishes playing */
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
@@ -351,9 +434,6 @@ class TaskGather extends SttTask {
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.Connect, this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, DeepgramTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
/* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */
if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
break;
case 'soniox':
@@ -362,6 +442,12 @@ class TaskGather extends SttTask {
ep, SonioxTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'verbio':
this.bugname = `${this.bugname_prefix}verbio_transcribe`;
this.addCustomEventListener(
ep, VerbioTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'cobalt':
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
this.addCustomEventListener(
@@ -409,8 +495,6 @@ class TaskGather extends SttTask {
this._onStartOfSpeech.bind(this, cs, ep));
this.addCustomEventListener(ep, NvidiaTranscriptionEvents.TranscriptionComplete,
this._onTranscriptionComplete.bind(this, cs, ep));
this.addCustomEventListener(ep, NvidiaTranscriptionEvents.VadDetected,
this._onVadDetected.bind(this, cs, ep));
/* I think nvidia has this (??) - stall timers until prompt finishes playing */
if ((this.sayTask || this.playTask) && this.listenDuringPrompt) {
@@ -428,6 +512,24 @@ class TaskGather extends SttTask {
this.addCustomEventListener(ep, AssemblyAiTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
break;
case 'speechmatics':
this.bugname = `${this.bugname_prefix}speechmatics_transcribe`;
this.addCustomEventListener(
ep, SpeechmaticsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Info,
this._onSpeechmaticsInfo.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.RecognitionStarted,
this._onSpeechmaticsRecognitionStarted.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Error,
this._onSpeechmaticsErrror.bind(this, cs, ep));
break;
default:
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
@@ -477,7 +579,8 @@ class TaskGather extends SttTask {
account_sid: this.cs.accountSid,
alert_type: AlertType.STT_FAILURE,
vendor: this.vendor,
detail: err.message
detail: err.message,
target_sid: this.cs.callSid
});
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
}
@@ -487,9 +590,8 @@ class TaskGather extends SttTask {
this._clearTimer();
this._timeoutTimer = setTimeout(() => {
if (this.isContinuousAsr) this._startAsrTimer();
else if (this.interDigitTimeout <= 0 ||
this.digitBuffer.length < this.minDigits ||
this.needsStt && this.digitBuffer.length === 0) {
if (this.interDigitTimer) return; // let the inter-digit timer complete
else {
this._resolve(this.digitBuffer.length >= this.minDigits ? 'dtmf-num-digits' : 'timeout');
}
}, this.timeout);
@@ -499,7 +601,9 @@ class TaskGather extends SttTask {
if (this._timeoutTimer) {
clearTimeout(this._timeoutTimer);
this._timeoutTimer = null;
return true;
}
return false;
}
_startAsrTimer() {
@@ -508,17 +612,25 @@ class TaskGather extends SttTask {
this._clearAsrTimer();
this._asrTimer = setTimeout(() => {
this.logger.debug('_startAsrTimer - asr timer went off');
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
}, this.asrTimeout);
this.logger.debug(`_startAsrTimer: set for ${this.asrTimeout}ms`);
}
_clearAsrTimer() {
if (this._asrTimer) clearTimeout(this._asrTimer);
if (this._asrTimer) {
this.logger.debug('_clearAsrTimer: asrTimer cleared');
clearTimeout(this._asrTimer);
}
this._asrTimer = null;
}
_hangupCall() {
this.logger.debug('_hangupCall');
this.cs.hangup();
}
_startFastRecognitionTimer(evt) {
assert(this.fastRecognitionTimeout > 0);
this._clearFastRecognitionTimer();
@@ -539,7 +651,7 @@ class TaskGather extends SttTask {
this._clearFinalAsrTimer();
this._finalAsrTimer = setTimeout(() => {
this.logger.debug('_startFinalAsrTimer - final asr timer went off');
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
}, 1000);
this.logger.debug('_startFinalAsrTimer: set for 1 second');
@@ -550,13 +662,68 @@ class TaskGather extends SttTask {
this._finalAsrTimer = null;
}
_startVad() {
if (!this._vadStarted && this._vadEnabled) {
this.logger.debug('_startVad');
this.addCustomEventListener(this.ep, VadDetection.Detection, this._onVadDetected.bind(this, this.cs, this.ep));
this.ep?.startVadDetection(this.vad);
this._vadStarted = true;
}
}
_stopVad() {
if (this._vadStarted) {
this.logger.debug('_stopVad');
this.ep?.stopVadDetection(this.vad);
this.ep?.removeCustomEventListener(VadDetection.Detection, this._onVadDetected);
this._vadStarted = false;
}
}
_startFillerNoise() {
this.logger.debug('Gather:_startFillerNoise - playing filler noise');
this.ep?.play(this.fillerNoise.url);
this._fillerNoiseOn = true;
this.ep.once('playback-start', (evt) => {
if (evt.file === this.fillerNoise.url && !this._fillerNoiseOn) {
this.logger.info({evt}, 'Gather:_startFillerNoise - race condition - kill filler noise here');
this.ep.api('uuid_break', this.ep.uuid)
.catch((err) => this.logger.info(err, 'Error killing filler noise'));
return;
} else this.logger.debug({evt}, 'Gather:_startFillerNoise - playback started');
});
}
_startFillerNoiseTimer() {
this._clearFillerNoiseTimer();
this._fillerNoiseTimer = setTimeout(() => {
this.logger.debug('Gather:_startFillerNoiseTimer - playing filler noise');
this._startFillerNoise();
}, this.fillerNoise.startDelaySecs * 1000);
}
_clearFillerNoiseTimer() {
if (this._fillerNoiseTimer) clearTimeout(this._fillerNoiseTimer);
this._fillerNoiseTimer = null;
}
_killFillerNoise() {
if (this._fillerNoiseTimer) {
this.logger.debug('Gather:_killFillerNoise');
this.ep?.api('uuid_break', this.ep.uuid);
}
}
_killAudio(cs) {
if (!this.sayTask && !this.playTask && this.bargein) {
if (this.ep?.connected && !this.playComplete) {
if (this.hasFillerNoise || (!this.sayTask && !this.playTask && this.bargein)) {
if (this.ep?.connected && (!this.playComplete || this.hasFillerNoise)) {
this.logger.debug('Gather:_killAudio: killing playback of any audio');
this.playComplete = true;
this._fillerNoiseOn = false; // in a race, if we just started audio it may sneak through here
this.ep.api('uuid_break', this.ep.uuid)
.catch((err) => this.logger.info(err, 'Error killing audio'));
cs.clearTtsStream();
}
return;
}
@@ -570,17 +737,18 @@ class TaskGather extends SttTask {
this.playTask.kill(cs);
this.playTask = null;
}
this.playComplete = true;
}
_onTranscription(cs, ep, evt, fsEvent) {
// make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished');
this.logger.debug({evt, bugname, finished}, `Gather:_onTranscription for vendor ${this.vendor}`);
this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription raw transcript');
if (bugname && this.bugname !== bugname) return;
if (finished === 'true') return;
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */
if (this._bufferedTranscripts.length === 0) {
@@ -588,19 +756,35 @@ class TaskGather extends SttTask {
}
else {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._bufferedTranscripts = [];
this._resolve('speech', evt);
}
return;
}
if (this.vendor === 'deepgram' && evt.type === 'Metadata') {
this.logger.debug('Gather:_onTranscription - discarding Metadata event from deepgram');
return;
}
evt = this.normalizeTranscription(evt, this.vendor, 1, this.language,
this.shortUtterance, this.data.recognizer.punctuation);
this.logger.debug({evt, bugname, finished, vendor: this.vendor}, 'Gather:_onTranscription normalized transcript');
if (evt.alternatives.length === 0) {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
return;
}
const confidence = evt.alternatives[0].confidence;
const minConfidence = this.data.recognizer?.minConfidence;
this.logger.debug({evt},
`TaskGather:_onTranscription - confidence (${confidence}), minConfidence (${minConfidence})`);
if (confidence && minConfidence && confidence < minConfidence) {
this.logger.info({evt},
'TaskGather:_onTranscription - Transcript confidence ' +
`(${confidence}) < minConfidence (${minConfidence})`);
return this._resolve('stt-low-confidence', evt);
}
/* fast path: our first partial transcript exactly matches an early hint */
if (this.earlyHintsMatch && evt.is_final === false && this.partialTranscriptsCount++ === 0) {
@@ -622,7 +806,9 @@ class TaskGather extends SttTask {
if (evt.is_final) {
if (evt.alternatives[0].transcript === '' && !this.callSession.callGone && !this.killed) {
emptyTranscript = true;
if (finished === 'true' && ['microsoft', 'deepgram'].includes(this.vendor)) {
if (finished === 'true' &&
['microsoft', 'deepgram'].includes(this.vendor) &&
this._bufferedTranscripts.length === 0) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
return;
}
@@ -656,56 +842,72 @@ class TaskGather extends SttTask {
this._clearTimer();
if (this._finalAsrTimer) {
this._clearFinalAsrTimer();
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
return this._resolve(this._bufferedTranscripts.length > 0 ? 'speech' : 'timeout', evt);
}
this._startAsrTimer();
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'speechmatics'].includes(this.vendor) &&
!this.vendor.startsWith('custom')) {
this._startTranscribing(ep);
}
}
else {
/* this was removed to fix https://github.com/jambonz/jambonz-feature-server/issues/783 */
/*
if (this.bargein && (words + bufferedWords) < this.minBargeinWordCount) {
this.logger.debug({evt, words, bufferedWords},
'TaskGather:_onTranscription - final transcript but < min barge words');
this._bufferedTranscripts.push(evt);
this._startTranscribing(ep);
if (!emptyTranscript) this._bufferedTranscripts.push(evt);
if (!['soniox', 'aws', 'microsoft', 'deepgram'].includes(this.vendor)) this._startTranscribing(ep);
return;
}
else {
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
else if (this.vendor === 'deepgram') {
/* compile transcripts into one */
if (!emptyTranscript) this._bufferedTranscripts.push(evt);
if (this.data.recognizer?.deepgramOptions?.utteranceEndMs) {
this.logger.debug('TaskGather:_onTranscription - got speech_final waiting for UtteranceEnd event');
return;
}
this.logger.debug({evt}, 'TaskGather:_onTranscription - compiling deepgram transcripts');
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
this._bufferedTranscripts = [];
this.logger.debug({evt}, 'TaskGather:_onTranscription - compiled deepgram transcripts');
}
/* here is where we return a final transcript */
this._resolve('speech', evt);
*/
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
else if (this.vendor === 'deepgram') {
/* compile transcripts into one */
if (!emptyTranscript) this._bufferedTranscripts.push(evt);
/* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
if (this._bufferedTranscripts.length === 0) return;
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language, this.vendor);
this._bufferedTranscripts = [];
}
/* here is where we return a final transcript */
this._resolve('speech', evt);
/*}*/
}
}
else {
this._clearTimer();
this._startTimer();
if (this.bargein && (words + bufferedWords) >= this.minBargeinWordCount) {
if (!this.playComplete) {
this.logger.debug({transcript: evt.alternatives[0].transcript}, 'killing audio due to speech');
this.emit('vad');
/* deepgram can send a non-final transcript but with words that are final, so we need to buffer */
let emptyTranscript = false;
if (this.vendor === 'deepgram') {
const originalEvent = evt.vendor.evt;
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
this._bufferedTranscripts.push(evt);
}
if (evt.alternatives[0].transcript === '') emptyTranscript = true;
}
if (!emptyTranscript) {
if (this._clearTimer()) this._startTimer();
if (this.bargein && (words + bufferedWords) >= this.minBargeinWordCount) {
if (!this.playComplete) {
this.logger.debug({transcript: evt.alternatives[0].transcript}, 'killing audio due to speech');
this.emit('vad');
this.notifyStatus({event: 'speech-bargein-detected', ...evt});
}
this._killAudio(cs);
}
this._killAudio(cs);
}
if (this.fastRecognitionTimeout) {
this._startFastRecognitionTimer(evt);
@@ -717,20 +919,18 @@ class TaskGather extends SttTask {
this.cs.callInfo, httpHeaders));
}
if (this.vendor === 'soniox') {
this._clearTimer();
if (evt.vendor.finalWords.length) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - buffering soniox transcript');
this._sonioxTranscripts.push(evt.vendor.finalWords);
}
}
/* deepgram can send a non-final transcript but with words that are final, so we need to buffer */
if (this.vendor === 'deepgram') {
const originalEvent = evt.vendor.evt;
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
this._bufferedTranscripts.push(evt);
}
// If transcription received, reset timeout timer.
if (this._timeoutTimer && !emptyTranscript) {
this._startTimer();
}
/* restart asr timer if we get a partial transcript (only if the asr timer is already running) */
/* note: https://github.com/jambonz/jambonz-feature-server/issues/866 */
if (this.isContinuousAsr && this._asrTimer) this._startAsrTimer();
}
}
_onEndOfUtterance(cs, ep) {
@@ -762,50 +962,87 @@ class TaskGather extends SttTask {
_onTranscriptionComplete(cs, ep) {
this.logger.debug('TaskGather:_onTranscriptionComplete');
}
async _onJambonzError(cs, ep, evt) {
this.logger.info({evt}, 'TaskGather:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
async _startFallback(cs, ep, evt) {
if (this.canFallback) {
ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {
await this._fallback();
await this._initSpeech(cs, ep);
this.logger.debug('gather:_startFallback');
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'in progress'});
await this._initFallback();
this._speechHandlersSet = false;
await this._setSpeechHandlers(cs, ep);
this._startTranscribing(ep);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
return;
return true;
} catch (error) {
this.logger.info({error}, `There is error while falling back to ${this.fallbackVendor}`);
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'not available'});
}
} else {
this.logger.debug('gather:_startFallback no condition for falling back');
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'not available'});
}
const {writeAlerts, AlertType} = cs.srf.locals;
return false;
}
async _onJambonzError(cs, ep, evt) {
if (this.vendor === 'google' && evt.error_code === 0) {
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError - ignoring google error code 0');
return;
}
this.logger.info({evt}, 'TaskGather:_onJambonzError');
const {writeAlerts, AlertType} = cs.srf.locals;
if (this.vendor === 'nuance') {
const {code, error} = evt;
if (code === 404 && error === 'No speech') return this._resolve('timeout');
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskGather:_onJambonzError');
const errMessage = evt.error || evt.Message;
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
message: `Custom speech vendor ${this.vendor} error: ${errMessage}`,
vendor: this.vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
if (!(await this._startFallback(cs, ep, evt))) {
this.notifyTaskDone();
}
}
_onVendorConnectFailure(cs, _ep, evt) {
async _onVendorConnectFailure(cs, _ep, evt) {
super._onVendorConnectFailure(cs, _ep, evt);
this.notifyTaskDone();
if (!(await this._startFallback(cs, _ep, evt))) {
this.notifyTaskDone();
}
}
_onVendorError(cs, _ep, evt) {
async _onSpeechmaticsErrror(cs, _ep, evt) {
// eslint-disable-next-line no-unused-vars
const {message, ...e} = evt;
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
}
async _onVendorError(cs, _ep, evt) {
super._onVendorError(cs, _ep, evt);
this._resolve('stt-error', evt);
if (!(await this._startFallback(cs, _ep, evt))) {
this._resolve('stt-error', evt);
}
}
async _onSpeechmaticsRecognitionStarted(_cs, _ep, evt) {
this.logger.debug({evt}, 'TaskGather:_onSpeechmaticsRecognitionStarted');
}
async _onSpeechmaticsInfo(_cs, _ep, evt) {
this.logger.debug({evt}, 'TaskGather:_onSpeechmaticsInfo');
}
_onVadDetected(cs, ep) {
@@ -814,6 +1051,10 @@ class TaskGather extends SttTask {
this._killAudio(cs);
this.emit('vad');
}
if (this.vad?.strategy === 'one-shot') {
this.ep?.removeCustomEventListener(VadDetection.Detection, this._onVadDetected);
this._vadStarted = false;
}
}
_onNoSpeechDetected(cs, ep, evt, fsEvent) {
@@ -832,29 +1073,39 @@ class TaskGather extends SttTask {
async _resolve(reason, evt) {
this.logger.debug(`TaskGather:resolve with reason ${reason}`);
if (this.resolved) return;
this.resolved = true;
// Clear dtmf event
if (this.dtmfBargein) {
this.ep.removeAllListeners('dtmf');
}
clearTimeout(this.interDigitTimer);
this._clearTimer();
this._clearFastRecognitionTimer();
this.span.setAttributes({
channel: 1,
'stt.resolve': reason,
'stt.result': JSON.stringify(evt)
});
if (this.needsStt && this.ep && this.ep.connected) {
this.ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, 'Error stopping transcription'));
.catch((err) => {
if (this.resolved) return;
this.logger.error({err}, 'Error stopping transcription');
});
}
if (this.resolved) {
this.logger.debug('TaskGather:_resolve - already resolved');
return;
}
this.resolved = true;
// If bargin is false and ws application return ack to verb:hook
// the gather should not play any audio
this._killAudio(this.cs);
// Clear dtmf events, to avoid any case can leak the listener, just clean it
this.ep.removeAllListeners('dtmf');
clearTimeout(this.interDigitTimer);
this._clearTimer();
this._clearFastRecognitionTimer();
this._clearAsrTimer();
this._clearFinalAsrTimer();
this.span.setAttributes({
channel: 1,
'stt.label': this.label || 'None',
'stt.resolve': reason,
'stt.result': JSON.stringify(evt)
});
if (this.callSession && this.callSession.callGone) {
this.logger.debug('TaskGather:_resolve - call is gone, not invoking web callback');
@@ -862,36 +1113,81 @@ class TaskGather extends SttTask {
return;
}
// action hook delay
if (this.cs.actionHookDelayProcessor) {
this.logger.debug('TaskGather:_resolve - actionHookDelayProcessor exists - starting it');
this.cs.actionHookDelayProcessor.start();
}
// TODO: remove and implement as actionHookDelay
if (this.hasFillerNoise && (reason.startsWith('dtmf') || reason.startsWith('speech'))) {
if (this.fillerNoiseStartDelaySecs > 0) {
this._startFillerNoiseTimer();
}
else {
this.logger.debug(`TaskGather:_resolve - playing filler noise: ${this.fillerNoiseUrl}`);
this._startFillerNoise();
}
}
let returnedVerbs = false;
try {
if (reason.startsWith('dtmf')) {
if (this.parentTask) this.parentTask.emit('dtmf', evt);
else {
this.emit('dtmf', evt);
await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'});
returnedVerbs = await this.performAction({digits: this.digitBuffer, reason: 'dtmfDetected'});
}
}
else if (reason.startsWith('speech')) {
if (this.parentTask) this.parentTask.emit('transcription', evt);
else {
this.emit('transcription', evt);
await this.performAction({speech: evt, reason: 'speechDetected'});
this.logger.debug('TaskGather:_resolve - invoking performAction');
returnedVerbs = await this.performAction({speech: evt, reason: 'speechDetected'});
this.logger.debug({returnedVerbs}, 'TaskGather:_resolve - back from performAction');
}
}
else if (reason.startsWith('timeout')) {
if (this.parentTask) this.parentTask.emit('timeout', evt);
else {
this.emit('timeout', evt);
await this.performAction({reason: 'timeout'});
returnedVerbs = await this.performAction({reason: 'timeout'});
}
}
else if (reason.startsWith('stt-error')) {
if (this.parentTask) this.parentTask.emit('stt-error', evt);
else {
this.emit('stt-error', evt);
await this.performAction({reason: 'error', details: evt.error});
returnedVerbs = await this.performAction({reason: 'error', details: evt.error});
}
} else if (reason.startsWith('stt-low-confidence')) {
if (this.parentTask) this.parentTask.emit('stt-low-confidence', evt);
else {
this.emit('stt-low-confidence', evt);
returnedVerbs = await this.performAction({reason: 'stt-low-confidence'});
}
}
} catch (err) { /*already logged error*/ }
// Gather got response from hook, cancel actionHookDelay processing
if (this.cs.actionHookDelayProcessor) {
if (returnedVerbs) {
this.logger.debug('TaskGather:_resolve - got response from action hook, cancelling actionHookDelay');
await this.cs.actionHookDelayProcessor.stop();
if (this.actionHookDelayAction && !this.cs.popActionHookDelayProperties()) {
// no session level ahd was running when this task started, so clear it
this.cs.clearActionHookDelayProcessor();
this.logger.debug('TaskGather:_resolve - clear ahd');
}
}
else {
this.logger.debug('TaskGather:_resolve - no response from action hook, continue actionHookDelay');
}
}
this._clearFillerNoiseTimer();
this.notifyTaskDone();
}
}

View File

@@ -8,6 +8,10 @@ const DTMF_SPAN_NAME = 'dtmf';
class TaskListen extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts);
/**
* @deprecated
* use bidirectionalAudio.enabled
*/
this.disableBidirectionalAudio = opts.disableBidirectionalAudio;
this.preconditions = TaskPreconditions.Endpoint;
@@ -25,6 +29,15 @@ class TaskListen extends Task {
this.results = {};
this.playAudioQueue = [];
this.isPlayingAudioFromQueue = false;
this.bidirectionalAudio = {
enabled: this.disableBidirectionalAudio === true ? false : true,
...(this.data['bidirectionalAudio']),
};
// From drachtio-version 3.0.40, forkAudioStart will send empty bugname, metadata together with
// bidirectionalAudio params that cause old version of freeswitch missunderstand between bugname and
// bidirectionalAudio params
this._bugname = 'audio_fork';
if (this.transcribe) this.transcribeTask = makeTask(logger, {'transcribe': opts.transcribe}, this);
}
@@ -123,8 +136,6 @@ class TaskListen extends Task {
ci,
this.metadata);
if (this.hook.auth) {
this.logger.debug({username: this.hook.auth.username, password: this.hook.auth.password},
'TaskListen:_startListening basic auth');
await this.ep.set({
'MOD_AUDIO_BASIC_AUTH_USERNAME': this.hook.auth.username,
'MOD_AUDIO_BASIC_AUTH_PASSWORD': this.hook.auth.password
@@ -135,7 +146,8 @@ class TaskListen extends Task {
mixType: this.mixType,
sampling: this.sampleRate,
...(this._bugname && {bugname: this._bugname}),
metadata
metadata,
bidirectionalAudio: this.bidirectionalAudio || {}
});
this.recordStartTime = moment();
if (this.maxLength) {
@@ -155,7 +167,7 @@ class TaskListen extends Task {
}
/* support bi-directional audio */
if (!this.disableBidirectionalAudio) {
if (this.bidirectionalAudio.enabled) {
ep.addCustomEventListener(ListenEvents.PlayAudio, this._onPlayAudio.bind(this, ep));
}
ep.addCustomEventListener(ListenEvents.KillAudio, this._onKillAudio.bind(this, ep));

101
lib/tasks/llm/index.js Normal file
View File

@@ -0,0 +1,101 @@
const Task = require('../task');
const {TaskPreconditions} = require('../../utils/constants');
const TaskLlmOpenAI_S2S = require('./llms/openai_s2s');
const TaskLlmVoiceAgent_S2S = require('./llms/voice_agent_s2s');
class TaskLlm extends Task {
constructor(logger, opts) {
super(logger, opts);
this.preconditions = TaskPreconditions.Endpoint;
['vendor', 'model', 'auth', 'connectOptions'].forEach((prop) => {
this[prop] = this.data[prop];
});
this.eventHandlers = [];
// delegate to the specific llm model
this.llm = this.createSpecificLlm();
}
get name() { return this.llm.name ; }
get toolHook() { return this.llm?.toolHook; }
get eventHook() { return this.llm?.eventHook; }
get ep() { return this.cs.ep; }
async exec(cs, {ep}) {
await super.exec(cs, {ep});
await this.llm.exec(cs, {ep});
}
async kill(cs) {
super.kill(cs);
await this.llm.kill(cs);
}
createSpecificLlm() {
let llm;
switch (this.vendor) {
case 'openai':
case 'microsoft':
llm = new TaskLlmOpenAI_S2S(this.logger, this.data, this);
break;
case 'voiceagent':
case 'deepgram':
llm = new TaskLlmVoiceAgent_S2S(this.logger, this.data, this);
break;
default:
throw new Error(`Unsupported vendor ${this.vendor} for LLM`);
}
if (!llm) {
throw new Error(`Unsupported vendor:model ${this.vendor}:${this.model}`);
}
return llm;
}
addCustomEventListener(ep, event, handler) {
this.eventHandlers.push({ep, event, handler});
ep.addCustomEventListener(event, handler);
}
removeCustomEventListeners() {
this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
}
async sendEventHook(data) {
await this.cs?.requestor.request('llm:event', this.eventHook, data);
}
async sendToolHook(tool_call_id, data) {
await this.cs?.requestor.request('llm:tool-call', this.toolHook, {tool_call_id, ...data});
}
async processToolOutput(tool_call_id, data) {
if (!this.ep.connected) {
this.logger.info('TaskLlm:processToolOutput - no connected endpoint');
return;
}
this.llm.processToolOutput(this.ep, tool_call_id, data);
}
async processLlmUpdate(data, callSid) {
if (this.ep.connected) {
if (typeof this.llm.processLlmUpdate === 'function') {
this.llm.processLlmUpdate(this.ep, data, callSid);
}
else {
const {vendor, model} = this.llm;
this.logger.info({data, callSid},
`TaskLlm:_processLlmUpdate: LLM ${vendor}:${model} does not support llm:update`);
}
}
}
}
module.exports = TaskLlm;

View File

@@ -0,0 +1,357 @@
const Task = require('../../task');
const TaskName = 'Llm_OpenAI_s2s';
const {LlmEvents_OpenAI} = require('../../../utils/constants');
const ClientEvent = 'client.event';
const SessionDelete = 'session.delete';
const openai_server_events = [
'error',
'session.created',
'session.updated',
'conversation.created',
'input_audio_buffer.committed',
'input_audio_buffer.cleared',
'input_audio_buffer.speech_started',
'input_audio_buffer.speech_stopped',
'conversation.item.created',
'conversation.item.input_audio_transcription.completed',
'conversation.item.input_audio_transcription.failed',
'conversation.item.truncated',
'conversation.item.deleted',
'response.created',
'response.done',
'response.output_item.added',
'response.output_item.done',
'response.content_part.added',
'response.content_part.done',
'response.text.delta',
'response.text.done',
'response.audio_transcript.delta',
'response.audio_transcript.done',
'response.audio.delta',
'response.audio.done',
'response.function_call_arguments.delta',
'response.function_call_arguments.done',
'rate_limits.updated',
'output_audio.playback_started',
'output_audio.playback_stopped',
];
const expandWildcards = (events) => {
const expandedEvents = [];
events.forEach((evt) => {
if (evt.endsWith('.*')) {
const prefix = evt.slice(0, -2); // Remove the wildcard ".*"
const matchingEvents = openai_server_events.filter((e) => e.startsWith(prefix));
expandedEvents.push(...matchingEvents);
} else {
expandedEvents.push(evt);
}
});
return expandedEvents;
};
class TaskLlmOpenAI_S2S extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts, parentTask);
this.parent = parentTask;
this.vendor = this.parent.vendor;
this.model = this.parent.model || 'gpt-4o-realtime-preview-2024-12-17';
this.auth = this.parent.auth;
this.connectionOptions = this.parent.connectOptions;
const {apiKey} = this.auth || {};
if (!apiKey) throw new Error('auth.apiKey is required for OpenAI S2S');
if (['openai', 'microsoft'].indexOf(this.vendor) === -1) {
throw new Error(`Invalid vendor ${this.vendor} for OpenAI S2S`);
}
if ('microsoft' === this.vendor && !this.connectionOptions?.host) {
throw new Error('connectionOptions.host is required for Microsoft OpenAI S2S');
}
this.apiKey = apiKey;
this.authType = 'microsoft' === this.vendor ? 'query' : 'bearer';
this.actionHook = this.data.actionHook;
this.eventHook = this.data.eventHook;
this.toolHook = this.data.toolHook;
const {response_create, session_update} = this.data.llmOptions;
if (typeof response_create !== 'object') {
throw new Error('llmOptions with an initial response.create is required for OpenAI S2S');
}
this.response_create = response_create;
this.session_update = session_update;
this.results = {
completionReason: 'normal conversation end'
};
/**
* only one of these will have items,
* if includeEvents, then these are the events to include
* if excludeEvents, then these are the events to exclude
*/
this.includeEvents = [];
this.excludeEvents = [];
/* default to all events if user did not specify */
this._populateEvents(this.data.events || openai_server_events);
this.addCustomEventListener = parentTask.addCustomEventListener.bind(parentTask);
this.removeCustomEventListeners = parentTask.removeCustomEventListeners.bind(parentTask);
}
get name() { return TaskName; }
get host() {
const {host} = this.connectionOptions || {};
return host || (this.vendor === 'openai' ? 'api.openai.com' : void 0);
}
get path() {
const {path} = this.connectionOptions || {};
if (path) return path;
switch (this.vendor) {
case 'openai':
return 'v1/realtime?model=${this.model}';
case 'microsoft':
return `openai/realtime?api-version=2024-10-01-preview&deployment=${this.model}`;
}
}
async _api(ep, args) {
const res = await ep.api('uuid_openai_s2s', `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
throw new Error({args}, `Error calling uuid_openai_s2s: ${res.body}`);
}
}
async exec(cs, {ep}) {
await super.exec(cs);
await this._startListening(cs, ep);
await this.awaitTaskDone();
/* note: the parent llm verb started the span, which is why this is necessary */
await this.parent.performAction(this.results);
this._unregisterHandlers();
}
async kill(cs) {
super.kill(cs);
this._api(cs.ep, [cs.ep.uuid, SessionDelete])
.catch((err) => this.logger.info({err}, 'TaskLlmOpenAI_S2S:kill - error deleting session'));
this.notifyTaskDone();
}
/**
* Send function call output to the OpenAI server in the form of conversation.item.create
* per https://platform.openai.com/docs/guides/realtime/function-calls
*/
async processToolOutput(ep, tool_call_id, data) {
try {
this.logger.debug({tool_call_id, data}, 'TaskLlmOpenAI_S2S:processToolOutput');
if (!data.type || data.type !== 'conversation.item.create') {
this.logger.info({data},
'TaskLlmOpenAI_S2S:processToolOutput - invalid tool output, must be conversation.item.create');
}
else {
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]);
// spec also recommends to send immediate response.create
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify({type: 'response.create'})]);
}
} catch (err) {
this.logger.info({err}, 'TaskLlmOpenAI_S2S:processToolOutput');
}
}
/**
* Send a session.update to the OpenAI server
* Note: creating and deleting conversation items also supported as well as interrupting the assistant
*/
async processLlmUpdate(ep, data, _callSid) {
try {
this.logger.debug({data, _callSid}, 'TaskLlmOpenAI_S2S:processLlmUpdate');
if (!data.type || ![
'session.update',
'conversation.item.create',
'conversation.item.delete',
'response.cancel'
].includes(data.type)) {
this.logger.info({data}, 'TaskLlmOpenAI_S2S:processLlmUpdate - invalid mid-call request');
}
else {
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]);
}
} catch (err) {
this.logger.info({err}, 'TaskLlmOpenAI_S2S:processLlmUpdate');
}
}
async _startListening(cs, ep) {
this._registerHandlers(ep);
try {
const args = [ep.uuid, 'session.create', this.host, this.path, this.authType, this.apiKey];
await this._api(ep, args);
} catch (err) {
this.logger.error({err}, 'TaskLlmOpenAI_S2S:_startListening');
this.notifyTaskDone();
}
}
async _sendClientEvent(ep, obj) {
let ok = true;
this.logger.debug({obj}, 'TaskLlmOpenAI_S2S:_sendClientEvent');
try {
const args = [ep.uuid, ClientEvent, JSON.stringify(obj)];
await this._api(ep, args);
} catch (err) {
ok = false;
this.logger.error({err}, 'TaskLlmOpenAI_S2S:_sendClientEvent - Error');
}
return ok;
}
async _sendInitialMessage(ep) {
let obj = {type: 'response.create', response: this.response_create};
if (!await this._sendClientEvent(ep, obj)) {
this.notifyTaskDone();
}
/* send immediate session.update if present */
else if (this.session_update) {
obj = {type: 'session.update', session: this.session_update};
this.logger.debug({obj}, 'TaskLlmOpenAI_S2S:_sendInitialMessage - sending session.update');
if (!await this._sendClientEvent(ep, obj)) {
this.notifyTaskDone();
}
}
}
_registerHandlers(ep) {
this.addCustomEventListener(ep, LlmEvents_OpenAI.Connect, this._onConnect.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_OpenAI.ConnectFailure, this._onConnectFailure.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_OpenAI.Disconnect, this._onDisconnect.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_OpenAI.ServerEvent, this._onServerEvent.bind(this, ep));
}
_unregisterHandlers() {
this.removeCustomEventListeners();
}
_onError(ep, evt) {
this.logger.info({evt}, 'TaskLlmOpenAI_S2S:_onError');
this.notifyTaskDone();
}
_onConnect(ep) {
this.logger.debug('TaskLlmOpenAI_S2S:_onConnect');
this._sendInitialMessage(ep);
}
_onConnectFailure(_ep, evt) {
this.logger.info(evt, 'TaskLlmOpenAI_S2S:_onConnectFailure');
this.results = {completionReason: 'connection failure'};
this.notifyTaskDone();
}
_onDisconnect(_ep, evt) {
this.logger.info(evt, 'TaskLlmOpenAI_S2S:_onConnectFailure');
this.results = {completionReason: 'disconnect from remote end'};
this.notifyTaskDone();
}
async _onServerEvent(ep, evt) {
let endConversation = false;
const type = evt.type;
this.logger.info({evt}, 'TaskLlmOpenAI_S2S:_onServerEvent');
/* check for failures, such as rate limit exceeded, that should terminate the conversation */
if (type === 'response.done' && evt.response.status === 'failed') {
endConversation = true;
this.results = {
completionReason: 'server failure',
error: evt.response.status_details?.error
};
}
/* server errors of some sort */
else if (type === 'error') {
endConversation = true;
this.results = {
completionReason: 'server error',
error: evt.error
};
}
/* tool calls */
else if (type === 'response.output_item.done' && evt.item?.type === 'function_call') {
this.logger.debug({evt}, 'TaskLlmOpenAI_S2S:_onServerEvent - function_call');
if (!this.toolHook) {
this.logger.warn({evt}, 'TaskLlmOpenAI_S2S:_onServerEvent - no toolHook defined!');
}
else {
const {name, call_id} = evt.item;
const args = JSON.parse(evt.item.arguments);
try {
await this.parent.sendToolHook(call_id, {name, args});
} catch (err) {
this.logger.info({err, evt}, 'TaskLlmOpenAI - error calling function');
this.results = {
completionReason: 'client error calling function',
error: err
};
endConversation = true;
}
}
}
/* check whether we should notify on this event */
if (this.includeEvents.length > 0 ? this.includeEvents.includes(type) : !this.excludeEvents.includes(type)) {
this.parent.sendEventHook(evt)
.catch((err) => this.logger.info({err}, 'TaskLlmOpenAI_S2S:_onServerEvent - error sending event hook'));
}
if (endConversation) {
this.logger.info({results: this.results}, 'TaskLlmOpenAI_S2S:_onServerEvent - ending conversation due to error');
this.notifyTaskDone();
}
}
_populateEvents(events) {
if (events.includes('all')) {
/* work by excluding specific events */
const exclude = events
.filter((evt) => evt.startsWith('-'))
.map((evt) => evt.slice(1));
if (exclude.length === 0) this.includeEvents = openai_server_events;
else this.excludeEvents = expandWildcards(exclude);
}
else {
/* work by including specific events */
const include = events
.filter((evt) => !evt.startsWith('-'));
this.includeEvents = expandWildcards(include);
}
this.logger.debug({
includeEvents: this.includeEvents,
excludeEvents: this.excludeEvents
}, 'TaskLlmOpenAI_S2S:_populateEvents');
}
}
module.exports = TaskLlmOpenAI_S2S;

View File

@@ -0,0 +1,313 @@
const Task = require('../../task');
const TaskName = 'Llm_VoiceAgent_s2s';
const {LlmEvents_VoiceAgent} = require('../../../utils/constants');
const ClientEvent = 'client.event';
const SessionDelete = 'session.delete';
const va_server_events = [
'Error',
'Welcome',
'SettingsApplied',
'ConversationText',
'UserStartedSpeaking',
'EndOfThought',
'AgentThinking',
'FunctionCallRequest',
'FunctionCalling',
'AgentStartedSpeaking',
'AgentAudioDone',
];
const expandWildcards = (events) => {
// no-op for deepgram
return events;
};
class TaskLlmVoiceAgent_S2S extends Task {
constructor(logger, opts, parentTask) {
super(logger, opts, parentTask);
this.parent = parentTask;
this.vendor = this.parent.vendor;
this.model = this.parent.model;
this.auth = this.parent.auth;
this.connectionOptions = this.parent.connectOptions;
const {apiKey} = this.auth || {};
if (!apiKey) throw new Error('auth.apiKey is required for VoiceAgent S2S');
this.apiKey = apiKey;
this.authType = 'bearer';
this.actionHook = this.data.actionHook;
this.eventHook = this.data.eventHook;
this.toolHook = this.data.toolHook;
const {settingsConfiguration} = this.data.llmOptions;
if (typeof settingsConfiguration !== 'object') {
throw new Error('llmOptions with an initial settingsConfiguration is required for VoiceAgent S2S');
}
// eslint-disable-next-line no-unused-vars
const {audio, ...rest} = settingsConfiguration;
const cfg = this.settingsConfiguration = rest;
if (!cfg.agent) throw new Error('llmOptions.settingsConfiguration.agent is required for VoiceAgent S2S');
if (!cfg.agent.think) {
throw new Error('llmOptions.settingsConfiguration.agent.think is required for VoiceAgent S2S');
}
if (!cfg.agent.think.model) {
throw new Error('llmOptions.settingsConfiguration.agent.think.model is required for VoiceAgent S2S');
}
if (!cfg.agent.think.provider?.type) {
throw new Error('llmOptions.settingsConfiguration.agent.think.provider.type is required for VoiceAgent S2S');
}
this.results = {
completionReason: 'normal conversation end'
};
/**
* only one of these will have items,
* if includeEvents, then these are the events to include
* if excludeEvents, then these are the events to exclude
*/
this.includeEvents = [];
this.excludeEvents = [];
/* default to all events if user did not specify */
this._populateEvents(this.data.events || va_server_events);
this.addCustomEventListener = parentTask.addCustomEventListener.bind(parentTask);
this.removeCustomEventListeners = parentTask.removeCustomEventListeners.bind(parentTask);
}
get name() { return TaskName; }
get host() {
const {host} = this.connectionOptions || {};
return host || 'agent.deepgram.com';
}
get path() {
const {path} = this.connectionOptions || {};
if (path) return path;
return '/agent';
}
async _api(ep, args) {
const res = await ep.api('uuid_voice_agent_s2s', `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
throw new Error(`Error calling uuid_voice_agent_s2s: ${JSON.stringify(res.body)}`);
}
}
async exec(cs, {ep}) {
await super.exec(cs);
await this._startListening(cs, ep);
await this.awaitTaskDone();
/* note: the parent llm verb started the span, which is why this is necessary */
await this.parent.performAction(this.results);
this._unregisterHandlers();
}
async kill(cs) {
super.kill(cs);
this._api(cs.ep, [cs.ep.uuid, SessionDelete])
.catch((err) => this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:kill - error deleting session'));
this.notifyTaskDone();
}
/**
* Send function call response to the VoiceAgent server
*/
async processToolOutput(ep, tool_call_id, data) {
try {
const {data:response} = data;
this.logger.debug({tool_call_id, response}, 'TaskLlmVoiceAgent_S2S:processToolOutput');
if (!response.type || response.type !== 'FunctionCallResponse') {
this.logger.info({response},
'TaskLlmVoiceAgent_S2S:processToolOutput - invalid tool output, must be FunctionCallResponse');
}
else {
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(response)]);
}
} catch (err) {
this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:processToolOutput');
}
}
/**
* Send a session.update to the VoiceAgent server
* Note: creating and deleting conversation items also supported as well as interrupting the assistant
*/
async processLlmUpdate(ep, data, _callSid) {
try {
this.logger.debug({data, _callSid}, 'TaskLlmVoiceAgent_S2S:processLlmUpdate');
if (!data.type || ![
'UpdateInstructions',
'UpdateSpeak',
'InjectAgentMessage',
].includes(data.type)) {
this.logger.info({data}, 'TaskLlmVoiceAgent_S2S:processLlmUpdate - invalid mid-call request');
}
else {
await this._api(ep, [ep.uuid, ClientEvent, JSON.stringify(data)]);
}
} catch (err) {
this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:processLlmUpdate');
}
}
async _startListening(cs, ep) {
this._registerHandlers(ep);
try {
const args = [ep.uuid, 'session.create', this.host, this.path, this.authType, this.apiKey];
await this._api(ep, args);
} catch (err) {
this.logger.error({err}, `TaskLlmVoiceAgent_S2S:_startListening: ${JSON.stringify(err)}`);
this.notifyTaskDone();
}
}
async _sendClientEvent(ep, obj) {
let ok = true;
this.logger.debug({obj}, 'TaskLlmVoiceAgent_S2S:_sendClientEvent');
try {
const args = [ep.uuid, ClientEvent, JSON.stringify(obj)];
await this._api(ep, args);
} catch (err) {
ok = false;
this.logger.error({err}, 'TaskLlmVoiceAgent_S2S:_sendClientEvent - Error');
}
return ok;
}
async _sendInitialMessage(ep) {
if (!await this._sendClientEvent(ep, this.settingsConfiguration)) {
this.notifyTaskDone();
}
}
_registerHandlers(ep) {
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.Connect, this._onConnect.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.ConnectFailure, this._onConnectFailure.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.Disconnect, this._onDisconnect.bind(this, ep));
this.addCustomEventListener(ep, LlmEvents_VoiceAgent.ServerEvent, this._onServerEvent.bind(this, ep));
}
_unregisterHandlers() {
this.removeCustomEventListeners();
}
_onError(_ep, evt) {
this.logger.info({evt}, 'TaskLlmVoiceAgent_S2S:_onError');
this.notifyTaskDone();
}
_onConnect(ep) {
this.logger.debug('TaskLlmVoiceAgent_S2S:_onConnect');
this._sendInitialMessage(ep);
}
_onConnectFailure(_ep, evt) {
this.logger.info(evt, 'TaskLlmVoiceAgent_S2S:_onConnectFailure');
this.results = {completionReason: 'connection failure'};
this.notifyTaskDone();
}
_onDisconnect(_ep, evt) {
this.logger.info(evt, 'TaskLlmVoiceAgent_S2S:_onConnectFailure');
this.results = {completionReason: 'disconnect from remote end'};
this.notifyTaskDone();
}
async _onServerEvent(_ep, evt) {
let endConversation = false;
const type = evt.type;
this.logger.info({evt}, 'TaskLlmVoiceAgent_S2S:_onServerEvent');
/* check for failures, such as rate limit exceeded, that should terminate the conversation */
if (type === 'response.done' && evt.response.status === 'failed') {
endConversation = true;
this.results = {
completionReason: 'server failure',
error: evt.response.status_details?.error
};
}
/* server errors of some sort */
else if (type === 'error') {
endConversation = true;
this.results = {
completionReason: 'server error',
error: evt.error
};
}
/* tool calls */
else if (type === 'FunctionCallRequest') {
this.logger.debug({evt}, 'TaskLlmVoiceAgent_S2S:_onServerEvent - function_call');
if (!this.toolHook) {
this.logger.warn({evt}, 'TaskLlmVoiceAgent_S2S:_onServerEvent - no toolHook defined!');
}
else {
const {function_name:name, function_call_id:call_id} = evt;
const args = evt.input;
try {
await this.parent.sendToolHook(call_id, {name, args});
} catch (err) {
this.logger.info({err, evt}, 'TaskLlmVoiceAgent - error calling function');
this.results = {
completionReason: 'client error calling function',
error: err
};
endConversation = true;
}
}
}
/* check whether we should notify on this event */
if (this.includeEvents.length > 0 ? this.includeEvents.includes(type) : !this.excludeEvents.includes(type)) {
this.parent.sendEventHook(evt)
.catch((err) => this.logger.info({err}, 'TaskLlmVoiceAgent_S2S:_onServerEvent - error sending event hook'));
}
if (endConversation) {
this.logger.info({results: this.results},
'TaskLlmVoiceAgent_S2S:_onServerEvent - ending conversation due to error');
this.notifyTaskDone();
}
}
_populateEvents(events) {
if (events.includes('all')) {
/* work by excluding specific events */
const exclude = events
.filter((evt) => evt.startsWith('-'))
.map((evt) => evt.slice(1));
if (exclude.length === 0) this.includeEvents = va_server_events;
else this.excludeEvents = expandWildcards(exclude);
}
else {
/* work by including specific events */
const include = events
.filter((evt) => !evt.startsWith('-'));
this.includeEvents = expandWildcards(include);
}
this.logger.debug({
includeEvents: this.includeEvents,
excludeEvents: this.excludeEvents
}, 'TaskLlmVoiceAgent_S2S:_populateEvents');
}
}
module.exports = TaskLlmVoiceAgent_S2S;

View File

@@ -14,6 +14,9 @@ function makeTask(logger, obj, parent) {
}
validateVerb(name, data, logger);
switch (name) {
case TaskName.Answer:
const TaskAnswer = require('./answer');
return new TaskAnswer(logger, data, parent);
case TaskName.SipDecline:
const TaskSipDecline = require('./sip_decline');
return new TaskSipDecline(logger, data, parent);
@@ -41,6 +44,9 @@ function makeTask(logger, obj, parent) {
case TaskName.Dtmf:
const TaskDtmf = require('./dtmf');
return new TaskDtmf(logger, data, parent);
case TaskName.Dub:
const TaskDub = require('./dub');
return new TaskDub(logger, data, parent);
case TaskName.Enqueue:
const TaskEnqueue = require('./enqueue');
return new TaskEnqueue(logger, data, parent);
@@ -56,6 +62,9 @@ function makeTask(logger, obj, parent) {
case TaskName.Message:
const TaskMessage = require('./message');
return new TaskMessage(logger, data, parent);
case TaskName.Llm:
const TaskLlm = require('./llm');
return new TaskLlm(logger, data, parent);
case TaskName.Rasa:
const TaskRasa = require('./rasa');
return new TaskRasa(logger, data, parent);

View File

@@ -1,5 +1,6 @@
const Task = require('./task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
const { PlayFileNotFoundError } = require('../utils/error');
class TaskPlay extends Task {
constructor(logger, opts) {
@@ -66,8 +67,20 @@ class TaskPlay extends Task {
}
}
} catch (err) {
if (timeout) clearTimeout(timeout);
this.logger.info(err, `TaskPlay:exec - error playing ${this.url}`);
this.logger.info(`TaskPlay:exec - error playing ${this.url}: ${err.message}`);
this.playComplete = true;
if (err.message === 'File Not Found') {
const {writeAlerts, AlertType} = cs.srf.locals;
await this.performAction({status: 'fail', reason: 'playFailed'}, !(this.parentTask || cs.isConfirmCallSession));
this.emit('playDone');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.PLAY_FILENOTFOUND,
url: this.url,
target_sid: cs.callSid
});
throw new PlayFileNotFoundError(this.url);
}
}
this.emit('playDone');
}

View File

@@ -1,5 +1,6 @@
const Task = require('./task');
const {TaskName} = require('../utils/constants');
const WsRequestor = require('../utils/ws-requestor');
/**
* Redirects to a new application
@@ -13,6 +14,17 @@ class TaskRedirect extends Task {
async exec(cs) {
await super.exec(cs);
if (cs.requestor instanceof WsRequestor && cs.application.requestor._isAbsoluteUrl(this.actionHook)) {
this.logger.info(`Task:performAction redirecting to ${this.actionHook}, requires new ws connection`);
try {
this.cs.requestor.close();
const requestor = new WsRequestor(this.logger, cs.accountSid, {url: this.actionHook}, this.webhook_secret) ;
this.cs.application.requestor = requestor;
} catch (err) {
this.logger.info(err, `Task:performAction error redirecting to ${this.actionHook}`);
}
}
await this.performAction();
}
}

View File

@@ -12,11 +12,13 @@ class TaskRestDial extends Task {
this.from = this.data.from;
this.callerName = this.data.callerName;
this.timeLimit = this.data.timeLimit;
this.fromHost = this.data.fromHost;
this.to = this.data.to;
this.call_hook = this.data.call_hook;
this.timeout = this.data.timeout || 60;
this.sipRequestWithinDialogHook = this.data.sipRequestWithinDialogHook;
this.referHook = this.data.referHook;
this.on('connect', this._onConnect.bind(this));
this.on('callStatus', this._onCallStatus.bind(this));
@@ -38,9 +40,9 @@ class TaskRestDial extends Task {
if (this.data.amd) {
this.startAmd = cs.startAmd;
this.stopAmd = cs.stopAmd;
this.on('amd', this._onAmdEvent.bind(this, cs));
}
this.stopAmd = cs.stopAmd;
this._setCallTimer();
await this.awaitTaskDone();
@@ -64,6 +66,10 @@ class TaskRestDial extends Task {
this.canCancel = false;
const cs = this.callSession;
cs.setDialog(dlg);
cs.referHook = this.referHook;
if (this.timeLimit) {
cs.startMaxCallDurationTimer(this.timeLimit);
}
this.logger.debug('TaskRestDial:_onConnect - call connected');
if (this.sipRequestWithinDialogHook) this._initSipRequestWithinDialogHandler(cs, dlg);
try {
@@ -75,11 +81,13 @@ class TaskRestDial extends Task {
synthesizer: {
vendor: cs.speechSynthesisVendor,
language: cs.speechSynthesisLanguage,
voice: cs.speechSynthesisVoice
voice: cs.speechSynthesisVoice,
label: cs.speechSynthesisLabel,
},
recognizer: {
vendor: cs.speechRecognizerVendor,
language: cs.speechRecognizerLanguage
language: cs.speechRecognizerLanguage,
label: cs.speechRecognizerLabel,
}
}
};

View File

@@ -1,6 +1,8 @@
const Task = require('./task');
const assert = require('assert');
const TtsTask = require('./tts-task');
const {TaskName, TaskPreconditions} = require('../utils/constants');
const pollySSMLSplit = require('polly-ssml-split');
const { SpeechCredentialError } = require('../utils/error');
const breakLengthyTextIfNeeded = (logger, text) => {
const chunkSize = 1000;
@@ -23,33 +25,51 @@ const breakLengthyTextIfNeeded = (logger, text) => {
}
};
class TaskSay extends Task {
const parseTextFromSayString = (text) => {
const closingBraceIndex = text.indexOf('}');
if (closingBraceIndex === -1) return text;
return text.slice(closingBraceIndex + 1);
};
class TaskSay extends TtsTask {
constructor(logger, opts, parentTask) {
super(logger, opts);
super(logger, opts, parentTask);
this.preconditions = TaskPreconditions.Endpoint;
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
.flat();
assert.ok((typeof this.data.text === 'string' || Array.isArray(this.data.text)) || this.data.stream === true,
'Say: either text or stream:true is required');
this.loop = this.data.loop || 1;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
this.synthesizer = this.data.synthesizer || {};
this.disableTtsCache = this.data.disableTtsCache;
this.options = this.synthesizer.options || {};
this.isHandledByPrimaryProvider = true;
if (this.data.stream === true) {
this._isStreamingTts = true;
this.closeOnStreamEmpty = this.data.closeOnStreamEmpty !== false;
}
else {
this._isStreamingTts = false;
this.text = (Array.isArray(this.data.text) ? this.data.text : [this.data.text])
.map((t) => breakLengthyTextIfNeeded(this.logger, t))
.flat();
this.loop = this.data.loop || 1;
this.isHandledByPrimaryProvider = true;
}
}
get name() { return TaskName.Say; }
get summary() {
for (let i = 0; i < this.text.length; i++) {
if (this.text[i].startsWith('silence_stream')) continue;
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
if (this.isStreamingTts) return `${this.name} streaming`;
else {
for (let i = 0; i < this.text.length; i++) {
if (this.text[i].startsWith('silence_stream')) continue;
return `${this.name}{text=${this.text[i].slice(0, 15)}${this.text[i].length > 15 ? '...' : ''}}`;
}
return `${this.name}{${this.text[0]}}`;
}
return `${this.name}{${this.text[0]}}`;
}
get isStreamingTts() { return this._isStreamingTts; }
_validateURL(urlString) {
try {
new URL(urlString);
@@ -59,144 +79,75 @@ class TaskSay extends Task {
}
}
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label}) {
const {srf} = cs;
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, srf);
const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers;
const engine = this.synthesizer.engine || 'standard';
const salt = cs.callSid;
let credentials = cs.getSpeechCredentials(vendor, 'tts', label);
/* parse Nuance voices into name and model */
let model;
if (vendor === 'nuance' && voice) {
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
if (arr) {
voice = arr[1];
model = arr[2];
}
} else if (vendor === 'deepgram') {
model = voice;
async exec(cs, obj) {
if (this.isStreamingTts && !cs.appIsUsingWebsockets) {
throw new Error('Say: streaming say verb requires applications to use the websocket API');
}
/* allow for microsoft custom region voice and api_key to be specified as an override */
if (vendor === 'microsoft' && this.options.deploymentId) {
credentials = credentials || {};
credentials.use_custom_tts = true;
credentials.custom_tts_endpoint = this.options.deploymentId;
credentials.api_key = this.options.apiKey || credentials.apiKey;
credentials.region = this.options.region || credentials.region;
voice = this.options.voice || voice;
} else if (vendor === 'elevenlabs') {
credentials = credentials || {};
credentials.model_id = this.options.model_id || credentials.model_id;
credentials.voice_settings = this.options.voice_settings || {};
credentials.optimize_streaming_latency = this.options.optimize_streaming_latency
|| credentials.optimize_streaming_latency;
voice = this.options.voice_id || voice;
}
this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
try {
if (!credentials) {
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.TTS_NOT_PROVISIONED,
vendor
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
this.notifyError({
msg: 'TTS error',
details:`No speech credentials provisioned for selected vendor ${vendor}`
});
throw new Error('no provisioned speech credentials for TTS');
if (this.isStreamingTts) await this.handlingStreaming(cs, obj);
else await this.handling(cs, obj);
this.emit('playDone');
} catch (error) {
if (error instanceof SpeechCredentialError) {
// if say failed due to speech credentials, alarm is writtern and error notification is sent
// finished this say to move to next task.
this.logger.info({error}, 'Say failed due to SpeechCredentialError, finished!');
this.emit('playDone');
return;
}
// synthesize all of the text elements
let lastUpdated = false;
/* produce an audio segment from the provided text */
const generateAudio = async(text) => {
if (this.killed) return;
if (text.startsWith('silence_stream://')) return text;
/* otel: trace time for tts */
const {span} = this.startChildSpan('tts-generation', {
'tts.vendor': vendor,
'tts.language': language,
'tts.voice': voice
});
try {
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
account_sid: cs.accountSid,
text,
vendor,
language,
voice,
engine,
model,
salt,
credentials,
options: this.options,
disableTtsCache : this.disableTtsCache
});
this.logger.debug(`file ${filePath}, served from cache ${servedFromCache}`);
if (filePath) cs.trackTmpFile(filePath);
if (!servedFromCache && !lastUpdated) {
lastUpdated = true;
updateSpeechCredentialLastUsed(credentials.speech_credential_sid)
.catch(() => {/*already logged error */});
}
span.setAttributes({'tts.cached': servedFromCache});
span.end();
if (!servedFromCache && rtt) {
this.notifyStatus({
event: 'synthesized-audio',
vendor,
language,
characters: text.length,
elapsedTime: rtt
});
}
return filePath;
} catch (err) {
this.logger.info({err}, 'Error synthesizing tts');
span.end();
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.TTS_FAILURE,
vendor,
detail: err.message
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
this.notifyError({msg: 'TTS error', details: err.message || err});
throw err;
}
};
const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t)));
return (await Promise.all(arr)).filter((fp) => fp && fp.length);
} catch (err) {
this.logger.info(err, 'TaskSay:exec error');
throw err;
throw error;
}
}
async exec(cs, {ep}) {
async handlingStreaming(cs, {ep}) {
const {vendor, language, voice, label} = this.getTtsVendorData(cs);
const credentials = cs.getSpeechCredentials(vendor, 'tts', label);
if (!credentials) {
throw new SpeechCredentialError(
`No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`);
}
try {
await this.setTtsStreamingChannelVars(vendor, language, voice, credentials, ep);
await cs.startTtsStream();
cs.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_open'})
.catch((err) => this.logger.info({err}, 'TaskSay:handlingStreaming - Error sending'));
} catch (err) {
this.logger.info({err}, 'TaskSay:handlingStreaming - Error setting channel vars');
cs.requestor?.request('tts:streaming-event', '/streaming-event', {event_type: 'stream_closed'})
.catch((err) => this.logger.info({err}, 'TaskSay:handlingStreaming - Error sending'));
//TODO: send tts:streaming-event with error?
this.notifyTaskDone();
}
await this.awaitTaskDone();
this.logger.info('TaskSay:handlingStreaming - done');
}
async handling(cs, {ep}) {
const {srf, accountSid:account_sid, callSid:target_sid} = cs;
const {writeAlerts, AlertType} = srf.locals;
const {addFileToCache} = srf.locals.dbHelpers;
const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural';
await super.exec(cs);
this.ep = ep;
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
let vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
this.synthesizer.vendor :
cs.speechSynthesisVendor;
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
let language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
this.synthesizer.language :
cs.speechSynthesisLanguage ;
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
let voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
const label = this.synthesizer.label && this.synthesizer.label !== 'default' ?
this.synthesizer.label :
cs.speechSynthesisLabel;
let label = this.taskIncludeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
const fallbackVendor = this.synthesizer.fallbackVendor && this.synthesizer.fallbackVendor !== 'default' ?
this.synthesizer.fallbackVendor :
@@ -207,16 +158,22 @@ class TaskSay extends Task {
const fallbackVoice = this.synthesizer.fallbackVoice && this.synthesizer.fallbackVoice !== 'default' ?
this.synthesizer.fallbackVoice :
cs.fallbackSpeechSynthesisVoice;
const fallbackLabel = this.synthesizer.fallbackLabel && this.synthesizer.fallbackLabel !== 'default' ?
this.synthesizer.fallbackLabel :
cs.fallbackSpeechSynthesisLabel;
const fallbackLabel = this.taskIncludeSynthesizer ?
this.synthesizer.fallbackLabel : cs.fallbackSpeechSynthesisLabel;
let filepath;
try {
filepath = await this._synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label});
} catch (error) {
if (fallbackVendor && this.isHandledByPrimaryProvider) {
if (cs.hasFallbackTts) {
vendor = fallbackVendor;
language = fallbackLanguage;
voice = fallbackVoice;
label = fallbackLabel;
}
const startFallback = async(error) => {
if (fallbackVendor && this.isHandledByPrimaryProvider && !cs.hasFallbackTts) {
this.notifyError(
{ msg: 'TTS error', details:`TTS vendor ${vendor} error: ${error}`, failover: 'in progress'});
this.isHandledByPrimaryProvider = false;
cs.hasFallbackTts = true;
this.logger.info(`Synthesize error, fallback to ${fallbackVendor}`);
filepath = await this._synthesizeWithSpecificVendor(cs, ep,
{
@@ -226,32 +183,110 @@ class TaskSay extends Task {
label: fallbackLabel
});
} else {
throw error;
this.notifyError(
{ msg: 'TTS error', details:`TTS vendor ${vendor} error: ${error}`, failover: 'not available'});
throw new SpeechCredentialError(error.message);
}
};
let filepath;
try {
filepath = await this._synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label});
} catch (error) {
await startFallback(error);
}
this.notifyStatus({event: 'start-playback'});
while (!this.killed && (this.loop === 'forever' || this.loop--) && this.ep?.connected) {
while (!this.killed && (this.loop === 'forever' || this.loop--) && ep?.connected) {
let segment = 0;
while (!this.killed && segment < filepath.length) {
if (cs.isInConference) {
const {memberId, confName, confUuid} = cs;
await this.playToConfMember(this.ep, memberId, confName, confUuid, filepath[segment]);
await this.playToConfMember(ep, memberId, confName, confUuid, filepath[segment]);
}
else {
this.logger.debug(`Say:exec sending command to play file ${filepath[segment]}`);
await ep.play(filepath[segment]);
this.logger.debug(`Say:exec completed play file ${filepath[segment]}`);
const isStreaming = filepath[segment].startsWith('say:{');
if (isStreaming) {
const arr = /^say:\{.*\}\s*(.*)$/.exec(filepath[segment]);
if (arr) this.logger.debug(`Say:exec sending streaming tts request: ${arr[1].substring(0, 64)}..`);
}
else this.logger.debug(`Say:exec sending ${filepath[segment].substring(0, 64)}`);
ep.once('playback-start', (evt) => {
this.logger.debug({evt}, 'Say got playback-start');
if (this.otelSpan) {
this._addStreamingTtsAttributes(this.otelSpan, evt);
this.otelSpan.end();
this.otelSpan = null;
if (evt.variable_tts_cache_filename) {
cs.trackTmpFile(evt.variable_tts_cache_filename);
}
}
});
ep.once('playback-stop', (evt) => {
this.logger.debug({evt}, 'Say got playback-stop');
if (evt.variable_tts_error) {
writeAlerts({
account_sid,
alert_type: AlertType.TTS_FAILURE,
vendor,
detail: evt.variable_tts_error,
target_sid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
}
if (evt.variable_tts_cache_filename && !this.killed) {
const text = parseTextFromSayString(this.text[segment]);
addFileToCache(evt.variable_tts_cache_filename, {
account_sid,
vendor,
language,
voice,
engine,
text
}).catch((err) => this.logger.info({err}, 'Error adding file to cache'));
}
if (this._playResolve) {
evt.variable_tts_error ? this._playReject(new Error(evt.variable_tts_error)) : this._playResolve();
}
});
// wait for playback-stop event received to confirm if the playback is successful
this._playPromise = new Promise((resolve, reject) => {
this._playResolve = resolve;
this._playReject = reject;
});
const r = await ep.play(filepath[segment]);
this.logger.debug({r}, 'Say:exec play result');
try {
// wait for playback-stop event received to confirm if the playback is successful
await this._playPromise;
} catch (err) {
try {
await startFallback(err);
continue;
} catch (err) {
this.logger.info({err}, 'Error waiting for playback-stop event');
throw err;
}
} finally {
this._playPromise = null;
this._playResolve = null;
this._playReject = null;
}
if (filepath[segment].startsWith('say:{')) {
const arr = /^say:\{.*\}\s*(.*)$/.exec(filepath[segment]);
if (arr) this.logger.debug(`Say:exec complete playing streaming tts request: ${arr[1].substring(0, 64)}..`);
} else {
// This log will print spech credentials in say command for tts stream mode
this.logger.debug(`Say:exec completed play file ${filepath[segment]}`);
}
}
segment++;
}
}
this.emit('playDone');
}
async kill(cs) {
super.kill(cs);
if (this.ep.connected) {
if (this.ep?.connected) {
this.logger.debug('TaskSay:kill - killing audio');
if (cs.isInConference) {
const {memberId, confName} = cs;
@@ -261,8 +296,92 @@ class TaskSay extends Task {
this.notifyStatus({event: 'kill-playback'});
this.ep.api('uuid_break', this.ep.uuid);
}
this.ep.removeAllListeners('playback-start');
this.ep.removeAllListeners('playback-stop');
// if we are waiting for playback-stop event, resolve the promise
if (this._playResolve) {
this._playResolve();
this._playResolve = null;
}
}
this.notifyTaskDone();
}
_addStreamingTtsAttributes(span, evt) {
const attrs = {'tts.cached': false};
for (const [key, value] of Object.entries(evt)) {
if (key.startsWith('variable_tts_')) {
let newKey = key.substring('variable_tts_'.length)
.replace('whisper_', 'whisper.')
.replace('deepgram_', 'deepgram.')
.replace('playht_', 'playht.')
.replace('cartesia_', 'cartesia.')
.replace('rimelabs_', 'rimelabs.')
.replace('verbio_', 'verbio.')
.replace('elevenlabs_', 'elevenlabs.');
if (spanMapping[newKey]) newKey = spanMapping[newKey];
attrs[newKey] = value;
}
}
delete attrs['cache_filename']; //no value in adding this to the span
span.setAttributes(attrs);
}
notifyTtsStreamIsEmpty() {
if (this.isStreamingTts && this.closeOnStreamEmpty) {
this.logger.info('TaskSay:notifyTtsStreamIsEmpty - stream is empty, killing task');
this.notifyTaskDone();
}
}
}
const spanMapping = {
// IMPORTANT!!! JAMBONZ WEBAPP WILL SHOW TEXT PERFECTLY IF THE SPAN NAME IS SMALLER OR EQUAL 25 CHARACTERS.
// EX: whisper.ratelim_reqs has length 20 <= 25 which is perfect
// Elevenlabs
'elevenlabs.reported_latency_ms': 'elevenlabs.latency_ms',
'elevenlabs.request_id': 'elevenlabs.req_id',
'elevenlabs.history_item_id': 'elevenlabs.item_id',
'elevenlabs.optimize_streaming_latency': 'elevenlabs.optimization',
'elevenlabs.name_lookup_time_ms': 'name_lookup_ms',
'elevenlabs.connect_time_ms': 'connect_ms',
'elevenlabs.final_response_time_ms': 'final_response_ms',
// Whisper
'whisper.reported_latency_ms': 'whisper.latency_ms',
'whisper.request_id': 'whisper.req_id',
'whisper.reported_organization': 'whisper.organization',
'whisper.reported_ratelimit_requests': 'whisper.ratelimit',
'whisper.reported_ratelimit_remaining_requests': 'whisper.ratelimit_remain',
'whisper.reported_ratelimit_reset_requests': 'whisper.ratelimit_reset',
'whisper.name_lookup_time_ms': 'name_lookup_ms',
'whisper.connect_time_ms': 'connect_ms',
'whisper.final_response_time_ms': 'final_response_ms',
// Deepgram
'deepgram.request_id': 'deepgram.req_id',
'deepgram.reported_model_name': 'deepgram.model_name',
'deepgram.reported_model_uuid': 'deepgram.model_uuid',
'deepgram.reported_char_count': 'deepgram.char_count',
'deepgram.name_lookup_time_ms': 'name_lookup_ms',
'deepgram.connect_time_ms': 'connect_ms',
'deepgram.final_response_time_ms': 'final_response_ms',
// Playht
'playht.request_id': 'playht.req_id',
'playht.name_lookup_time_ms': 'name_lookup_ms',
'playht.connect_time_ms': 'connect_ms',
'playht.final_response_time_ms': 'final_response_ms',
// Cartesia
'cartesia.request_id': 'cartesia.req_id',
'cartesia.name_lookup_time_ms': 'name_lookup_ms',
'cartesia.connect_time_ms': 'connect_ms',
'cartesia.final_response_time_ms': 'final_response_ms',
// Rimelabs
'rimelabs.name_lookup_time_ms': 'name_lookup_ms',
'rimelabs.connect_time_ms': 'connect_ms',
'rimelabs.final_response_time_ms': 'final_response_ms',
// verbio
'verbio.name_lookup_time_ms': 'name_lookup_ms',
'verbio.connect_time_ms': 'connect_ms',
'verbio.final_response_time_ms': 'final_response_ms',
};
module.exports = TaskSay;

View File

@@ -12,6 +12,7 @@ class TaskSipRefer extends Task {
this.referTo = this.data.referTo;
this.referredBy = this.data.referredBy;
this.referredByDisplayName = this.data.referredByDisplayName;
this.headers = this.data.headers || {};
this.eventHook = this.data.eventHook;
}
@@ -94,7 +95,10 @@ class TaskSipRefer extends Task {
}
if (status >= 200) {
this.referSpan.setAttributes({'refer.finalNotify': status});
await this.performAction({refer_status: 202, final_referred_call_status: status});
await this.performAction({refer_status: 202, final_referred_call_status: status})
.catch((err) => {
this.logger.error(err, 'TaskSipRefer:exec - error performing action finalNotify');
});
this.notifyTaskDone();
}
}
@@ -102,7 +106,7 @@ class TaskSipRefer extends Task {
}
_normalizeReferHeaders(cs, dlg) {
let {referTo, referredBy} = this;
let {referTo, referredBy, referredByDisplayName} = this;
/* get IP address of the SBC to use as hostname if needed */
const {host} = parseUri(dlg.remote.uri);
@@ -117,9 +121,12 @@ class TaskSipRefer extends Task {
referredBy = cs.req?.callingNumber || dlg.local.uri;
this.logger.info({referredBy}, 'setting referredby');
}
if (!referredByDisplayName) {
referredByDisplayName = cs.req?.callingName;
}
if (!referredBy.startsWith('<') && !referredBy.startsWith('sip') && !referredBy.startsWith('"')) {
/* they may have only provided a phone number/user */
referredBy = `sip:${referredBy}@${host}`;
referredBy = `${referredByDisplayName ? `"${referredByDisplayName}"` : ''}<sip:${referredBy}@${host}>`;
}
return {referTo, referredBy};
}

View File

@@ -2,6 +2,8 @@ const Task = require('./task');
const assert = require('assert');
const crypto = require('crypto');
const { TaskPreconditions, CobaltTranscriptionEvents } = require('../utils/constants');
const { SpeechCredentialError } = require('../utils/error');
const {JAMBONES_AWS_TRANSCRIBE_USE_GRPC} = require('../config');
class SttTask extends Task {
@@ -16,14 +18,22 @@ class SttTask extends Task {
normalizeTranscription,
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts,
consolidateTranscripts
consolidateTranscripts,
updateSpeechmaticsPayload
} = require('../utils/transcription-utils')(logger);
this.setChannelVarsForStt = setChannelVarsForStt;
this.normalizeTranscription = normalizeTranscription;
this.compileSonioxTranscripts = compileSonioxTranscripts;
this.consolidateTranscripts = consolidateTranscripts;
this.updateSpeechmaticsPayload = updateSpeechmaticsPayload;
this.eventHandlers = [];
this.isHandledByPrimaryProvider = true;
/**
* Task use taskIncludeRecognizer to identify
* if taskIncludeRecognizer === true, use label from verb.recognizer, even it's empty
* if taskIncludeRecognizer === false, use label from application.recognizer
*/
this.taskIncludeRecognizer = !!this.data.recognizer;
if (this.data.recognizer) {
const recognizer = this.data.recognizer;
this.vendor = recognizer.vendor;
@@ -33,7 +43,7 @@ class SttTask extends Task {
//fallback
this.fallbackVendor = recognizer.fallbackVendor || 'default';
this.fallbackLanguage = recognizer.fallbackLanguage || 'default';
this.fallbackLabel = recognizer.fallbackLabel || 'default';
this.fallbackLabel = recognizer.fallbackLabel;
/* let credentials be supplied in the recognizer object at runtime */
this.sttCredentials = setSpeechCredentialsAtRuntime(recognizer);
@@ -56,24 +66,20 @@ class SttTask extends Task {
super.exec(cs);
this.ep = ep;
this.ep2 = ep2;
// copy all value from config verb to this object.
// use session preferences if we don't have specific verb-level settings.
if (cs.recognizer) {
for (const k in cs.recognizer) {
if (Array.isArray(this.data.recognizer[k]) ||
Array.isArray(cs.recognizer[k])) {
this.data.recognizer[k] = [
...this.data.recognizer[k],
...cs.recognizer[k]
];
} else if (typeof this.data.recognizer[k] === 'object' ||
typeof cs.recognizer[k] === 'object'
) {
this.data.recognizer[k] = {
...this.data.recognizer[k],
...cs.recognizer[k]
};
const newValue = this.data.recognizer && this.data.recognizer[k] !== undefined ?
this.data.recognizer[k] :
cs.recognizer[k];
if (Array.isArray(newValue)) {
this.data.recognizer[k] = [...(this.data.recognizer[k] || []), ...cs.recognizer[k]];
} else if (typeof newValue === 'object' && newValue !== null) {
this.data.recognizer[k] = { ...(this.data.recognizer[k] || {}), ...cs.recognizer[k] };
} else {
this.data.recognizer[k] = cs.recognizer[k] || this.data.recognizer[k];
this.data.recognizer[k] = newValue;
}
}
}
@@ -85,7 +91,7 @@ class SttTask extends Task {
this.language = cs.speechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.language = this.language;
}
if ('default' === this.label || !this.label) {
if (!this.taskIncludeRecognizer) {
this.label = cs.speechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.label = this.label;
}
@@ -98,10 +104,22 @@ class SttTask extends Task {
this.fallbackLanguage = cs.fallbackSpeechRecognizerLanguage;
if (this.data.recognizer) this.data.recognizer.fallbackLanguage = this.fallbackLanguage;
}
if ('default' === this.fallbackLabel || !this.fallbackLabel) {
if (!this.taskIncludeRecognizer) {
this.fallbackLabel = cs.fallbackSpeechRecognizerLabel;
if (this.data.recognizer) this.data.recognizer.fallbackLabel = this.fallbackLabel;
}
if (cs.hasFallbackAsr) {
if (this.taskIncludeRecognizer) {
// reset fallback ASR from previous run if this verb contains data.recognizer.
cs.hasFallbackAsr = false;
} else {
this.logger.debug('Call session has fallback to 2nd ASR, use 2nd recognizer configuration');
this.vendor = this.fallbackVendor;
this.language = this.fallbackLanguage;
this.label = this.fallbackLabel;
}
}
if (!this.data.recognizer.vendor) {
this.data.recognizer.vendor = this.vendor;
}
@@ -110,13 +128,28 @@ class SttTask extends Task {
this.data.recognizer.model = cs.speechRecognizerLanguage;
}
if (!this.sttCredentials) {
if (
// not gather task, such as transcribe
(!this.input ||
// gather task with speech
this.input.includes('speech')) &&
!this.sttCredentials) {
try {
this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
} catch (error) {
if (this.fallbackVendor && this.isHandledByPrimaryProvider) {
await this._fallback();
if (this.canFallback) {
this.notifyError(
{
msg: 'ASR error', details:`Invalid vendor ${this.vendor}, Error: ${error}`,
failover: 'in progress'
});
await this._initFallback();
} else {
this.notifyError(
{
msg: 'ASR error', details:`Invalid vendor ${this.vendor}, Error: ${error}`,
failover: 'not available'
});
throw error;
}
}
@@ -148,7 +181,7 @@ class SttTask extends Task {
}
async _initSpeechCredentials(cs, vendor, label) {
const {getNuanceAccessToken, getIbmAccessToken} = cs.srf.locals.dbHelpers;
const {getNuanceAccessToken, getIbmAccessToken, getAwsAuthToken, getVerbioAccessToken} = cs.srf.locals.dbHelpers;
let credentials = cs.getSpeechCredentials(vendor, 'stt', label);
if (!credentials) {
@@ -157,15 +190,11 @@ class SttTask extends Task {
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_NOT_PROVISIONED,
vendor
vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no stt'));
// Notify application that STT vender is wrong.
this.notifyError({
msg: 'ASR error',
details: `No speech-to-text service credentials for ${vendor} have been configured`
});
this.notifyTaskDone();
throw new Error(`No speech-to-text service credentials for ${vendor} have been configured`);
// the ASR might have fallback configuration, should not done task here.
throw new SpeechCredentialError(`No speech-to-text service credentials for ${vendor} have been configured`);
}
if (vendor === 'nuance' && credentials.client_id) {
@@ -181,21 +210,59 @@ class SttTask extends Task {
const {access_token, servedFromCache} = await getIbmAccessToken(stt_api_key);
this.logger.debug({stt_api_key}, `got ibm access token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...credentials, access_token, stt_region};
} else if (['aws', 'polly'].includes(vendor) && credentials.roleArn) {
/* get aws access token */
const {roleArn, region} = credentials;
const {accessKeyId, secretAccessKey, sessionToken, servedFromCache} =
await getAwsAuthToken({
region,
roleArn
});
this.logger.debug({roleArn}, `(roleArn) got aws access token ${servedFromCache ? 'from cache' : ''}`);
// from role ARN, we will get SessionToken, but feature server use it as securityToken.
credentials = {...credentials, accessKeyId, secretAccessKey, securityToken: sessionToken};
}
else if (vendor === 'verbio' && credentials.client_id && credentials.client_secret) {
const {access_token, servedFromCache} = await getVerbioAccessToken(credentials);
this.logger.debug({client_id: credentials.client_id},
`got verbio access token ${servedFromCache ? 'from cache' : ''}`);
credentials.access_token = access_token;
}
else if (vendor == 'aws' && !JAMBONES_AWS_TRANSCRIBE_USE_GRPC) {
/* get AWS access token */
const {speech_credential_sid, accessKeyId, secretAccessKey, securityToken, region } = credentials;
if (!securityToken) {
const { servedFromCache, ...newCredentials} = await getAwsAuthToken({
speech_credential_sid,
accessKeyId,
secretAccessKey,
region});
this.logger.debug({newCredentials}, `got aws security token ${servedFromCache ? 'from cache' : ''}`);
credentials = {...newCredentials, region};
}
}
return credentials;
}
async _fallback() {
get canFallback() {
return this.fallbackVendor && this.isHandledByPrimaryProvider && !this.cs.hasFallbackAsr;
}
async _initFallback() {
assert(this.fallbackVendor, 'fallback failed without fallbackVendor configuration');
this.isHandledByPrimaryProvider = false;
this.logger.info(`Failed to use primary STT provider, fallback to ${this.fallbackVendor}`);
this.vendor = this.fallbackVendor;
this.language = this.fallbackLanguage;
this.label = this.fallbackLabel;
this.isHandledByPrimaryProvider = false;
this.cs.hasFallbackAsr = true;
this.vendor = this.cs.fallbackSpeechRecognizerVendor = this.fallbackVendor;
this.language = this.cs.fallbackSpeechRecognizerLanguage = this.fallbackLanguage;
this.label = this.cs.fallbackSpeechRecognizerLabel = this.fallbackLabel;
this.data.recognizer.vendor = this.vendor;
this.data.recognizer.language = this.language;
this.data.recognizer.label = this.label;
this.sttCredentials = await this._initSpeechCredentials(this.cs, this.vendor, this.label);
// cleanup previous listener from previous vendor
this.removeCustomEventListeners();
}
async compileHintsForCobalt(ep, hostport, model, token, hints) {
@@ -239,6 +306,20 @@ class SttTask extends Task {
_doContinuousAsrWithDeepgram(asrTimeout) {
/* deepgram has an utterance_end_ms property that simplifies things */
assert(this.vendor === 'deepgram');
if (asrTimeout < 1000) {
this.notifyError({
msg: 'ASR error',
details:`asrTimeout ${asrTimeout} is too short for deepgram; setting it to 1000ms`
});
asrTimeout = 1000;
}
else if (asrTimeout > 5000) {
this.notifyError({
msg: 'ASR error',
details:`asrTimeout ${asrTimeout} is too long for deepgram; setting it to 5000ms`
});
asrTimeout = 5000;
}
this.logger.debug(`_doContinuousAsrWithDeepgram - setting utterance_end_ms to ${asrTimeout}`);
const dgOptions = this.data.recognizer.deepgramOptions = this.data.recognizer.deepgramOptions || {};
dgOptions.utteranceEndMs = dgOptions.utteranceEndMs || asrTimeout;
@@ -257,8 +338,8 @@ class SttTask extends Task {
message: 'STT failure reported by vendor',
detail: evt.error,
vendor: this.vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${evt.error}`});
}
_onVendorConnectFailure(cs, _ep, evt) {
@@ -270,8 +351,8 @@ class SttTask extends Task {
alert_type: AlertType.STT_FAILURE,
message: `Failed connecting to ${this.vendor} speech recognizer: ${reason}`,
vendor: this.vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, `Error generating alert for ${this.vendor} connection failure`));
this.notifyError({msg: 'ASR error', details:`Failed connecting to speech vendor ${this.vendor}: ${reason}`});
}
}

View File

@@ -45,6 +45,10 @@ class Task extends Emitter {
return this.name;
}
set disableTracing(val) {
this._disableTracing = val;
}
toJSON() {
return this.data;
}
@@ -160,15 +164,33 @@ class Task extends Emitter {
const httpHeaders = b3 && {b3};
span.setAttributes({'http.body': JSON.stringify(params)});
try {
if (this.id) params.verb_id = this.id;
const json = await this.cs.requestor.request(type, this.actionHook, params, httpHeaders);
span.setAttributes({'http.statusCode': 200});
span.end();
const isWsConnection = this.cs.requestor instanceof WsRequestor;
if (!isWsConnection || (expectResponse && json && Array.isArray(json) && json.length)) {
span.end();
} else {
/** we use this span to measure application response latency,
* and with websocket connections we generally get the application's response
* in a subsequent message from the far end, so we terminate the span when the
* first new set of verbs arrive after sending a transcript
* */
this.emit('VerbHookSpanWaitForEnd', {span});
// If actionHook delay action is configured, and ws application have not responded yet any verb for actionHook
// We have to transfer the task to call-session to await on next ws command verbs, and also run action Hook
// delay actions
//if (this.hookDelayActionOpts) {
// this.emit('ActionHookDelayActionOptions', this.hookDelayActionOpts);
//}
}
if (expectResponse && json && Array.isArray(json)) {
const makeTask = require('./make_task');
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
if (tasks && tasks.length > 0) {
this.logger.info({tasks: tasks}, `${this.name} replacing application with ${tasks.length} tasks`);
this.callSession.replaceApplication(tasks);
return true;
}
}
} catch (err) {
@@ -176,6 +198,7 @@ class Task extends Emitter {
span.end();
throw err;
}
return false;
}
}
@@ -255,6 +278,7 @@ class Task extends Emitter {
delete obj.requestor;
delete obj.notifier;
obj.tasks = cs.getRemainingTaskData();
obj.callInfo = cs.callInfo.toJSON();
if (opts && obj.tasks.length > 0) {
const key = Object.keys(obj.tasks[0])[0];
Object.assign(obj.tasks[0][key], {_: opts});

View File

@@ -12,10 +12,13 @@ const {
NvidiaTranscriptionEvents,
JambonzTranscriptionEvents,
TranscribeStatus,
AssemblyAiTranscriptionEvents
AssemblyAiTranscriptionEvents,
VerbioTranscriptionEvents,
SpeechmaticsTranscriptionEvents
} = require('../utils/constants.json');
const { normalizeJambones } = require('@jambonz/verb-specifications');
const SttTask = require('./stt-task');
const { SpeechCredentialError } = require('../utils/error');
const STT_LISTEN_SPAN_NAME = 'stt-listen';
@@ -24,6 +27,7 @@ class TaskTranscribe extends SttTask {
super(logger, opts, parentTask);
this.transcriptionHook = this.data.transcriptionHook;
this.translationHook = this.data.translationHook;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
if (this.data.recognizer) {
@@ -31,6 +35,25 @@ class TaskTranscribe extends SttTask {
this.separateRecognitionPerChannel = this.data.recognizer.separateRecognitionPerChannel;
}
/* for nested transcribe in dial, unless the app explicitly says so we want to transcribe both legs */
if (this.parentTask?.name === TaskName.Dial) {
if (this.data.channel === 1 || this.data.channel === 2) {
/* transcribe only the channel specified */
this.separateRecognitionPerChannel = false;
this.channel = this.data.channel;
logger.debug(`TaskTranscribe: transcribing only channel ${this.channel} in the Dial verb`);
}
else if (this.separateRecognitionPerChannel !== false) {
this.separateRecognitionPerChannel = true;
}
else {
this.channel = 1;
}
}
else {
this.channel = 1;
}
this.childSpan = [null, null];
// Continuous asr timeout
@@ -39,13 +62,35 @@ class TaskTranscribe extends SttTask {
this.isContinuousAsr = true;
}
/* buffer speech for continuous asr */
this._bufferedTranscripts = [];
this._bufferedTranscripts = [ [], [] ]; // for channel 1 and 2
this.bugname_prefix = 'transcribe_';
this.paused = false;
}
get name() { return TaskName.Transcribe; }
async exec(cs, {ep, ep2}) {
get transcribing1() {
return this.channel === 1 || this.separateRecognitionPerChannel;
}
get transcribing2() {
return this.channel === 2 || this.separateRecognitionPerChannel && this.ep2;
}
async exec(cs, obj) {
try {
await this.handling(cs, obj);
} catch (error) {
if (error instanceof SpeechCredentialError) {
this.logger.info('Transcribe failed due to SpeechCredentialError, finished!');
this.notifyTaskDone();
return;
}
throw error;
}
}
async handling(cs, {ep, ep2}) {
await super.exec(cs, {ep, ep2});
if (this.data.recognizer.vendor === 'nuance') {
@@ -56,7 +101,6 @@ class TaskTranscribe extends SttTask {
...this.data.recognizer.nuanceOptions
};
}
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
if (cs.hasGlobalSttHints) {
const {hints, hintsBoost} = cs.globalSttHints;
@@ -67,25 +111,27 @@ class TaskTranscribe extends SttTask {
}
try {
await this._startTranscribing(cs, ep, 1);
if (this.separateRecognitionPerChannel && ep2) {
if (this.transcribing1) {
await this._startTranscribing(cs, ep, 1);
}
if (this.transcribing2) {
await this._startTranscribing(cs, ep2, 2);
}
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid)
.catch(() => {/*already logged error */});
await this.awaitTaskDone();
} catch (err) {
this.logger.info(err, 'TaskTranscribe:exec - error');
this.parentTask && this.parentTask.emit('error', err);
if (!(await this._startFallback(cs, ep, {error: err}))) {
this.logger.info(err, 'TaskTranscribe:exec - error');
this.parentTask && this.parentTask.emit('error', err);
this.removeCustomEventListeners();
return;
}
}
await this.awaitTaskDone();
this.removeCustomEventListeners();
}
async _stopTranscription() {
let stopTranscription = false;
if (this.ep?.connected) {
if (this.transcribing1 && this.ep?.connected) {
stopTranscription = true;
this.ep.stopTranscription({
vendor: this.vendor,
@@ -93,9 +139,9 @@ class TaskTranscribe extends SttTask {
})
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
}
if (this.separateRecognitionPerChannel && this.ep2 && this.ep2.connected) {
if (this.transcribing2 && this.ep2?.connected) {
stopTranscription = true;
this.ep2.stopTranscription({vendor: this.vendor})
this.ep2.stopTranscription({vendor: this.vendor, bugname: this.bugname})
.catch((err) => this.logger.info(err, 'Error TaskTranscribe:kill'));
}
@@ -117,13 +163,13 @@ class TaskTranscribe extends SttTask {
this.logger.info(`TaskTranscribe:updateTranscribe status ${status}`);
switch (status) {
case TranscribeStatus.Pause:
this.paused = true;
await this._stopTranscription();
break;
case TranscribeStatus.Resume:
await this._startTranscribing(this.cs, this.ep, 1);
if (this.separateRecognitionPerChannel && this.ep2) {
await this._startTranscribing(this.cs, this.ep2, 2);
}
this.paused = false;
if (this.transcribing1) await this._startTranscribing(this.cs, this.ep, 1);
if (this.transcribing2) await this._startTranscribing(this.cs, this.ep2, 2);
break;
}
}
@@ -138,7 +184,7 @@ class TaskTranscribe extends SttTask {
if (this.isContinuousAsr) this._doContinuousAsrWithDeepgram(this.asrTimeout);
}
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.data.recognizer);
const opts = this.setChannelVarsForStt(this, this.sttCredentials, this.language, this.data.recognizer);
switch (this.vendor) {
case 'google':
this.bugname = `${this.bugname_prefix}google_transcribe`;
@@ -164,8 +210,8 @@ class TaskTranscribe extends SttTask {
this.bugname = `${this.bugname_prefix}azure_transcribe`;
this.addCustomEventListener(ep, AzureTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, AzureTranscriptionEvents.NoSpeechDetected,
this._onNoAudio.bind(this, cs, ep, channel));
//this.addCustomEventListener(ep, AzureTranscriptionEvents.NoSpeechDetected,
// this._onNoAudio.bind(this, cs, ep, channel));
break;
case 'nuance':
this.bugname = `${this.bugname_prefix}nuance_transcribe`;
@@ -182,7 +228,7 @@ class TaskTranscribe extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep, channel));
/* if app sets deepgramOptions.utteranceEndMs they essentially want continuous asr */
if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
//if (opts.DEEPGRAM_SPEECH_UTTERANCE_END_MS) this.isContinuousAsr = true;
break;
case 'soniox':
@@ -190,6 +236,13 @@ class TaskTranscribe extends SttTask {
this.addCustomEventListener(ep, SonioxTranscriptionEvents.Transcription,
this._onTranscription.bind(this, cs, ep, channel));
break;
case 'verbio':
this.bugname = `${this.bugname_prefix}verbio_transcribe`;
this.addCustomEventListener(
ep, VerbioTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep));
break;
case 'cobalt':
this.bugname = `${this.bugname_prefix}cobalt_transcribe`;
this.addCustomEventListener(ep, CobaltTranscriptionEvents.Transcription,
@@ -247,6 +300,24 @@ class TaskTranscribe extends SttTask {
this._onVendorConnectFailure.bind(this, cs, ep, channel));
break;
case 'speechmatics':
this.bugname = `${this.bugname_prefix}speechmatics_transcribe`;
this.addCustomEventListener(
ep, SpeechmaticsTranscriptionEvents.Transcription, this._onTranscription.bind(this, cs, ep, channel));
this.addCustomEventListener(
ep, SpeechmaticsTranscriptionEvents.Translation, this._onTranslation.bind(this, cs, ep, channel));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Info,
this._onSpeechmaticsInfo.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.RecognitionStarted,
this._onSpeechmaticsRecognitionStarted.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Connect,
this._onVendorConnect.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.ConnectFailure,
this._onVendorConnectFailure.bind(this, cs, ep));
this.addCustomEventListener(ep, SpeechmaticsTranscriptionEvents.Error,
this._onSpeechmaticsError.bind(this, cs, ep));
break;
default:
if (this.vendor.startsWith('custom:')) {
this.bugname = `${this.bugname_prefix}${this.vendor}_transcribe`;
@@ -286,7 +357,7 @@ class TaskTranscribe extends SttTask {
vendor: this.vendor,
interim: this.interim ? true : false,
locale: this.language,
channels: /*this.separateRecognitionPerChannel ? 2 : */ 1,
channels: 1,
bugname: this.bugname,
hostport: this.hostport
});
@@ -295,20 +366,32 @@ class TaskTranscribe extends SttTask {
async _onTranscription(cs, ep, channel, evt, fsEvent) {
// make sure this is not a transcript from answering machine detection
const bugname = fsEvent.getHeader('media-bugname');
const finished = fsEvent.getHeader('transcription-session-finished');
const bufferedTranscripts = this._bufferedTranscripts[channel - 1];
if (bugname && this.bugname !== bugname) return;
if (this.paused) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - paused, ignoring transcript');
}
if (this.vendor === 'ibm' && evt?.state === 'listening') return;
if (this.vendor === 'deepgram' && evt.type === 'UtteranceEnd') {
/* we will only get this when we have set utterance_end_ms */
if (this._bufferedTranscripts.length === 0) {
/* DH: send a speech event when we get UtteranceEnd if they want interim events */
if (this.interim) {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, sending speech event');
this._resolve(channel, evt);
}
if (bufferedTranscripts.length === 0) {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram but no buffered transcripts');
}
else {
this.logger.debug('Gather:_onTranscription - got UtteranceEnd event from deepgram, return buffered transcript');
evt = this.consolidateTranscripts(this._bufferedTranscripts, 1, this.language);
this._bufferedTranscripts = [];
this._resolve('speech', evt);
evt = this.consolidateTranscripts(bufferedTranscripts, channel, this.language, this.vendor);
evt.is_final = true;
this._bufferedTranscripts[channel - 1] = [];
this._resolve(channel, evt);
}
return;
}
@@ -322,55 +405,164 @@ class TaskTranscribe extends SttTask {
return;
}
if (evt.alternatives[0]?.transcript === '' && !cs.callGone && !this.killed) {
if (['microsoft', 'deepgram'].includes(this.vendor)) {
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, continue listening');
let emptyTranscript = false;
if (evt.is_final) {
if (evt.alternatives.length === 0 || evt.alternatives[0].transcript === '' && !cs.callGone && !this.killed) {
emptyTranscript = true;
if (finished === 'true' &&
['microsoft', 'deepgram'].includes(this.vendor) &&
bufferedTranscripts.length === 0) {
this.logger.debug({evt}, 'TaskGather:_onTranscription - got empty transcript from old gather, disregarding');
return;
}
else if (this.vendor !== 'deepgram') {
this.logger.info({evt}, 'TaskGather:_onTranscription - got empty transcript, continue listening');
return;
}
else if (this.isContinuousAsr) {
this.logger.info({evt},
'TaskGather:_onTranscription - got empty deepgram transcript during continous asr, continue listening');
return;
}
else if (this.vendor === 'deepgram' && bufferedTranscripts.length > 0) {
this.logger.info({evt},
'TaskGather:_onTranscription - got empty transcript from deepgram, return the buffered transcripts');
}
}
if (this.isContinuousAsr) {
/* append the transcript and start listening again for asrTimeout */
const t = evt.alternatives[0].transcript;
if (t) {
/* remove trailing punctuation */
if (/[,;:\.!\?]$/.test(t)) {
this.logger.debug('TaskGather:_onTranscription - removing trailing punctuation');
evt.alternatives[0].transcript = t.slice(0, -1);
}
}
this.logger.info({evt}, 'TaskGather:_onTranscription - got transcript during continous asr');
bufferedTranscripts.push(evt);
this._startAsrTimer(channel);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics']
.includes(this.vendor)) this._startTranscribing(cs, ep, channel);
}
else {
this.logger.info({evt}, 'TaskTranscribe:_onTranscription - got empty transcript, listen again');
this._transcribe(ep);
}
return;
}
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
}
else if (this.vendor === 'deepgram') {
/* compile transcripts into one */
if (!emptyTranscript) bufferedTranscripts.push(evt);
if (this.vendor === 'soniox') {
/* compile transcripts into one */
this._sonioxTranscripts.push(evt.vendor.finalWords);
if (evt.is_final) {
evt = this.compileSonioxTranscripts(this._sonioxTranscripts, 1, this.language);
this._sonioxTranscripts = [];
/* deepgram can send an empty and final transcript; only if we have any buffered should we resolve */
if (bufferedTranscripts.length === 0) return;
evt = this.consolidateTranscripts(bufferedTranscripts, channel, this.language);
this._bufferedTranscripts[channel - 1] = [];
}
/* here is where we return a final transcript */
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending final transcript');
this._resolve(channel, evt);
/* some STT engines will keep listening after a final response, so no need to restart */
if (!['soniox', 'aws', 'microsoft', 'deepgram', 'google', 'speechmatics'].includes(this.vendor) &&
!this.vendor.startsWith('custom:')) {
this.logger.debug('TaskTranscribe:_onTranscription - restarting transcribe');
this._startTranscribing(cs, ep, channel);
}
}
}
else {
/* interim transcript */
if (this.isContinuousAsr && evt.is_final) {
this._bufferedTranscripts.push(evt);
this._startAsrTimer(channel);
} else {
await this._resolve(channel, evt);
/* deepgram can send a non-final transcript but with words that are final, so we need to buffer */
if (this.vendor === 'deepgram') {
const originalEvent = evt.vendor.evt;
if (originalEvent.is_final && evt.alternatives[0].transcript !== '') {
this.logger.debug({evt}, 'Gather:_onTranscription - buffering a completed (partial) deepgram transcript');
bufferedTranscripts.push(evt);
}
}
if (this.interim) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranscription - sending interim transcript');
this._resolve(channel, evt);
}
}
}
async _onTranslation(_cs, _ep, channel, evt, _fsEvent) {
this.logger.debug({evt}, 'TaskTranscribe:_onTranslation');
if (this.translationHook && evt.results?.length > 0) {
try {
const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3};
const payload = {
...this.cs.callInfo,
...httpHeaders,
translation: {
channel,
language: evt.language,
translation: evt.results[0].content
}
};
this.logger.debug({payload}, 'sending translationHook');
const json = await this.cs.requestor.request('verb:hook', this.translationHook, payload);
this.logger.info({json}, 'completed translationHook');
if (json && Array.isArray(json) && !this.parentTask) {
const makeTask = require('./make_task');
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
if (tasks && tasks.length > 0) {
this.logger.info({tasks: tasks}, `${this.name} replacing application with ${tasks.length} tasks`);
this.cs.replaceApplication(tasks);
}
}
} catch (err) {
this.logger.info(err, 'TranscribeTask:_onTranslation error');
}
if (this.parentTask) {
this.parentTask.emit('translation', evt);
}
}
if (this.killed) {
this.logger.debug('TaskTranscribe:_onTranslation exiting after receiving final transcription');
this._clearTimer();
this.notifyTaskDone();
}
}
async _resolve(channel, evt) {
/* we've got a transcript, so end the otel child span for this channel */
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'transcript',
'stt.result': JSON.stringify(evt)
});
this.childSpan[channel - 1].span.end();
if (evt.is_final) {
/* we've got a final transcript, so end the otel child span for this channel */
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.label': this.label || 'None',
'stt.resolve': 'transcript',
'stt.result': JSON.stringify(evt)
});
this.childSpan[channel - 1].span.end();
}
}
if (this.transcriptionHook) {
const b3 = this.getTracingPropagation();
const httpHeaders = b3 && {b3};
const payload = {
...this.cs.callInfo,
...httpHeaders,
...(evt.alternatives && {speech: evt}),
...(evt.type && {speechEvent: evt})
};
try {
const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, {
...this.cs.callInfo,
...httpHeaders,
speech: evt
});
this.logger.info({json}, 'sent transcriptionHook');
this.logger.debug({payload}, 'sending transcriptionHook');
const json = await this.cs.requestor.request('verb:hook', this.transcriptionHook, payload);
this.logger.info({json}, 'completed transcriptionHook');
if (json && Array.isArray(json) && !this.parentTask) {
const makeTask = require('./make_task');
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
@@ -391,7 +583,7 @@ class TaskTranscribe extends SttTask {
this._clearTimer();
this.notifyTaskDone();
}
else {
else if (evt.is_final) {
/* start another child span for this channel */
const {span, ctx} = this.startChildSpan(`${STT_LISTEN_SPAN_NAME}:${channel}`);
this.childSpan[channel - 1] = {span, ctx};
@@ -399,11 +591,13 @@ class TaskTranscribe extends SttTask {
}
_onNoAudio(cs, ep, channel) {
this.logger.debug(`TaskTranscribe:_onNoAudio restarting transcription on channel ${channel}`);
this.logger.debug(`TaskTranscribe:_onNoAudio on channel ${channel}`);
if (this.paused) return;
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'timeout'
'stt.resolve': 'timeout',
'stt.label': this.label || 'None',
});
this.childSpan[channel - 1].span.end();
}
@@ -415,11 +609,13 @@ class TaskTranscribe extends SttTask {
}
_onMaxDurationExceeded(cs, ep, channel) {
this.logger.debug(`TaskTranscribe:_onMaxDurationExceeded restarting transcription on channel ${channel}`);
this.logger.debug(`TaskTranscribe:_onMaxDurationExceeded on channel ${channel}`);
if (this.paused) return;
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'max duration exceeded'
'stt.resolve': 'max duration exceeded',
'stt.label': this.label || 'None',
});
this.childSpan[channel - 1].span.end();
}
@@ -438,56 +634,91 @@ class TaskTranscribe extends SttTask {
}
}
async _onJambonzError(cs, _ep, evt) {
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
if (this.isHandledByPrimaryProvider && this.fallbackVendor) {
async _startFallback(cs, _ep, evt) {
if (this.canFallback) {
_ep.stopTranscription({
vendor: this.vendor,
bugname: this.bugname
})
.catch((err) => this.logger.error({err}, `Error stopping transcription for primary vendor ${this.vendor}`));
const {updateSpeechCredentialLastUsed} = require('../utils/db-utils')(this.logger, cs.srf);
try {
await this._fallback();
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'in progress'});
await this._initFallback();
let channel = 1;
if (this.ep !== _ep) {
channel = 2;
}
this[`_speechHandlersSet_${channel}`] = false;
this._startTranscribing(cs, _ep, channel);
updateSpeechCredentialLastUsed(this.sttCredentials.speech_credential_sid);
return;
return true;
} catch (error) {
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'not available'});
this.logger.info({error}, `There is error while falling back to ${this.fallbackVendor}`);
}
} else {
const {writeAlerts, AlertType} = cs.srf.locals;
this.logger.debug('transcribe:_startFallback no condition for falling back');
this.notifyError({ msg: 'ASR error',
details:`STT Vendor ${this.vendor} error: ${evt.error || evt.reason}`, failover: 'not available'});
}
return false;
}
if (this.vendor === 'nuance') {
const {code, error} = evt;
if (code === 404 && error === 'No speech') return this._resolve('timeout');
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
vendor: this.vendor,
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
this.notifyError({msg: 'ASR error', details:`Custom speech vendor ${this.vendor} error: ${evt.error}`});
async _onJambonzError(cs, _ep, evt) {
if (this.vendor === 'google' && evt.error_code === 0) {
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError - ignoring google error code 0');
return;
}
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
if (this.paused) return;
const {writeAlerts, AlertType} = cs.srf.locals;
if (this.vendor === 'nuance') {
const {code, error} = evt;
if (code === 404 && error === 'No speech') return this._resolve('timeout');
if (code === 413 && error === 'Too much speech') return this._resolve('timeout');
}
this.logger.info({evt}, 'TaskTranscribe:_onJambonzError');
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
message: `Custom speech vendor ${this.vendor} error: ${evt.error}`,
vendor: this.vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for jambonz custom connection failure'));
if (!(await this._startFallback(cs, _ep, evt))) {
this.notifyTaskDone();
}
}
_onVendorConnectFailure(cs, _ep, channel, evt) {
async _onVendorConnectFailure(cs, _ep, channel, evt) {
super._onVendorConnectFailure(cs, _ep, evt);
if (this.childSpan[channel - 1] && this.childSpan[channel - 1].span) {
this.childSpan[channel - 1].span.setAttributes({
channel,
'stt.resolve': 'connection failure'
'stt.resolve': 'connection failure',
'stt.label': this.label || 'None',
});
this.childSpan[channel - 1].span.end();
}
this.notifyTaskDone();
if (!(await this._startFallback(cs, _ep, evt))) {
this.notifyTaskDone();
}
}
async _onSpeechmaticsRecognitionStarted(_cs, _ep, evt) {
this.logger.debug({evt}, 'TaskGather:_onSpeechmaticsRecognitionStarted');
}
async _onSpeechmaticsInfo(_cs, _ep, evt) {
this.logger.debug({evt}, 'TaskGather:_onSpeechmaticsInfo');
}
async _onSpeechmaticsError(cs, _ep, evt) {
// eslint-disable-next-line no-unused-vars
const {message, ...e} = evt;
this._onVendorError(cs, _ep, {error: JSON.stringify(e)});
}
_startAsrTimer(channel) {
@@ -496,8 +727,9 @@ class TaskTranscribe extends SttTask {
this._clearAsrTimer(channel);
this._asrTimer = setTimeout(() => {
this.logger.debug(`TaskTranscribe:_startAsrTimer - asr timer went off for channel: ${channel}`);
const evt = this.consolidateTranscripts(this._bufferedTranscripts, channel, this.language);
this._bufferedTranscripts = [];
const evt = this.consolidateTranscripts(
this._bufferedTranscripts[channel - 1], channel, this.language, this.vendor);
this._bufferedTranscripts[channel - 1] = [];
this._resolve(channel, evt);
}, this.asrTimeout);
this.logger.debug(`TaskTranscribe:_startAsrTimer: set for ${this.asrTimeout}ms for channel ${channel}`);

289
lib/tasks/tts-task.js Normal file
View File

@@ -0,0 +1,289 @@
const Task = require('./task');
const { TaskPreconditions } = require('../utils/constants');
const { SpeechCredentialError } = require('../utils/error');
const dbUtils = require('../utils/db-utils');
class TtsTask extends Task {
constructor(logger, data, parentTask) {
super(logger, data);
this.parentTask = parentTask;
this.preconditions = TaskPreconditions.Endpoint;
this.earlyMedia = this.data.earlyMedia === true || (parentTask && parentTask.earlyMedia);
/**
* Task use taskIncludeSynthesizer to identify
* if taskIncludeSynthesizer === true, use label from verb.synthesizer, even it's empty
* if taskIncludeSynthesizer === false, use label from application.synthesizer
*/
this.taskIncludeSynthesizer = !!this.data.synthesizer;
this.synthesizer = this.data.synthesizer || {};
this.disableTtsCache = this.data.disableTtsCache;
this.options = this.synthesizer.options || {};
}
async exec(cs) {
super.exec(cs);
if (cs.synthesizer) {
this.options = {...cs.synthesizer.options, ...this.options};
this.data.synthesizer = this.data.synthesizer || {};
for (const k in cs.synthesizer) {
const newValue = this.data.synthesizer && this.data.synthesizer[k] !== undefined ?
this.data.synthesizer[k] :
cs.synthesizer[k];
if (Array.isArray(newValue)) {
this.data.synthesizer[k] = [...(this.data.synthesizer[k] || []), ...cs.synthesizer[k]];
} else if (typeof newValue === 'object' && newValue !== null) {
this.data.synthesizer[k] = { ...(this.data.synthesizer[k] || {}), ...cs.synthesizer[k] };
} else {
this.data.synthesizer[k] = newValue;
}
}
}
}
getTtsVendorData(cs) {
const vendor = this.synthesizer.vendor && this.synthesizer.vendor !== 'default' ?
this.synthesizer.vendor :
cs.speechSynthesisVendor;
const language = this.synthesizer.language && this.synthesizer.language !== 'default' ?
this.synthesizer.language :
cs.speechSynthesisLanguage ;
const voice = this.synthesizer.voice && this.synthesizer.voice !== 'default' ?
this.synthesizer.voice :
cs.speechSynthesisVoice;
const label = this.taskIncludeSynthesizer ? this.synthesizer.label : cs.speechSynthesisLabel;
return {vendor, language, voice, label};
}
async setTtsStreamingChannelVars(vendor, language, voice, credentials, ep) {
const {api_key, model_id} = credentials;
const {stability, similarity_boost, use_speaker_boost, style} = this.options;
let obj;
this.logger.debug({credentials},
`setTtsStreamingChannelVars: vendor: ${vendor}, language: ${language}, voice: ${voice}`);
switch (vendor) {
case 'deepgram':
obj = {
DEEPGRAM_API_KEY: api_key,
DEEPGRAM_TTS_STREAMING_MODEL: voice
};
break;
case 'cartesia':
obj = {
CARTESIA_API_KEY: api_key,
CARTESIA_TTS_STREAMING_MODEL_ID: model_id,
CARTESIA_TTS_STREAMING_VOICE_ID: voice,
CARTESIA_TTS_STREAMING_LANGUAGE: language || 'en',
};
break;
case 'elevenlabs':
obj = {
ELEVENLABS_API_KEY: api_key,
ELEVENLABS_TTS_STREAMING_MODEL_ID: model_id,
ELEVENLABS_TTS_STREAMING_VOICE_ID: voice,
// 20/12/2024 - only eleven_turbo_v2_5 support multiple language
...(['eleven_turbo_v2_5'].includes(model_id) && {ELEVENLABS_TTS_STREAMING_LANGUAGE: language}),
...(stability && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_STABILITY: stability}),
...(similarity_boost && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_SIMILARITY_BOOST: similarity_boost}),
...(use_speaker_boost && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_USE_SPEAKER_BOOST: use_speaker_boost}),
...(style && {ELEVENLABS_TTS_STREAMING_VOICE_SETTINGS_STYLE: style})
};
break;
default:
throw new Error(`vendor ${vendor} is not supported for tts streaming yet`);
}
this.logger.info({vendor, credentials, obj}, 'setTtsStreamingChannelVars');
await ep.set(obj);
}
async _synthesizeWithSpecificVendor(cs, ep, {vendor, language, voice, label, preCache = false}) {
const {srf, accountSid:account_sid} = cs;
const {writeAlerts, AlertType, stats} = srf.locals;
const {synthAudio} = srf.locals.dbHelpers;
const engine = this.synthesizer.engine || cs.synthesizer?.engine || 'neural';
const salt = cs.callSid;
let credentials = cs.getSpeechCredentials(vendor, 'tts', label);
if (!credentials) {
throw new SpeechCredentialError(
`No text-to-speech service credentials for ${vendor} with labels: ${label} have been configured`);
}
/* parse Nuance voices into name and model */
let model;
if (vendor === 'nuance' && voice) {
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
if (arr) {
voice = arr[1];
model = arr[2];
}
} else if (vendor === 'deepgram') {
model = voice;
}
/* allow for microsoft custom region voice and api_key to be specified as an override */
if (vendor === 'microsoft' && this.options.deploymentId) {
credentials = credentials || {};
credentials.use_custom_tts = true;
credentials.custom_tts_endpoint = this.options.deploymentId;
credentials.api_key = this.options.apiKey || credentials.apiKey;
credentials.region = this.options.region || credentials.region;
voice = this.options.voice || voice;
} else if (vendor === 'elevenlabs') {
credentials = credentials || {};
credentials.model_id = this.options.model_id || credentials.model_id;
credentials.voice_settings = this.options.voice_settings || {};
credentials.optimize_streaming_latency = this.options.optimize_streaming_latency
|| credentials.optimize_streaming_latency;
voice = this.options.voice_id || voice;
} else if (vendor === 'rimelabs') {
credentials = credentials || {};
credentials.model_id = this.options.model_id || credentials.model_id;
} else if (vendor === 'whisper') {
credentials = credentials || {};
credentials.model_id = this.options.model_id || credentials.model_id;
} else if (vendor === 'verbio') {
credentials = credentials || {};
credentials.engine_version = this.options.engine_version || credentials.engine_version;
} else if (vendor === 'playht') {
credentials = credentials || {};
credentials.voice_engine = this.options.voice_engine || credentials.voice_engine;
} else if (vendor === 'google' && typeof voice === 'string' && voice.startsWith('custom_')) {
const {lookupGoogleCustomVoice} = dbUtils(this.logger, cs.srf);
const arr = /custom_(.*)/.exec(voice);
if (arr) {
const google_custom_voice_sid = arr[1];
const [custom_voice] = await lookupGoogleCustomVoice(google_custom_voice_sid);
if (custom_voice.use_voice_cloning_key) {
voice = {
voice_cloning_key: custom_voice.voice_cloning_key,
};
}
}
}
/**
* note on cache_speech_handles. This was found to be risky.
* It can cause a crash in the following sequence on a single call:
* 1. Stream tts on vendor A with cache_speech_handles=1, then
* 2. Stream tts on vendor B with cache_speech_handles=1
*
* we previously tried to track when vendors were switched and manage the flag accordingly,
* but it difficult to track all the scenarios and the benefit (slightly faster start to tts playout)
* is probably minimal. DH.
*/
ep.set({
tts_engine: vendor.startsWith('custom:') ? 'custom' : vendor,
tts_voice: voice,
//cache_speech_handles: !cs.currentTtsVendor || cs.currentTtsVendor === vendor ? 1 : 0,
cache_speech_handles: 0,
}).catch((err) => this.logger.info({err}, 'Error setting tts_engine on endpoint'));
// set the current vendor on the call session
// If vendor is changed from the previous one, then reset the cache_speech_handles flag
//cs.currentTtsVendor = vendor;
if (!preCache && !this._disableTracing) this.logger.info({vendor, language, voice, model}, 'TaskSay:exec');
try {
if (!credentials) {
writeAlerts({
account_sid,
alert_type: AlertType.TTS_NOT_PROVISIONED,
vendor,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for no tts'));
throw new SpeechCredentialError('no provisioned speech credentials for TTS');
}
/* produce an audio segment from the provided text */
const generateAudio = async(text) => {
if (this.killed) return;
if (text.startsWith('silence_stream://')) return text;
/* otel: trace time for tts */
if (!preCache && !this._disableTracing) {
const {span} = this.startChildSpan('tts-generation', {
'tts.vendor': vendor,
'tts.language': language,
'tts.voice': voice,
'tts.label': label || 'None',
});
this.otelSpan = span;
}
try {
const {filePath, servedFromCache, rtt} = await synthAudio(stats, {
account_sid,
text,
vendor,
language,
voice,
engine,
model,
salt,
credentials,
options: this.options,
disableTtsCache : this.disableTtsCache,
renderForCaching: preCache
});
if (!filePath.startsWith('say:')) {
this.logger.debug(`Say: file ${filePath}, served from cache ${servedFromCache}`);
if (filePath) cs.trackTmpFile(filePath);
if (this.otelSpan) {
this.otelSpan.setAttributes({'tts.cached': servedFromCache});
this.otelSpan.end();
this.otelSpan = null;
}
if (!servedFromCache && rtt && !preCache && !this._disableTracing) {
this.notifyStatus({
event: 'synthesized-audio',
vendor,
language,
characters: text.length,
elapsedTime: rtt
});
}
}
else {
this.logger.debug('Say: a streaming tts api will be used');
const modifiedPath = filePath.replace('say:{', `say:{session-uuid=${ep.uuid},`);
return modifiedPath;
}
return filePath;
} catch (err) {
this.logger.info({err}, 'Error synthesizing tts');
if (this.otelSpan) this.otelSpan.end();
writeAlerts({
account_sid: cs.accountSid,
alert_type: AlertType.TTS_FAILURE,
vendor,
detail: err.message,
target_sid: cs.callSid
}).catch((err) => this.logger.info({err}, 'Error generating alert for tts failure'));
throw err;
}
};
const arr = this.text.map((t) => (this._validateURL(t) ? t : generateAudio(t)));
return (await Promise.all(arr)).filter((fp) => fp && fp.length);
} catch (err) {
this.logger.info(err, 'TaskSay:exec error');
throw err;
}
}
_validateURL(urlString) {
try {
new URL(urlString);
return true;
} catch (e) {
return false;
}
}
}
module.exports = TtsTask;

View File

@@ -0,0 +1,187 @@
const makeTask = require('../tasks/make_task');
const Emitter = require('events');
const { normalizeJambones } = require('@jambonz/verb-specifications');
const {TaskName} = require('../utils/constants');
/**
* ActionHookDelayProcessor
* @extends Emitter
*
* @param {Object} logger - logger instance
* @param {Object} opts - options
* @param {Object} cs - call session
* @param {Object} ep - endpoint
*
* @emits {Event} 'giveup' - when associated giveup timer expires
*
* Ref:https://www.jambonz.org/docs/supporting-articles/handling-action-hook-delays/
*/
class ActionHookDelayProcessor extends Emitter {
constructor(logger, opts, cs) {
super();
this.logger = logger;
this.cs = cs;
this._active = false;
const enabled = this.init(opts);
if (enabled && this.noResponseTimeout &&
(!this.actions || !Array.isArray(this.actions) || this.actions.length === 0)) {
throw new Error('ActionHookDelayProcessor: no actions specified');
}
else if (enabled && this.actions &&
this.actions.some((a) => !a.verb || ![TaskName.Say, TaskName.Play].includes(a.verb))) {
throw new Error(`ActionHookDelayProcessor: invalid actions specified: ${JSON.stringify(this.actions)}`);
}
}
get properties() {
return {
actions: this.actions,
retries: this.retries,
noResponseTimeout: this.noResponseTimeout,
noResponseGiveUpTimeout: this.noResponseGiveUpTimeout
};
}
get ep() {
return this.cs.ep;
}
init(opts) {
this.logger.debug({opts}, 'ActionHookDelayProcessor#init');
this.actions = opts.actions;
this.retries = opts.retries || 0;
this.noResponseTimeout = opts.noResponseTimeout;
this.noResponseGiveUpTimeout = opts.noResponseGiveUpTimeout;
this.giveUpActions = opts.giveUpActions;
// return false if these options actually disable the ahdp
return ('enable' in opts && opts.enable === true) ||
('enabled' in opts && opts.enabled === true) ||
(!('enable' in opts) && !('enabled' in opts));
}
start() {
this.logger.debug('ActionHookDelayProcessor#start');
if (this._active) {
this.logger.debug('ActionHookDelayProcessor#start: already started due to prior gather which is continuing');
return;
}
this._active = true;
this._retryCount = 0;
if (this.noResponseTimeout > 0) {
const timeoutMs = this.noResponseTimeout * 1000;
this._noResponseTimer = setTimeout(this._onNoResponseTimer.bind(this), timeoutMs);
} else {
this.logger.debug(
'ActionHookDelayProcessor#start: noResponseTimeout is 0 or undefined hence not calling _onNoResponseTimer'
);
}
if (this.noResponseGiveUpTimeout > 0) {
const timeoutMs = this.noResponseGiveUpTimeout * 1000;
this._noResponseGiveUpTimer = setTimeout(this._onNoResponseGiveUpTimer.bind(this), timeoutMs);
}
}
async stop() {
this._active = false;
if (this._noResponseTimer) {
clearTimeout(this._noResponseTimer);
this._noResponseTimer = null;
}
if (this._noResponseGiveUpTimer) {
clearTimeout(this._noResponseGiveUpTimer);
this._noResponseGiveUpTimer = null;
}
if (this._taskInProgress) {
this.logger.debug(`ActionHookDelayProcessor#stop: stopping ${this._taskInProgress.name}`);
this._sayResolver = () => {
this.logger.debug('ActionHookDelayProcessor#stop: play/say is done, continue on..');
//this._taskInProgress.kill(this.cs);
this._taskInProgress = null;
};
/* we let Say finish, but interrupt Play */
if (TaskName.Play === this._taskInProgress.name) {
await this._taskInProgress.kill(this.cs);
}
return new Promise((resolve) => this._sayResolver = resolve);
}
this.logger.debug('ActionHookDelayProcessor#stop returning');
}
_onNoResponseTimer() {
this.logger.debug('ActionHookDelayProcessor#_onNoResponseTimer');
this._noResponseTimer = null;
/* get the next play or say action */
const verb = this.actions[this._retryCount % this.actions.length];
const t = normalizeJambones(this.logger, [verb]);
this.logger.debug({verb}, 'ActionHookDelayProcessor#_onNoResponseTimer: starting action');
try {
this._taskInProgress = makeTask(this.logger, t[0]);
this._taskInProgress.disableTracing = true;
this._taskInProgress.exec(this.cs, {ep: this.ep}).catch((err) => {
this.logger.info(`ActionHookDelayProcessor#_onNoResponseTimer: error playing file: ${err.message}`);
this._taskInProgress = null;
this.ep.removeAllListeners('playback-start');
this.ep.removeAllListeners('playback-stop');
});
} catch (err) {
this.logger.info(err, 'ActionHookDelayProcessor#_onNoResponseTimer: error starting action');
this._taskInProgress = null;
return;
}
this.ep.once('playback-start', (evt) => {
this.logger.debug({evt}, 'got playback-start');
if (!this._active) {
this.logger.info({evt}, 'ActionHookDelayProcessor#_onNoResponseTimer: killing audio immediately');
/* note: in race condition we may have just hung up and cs.ep cleared */
this.ep?.api('uuid_break', this.ep?.uuid)
.catch((err) => this.logger.info(err,
'ActionHookDelayProcessor#_onNoResponseTimer Error killing audio'));
}
});
this.ep.once('playback-stop', (evt) => {
this._taskInProgress = null;
if (this._sayResolver) {
/* we were waiting for the play to finish before continuing to next task */
this.logger.debug({evt}, 'ActionHookDelayProcessor#_onNoResponseTimer got playback-stop');
this._sayResolver();
this._sayResolver = null;
}
else {
/* possibly start the no response timer again */
if (this._active && this.retries > 0 && this._retryCount < this.retries && this.noResponseTimeout > 0) {
this.logger.debug({evt}, 'ActionHookDelayProcessor#_onNoResponseTimer: playback-stop on play/say action');
const timeoutMs = this.noResponseTimeout * 1000;
this._noResponseTimer = setTimeout(this._onNoResponseTimer.bind(this), timeoutMs);
}
}
});
this._retryCount++;
}
_onNoResponseGiveUpTimer() {
this._active = false;
if (!this.giveUpActions) {
this.logger.info('ActionHookDelayProcessor#_onNoResponseGiveUpTimer');
this.stop().catch((err) => {});
this.emit('giveup');
} else {
this.logger.info('ActionHookDelayProcessor#_onNoResponseGiveUpTimer - giveUpActions');
this.emit('giveupWithTasks', this.giveUpActions);
}
}
}
module.exports = ActionHookDelayProcessor;

View File

@@ -153,7 +153,7 @@ class Amd extends Emitter {
const wordCount = t.alternatives[0].transcript.split(' ').length;
const final = t.is_final;
const foundHint = hints.find((h) => t.alternatives[0].transcript.includes(h));
const foundHint = hints.find((h) => t.alternatives[0].transcript.toLowerCase().includes(h.toLowerCase()));
if (foundHint) {
/* we detected a common voice mail greeting */
this.logger.debug(`Amd:evaluateTranscription: found hint ${foundHint}`);
@@ -210,7 +210,8 @@ module.exports = (logger) => {
account_sid: cs.accountSid,
alert_type: AlertType.STT_FAILURE,
vendor: vendor,
detail: err.message
detail: err.message,
target_sid: cs.callSid
});
}).catch((err) => logger.info({err}, 'Error generating alert for tts failure'));
@@ -245,7 +246,10 @@ module.exports = (logger) => {
const amd = ep.amd = new Amd(logger, cs, opts);
const {vendor, language} = amd;
let sttCredentials = amd.sttCredentials;
const hints = voicemailHints[language] || [];
// hints from configuration might be too long for specific language and vendor that make transcribe freeswitch
// modules cannot connect to the vendor. hints is used in next step to validate if the transcription
// matchs voice mail hints.
const hints = [];
if (vendor === 'nuance' && sttCredentials.client_id) {
/* get nuance access token */
@@ -266,7 +270,7 @@ module.exports = (logger) => {
/* set stt options */
logger.info(`starting amd for vendor ${vendor} and language ${language}`);
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, {
const sttOpts = amd.setChannelVarsForStt({name: TaskName.Gather}, sttCredentials, language, {
vendor,
hints,
enhancedModel: true,

View File

@@ -26,25 +26,28 @@ class BackgroundTaskManager extends Emitter {
return this.tasks.size;
}
async newTask(type, taskOpts) {
this.logger.info({taskOpts}, `initiating Background task ${type}`);
async newTask(type, opts, sticky = false) {
this.logger.info({opts}, `initiating Background task ${type}`);
if (this.tasks.has(type)) {
this.logger.info(`Background task ${type} is running, skiped`);
this.logger.info(`Background task ${type} is running, skipped`);
return;
}
let task;
switch (type) {
case 'listen':
task = await this._initListen(taskOpts);
task = await this._initListen(opts);
break;
case 'bargeIn':
task = await this._initBargeIn(taskOpts);
task = await this._initBargeIn(opts);
break;
case 'record':
task = await this._initRecord();
break;
case 'transcribe':
task = await this._initTranscribe(taskOpts);
task = await this._initTranscribe(opts);
break;
case 'ttsStream':
task = await this._initTtsStream(opts);
break;
default:
break;
@@ -52,6 +55,7 @@ class BackgroundTaskManager extends Emitter {
if (task) {
this.tasks.set(type, task);
}
if (task && sticky) task.sticky = true;
return task;
}
@@ -64,19 +68,16 @@ class BackgroundTaskManager extends Emitter {
task.kill();
// Remove task from managed List
this.tasks.delete(type);
} else {
this.logger.info(`stopping background task, ${type} is not running, skipped`);
}
}
stopAll() {
this.logger.info('BackgroundTaskManager:stopAll');
this.logger.debug('BackgroundTaskManager:stopAll');
for (const key of this.tasks.keys()) {
this.stop(key);
}
}
// Initiate Task
// Initiate Listen
async _initListen(opts, bugname = 'jambonz-background-listen', ignoreCustomerData = false, type = 'listen') {
let task;
@@ -102,6 +103,7 @@ class BackgroundTaskManager extends Emitter {
async _initBargeIn(opts) {
let task;
try {
const copy = JSON.parse(JSON.stringify(opts));
const t = normalizeJambones(this.logger, [opts]);
task = makeTask(this.logger, t[0]);
task
@@ -119,7 +121,8 @@ class BackgroundTaskManager extends Emitter {
this._taskCompleted('bargeIn', task);
if (task.sticky && !this.cs.callGone && !this.cs._stopping) {
this.logger.info('BackgroundTaskManager:_initBargeIn: restarting background bargeIn');
this.newTask('bargeIn', opts);
this._bargeInHandled = false;
this.newTask('bargeIn', copy, true);
}
return;
})
@@ -134,8 +137,7 @@ class BackgroundTaskManager extends Emitter {
async _initRecord() {
if (this.cs.accountInfo.account.record_all_calls || this.cs.application.record_all_calls) {
if (!JAMBONZ_RECORD_WS_BASE_URL || !this.cs.accountInfo.account.bucket_credential) {
this.logger.error(`_initRecord: invalid configuration,
missing JAMBONZ_RECORD_WS_BASE_URL or bucket configuration`);
this.logger.error('_initRecord: invalid cfg - missing JAMBONZ_RECORD_WS_BASE_URL or bucket config');
return undefined;
}
const listenOpts = {
@@ -175,8 +177,27 @@ class BackgroundTaskManager extends Emitter {
return task;
}
// Initiate Tts Stream
async _initTtsStream(opts) {
let task;
try {
const t = normalizeJambones(this.logger, [opts]);
task = makeTask(this.logger, t[0]);
const resources = await this.cs._evaluatePreconditions(task);
const {span, ctx} = this.rootSpan.startChildSpan(`background-ttsStream:${task.summary}`);
task.span = span;
task.ctx = ctx;
task.exec(this.cs, resources)
.then(this._taskCompleted.bind(this, 'ttsStream', task))
.catch(this._taskError.bind(this, 'ttsStream', task));
} catch (err) {
this.logger.info(err, 'BackgroundTaskManager:_initTtsStream - Error creating ttsStream task');
}
return task;
}
_taskCompleted(type, task) {
this.logger.info({type, task}, 'BackgroundTaskManager:_taskCompleted: task completed');
this.logger.debug({type, task}, `BackgroundTaskManager:_taskCompleted: task completed, sticky: ${task.sticky}`);
task.removeAllListeners();
task.span.end();
this.tasks.delete(type);
@@ -189,7 +210,10 @@ class BackgroundTaskManager extends Emitter {
}
_bargeInTaskCompleted(evt) {
this.logger.info({evt}, 'BackgroundTaskManager:_bargeInTaskCompleted on event from background bargeIn');
if (this._bargeInHandled) return;
this._bargeInHandled = true;
this.logger.debug({evt},
'BackgroundTaskManager:_bargeInTaskCompleted on event from background bargeIn, emitting bargein-done event');
this.emit('bargeIn-done', evt);
}
}

View File

@@ -1,18 +1,20 @@
{
"TaskName": {
"Cognigy": "cognigy",
"Answer": "answer",
"Conference": "conference",
"Config": "config",
"Dequeue": "dequeue",
"Dial": "dial",
"Dialogflow": "dialogflow",
"Dtmf": "dtmf",
"Dub": "dub",
"Enqueue": "enqueue",
"Gather": "gather",
"Hangup": "hangup",
"Leave": "leave",
"Lex": "lex",
"Listen": "listen",
"Llm": "llm",
"Message": "message",
"Pause": "pause",
"Play": "play",
@@ -29,7 +31,8 @@
"Tag": "tag",
"Transcribe": "transcribe"
},
"AllowedSipRecVerbs": ["config", "gather", "transcribe", "listen", "tag"],
"AllowedSipRecVerbs": ["answer", "config", "gather", "transcribe", "listen", "tag"],
"AllowedConfirmSessionVerbs": ["config", "gather", "plays", "say", "tag"],
"CallStatus": {
"Trying": "trying",
"Ringing": "ringing",
@@ -95,6 +98,10 @@
"Transcription": "soniox_transcribe::transcription",
"Error": "soniox_transcribe::error"
},
"VerbioTranscriptionEvents": {
"Transcription": "verbio_transcribe::transcription",
"Error": "verbio_transcribe::error"
},
"CobaltTranscriptionEvents": {
"Transcription": "cobalt_speech::transcription",
"CompileContext": "cobalt_speech::compile_context_response",
@@ -120,6 +127,15 @@
"NoSpeechDetected": "azure_transcribe::no_speech_detected",
"VadDetected": "azure_transcribe::vad_detected"
},
"SpeechmaticsTranscriptionEvents": {
"Transcription": "speechmatics_transcribe::transcription",
"Translation": "speechmatics_transcribe::translation",
"Info": "speechmatics_transcribe::info",
"RecognitionStarted": "speechmatics_transcribe::recognition_started",
"ConnectFailure": "speechmatics_transcribe::connect_failed",
"Connect": "speechmatics_transcribe::connect",
"Error": "speechmatics_transcribe::error"
},
"JambonzTranscriptionEvents": {
"Transcription": "jambonz_transcribe::transcription",
"ConnectFailure": "jambonz_transcribe::connect_failed",
@@ -132,6 +148,9 @@
"ConnectFailure": "assemblyai_transcribe::connect_failed",
"Connect": "assemblyai_transcribe::connect"
},
"VadDetection": {
"Detection": "vad_detect:detection"
},
"ListenEvents": {
"Connect": "mod_audio_fork::connect",
"ConnectFailure": "mod_audio_fork::connect_failed",
@@ -149,6 +168,20 @@
"StandbyEnter": "standby-enter",
"StandbyExit": "standby-exit"
},
"LlmEvents_OpenAI": {
"Error": "error",
"Connect": "openai_s2s::connect",
"ConnectFailure": "openai_s2s::connect_failed",
"Disconnect": "openai_s2s::disconnect",
"ServerEvent": "openai_s2s::server_event"
},
"LlmEvents_VoiceAgent": {
"Error": "error",
"Connect": "voice_agent_s2s::connect",
"ConnectFailure": "voice_agent_s2s::connect_failed",
"Disconnect": "voice_agent_s2s::disconnect",
"ServerEvent": "voice_agent_s2s::server_event"
},
"QueueResults": {
"Bridged": "bridged",
"Error": "error",
@@ -163,17 +196,23 @@
},
"KillReason": {
"Hangup": "hangup",
"Replaced": "replaced"
"Replaced": "replaced",
"MediaTimeout": "media_timeout"
},
"HookMsgTypes": [
"session:new",
"session:reconnect",
"session:redirect",
"session:adulting",
"call:status",
"queue:status",
"dial:confirm",
"verb:hook",
"verb:status",
"llm:event",
"llm:tool-call",
"tts:tokens-result",
"tts:streaming-event",
"jambonz:error"
],
"RecordState": {
@@ -192,7 +231,45 @@
"ToneTimeout": "amd_tone_timeout",
"Stopped": "amd_stopped"
},
"MediaPath": {
"NoMedia": "no-media",
"PartialMedia": "partial-media",
"FullMedia": "full-media"
},
"DeepgramTtsStreamingEvents": {
"Empty": "deepgram_tts_streaming::empty",
"ConnectFailure": "deepgram_tts_streaming::connect_failed",
"Connect": "deepgram_tts_streaming::connect"
},
"CartesiaTtsStreamingEvents": {
"Empty": "cartesia_tts_streaming::empty",
"ConnectFailure": "cartesia_tts_streaming::connect_failed",
"Connect": "cartesia_tts_streaming::connect"
},
"ElevenlabsTtsStreamingEvents": {
"Empty": "elevenlabs_tts_streaming::empty",
"ConnectFailure": "elevenlabs_tts_streaming::connect_failed",
"Connect": "elevenlabs_tts_streaming::connect"
},
"TtsStreamingEvents": {
"Empty": "tts_streaming::empty",
"Pause": "tts_streaming::pause",
"Resume": "tts_streaming::resume",
"ConnectFailure": "tts_streaming::connect_failed"
},
"TtsStreamingConnectionStatus": {
"NotConnected": "not_connected",
"Connected": "connected",
"Connecting": "connecting",
"Failed": "failed"
},
"MAX_SIMRINGS": 10,
"BONG_TONE": "tone_stream://v=-7;%(100,0,941.0,1477.0);v=-7;>=2;+=.1;%(1400,0,350,440)",
"FS_UUID_SET_NAME": "fsUUIDs"
"FS_UUID_SET_NAME": "fsUUIDs",
"SystemState" : {
"Online": "ONLINE",
"Offline": "OFFLINE",
"GracefulShutdownInProgress":"SHUTDOWN_IN_PROGRESS"
},
"FEATURE_SERVER" : "feature-server"
}

View File

@@ -41,6 +41,7 @@ const speechMapper = (cred) => {
const o = JSON.parse(decrypt(credential));
obj.access_key_id = o.access_key_id;
obj.secret_access_key = o.secret_access_key;
obj.role_arn = o.role_arn;
obj.aws_region = o.aws_region;
}
else if ('microsoft' === obj.vendor) {
@@ -75,6 +76,9 @@ const speechMapper = (cred) => {
else if ('deepgram' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.deepgram_stt_uri = o.deepgram_stt_uri;
obj.deepgram_tts_uri = o.deepgram_tts_uri;
obj.deepgram_stt_use_tls = o.deepgram_stt_use_tls;
}
else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
@@ -87,19 +91,54 @@ const speechMapper = (cred) => {
else if ('cobalt' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.cobalt_server_uri = o.cobalt_server_uri;
} else if ('elevenlabs' === obj.vendor) {
}
else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
obj.options = o.options;
} else if ('assemblyai' === obj.vendor) {
}
else if ('playht' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
} else if ('whisper' === obj.vendor) {
obj.user_id = o.user_id;
obj.voice_engine = o.voice_engine;
obj.options = o.options;
}
else if ('cartesia' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
} else if (obj.vendor.startsWith('custom:')) {
obj.embedding = o.embedding;
obj.options = o.options;
}
else if ('rimelabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
obj.options = o.options;
}
else if ('assemblyai' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
}
else if ('whisper' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.model_id = o.model_id;
}
else if ('verbio' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.client_secret = o.client_secret;
obj.engine_version = o.engine_version;
}
else if ('speechmatics' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = o.api_key;
obj.speechmatics_stt_uri = o.speechmatics_stt_uri;
}
else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = o.auth_token;
obj.custom_stt_url = o.custom_stt_url;
@@ -183,11 +222,23 @@ module.exports = (logger, srf) => {
}
};
const lookupVoipCarrierBySid = async(sid) => {
const pp = pool.promise();
try {
const [r] = await pp.query('SELECT * FROM voip_carriers WHERE voip_carrier_sid = ?', [sid]);
return r;
} catch (err) {
logger.error({err}, `lookupVoipCarrierBySid: Error ${sid}`);
}
};
return {
lookupAccountDetails,
updateSpeechCredentialLastUsed,
lookupCarrier,
lookupCarrierByPhoneNumber,
lookupGoogleCustomVoice
lookupGoogleCustomVoice,
lookupVoipCarrierBySid
};
};

24
lib/utils/error.js Normal file
View File

@@ -0,0 +1,24 @@
class NonFatalTaskError extends Error {
constructor(msg) {
super(msg);
}
}
class SpeechCredentialError extends NonFatalTaskError {
constructor(msg) {
super(msg);
}
}
class PlayFileNotFoundError extends NonFatalTaskError {
constructor(url) {
super('File not found');
this.url = url;
}
}
module.exports = {
SpeechCredentialError,
NonFatalTaskError,
PlayFileNotFoundError
};

View File

@@ -1,5 +1,5 @@
const Mrf = require('drachtio-fsmrf');
const ip = require('ip');
const os = require('os');
const {
JAMBONES_MYSQL_HOST,
JAMBONES_MYSQL_USER,
@@ -12,11 +12,25 @@ const {
JAMBONES_TIME_SERIES_HOST,
JAMBONES_ESL_LISTEN_ADDRESS,
PORT,
HTTP_IP,
NODE_ENV,
} = require('../config');
const Registrar = require('@jambonz/mw-registrar');
const assert = require('assert');
function getLocalIp() {
const interfaces = os.networkInterfaces();
for (const interfaceName in interfaces) {
const interface = interfaces[interfaceName];
for (const iface of interface) {
if (iface.family === 'IPv4' && !iface.internal) {
return iface.address;
}
}
}
return '127.0.0.1'; // Fallback to localhost if no suitable interface found
}
function initMS(logger, wrapper, ms) {
Object.assign(wrapper, {ms, active: true, connects: 1});
logger.info(`connected to freeswitch at ${ms.address}`);
@@ -138,7 +152,8 @@ function installSrfLocals(srf, logger) {
lookupAccountBySid,
lookupAccountCapacitiesBySid,
lookupSmppGateways,
lookupClientByAccountAndUsername
lookupClientByAccountAndUsername,
lookupSystemInformation
} = require('@jambonz/db-helpers')({
host: JAMBONES_MYSQL_HOST,
user: JAMBONES_MYSQL_USER,
@@ -171,17 +186,21 @@ function installSrfLocals(srf, logger) {
retrieveFromSortedSet,
retrieveByPatternSortedSet,
sortedSetLength,
sortedSetPositionByPattern
sortedSetPositionByPattern,
} = require('@jambonz/realtimedb-helpers')({}, logger, tracer);
const registrar = new Registrar(logger, client);
const {
synthAudio,
addFileToCache,
getNuanceAccessToken,
getIbmAccessToken,
getAwsAuthToken,
getVerbioAccessToken
} = require('@jambonz/speech-utils')({}, logger);
const {
writeAlerts,
AlertType
AlertType,
writeSystemAlerts
} = require('@jambonz/time-series')(logger, {
host: JAMBONES_TIME_SERIES_HOST,
commitSize: 50,
@@ -190,7 +209,8 @@ function installSrfLocals(srf, logger) {
let localIp;
try {
localIp = ip.address();
// Either use the configured IP address or discover it
localIp = HTTP_IP || getLocalIp();
} catch (err) {
logger.error({err}, 'installSrfLocals - error detecting local ipv4 address');
}
@@ -210,11 +230,14 @@ function installSrfLocals(srf, logger) {
lookupAccountCapacitiesBySid,
lookupSmppGateways,
lookupClientByAccountAndUsername,
lookupSystemInformation,
updateCallStatus,
retrieveCall,
listCalls,
deleteCall,
synthAudio,
getAwsAuthToken,
addFileToCache,
createHash,
retrieveHash,
deleteKey,
@@ -235,7 +258,8 @@ function installSrfLocals(srf, logger) {
retrieveFromSortedSet,
retrieveByPatternSortedSet,
sortedSetLength,
sortedSetPositionByPattern
sortedSetPositionByPattern,
getVerbioAccessToken
},
parentLogger: logger,
getSBC,
@@ -246,7 +270,8 @@ function installSrfLocals(srf, logger) {
getFreeswitch,
stats: stats,
writeAlerts,
AlertType
AlertType,
writeSystemAlerts
};
if (localIp) {

32
lib/utils/network.js Normal file
View File

@@ -0,0 +1,32 @@
/**
* Parses a list of hostport entries and selects the first one that matches the specified protocol,
* excluding any entries with the localhost IP address ('127.0.0.1').
*
* Each hostport entry should be in the format: 'protocol/ip:port'
*
* @param {Object} logger - A logging object with a 'debug' method for logging debug messages.
* @param {string} hostport - A comma-separated string containing hostport entries.
* @param {string} protocol - The protocol to match (e.g., 'udp', 'tcp').
* @returns {Array} An array containing:
* 0: protocol
* 1: ip address
* 2: port
*/
const selectHostPort = (logger, hostport, protocol) => {
logger.debug(`selectHostPort: ${hostport}, ${protocol}`);
const sel = hostport
.split(',')
.map((hp) => {
const arr = /(.*)\/(.*):(.*)/.exec(hp);
return [arr[1], arr[2], arr[3]];
})
.filter((hp) => {
return hp[0] === protocol && hp[1] !== '127.0.0.1';
});
return sel[0];
};
module.exports = {
selectHostPort
};

View File

@@ -0,0 +1,18 @@
const parseDecibels = (db) => {
if (!db) return 0;
if (typeof db === 'number') {
return db;
}
else if (typeof db === 'string') {
const match = db.match(/([+-]?\d+(\.\d+)?)\s*db/i);
if (match) {
return Math.trunc(parseFloat(match[1]));
} else {
return 0;
}
} else {
return 0;
}
};
module.exports = parseDecibels;

View File

@@ -1,5 +1,5 @@
const Emitter = require('events');
const {CallStatus} = require('./constants');
const {CallStatus, MediaPath} = require('./constants');
const SipError = require('drachtio-srf').SipError;
const {TaskPreconditions, CallDirection} = require('../utils/constants');
const CallInfo = require('../session/call-info');
@@ -16,9 +16,15 @@ const uuidv4 = require('uuid-random');
const HttpRequestor = require('./http-requestor');
const WsRequestor = require('./ws-requestor');
const {makeOpusFirst} = require('./sdp-utils');
const {
JAMBONES_USE_FREESWITCH_TIMER_FD,
JAMBONES_MEDIA_TIMEOUT_MS,
JAMBONES_MEDIA_HOLD_TIMEOUT_MS
} = require('../config');
class SingleDialer extends Emitter {
constructor({logger, sbcAddress, target, opts, application, callInfo, accountInfo, rootSpan, startSpan, dialTask}) {
constructor({logger, sbcAddress, target, opts, application, callInfo, accountInfo, rootSpan, startSpan, dialTask,
onHoldMusic}) {
super();
assert(target.type);
@@ -41,6 +47,7 @@ class SingleDialer extends Emitter {
this.callSid = uuidv4();
this.dialTask = dialTask;
this.onHoldMusic = onHoldMusic;
this.on('callStatusChange', this._notifyCallStatusChange.bind(this));
}
@@ -131,6 +138,7 @@ class SingleDialer extends Emitter {
this.serviceUrl = srf.locals.serviceUrl;
this.ep = await ms.createEndpoint();
this._configMsEndpoint();
this.logger.debug(`SingleDialer:exec - created endpoint ${this.ep.uuid}`);
/**
@@ -189,6 +197,10 @@ class SingleDialer extends Emitter {
callSid: this.callSid,
traceId: this.rootSpan.traceId
});
if (this.dialTask && this.dialTask.tag !== null &&
typeof this.dialTask.tag === 'object' && !Array.isArray(this.dialTask.tag)) {
this.callInfo.customerData = this.dialTask.tag;
}
this.logger = srf.locals.parentLogger.child({
callSid: this.callSid,
parentCallSid: this.parentCallInfo.callSid,
@@ -203,6 +215,8 @@ class SingleDialer extends Emitter {
},
cbProvisional: (prov) => {
const status = {sipStatus: prov.status, sipReason: prov.reason};
// Update call-id for sbc outbound INVITE
this.callInfo.sbcCallid = prov.get('X-CID');
if ([180, 183].includes(prov.status) && prov.body) {
if (status.callStatus !== CallStatus.EarlyMedia) {
status.callStatus = CallStatus.EarlyMedia;
@@ -253,7 +267,7 @@ class SingleDialer extends Emitter {
.on('modify', async(req, res) => {
try {
if (this.ep) {
if (this.dialTask && this.dialTask.isOnHold) {
if (this.dialTask && this.dialTask.isOnHoldEnabled) {
this.logger.info('dial is onhold, emit event');
this.emit('reinvite', req, res);
} else {
@@ -285,17 +299,17 @@ class SingleDialer extends Emitter {
if (err.status === 487) status.callStatus = CallStatus.NoAnswer;
else if ([486, 600].includes(err.status)) status.callStatus = CallStatus.Busy;
this.logger.info(`SingleDialer:exec outdial failure ${err.status}`);
inviteSpan.setAttributes({'invite.status_code': err.status});
inviteSpan.end();
inviteSpan?.setAttributes({'invite.status_code': err.status});
inviteSpan?.end();
}
else {
this.logger.error(err, 'SingleDialer:exec');
status.sipStatus = 500;
inviteSpan.setAttributes({
inviteSpan?.setAttributes({
'invite.status_code': 500,
'invite.err': err.message
});
inviteSpan.end();
inviteSpan?.end();
}
this.emit('callStatusChange', status);
if (this.ep) this.ep.destroy();
@@ -305,14 +319,19 @@ class SingleDialer extends Emitter {
/**
* kill the call in progress or the stable dialog, whichever we have
*/
async kill() {
async kill(Reason) {
this.killed = true;
if (this.inviteInProgress) await this.inviteInProgress.cancel();
else if (this.dlg && this.dlg.connected) {
const duration = moment().diff(this.dlg.connectTime, 'seconds');
this.logger.debug('SingleDialer:kill hanging up called party');
this.emit('callStatusChange', {callStatus: CallStatus.Completed, duration});
this.dlg.destroy();
const headers = {
...(Reason && {'X-Reason': Reason})
};
this.dlg.destroy({
headers
});
}
if (this.ep) {
this.logger.debug(`SingleDialer:kill - deleting endpoint ${this.ep.uuid}`);
@@ -320,6 +339,27 @@ class SingleDialer extends Emitter {
}
}
_configMsEndpoint() {
const opts = {
...(this.onHoldMusic && {holdMusic: `shout://${this.onHoldMusic.replace(/^https?:\/\//, '')}`}),
...(JAMBONES_USE_FREESWITCH_TIMER_FD && {timer_name: 'timerfd'}),
...(JAMBONES_MEDIA_TIMEOUT_MS && {media_timeout: JAMBONES_MEDIA_TIMEOUT_MS}),
...(JAMBONES_MEDIA_HOLD_TIMEOUT_MS && {media_hold_timeout: JAMBONES_MEDIA_HOLD_TIMEOUT_MS})
};
if (Object.keys(opts).length > 0) {
this.ep.set(opts);
}
if (this.dialTask?.inbandDtmfEnabled && !this.ep.inbandDtmfEnabled) {
// https://developer.signalwire.com/freeswitch/FreeSWITCH-Explained/Modules/mod-dptools/6587132/#0-about
try {
this.ep.execute('start_dtmf');
this.ep.inbandDtmfEnabled = true;
} catch (err) {
this.logger.info(err, 'place-outdial:_configMsEndpoint - error enable inband DTMF');
}
}
}
/**
* Run an application on the call after answer, e.g. call screening.
* Once the application completes in some fashion, emit an 'accepted' event
@@ -333,6 +373,7 @@ class SingleDialer extends Emitter {
const json = await this.requestor.request('dial:confirm', confirmHook, this.callInfo.toJSON());
if (!json || (Array.isArray(json) && json.length === 0)) {
this.logger.info('SingleDialer:_executeApp: no tasks returned from confirm hook');
this.emit('accept');
return;
}
const tasks = normalizeJambones(this.logger, json).map((tdata) => makeTask(this.logger, tdata));
@@ -358,7 +399,8 @@ class SingleDialer extends Emitter {
callInfo: this.callInfo,
accountInfo: this.accountInfo,
tasks,
rootSpan: this.rootSpan
rootSpan: this.rootSpan,
req: this.req
});
await cs.exec();
@@ -392,6 +434,7 @@ class SingleDialer extends Emitter {
const app = {...application};
if ('WS' === app.call_hook?.method ||
app.call_hook?.url.startsWith('ws://') || app.call_hook?.url.startsWith('wss://')) {
if (app.call_hook?.url) app.call_hook.url += '/adulting';
const requestor = new WsRequestor(logger, this.accountInfo.account.account_sid,
app.call_hook, this.accountInfo.account.webhook_secret);
app.requestor = requestor;
@@ -406,6 +449,8 @@ class SingleDialer extends Emitter {
this.accountInfo.account.webhook_secret);
else app.notifier = {request: () => {}, close: () => {}};
}
// Replace old application with new application.
this.application = app;
const cs = new AdultingCallSession({
logger: newLogger,
singleDialer: this,
@@ -415,32 +460,52 @@ class SingleDialer extends Emitter {
tasks,
rootSpan
});
app.requestor.request('session:adulting', '/adulting', {
...cs.callInfo.toJSON(),
parentCallInfo: this.parentCallInfo.toJSON()
}).catch((err) => {
newLogger.error({err}, 'doAdulting: error sending adulting request');
});
cs.req = this.req;
// fixed hangup an adulting session does not send status callback Completed
cs.wrapDialog(this.dlg);
cs.exec().catch((err) => newLogger.error({err}, 'doAdulting: error executing session'));
return cs;
}
async releaseMediaToSBC(remoteSdp, localSdp) {
async releaseMediaToSBC(remoteSdp, localSdp, releaseMediaEntirely) {
assert(this.dlg && this.dlg.connected && this.ep && typeof remoteSdp === 'string');
const sdp = stripCodecs(this.logger, remoteSdp, localSdp) || remoteSdp;
await this.dlg.modify(sdp, {
headers: {
'X-Reason': 'release-media'
'X-Reason': releaseMediaEntirely ? 'release-media-entirely' : 'release-media'
}
});
this.ep.destroy()
.then(() => this.ep = null)
.catch((err) => this.logger.error({err}, 'SingleDialer:releaseMediaToSBC: Error destroying endpoint'));
try {
await this.ep.destroy();
} catch (err) {
this.logger.error({err}, 'SingleDialer:releaseMediaToSBC: Error destroying endpoint');
}
this.ep = null;
}
async reAnchorMedia() {
async reAnchorMedia(currentMediaRoute = MediaPath.PartialMedia) {
assert(this.dlg && this.dlg.connected && !this.ep);
this.logger.debug('SingleDialer:reAnchorMedia: re-anchoring media after partial media');
this.ep = await this.ms.createEndpoint({remoteSdp: this.dlg.remote.sdp});
this._configMsEndpoint();
await this.dlg.modify(this.ep.local.sdp, {
headers: {
'X-Reason': 'anchor-media'
}
});
if (currentMediaRoute === MediaPath.NoMedia) {
this.logger.debug('SingleDialer:reAnchorMedia: repoint endpoint after no media');
await this.ep.modify(this.dlg.remote.sdp);
}
}
_notifyCallStatusChange({callStatus, sipStatus, sipReason, duration}) {
@@ -466,11 +531,12 @@ class SingleDialer extends Emitter {
}
function placeOutdial({
logger, srf, ms, sbcAddress, target, opts, application, callInfo, accountInfo, rootSpan, startSpan, dialTask
logger, srf, ms, sbcAddress, target, opts, application, callInfo, accountInfo, rootSpan, startSpan, dialTask,
onHoldMusic
}) {
const myOpts = deepcopy(opts);
const sd = new SingleDialer({
logger, sbcAddress, target, myOpts, application, callInfo, accountInfo, rootSpan, startSpan, dialTask
logger, sbcAddress, target, myOpts, application, callInfo, accountInfo, rootSpan, startSpan, dialTask, onHoldMusic
});
sd.exec(srf, ms, myOpts);
return sd;

View File

@@ -46,12 +46,24 @@ module.exports = (logger) => {
const {srf} = require('../..');
srf.locals.publicIp = publicIp;
})
.on(LifeCycleEvents.ScaleIn, () => {
.on(LifeCycleEvents.ScaleIn, async() => {
logger.info('AWS scale-in notification: begin drying up calls');
dryUpCalls = true;
lifecycleEmitter.operationalState = LifeCycleEvents.ScaleIn;
const {srf} = require('../..');
const {writeSystemAlerts} = srf.locals;
if (writeSystemAlerts) {
const {SystemState, FEATURE_SERVER} = require('./constants');
await writeSystemAlerts({
system_component: FEATURE_SERVER,
state : SystemState.GracefulShutdownInProgress,
fields : {
detail: `feature-server with process_id ${process.pid} shutdown in progress`,
host: srf.locals?.ipv4
}
});
}
pingProxies(srf);
// if we have zero calls, we can complete the scale-in right

View File

@@ -1,7 +1,4 @@
const {
TaskName,
} = require('./constants.json');
const {TaskName} = require('./constants.json');
const stickyVars = {
google: [
'GOOGLE_SPEECH_HINTS',
@@ -45,12 +42,19 @@ const stickyVars = {
'DEEPGRAM_SPEECH_ENDPOINTING',
'DEEPGRAM_SPEECH_UTTERANCE_END_MS',
'DEEPGRAM_SPEECH_VAD_TURNOFF',
'DEEPGRAM_SPEECH_TAG'
'DEEPGRAM_SPEECH_TAG',
'DEEPGRAM_SPEECH_MODEL_VERSION'
],
aws: [
'AWS_VOCABULARY_NAME',
'AWS_VOCABULARY_FILTER_METHOD',
'AWS_VOCABULARY_FILTER_NAME'
'AWS_VOCABULARY_FILTER_NAME',
'AWS_LANGUAGE_MODEL_NAME',
'AWS_ACCESS_KEY_ID',
'AWS_SECRET_ACCESS_KEY',
'AWS_REGION',
'AWS_SECURITY_TOKEN',
'AWS_PII_ENTITY_TYPES',
],
nuance: [
'NUANCE_ACCESS_TOKEN',
@@ -99,11 +103,92 @@ const stickyVars = {
assemblyai: [
'ASSEMBLYAI_API_KEY',
'ASSEMBLYAI_WORD_BOOST'
],
speechmatics: [
'SPEECHMATICS_API_KEY',
'SPEECHMATICS_HOST',
'SPEECHMATICS_PATH',
'SPEECHMATICS_SPEECH_HINTS',
'SPEECHMATICS_TRANSLATION_LANGUAGES',
'SPEECHMATICS_TRANSLATION_PARTIALS'
]
};
const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
if (bufferedTranscripts.length === 1) return bufferedTranscripts[0];
/**
* @see https://developers.deepgram.com/docs/models-languages-overview
*/
const optimalDeepramModels = {
zh: ['base', 'base'],
'zh-CN':['base', 'base'],
'zh-TW': ['base', 'base'],
da: ['enhanced', 'enhanced'],
en: ['nova-2-phonecall', 'nova-2'],
'en-US': ['nova-2-phonecall', 'nova-2'],
'en-AU': ['nova-2', 'nova-2'],
'en-GB': ['nova-2', 'nova-2'],
'en-IN': ['nova-2', 'nova-2'],
'en-NZ': ['nova-2', 'nova-2'],
nl: ['nova-2', 'nova-2'],
fr: ['nova-2', 'nova-2'],
'fr-CA': ['nova-2', 'nova-2'],
de: ['nova-2', 'nova-2'],
hi: ['nova-2', 'nova-2'],
'hi-Latn': ['nova-2', 'nova-2'],
id: ['base', 'base'],
it: ['nova-2', 'nova-2'],
ja: ['enhanced', 'enhanced'],
ko: ['nova-2', 'nova-2'],
no: ['nova-2', 'nova-2'],
pl: ['nova-2', 'nova-2'],
pt: ['nova-2', 'nova-2'],
'pt-BR': ['nova-2', 'nova-2'],
'pt-PT': ['nova-2', 'nova-2'],
ru: ['nova-2', 'nova-2'],
es: ['nova-2', 'nova-2'],
'es-419': ['nova-2', 'nova-2'],
'es-LATAM': ['enhanced', 'enhanced'],
sv: ['nova-2', 'nova-2'],
ta: ['enhanced', 'enhanced'],
taq: ['enhanced', 'enhanced'],
tr: ['nova-2', 'nova-2'],
uk: ['nova-2', 'nova-2']
};
const selectDefaultDeepgramModel = (task, language) => {
if (language in optimalDeepramModels) {
const [gather, transcribe] = optimalDeepramModels[language];
return task.name === TaskName.Gather ? gather : transcribe;
}
return 'base';
};
const optimalGoogleModels = {
'v1' : {
'en-IN':['telephony', 'telephony'],
'es-DO':['default', 'default'],
'es-MX':['default', 'default'],
'en-AU':['telephony', 'telephony'],
'en-GB':['telephony', 'telephony'],
'en-NZ':['telephony', 'telephony']
},
'v2' : {
'en-IN':['telephony', 'long']
}
};
const selectDefaultGoogleModel = (task, language, version) => {
const useV2 = version === 'v2';
if (language in optimalGoogleModels[version]) {
const [gather, transcribe] = optimalGoogleModels[version][language];
return task.name === TaskName.Gather ? gather : transcribe;
}
return task.name === TaskName.Gather ?
(useV2 ? 'telephony_short' : 'command_and_search') :
(useV2 ? 'long' : 'latest_long');
};
const consolidateTranscripts = (bufferedTranscripts, channel, language, vendor) => {
if (bufferedTranscripts.length === 1) {
bufferedTranscripts[0].is_final = true;
return bufferedTranscripts[0];
}
let totalConfidence = 0;
const finalTranscript = bufferedTranscripts.reduce((acc, evt) => {
totalConfidence += evt.alternatives[0].confidence;
@@ -143,7 +228,7 @@ const consolidateTranscripts = (bufferedTranscripts, channel, language) => {
totalConfidence / bufferedTranscripts.length;
finalTranscript.alternatives[0].transcript = finalTranscript.alternatives[0].transcript.trim();
finalTranscript.vendor = {
name: 'deepgram',
name: vendor,
evt: bufferedTranscripts
};
return finalTranscript;
@@ -222,7 +307,7 @@ const normalizeDeepgram = (evt, channel, language, shortUtterance) => {
language_code: language,
channel_tag: channel,
is_final: shortUtterance ? evt.is_final : evt.speech_final,
alternatives: [alternatives[0]],
alternatives: alternatives.length ? [alternatives[0]] : [],
vendor: {
name: 'deepgram',
evt: copy
@@ -268,8 +353,10 @@ const normalizeIbm = (evt, channel, language) => {
const normalizeGoogle = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const language_code = evt.language_code || language;
return {
language_code: language,
language_code: language_code,
channel_tag: channel,
is_final: evt.is_final,
alternatives: [evt.alternatives[0]],
@@ -328,6 +415,20 @@ const normalizeNuance = (evt, channel, language) => {
};
};
const normalizeVerbio = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt.is_final,
alternatives: evt.alternatives,
vendor: {
name: 'verbio',
evt: copy
}
};
};
const normalizeMicrosoft = (evt, channel, language, punctuation = true) => {
const copy = JSON.parse(JSON.stringify(evt));
const nbest = evt.NBest;
@@ -359,16 +460,41 @@ const normalizeMicrosoft = (evt, channel, language, punctuation = true) => {
const normalizeAws = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
return {
language_code: language,
channel_tag: channel,
is_final: evt[0].is_final,
alternatives: evt[0].alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
const isGrpcPayload = Array.isArray(evt);
if (isGrpcPayload) {
/* legacy grpc api */
return {
language_code: language,
channel_tag: channel,
is_final: evt[0].is_final,
alternatives: evt[0].alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
}
else {
/* websocket api */
const alternatives = evt.Transcript?.Results[0]?.Alternatives.map((alt) => {
const items = alt.Items.filter((item) => item.Type === 'pronunciation' && 'Confidence' in item);
const confidence = items.reduce((acc, item) => acc + item.Confidence, 0) / items.length;
return {
transcript: alt.Transcript,
confidence
};
});
return {
language_code: language,
channel_tag: channel,
is_final: evt.Transcript?.Results[0].IsPartial === false,
alternatives,
vendor: {
name: 'aws',
evt: copy
}
};
}
};
const normalizeAssemblyAi = (evt, channel, language) => {
@@ -384,12 +510,37 @@ const normalizeAssemblyAi = (evt, channel, language) => {
}
],
vendor: {
name: 'ASSEMBLYAI',
name: 'assemblyai',
evt: copy
}
};
};
const normalizeSpeechmatics = (evt, channel, language) => {
const copy = JSON.parse(JSON.stringify(evt));
const is_final = evt.message === 'AddTranscript';
const words = evt.results?.filter((r) => r.type === 'word') || [];
const confidence = words.length > 0 ?
words.reduce((acc, word) => acc + word.alternatives[0].confidence, 0) / words.length :
0;
const alternative = {
confidence,
transcript: evt.metadata?.transcript
};
const obj = {
language_code: language,
channel_tag: channel,
is_final,
alternatives: [alternative],
vendor: {
name: 'speechmatics',
evt: copy
}
};
return obj;
};
module.exports = (logger) => {
const normalizeTranscription = (evt, vendor, channel, language, shortUtterance, punctuation) => {
@@ -415,6 +566,10 @@ module.exports = (logger) => {
return normalizeCobalt(evt, channel, language);
case 'assemblyai':
return normalizeAssemblyAi(evt, channel, language, shortUtterance);
case 'verbio':
return normalizeVerbio(evt, channel, language);
case 'speechmatics':
return normalizeSpeechmatics(evt, channel, language);
default:
if (vendor.startsWith('custom:')) {
return normalizeCustom(evt, channel, language, vendor);
@@ -424,22 +579,15 @@ module.exports = (logger) => {
}
};
const setChannelVarsForStt = (task, sttCredentials, rOpts = {}) => {
const setChannelVarsForStt = (task, sttCredentials, language, rOpts = {}) => {
let opts = {};
const {enable, voiceMs = 0, mode = -1} = rOpts.vad || {};
const vad = {enable, voiceMs, mode};
const vendor = rOpts.vendor;
/* voice activity detection works across vendors */
opts = {
...opts,
...(vad.enable && {START_RECOGNIZING_ON_VAD: 1}),
...(vad.enable && vad.voiceMs && {RECOGNIZER_VAD_VOICE_MS: vad.voiceMs}),
...(vad.enable && typeof vad.mode === 'number' && {RECOGNIZER_VAD_MODE: vad.mode}),
};
if ('google' === vendor) {
const model = task.name === TaskName.Gather ? 'command_and_search' : 'latest_long';
const useV2 = rOpts.googleOptions?.serviceVersion === 'v2';
const version = useV2 ? 'v2' : 'v1';
let {model} = rOpts;
model = model || selectDefaultGoogleModel(task, language, version);
opts = {
...opts,
...(sttCredentials && {GOOGLE_APPLICATION_CREDENTIALS: JSON.stringify(sttCredentials.credentials)}),
@@ -472,19 +620,52 @@ module.exports = (logger) => {
...{GOOGLE_SPEECH_MODEL: rOpts.model || model},
...(rOpts.naicsCode > 0 && {GOOGLE_SPEECH_METADATA_INDUSTRY_NAICS_CODE: rOpts.naicsCode}),
GOOGLE_SPEECH_METADATA_RECORDING_DEVICE_TYPE: 'phone_line',
...(useV2 && {
GOOGLE_SPEECH_RECOGNIZER_PARENT: `projects/${sttCredentials.credentials.project_id}/locations/global`,
GOOGLE_SPEECH_CLOUD_SERVICES_VERSION: 'v2',
...(rOpts.googleOptions?.speechStartTimeoutMs && {
GOOGLE_SPEECH_START_TIMEOUT_MS: rOpts.googleOptions.speechStartTimeoutMs
}),
...(rOpts.googleOptions?.speechEndTimeoutMs && {
GOOGLE_SPEECH_END_TIMEOUT_MS: rOpts.googleOptions.speechEndTimeoutMs
}),
...(rOpts.googleOptions?.transcriptNormalization && {
GOOGLE_SPEECH_TRANSCRIPTION_NORMALIZATION: JSON.stringify(rOpts.googleOptions.transcriptNormalization)
}),
...(rOpts.googleOptions?.enableVoiceActivityEvents && {
GOOGLE_SPEECH_ENABLE_VOICE_ACTIVITY_EVENTS: rOpts.googleOptions.enableVoiceActivityEvents
}),
...(rOpts.sgoogleOptions?.recognizerId) && {GOOGLE_SPEECH_RECOGNIZER_ID: rOpts.googleOptions.recognizerId},
...(rOpts.googleOptions?.enableVoiceActivityEvents && {
GOOGLE_SPEECH_ENABLE_VOICE_ACTIVITY_EVENTS: rOpts.googleOptions.enableVoiceActivityEvents
}),
}),
};
}
else if (['aws', 'polly'].includes(vendor)) {
const {awsOptions = {}} = rOpts;
const vocabularyName = awsOptions.vocabularyName || rOpts.vocabularyName;
const vocabularyFilterName = awsOptions.vocabularyFilterName || rOpts.vocabularyFilterName;
const filterMethod = awsOptions.vocabularyFilterMethod || rOpts.filterMethod;
opts = {
...opts,
...(rOpts.vocabularyName && {AWS_VOCABULARY_NAME: rOpts.vocabularyName}),
...(rOpts.vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: rOpts.vocabularyFilterName}),
...(rOpts.filterMethod && {AWS_VOCABULARY_FILTER_METHOD: rOpts.filterMethod}),
...(vocabularyName && {AWS_VOCABULARY_NAME: vocabularyName}),
...(vocabularyFilterName && {AWS_VOCABULARY_FILTER_NAME: vocabularyFilterName}),
...(filterMethod && {AWS_VOCABULARY_FILTER_METHOD: filterMethod}),
...(sttCredentials && {
AWS_ACCESS_KEY_ID: sttCredentials.accessKeyId,
AWS_SECRET_ACCESS_KEY: sttCredentials.secretAccessKey,
AWS_REGION: sttCredentials.region
AWS_REGION: sttCredentials.region,
AWS_SECURITY_TOKEN: sttCredentials.securityToken
}),
...(awsOptions.accessKey && {AWS_ACCESS_KEY_ID: awsOptions.accessKey}),
...(awsOptions.secretKey && {AWS_SECRET_ACCESS_KEY: awsOptions.secretKey}),
...(awsOptions.region && {AWS_REGION: awsOptions.region}),
...(awsOptions.securityToken && {AWS_SECURITY_TOKEN: awsOptions.securityToken}),
...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}),
...(awsOptions.piiEntityTypes?.length && {AWS_PII_ENTITY_TYPES: awsOptions.piiEntityTypes.join(',')}),
...(awsOptions.piiIdentifyEntities && {AWS_PII_IDENTIFY_ENTITIES: true}),
...(awsOptions.languageModelName && {AWS_LANGUAGE_MODEL_NAME: awsOptions.languageModelName}),
};
}
else if ('microsoft' === vendor) {
@@ -510,12 +691,19 @@ module.exports = (logger) => {
...{AZURE_USE_OUTPUT_FORMAT_DETAILED: 1},
...(azureOptions.speechSegmentationSilenceTimeoutMs &&
{AZURE_SPEECH_SEGMENTATION_SILENCE_TIMEOUT_MS: azureOptions.speechSegmentationSilenceTimeoutMs}),
...(azureOptions.languageIdMode &&
{AZURE_LANGUAGE_ID_MODE: azureOptions.languageIdMode}),
...(azureOptions.postProcessing &&
{AZURE_POST_PROCESSING_OPTION: azureOptions.postProcessing}),
...(sttCredentials && {
...(sttCredentials.api_key && {AZURE_SUBSCRIPTION_KEY: sttCredentials.api_key}),
...(sttCredentials.region && {AZURE_REGION: sttCredentials.region}),
}),
...(sttCredentials.use_custom_stt && sttCredentials.custom_stt_endpoint &&
{AZURE_SERVICE_ENDPOINT_ID: sttCredentials.custom_stt_endpoint}),
//azureSttEndpointId overrides sttCredentials.custom_stt_endpoint
...(rOpts.azureSttEndpointId &&
{AZURE_SERVICE_ENDPOINT_ID: rOpts.azureSttEndpointId}),
};
}
else if ('nuance' === vendor) {
@@ -567,15 +755,24 @@ module.exports = (logger) => {
};
}
else if ('deepgram' === vendor) {
let {model} = rOpts;
const {deepgramOptions = {}} = rOpts;
const deepgramUri = deepgramOptions.deepgramSttUri || sttCredentials.deepgram_stt_uri;
const useTls = deepgramOptions.deepgramSttUseTls || sttCredentials.deepgram_stt_use_tls;
/* default to a sensible model if not supplied */
if (!model) {
model = selectDefaultDeepgramModel(task, language);
}
opts = {
...opts,
DEEPGRAM_SPEECH_MODEL: model,
...(deepgramUri && {DEEPGRAM_URI: deepgramUri}),
...(deepgramUri && useTls && {DEEPGRAM_USE_TLS: 1}),
...(sttCredentials.api_key) &&
{DEEPGRAM_API_KEY: sttCredentials.api_key},
...(deepgramOptions.tier) &&
{DEEPGRAM_SPEECH_TIER: deepgramOptions.tier},
...(deepgramOptions.model) &&
{DEEPGRAM_SPEECH_MODEL: deepgramOptions.model},
...(deepgramOptions.punctuate) &&
{DEEPGRAM_SPEECH_ENABLE_AUTOMATIC_PUNCTUATION: 1},
...(deepgramOptions.smartFormatting) &&
@@ -605,13 +802,17 @@ module.exports = (logger) => {
...(deepgramOptions.keywords) &&
{DEEPGRAM_SPEECH_KEYWORDS: deepgramOptions.keywords.join(',')},
...('endpointing' in deepgramOptions) &&
{DEEPGRAM_SPEECH_ENDPOINTING: deepgramOptions.endpointing === false ? 'false' : deepgramOptions.endpointing},
{DEEPGRAM_SPEECH_ENDPOINTING: deepgramOptions.endpointing === false ? 'false' : deepgramOptions.endpointing,
// default DEEPGRAM_SPEECH_UTTERANCE_END_MS is 1000, will be override by user settings later if there is.
DEEPGRAM_SPEECH_UTTERANCE_END_MS: 1000},
...(deepgramOptions.utteranceEndMs) &&
{DEEPGRAM_SPEECH_UTTERANCE_END_MS: deepgramOptions.utteranceEndMs},
...(deepgramOptions.vadTurnoff) &&
{DEEPGRAM_SPEECH_VAD_TURNOFF: deepgramOptions.vadTurnoff},
...(deepgramOptions.tag) &&
{DEEPGRAM_SPEECH_TAG: deepgramOptions.tag}
{DEEPGRAM_SPEECH_TAG: deepgramOptions.tag},
...(deepgramOptions.version) &&
{DEEPGRAM_SPEECH_MODEL_VERSION: deepgramOptions.version}
};
}
else if ('soniox' === vendor) {
@@ -710,7 +911,8 @@ module.exports = (logger) => {
...(cobaltOptions.enableConfusionNetwork && {COBALT_ENABLE_CONFUSION_NETWORK: 1}),
...(cobaltOptions.compiledContextData && {COBALT_COMPILED_CONTEXT_DATA: cobaltOptions.compiledContextData}),
};
} else if ('assemblyai' === vendor) {
}
else if ('assemblyai' === vendor) {
opts = {
...opts,
...(sttCredentials.api_key) &&
@@ -719,8 +921,44 @@ module.exports = (logger) => {
{ASSEMBLYAI_WORD_BOOST: JSON.stringify(rOpts.hints)})
};
}
else if ('verbio' === vendor) {
const {verbioOptions = {}} = rOpts;
opts = {
...opts,
...(sttCredentials.access_token && { VERBIO_ACCESS_TOKEN: sttCredentials.access_token}),
...(sttCredentials.engine_version && {VERBIO_ENGINE_VERSION: sttCredentials.engine_version}),
...(language && {VERBIO_LANGUAGE: language}),
...(verbioOptions.enable_formatting && {VERBIO_ENABLE_FORMATTING: verbioOptions.enable_formatting}),
...(verbioOptions.enable_diarization && {VERBIO_ENABLE_DIARIZATION: verbioOptions.enable_diarization}),
...(verbioOptions.topic && {VERBIO_TOPIC: verbioOptions.topic}),
...(verbioOptions.inline_grammar && {VERBIO_INLINE_GRAMMAR: verbioOptions.inline_grammar}),
...(verbioOptions.grammar_uri && {VERBIO_GRAMMAR_URI: verbioOptions.grammar_uri}),
...(verbioOptions.label && {VERBIO_LABEL: verbioOptions.label}),
...(verbioOptions.recognition_timeout && {VERBIO_RECOGNITION_TIMEOUT: verbioOptions.recognition_timeout}),
...(verbioOptions.speech_complete_timeout &&
{VERBIO_SPEECH_COMPLETE_TIMEOUT: verbioOptions.speech_complete_timeout}),
...(verbioOptions.speech_incomplete_timeout &&
{VERBIO_SPEECH_INCOMPLETE_TIMEOUT: verbioOptions.speech_incomplete_timeout}),
};
}
else if ('speechmatics' === vendor) {
const {speechmaticsOptions = {}} = rOpts;
opts = {
...opts,
...(sttCredentials.api_key) && {SPEECHMATICS_API_KEY: sttCredentials.api_key},
...(sttCredentials.speechmatics_stt_uri) && {SPEECHMATICS_HOST: sttCredentials.speechmatics_stt_uri},
...(rOpts.hints?.length > 0 && {SPEECHMATICS_SPEECH_HINTS: rOpts.hints.join(',')}),
...(speechmaticsOptions.translation_config &&
{
SPEECHMATICS_TRANSLATION_LANGUAGES: speechmaticsOptions.translation_config.target_languages.join(','),
SPEECHMATICS_TRANSLATION_PARTIALS: speechmaticsOptions.translation_config.enable_partials ? 1 : 0
}
),
};
}
else if (vendor.startsWith('custom:')) {
let {options = {}} = rOpts;
let {options = {}} = rOpts.customOptions || {};
const {sampleRate} = rOpts.customOptions || {};
const {auth_token, custom_stt_url} = sttCredentials;
options = {
...options,
@@ -728,14 +966,15 @@ module.exports = (logger) => {
{hints: rOpts.hints}),
...(rOpts.hints?.length > 0 && typeof rOpts.hints[0] === 'object' &&
{hints: JSON.stringify(rOpts.hints)}),
...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost})
...(typeof rOpts.hintsBoost === 'number' && {hintsBoost: rOpts.hintsBoost}),
...(task.cs?.callSid && {callSid: task.cs.callSid})
};
opts = {
...opts,
...(auth_token && {JAMBONZ_STT_API_KEY: auth_token}),
JAMBONZ_STT_URL: custom_stt_url,
...(Object.keys(options).length > 0 && {JAMBONZ_STT_OPTIONS: JSON.stringify(options)}),
...(sampleRate && {JAMBONZ_STT_SAMPLING: sampleRate})
};
}
@@ -785,6 +1024,6 @@ module.exports = (logger) => {
setChannelVarsForStt,
setSpeechCredentialsAtRuntime,
compileSonioxTranscripts,
consolidateTranscripts
consolidateTranscripts,
};
};

View File

@@ -0,0 +1,325 @@
const Emitter = require('events');
const assert = require('assert');
const {
TtsStreamingEvents,
TtsStreamingConnectionStatus
} = require('../utils/constants');
const MAX_CHUNK_SIZE = 1800;
const HIGH_WATER_BUFFER_SIZE = 1000;
const LOW_WATER_BUFFER_SIZE = 200;
const TIMEOUT_RETRY_MSECS = 3000;
class TtsStreamingBuffer extends Emitter {
constructor(cs) {
super();
this.cs = cs;
this.logger = cs.logger;
this.tokens = '';
this.eventHandlers = [];
this._isFull = false;
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
this._flushPending = false;
this.timer = null;
}
get isEmpty() {
return this.tokens.length === 0;
}
get isFull() {
return this._isFull;
}
get size() {
return this.tokens.length;
}
get ep() {
return this.cs?.ep;
}
async start() {
assert.ok(
this._connectionStatus === TtsStreamingConnectionStatus.NotConnected,
'TtsStreamingBuffer:start already started, or has failed');
this.vendor = this.cs.getTsStreamingVendor();
if (!this.vendor) {
this.logger.info('TtsStreamingBuffer:start No TTS streaming vendor configured');
throw new Error('No TTS streaming vendor configured');
}
this.logger.info(`TtsStreamingBuffer:start Connecting to TTS streaming with vendor ${this.vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Connecting;
try {
if (this.eventHandlers.length === 0) this._initHandlers(this.ep);
await this._api(this.ep, [this.ep.uuid, 'connect']);
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:start Error connecting to TTS streaming');
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
}
}
stop() {
clearTimeout(this.timer);
this.removeCustomEventListeners();
if (this.ep) {
this._api(this.ep, [this.ep.uuid, 'close'])
.catch((err) => this.logger.info({err}, 'TtsStreamingBuffer:kill Error closing TTS streaming'));
}
this.timer = null;
this.tokens = '';
this._connectionStatus = TtsStreamingConnectionStatus.NotConnected;
}
/**
* Add tokens to the buffer and start feeding them to the endpoint if necessary.
*/
async bufferTokens(tokens) {
if (this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
this.logger.info('TtsStreamingBuffer:bufferTokens TTS streaming connection failed, rejecting request');
return {status: 'failed', reason: `connection to ${this.vendor} failed`};
}
const displayedTokens = tokens.length <= 40 ? tokens : tokens.substring(0, 40);
const totalLength = tokens.length;
/* if we crossed the high water mark, reject the request */
if (this.tokens.length + totalLength > HIGH_WATER_BUFFER_SIZE) {
this.logger.info(
`TtsStreamingBuffer throttling: buffer is full, rejecting request to buffer ${totalLength} tokens`);
if (!this._isFull) {
this._isFull = true;
this.emit(TtsStreamingEvents.Pause);
}
return {status: 'failed', reason: 'full'};
}
this.logger.debug(
`TtsStreamingBuffer:bufferTokens "${displayedTokens}" (length: ${totalLength}), starting? ${this.isEmpty}`
);
this.tokens += (tokens || '');
await this._feedTokens();
return {status: 'ok'};
}
flush() {
this.logger.debug('TtsStreamingBuffer:flush');
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
this.logger.debug('TtsStreamingBuffer:flush TTS stream is not quite ready - wait for connect');
this._flushPending = true;
return;
}
else if (this._connectionStatus === TtsStreamingConnectionStatus.Connected) {
if (this.size === 0) {
this._doFlush();
}
else {
/* we have tokens queued, so flush after they have been sent */
this._pendingFlush = true;
}
}
}
clear() {
this.logger.debug('TtsStreamingBuffer:clear');
if (this._connectionStatus !== TtsStreamingConnectionStatus.Connected) return;
clearTimeout(this.timer);
this._api(this.ep, [this.ep.uuid, 'clear'])
.catch((err) => this.logger.info({err}, 'TtsStreamingBuffer:clear Error clearing TTS streaming'));
this.tokens = '';
this.timer = null;
this._isFull = false;
}
/**
* Send tokens to the TTS engine in sentence chunks for best playout
*/
async _feedTokens(handlingTimeout = false) {
this.logger.debug({tokens: this.tokens}, '_feedTokens');
try {
/* are we in a state where we can feed tokens to the TTS? */
if (!this.cs.isTtsStreamOpen || !this.ep || !this.tokens) {
this.logger.debug('TTS stream is not open or no tokens to send');
return this.tokens?.length || 0;
}
if (this._connectionStatus === TtsStreamingConnectionStatus.NotConnected ||
this._connectionStatus === TtsStreamingConnectionStatus.Failed) {
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not connected');
return;
}
if (this._connectionStatus === TtsStreamingConnectionStatus.Connecting) {
this.logger.debug('TtsStreamingBuffer:_feedTokens TTS stream is not ready, waiting for connect');
return;
}
/* must send at least one sentence */
const limit = Math.min(MAX_CHUNK_SIZE, this.tokens.length);
let chunkEnd = findSentenceBoundary(this.tokens, limit);
if (chunkEnd <= 0) {
if (handlingTimeout) {
/* on a timeout we've left some tokens sitting around, so be more aggressive now in sending them */
chunkEnd = findWordBoundary(this.tokens, limit);
if (chunkEnd <= 0) {
this.logger.debug('TtsStreamingBuffer:_feedTokens: no word boundary found');
this._setTimerIfNeeded();
return;
}
}
else {
/* if we just received tokens, we wont send unless we have at least a full sentence */
this.logger.debug('TtsStreamingBuffer:_feedTokens: no sentence boundary found');
this._setTimerIfNeeded();
return;
}
}
const chunk = this.tokens.slice(0, chunkEnd);
this.tokens = this.tokens.slice(chunkEnd);
/* freeswitch looks for sequence of 2 newlines to determine end of message, so insert a space */
const modifiedChunk = chunk.replace(/\n\n/g, '\n \n');
await this._api(this.ep, [this.ep.uuid, 'send', modifiedChunk]);
this.logger.debug(`TtsStreamingBuffer:_feedTokens: sent ${chunk.length}, remaining: ${this.tokens.length}`);
if (this._pendingFlush) {
this._doFlush();
this._pendingFlush = false;
}
if (this.isFull && this.tokens.length <= LOW_WATER_BUFFER_SIZE) {
this.logger.info('TtsStreamingBuffer throttling: TTS streaming buffer is no longer full - resuming');
this._isFull = false;
this.emit(TtsStreamingEvents.Resume);
}
} catch (err) {
this.logger.info({err}, 'TtsStreamingBuffer:_feedTokens Error sending TTS chunk');
this.tokens = '';
}
return;
}
async _api(ep, args) {
const apiCmd = `uuid_${this.vendor}_tts_streaming`;
const res = await ep.api(apiCmd, `^^|${args.join('|')}`);
if (!res.body?.startsWith('+OK')) {
this.logger.info({args}, `Error calling ${apiCmd}: ${res.body}`);
throw new Error(`Error calling ${apiCmd}: ${res.body}`);
}
}
_onConnectFailure(vendor) {
this.logger.info(`streaming tts connection failed to ${vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Failed;
this.tokens = '';
this.emit(TtsStreamingEvents.ConnectFailure, {vendor});
}
_doFlush() {
this._api(this.ep, [this.ep.uuid, 'flush'])
.catch((err) => this.logger.info({err},
`TtsStreamingBuffer:_doFlush Error flushing TTS streaming: ${JSON.stringify(err)}`));
}
async _onConnect(vendor) {
this.logger.info(`streaming tts connection made to ${vendor}`);
this._connectionStatus = TtsStreamingConnectionStatus.Connected;
if (this.tokens.length > 0) {
await this._feedTokens();
}
if (this._flushPending) {
this.flush();
this._flushPending = false;
}
}
_setTimerIfNeeded() {
if (this.tokens.length > 0 && !this.timer) {
this.timer = setTimeout(this._onTimeout.bind(this), TIMEOUT_RETRY_MSECS);
}
}
_onTimeout() {
this.logger.info('TtsStreamingBuffer:_onTimeout');
this.timer = null;
this._feedTokens(true);
}
_onTtsEmpty(vendor) {
this.emit(TtsStreamingEvents.Empty, {vendor});
}
addCustomEventListener(ep, event, handler) {
this.eventHandlers.push({ep, event, handler});
ep.addCustomEventListener(event, handler);
}
removeCustomEventListeners() {
this.eventHandlers.forEach((h) => h.ep.removeCustomEventListener(h.event, h.handler));
}
_initHandlers(ep) {
[
// DH: add other vendors here as modules are added
'deepgram',
'cartesia',
'elevenlabs'
].forEach((vendor) => {
const eventClassName = `${vendor.charAt(0).toUpperCase() + vendor.slice(1)}TtsStreamingEvents`;
const eventClass = require('../utils/constants')[eventClassName];
if (!eventClass) throw new Error(`Event class for vendor ${vendor} not found`);
this.addCustomEventListener(ep, eventClass.Connect, this._onConnect.bind(this, vendor));
this.addCustomEventListener(ep, eventClass.ConnectFailure, this._onConnectFailure.bind(this, vendor));
this.addCustomEventListener(ep, eventClass.Empty, this._onTtsEmpty.bind(this, vendor));
});
}
}
const findSentenceBoundary = (text, limit) => {
// Match traditional sentence boundaries or double newlines
const sentenceEndRegex = /[.!?](?=\s|$)|\n\n/g;
let lastSentenceBoundary = -1;
let match;
while ((match = sentenceEndRegex.exec(text)) && match.index < limit) {
const precedingText = text.slice(0, match.index).trim(); // Extract text before the match and trim whitespace
if (precedingText.length > 0) { // Check if there's actual content
if (
match[0] === '\n\n' || // It's a double newline
(match.index === 0 || !/\d$/.test(text[match.index - 1])) // Standard punctuation rules
) {
lastSentenceBoundary = match.index + (match[0] === '\n\n' ? 2 : 1); // Include the boundary
}
}
}
return lastSentenceBoundary;
};
const findWordBoundary = (text, limit) => {
const wordBoundaryRegex = /\s+/g;
let lastWordBoundary = -1;
let match;
while ((match = wordBoundaryRegex.exec(text)) && match.index < limit) {
lastWordBoundary = match.index;
}
return lastWordBoundary;
};
module.exports = TtsStreamingBuffer;

View File

@@ -12,6 +12,20 @@ const {
JAMBONES_WS_MAX_PAYLOAD,
HTTP_USER_AGENT_HEADER
} = require('../config');
const MTYPE_WANTS_ACK = [
'call:status',
'verb:status',
'jambonz:error',
'llm:event',
'llm:tool-call',
'tts:streaming-event',
'tts:tokens-result',
];
const MTYPE_NO_DATA = [
'llm:tool-output',
'tts:flush',
'tts:clear'
];
class WsRequestor extends BaseRequestor {
constructor(logger, account_sid, hook, secret) {
@@ -44,7 +58,7 @@ class WsRequestor extends BaseRequestor {
async request(type, hook, params, httpHeaders = {}) {
assert(HookMsgTypes.includes(type));
const url = hook.url || hook;
const wantsAck = !['call:status', 'verb:status', 'jambonz:error'].includes(type);
const wantsAck = !MTYPE_WANTS_ACK.includes(type);
if (this.maliciousClient) {
this.logger.info({url: this.url}, 'WsRequestor:request - discarding msg to malicious client');
@@ -56,6 +70,12 @@ class WsRequestor extends BaseRequestor {
}
if (type === 'session:new') this.call_sid = params.callSid;
if (type === 'session:reconnect') {
this._reconnectPromise = new Promise((resolve, reject) => {
this._reconnectResolve = resolve;
this._reconnectReject = reject;
});
}
/* if we have an absolute url, and it is http then do a standard webhook */
if (this._isAbsoluteUrl(url) && url.startsWith('http')) {
@@ -71,20 +91,23 @@ class WsRequestor extends BaseRequestor {
}
/* connect if necessary */
const queueMsg = () => {
this.logger.debug(
`WsRequestor:request(${this.id}) - queueing ${type} message since we are connecting`);
if (wantsAck) {
const p = new Promise((resolve, reject) => {
this.queuedMsg.push({type, hook, params, httpHeaders, promise: {resolve, reject}});
});
return p;
}
else {
this.queuedMsg.push({type, hook, params, httpHeaders});
}
return;
};
if (!this.ws) {
if (this.connectInProgress) {
this.logger.debug(
`WsRequestor:request(${this.id}) - queueing ${type} message since we are connecting`);
if (wantsAck) {
const p = new Promise((resolve, reject) => {
this.queuedMsg.push({type, hook, params, httpHeaders, promise: {resolve, reject}});
});
return p;
}
else {
this.queuedMsg.push({type, hook, params, httpHeaders});
}
return;
return queueMsg();
}
this.connectInProgress = true;
this.logger.debug(`WsRequestor:request(${this.id}) - connecting since we do not have a connection for ${type}`);
@@ -102,6 +125,10 @@ class WsRequestor extends BaseRequestor {
return Promise.reject(err);
}
}
// If jambonz wait for ack from reconnect, queue the msg until reconnect is acked
if (type !== 'session:reconnect' && this._reconnectPromise) {
return queueMsg();
}
assert(this.ws);
/* prepare and send message */
@@ -119,7 +146,7 @@ class WsRequestor extends BaseRequestor {
type,
msgid,
call_sid: this.call_sid,
hook: type === 'verb:hook' ? url : undefined,
hook: ['verb:hook', 'session:redirect', 'llm:event', 'llm:tool-call'].includes(type) ? url : undefined,
data: {...payload},
...b3
};
@@ -139,6 +166,18 @@ class WsRequestor extends BaseRequestor {
}
};
const rejectQueuedMsgs = (err) => {
if (this.queuedMsg.length > 0) {
for (const {promise} of this.queuedMsg) {
this.logger.debug(`WsRequestor:request - preparing queued ${type} for rejectQueuedMsgs`);
if (promise) {
promise.reject(err);
}
}
this.queuedMsg.length = 0;
}
};
//this.logger.debug({obj}, `websocket: sending (${url})`);
/* special case: reconnecting before we received ack to session:new */
@@ -179,16 +218,37 @@ class WsRequestor extends BaseRequestor {
this.logger.debug({response}, `WsRequestor:request ${url} succeeded in ${rtt}ms`);
this.stats.histogram('app.hook.ws_response_time', rtt, ['hook_type:app']);
resolve(response);
if (this._reconnectResolve) {
this._reconnectResolve();
}
},
failure: (err) => {
if (this._reconnectReject) {
this._reconnectReject(err);
}
clearTimeout(timer);
reject(err);
}
});
/* send the message */
this.ws.send(JSON.stringify(obj), () => {
this.ws.send(JSON.stringify(obj), async() => {
this.logger.debug({obj}, `WsRequestor:request websocket: sent (${url})`);
// If session:reconnect is waiting for ack, hold here until ack to send queuedMsgs
if (this._reconnectPromise) {
try {
await this._reconnectPromise;
} catch (err) {
// bad thing happened to session:recconnect
rejectQueuedMsgs(err);
this.emit('reconnect-error');
return;
} finally {
this._reconnectPromise = null;
this._reconnectResolve = null;
this._reconnectReject = null;
}
}
sendQueuedMsgs();
});
});
@@ -326,7 +386,9 @@ class WsRequestor extends BaseRequestor {
'WsRequestor:_onSocketClosed time to reconnect');
if (!this.ws && !this.connectInProgress) {
this.connectInProgress = true;
this._connect().catch((err) => this.connectInProgress = false);
return this._connect()
.catch((err) => this.logger.error('WsRequestor:_onSocketClosed There is error while reconnect', err))
.finally(() => this.connectInProgress = false);
}
}, this.backoffMs);
this.backoffMs = this.backoffMs < 2000 ? this.backoffMs * 2 : (this.backoffMs + 2000);
@@ -344,7 +406,10 @@ class WsRequestor extends BaseRequestor {
/* messages must be JSON format */
try {
const obj = JSON.parse(content);
const {type, msgid, command, call_sid = this.call_sid, queueCommand = false, data} = obj;
this.logger.debug({obj}, 'WsRequestor:_onMessage - received message');
//const {type, msgid, command, call_sid = this.call_sid, queueCommand = false, data} = obj;
const {type, msgid, command, queueCommand = false, tool_call_id, data} = obj;
const call_sid = obj.callSid || this.call_sid;
//this.logger.debug({obj}, 'WsRequestor:request websocket: received');
assert.ok(type, 'type property not supplied');
@@ -357,8 +422,8 @@ class WsRequestor extends BaseRequestor {
case 'command':
assert.ok(command, 'command property not supplied');
assert.ok(data, 'data property not supplied');
this._recvCommand(msgid, command, call_sid, queueCommand, data);
assert.ok(data || MTYPE_NO_DATA.includes(command), 'data property not supplied');
this._recvCommand(msgid, command, call_sid, queueCommand, tool_call_id, data);
break;
default:
@@ -382,10 +447,10 @@ class WsRequestor extends BaseRequestor {
success && success(data);
}
_recvCommand(msgid, command, call_sid, queueCommand, data) {
_recvCommand(msgid, command, call_sid, queueCommand, tool_call_id, data) {
// TODO: validate command
this.logger.debug({msgid, command, call_sid, queueCommand, data}, 'received command');
this.emit('command', {msgid, command, call_sid, queueCommand, data});
this.emit('command', {msgid, command, call_sid, queueCommand, tool_call_id, data});
}
}

14742
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,9 @@
{
"name": "jambonz-feature-server",
"version": "0.8.5",
"version": "0.9.2",
"main": "app.js",
"engines": {
"node": ">= 10.16.0"
"node": ">= 18.x"
},
"keywords": [
"sip",
@@ -25,57 +25,56 @@
"jslint:fix": "eslint app.js tracer.js lib --fix"
},
"dependencies": {
"@aws-sdk/client-auto-scaling": "^3.360.0",
"@aws-sdk/client-sns": "^3.360.0",
"@jambonz/db-helpers": "^0.9.1",
"@aws-sdk/client-auto-scaling": "^3.549.0",
"@aws-sdk/client-sns": "^3.549.0",
"@jambonz/db-helpers": "^0.9.6",
"@jambonz/http-health-check": "^0.0.1",
"@jambonz/mw-registrar": "^0.2.4",
"@jambonz/realtimedb-helpers": "^0.8.7",
"@jambonz/speech-utils": "^0.0.33",
"@jambonz/stats-collector": "^0.1.9",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.50",
"@opentelemetry/api": "^1.4.0",
"@opentelemetry/exporter-jaeger": "^1.9.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.35.0",
"@opentelemetry/exporter-zipkin": "^1.9.0",
"@opentelemetry/instrumentation": "^0.35.0",
"@opentelemetry/resources": "^1.9.0",
"@opentelemetry/sdk-trace-base": "^1.9.0",
"@opentelemetry/sdk-trace-node": "^1.9.0",
"@opentelemetry/semantic-conventions": "^1.9.0",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.8",
"@jambonz/speech-utils": "^0.2.1",
"@jambonz/stats-collector": "^0.1.10",
"@jambonz/verb-specifications": "^0.0.91",
"@jambonz/time-series": "^0.2.13",
"@opentelemetry/api": "^1.8.0",
"@opentelemetry/exporter-jaeger": "^1.23.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.50.0",
"@opentelemetry/exporter-zipkin": "^1.23.0",
"@opentelemetry/instrumentation": "^0.50.0",
"@opentelemetry/resources": "^1.23.0",
"@opentelemetry/sdk-trace-base": "^1.23.0",
"@opentelemetry/sdk-trace-node": "^1.23.0",
"@opentelemetry/semantic-conventions": "^1.23.0",
"bent": "^7.3.12",
"debug": "^4.3.4",
"deepcopy": "^2.1.0",
"drachtio-fsmrf": "^3.0.33",
"drachtio-srf": "^4.5.31",
"express": "^4.18.2",
"drachtio-fsmrf": "^3.0.46",
"drachtio-srf": "^4.5.35",
"express": "^4.19.2",
"express-validator": "^7.0.1",
"ip": "^1.1.8",
"moment": "^2.29.4",
"parse-url": "^8.1.0",
"pino": "^8.8.0",
"moment": "^2.30.1",
"parse-url": "^9.2.0",
"pino": "^8.20.0",
"polly-ssml-split": "^0.1.0",
"proxyquire": "^2.1.3",
"sdp-transform": "^2.14.1",
"short-uuid": "^4.2.2",
"sinon": "^15.0.1",
"sdp-transform": "^2.14.2",
"short-uuid": "^5.1.0",
"sinon": "^17.0.1",
"to-snake-case": "^1.0.0",
"undici": "^5.26.2",
"undici": "^6.20.0",
"uuid-random": "^1.3.2",
"verify-aws-sns-signature": "^0.1.0",
"ws": "^8.9.0",
"ws": "^8.18.0",
"xml2js": "^0.6.2"
},
"devDependencies": {
"clear-module": "^4.1.2",
"eslint": "^7.32.0",
"eslint-plugin-promise": "^4.3.1",
"eslint": "7.32.0",
"eslint-plugin-promise": "^6.1.1",
"nyc": "^15.1.0",
"tape": "^5.6.1"
"tape": "^5.7.5"
},
"optionalDependencies": {
"bufferutil": "^4.0.6",
"utf-8-validate": "^5.0.8"
"bufferutil": "^4.0.8",
"utf-8-validate": "^6.0.3"
}
}

View File

@@ -222,3 +222,62 @@ test('test create-call app_json', async(t) => {
t.error(err);
}
});
test('test create-call timeLimit', async(t) => {
clearModule.all();
const {srf, disconnect} = require('../app');
try {
await connect(srf);
// GIVEN
let from = 'create-call-app-json';
let account_sid = 'bb845d4b-83a9-4cde-a6e9-50f3743bab3f';
// Give UAS app time to come up
const p = sippUac('uas.xml', '172.38.0.10', from);
await waitFor(1000);
const startTime = Date.now();
const app_json = `[
{
"verb": "pause",
"length": 7
}
]`;
const post = bent('http://127.0.0.1:3000/', 'POST', 'json', 201);
post('v1/createCall', {
'account_sid':account_sid,
"call_hook": {
"url": "http://127.0.0.1:3100/",
"method": "POST",
"username": "username",
"password": "password"
},
app_json,
"from": from,
"to": {
"type": "phone",
"number": "15583084809"
},
"timeLimit": 1,
"speech_recognizer_vendor": "google",
"speech_recognizer_language": "en"
});
//THEN
await p;
const endTime = Date.now();
t.ok(endTime - startTime < 2000, 'create-call: timeLimit is respected');
disconnect();
} catch (err) {
console.log(`error received: ${err}`);
disconnect();
t.error(err);
}
});

View File

@@ -1,4 +1,5 @@
/* SQLEditor (MySQL (2))*/
SET FOREIGN_KEY_CHECKS=0;
DROP TABLE IF EXISTS account_static_ips;
@@ -53,6 +54,8 @@ DROP TABLE IF EXISTS signup_history;
DROP TABLE IF EXISTS smpp_addresses;
DROP TABLE IF EXISTS google_custom_voices;
DROP TABLE IF EXISTS speech_credentials;
DROP TABLE IF EXISTS system_information;
@@ -136,6 +139,9 @@ account_sid CHAR(36) NOT NULL,
is_active BOOLEAN NOT NULL DEFAULT 1,
username VARCHAR(64),
password VARCHAR(1024),
allow_direct_app_calling BOOLEAN NOT NULL DEFAULT 1,
allow_direct_queue_calling BOOLEAN NOT NULL DEFAULT 1,
allow_direct_user_calling BOOLEAN NOT NULL DEFAULT 1,
PRIMARY KEY (client_sid)
);
@@ -338,11 +344,25 @@ label VARCHAR(64),
PRIMARY KEY (speech_credential_sid)
);
CREATE TABLE google_custom_voices
(
google_custom_voice_sid CHAR(36) NOT NULL UNIQUE ,
speech_credential_sid CHAR(36) NOT NULL,
model VARCHAR(512) NOT NULL,
reported_usage ENUM('REPORTED_USAGE_UNSPECIFIED','REALTIME','OFFLINE') DEFAULT 'REALTIME',
name VARCHAR(64) NOT NULL,
voice_cloning_key MEDIUMTEXT,
use_voice_cloning_key BOOLEAN DEFAULT false,
PRIMARY KEY (google_custom_voice_sid)
);
CREATE TABLE system_information
(
domain_name VARCHAR(255),
sip_domain_name VARCHAR(255),
monitoring_domain_name VARCHAR(255)
monitoring_domain_name VARCHAR(255),
private_network_cidr VARCHAR(8192),
log_level ENUM('info', 'debug') NOT NULL DEFAULT 'info'
);
CREATE TABLE users
@@ -437,11 +457,14 @@ CREATE TABLE sip_gateways
sip_gateway_sid CHAR(36),
ipv4 VARCHAR(128) NOT NULL COMMENT 'ip address or DNS name of the gateway. For gateways providing inbound calling service, ip address is required.',
netmask INTEGER NOT NULL DEFAULT 32,
port INTEGER NOT NULL DEFAULT 5060 COMMENT 'sip signaling port',
port INTEGER COMMENT 'sip signaling port',
inbound BOOLEAN NOT NULL COMMENT 'if true, whitelist this IP to allow inbound calls from the gateway',
outbound BOOLEAN NOT NULL COMMENT 'if true, include in least-cost routing when placing calls to the PSTN',
voip_carrier_sid CHAR(36) NOT NULL,
is_active BOOLEAN NOT NULL DEFAULT 1,
send_options_ping BOOLEAN NOT NULL DEFAULT 0,
use_sips_scheme BOOLEAN NOT NULL DEFAULT 0,
pad_crypto BOOLEAN NOT NULL DEFAULT 0,
protocol ENUM('udp','tcp','tls', 'tls/srtp') DEFAULT 'udp' COMMENT 'Outbound call protocol',
PRIMARY KEY (sip_gateway_sid)
) COMMENT='A whitelisted sip gateway used for origination/termination';
@@ -478,11 +501,19 @@ messaging_hook_sid CHAR(36) COMMENT 'webhook to call for inbound SMS/MMS ',
app_json TEXT,
speech_synthesis_vendor VARCHAR(64) NOT NULL DEFAULT 'google',
speech_synthesis_language VARCHAR(12) NOT NULL DEFAULT 'en-US',
speech_synthesis_voice VARCHAR(64),
speech_synthesis_voice VARCHAR(256),
speech_synthesis_label VARCHAR(64),
speech_recognizer_vendor VARCHAR(64) NOT NULL DEFAULT 'google',
speech_recognizer_language VARCHAR(64) NOT NULL DEFAULT 'en-US',
speech_recognizer_label VARCHAR(64),
use_for_fallback_speech BOOLEAN DEFAULT false,
fallback_speech_synthesis_vendor VARCHAR(64),
fallback_speech_synthesis_language VARCHAR(12),
fallback_speech_synthesis_voice VARCHAR(256),
fallback_speech_synthesis_label VARCHAR(64),
fallback_speech_recognizer_vendor VARCHAR(64),
fallback_speech_recognizer_language VARCHAR(64),
fallback_speech_recognizer_label VARCHAR(64),
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
record_all_calls BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (application_sid)
@@ -525,6 +556,7 @@ siprec_hook_sid CHAR(36),
record_all_calls BOOLEAN NOT NULL DEFAULT false,
record_format VARCHAR(16) NOT NULL DEFAULT 'mp3',
bucket_credential VARCHAR(8192) COMMENT 'credential used to authenticate with storage service',
enable_debug_log BOOLEAN NOT NULL DEFAULT false,
PRIMARY KEY (account_sid)
) COMMENT='An enterprise that uses the platform for comm services';
@@ -619,6 +651,10 @@ ALTER TABLE speech_credentials ADD FOREIGN KEY service_provider_sid_idxfk_5 (ser
CREATE INDEX account_sid_idx ON speech_credentials (account_sid);
ALTER TABLE speech_credentials ADD FOREIGN KEY account_sid_idxfk_8 (account_sid) REFERENCES accounts (account_sid);
CREATE INDEX google_custom_voice_sid_idx ON google_custom_voices (google_custom_voice_sid);
CREATE INDEX speech_credential_sid_idx ON google_custom_voices (speech_credential_sid);
ALTER TABLE google_custom_voices ADD FOREIGN KEY speech_credential_sid_idxfk (speech_credential_sid) REFERENCES speech_credentials (speech_credential_sid) ON DELETE CASCADE;
CREATE INDEX user_sid_idx ON users (user_sid);
CREATE INDEX email_idx ON users (email);
CREATE INDEX phone_idx ON users (phone);
@@ -704,4 +740,5 @@ ALTER TABLE accounts ADD FOREIGN KEY queue_event_hook_sid_idxfk (queue_event_hoo
ALTER TABLE accounts ADD FOREIGN KEY device_calling_application_sid_idxfk (device_calling_application_sid) REFERENCES applications (application_sid);
ALTER TABLE accounts ADD FOREIGN KEY siprec_hook_sid_idxfk (siprec_hook_sid) REFERENCES applications (application_sid);
SET FOREIGN_KEY_CHECKS=1;
SET FOREIGN_KEY_CHECKS=1;

View File

@@ -42,7 +42,7 @@ services:
ipv4_address: 172.38.0.7
drachtio:
image: drachtio/drachtio-server:0.8.24
image: drachtio/drachtio-server:0.8.26
restart: always
command: drachtio --contact "sip:*;transport=udp" --mtu 4096 --address 0.0.0.0 --port 9022
ports:
@@ -57,7 +57,7 @@ services:
condition: service_healthy
freeswitch:
image: drachtio/drachtio-freeswitch-mrf:0.6.1
image: drachtio/drachtio-freeswitch-mrf:0.9.2-4
restart: always
command: freeswitch --rtp-range-start 20000 --rtp-range-end 20100
environment:

View File

@@ -347,8 +347,7 @@ test('\'transcribe\' test - deepgram config options altLanguages', async(t) => {
"en-US"
],
"deepgramOptions": {
"model": "2-ea",
"tier": "nova",
"model": "nova-2",
"numerals": true,
"ner": true,
"vadTurnoff": 10,
@@ -408,8 +407,7 @@ test('\'transcribe\' test - deepgram config options altLanguages', async(t) => {
"en-US"
],
"deepgramOptions": {
"model": "2-ea",
"tier": "nova",
"model": "nova-2",
"numerals": true,
"ner": true,
"vadTurnoff": 10,

View File

@@ -25,29 +25,38 @@ module.exports = (serviceName) => {
}),
});
let exporter;
const exporters = [];
if (OTEL_EXPORTER_JAEGER_AGENT_HOST || OTEL_EXPORTER_JAEGER_ENDPOINT) {
exporter = new JaegerExporter();
}
else if (OTEL_EXPORTER_ZIPKIN_URL) {
exporter = new ZipkinExporter({url:OTEL_EXPORTER_ZIPKIN_URL});
}
else {
exporter = new OTLPTraceExporter({
url: OTEL_EXPORTER_COLLECTOR_URL
});
exporters.push(new JaegerExporter());
}
provider.addSpanProcessor(new BatchSpanProcessor(exporter, {
// The maximum queue size. After the size is reached spans are dropped.
maxQueueSize: 100,
// The maximum batch size of every export. It must be smaller or equal to maxQueueSize.
maxExportBatchSize: 10,
// The interval between two consecutive exports
scheduledDelayMillis: 500,
// How long the export can run before it is cancelled
exportTimeoutMillis: 30000,
}));
if (OTEL_EXPORTER_ZIPKIN_URL) {
exporters.push(new ZipkinExporter({url:OTEL_EXPORTER_ZIPKIN_URL}));
}
if (OTEL_EXPORTER_ZIPKIN_URL) {
exporters.push(new ZipkinExporter({url:OTEL_EXPORTER_ZIPKIN_URL}));
}
if (OTEL_EXPORTER_COLLECTOR_URL) {
exporters.push(new OTLPTraceExporter({
url: OTEL_EXPORTER_COLLECTOR_URL
}));
}
exporters.forEach((element) => {
provider.addSpanProcessor(new BatchSpanProcessor(element, {
// The maximum queue size. After the size is reached spans are dropped.
maxQueueSize: 100,
// The maximum batch size of every export. It must be smaller or equal to maxQueueSize.
maxExportBatchSize: 10,
// The interval between two consecutive exports
scheduledDelayMillis: 500,
// How long the export can run before it is cancelled
exportTimeoutMillis: 30000,
}));
});
// Initialize the OpenTelemetry APIs to use the NodeTracerProvider bindings
provider.register();