From 8e96527f1c68ed0faaff4270b1c7b6da2fe4f1fd Mon Sep 17 00:00:00 2001 From: Hoan HL Date: Mon, 17 Nov 2025 15:59:16 +0700 Subject: [PATCH] soniox transcription by node fetch, soniox sdk is depricated and invalid --- lib/utils/speech-utils.js | 118 ++++++++++++++++++++++++++++++++++---- package-lock.json | 10 ---- package.json | 1 - 3 files changed, 106 insertions(+), 23 deletions(-) diff --git a/lib/utils/speech-utils.js b/lib/utils/speech-utils.js index e27eaa2..b82b78c 100644 --- a/lib/utils/speech-utils.js +++ b/lib/utils/speech-utils.js @@ -2,7 +2,6 @@ const sttGoogle = require('@google-cloud/speech').v1p1beta1; const { TranscribeClient, ListVocabulariesCommand } = require('@aws-sdk/client-transcribe'); const { Deepgram } = require('@deepgram/sdk'); const sdk = require('microsoft-cognitiveservices-speech-sdk'); -const { SpeechClient } = require('@soniox/soniox-node'); const fs = require('fs'); const { AssemblyAI } = require('assemblyai'); const Houndify = require('houndify'); @@ -65,19 +64,114 @@ function capitalizeFirst(str) { const testSonioxStt = async(logger, credentials) => { - const api_key = credentials; - const soniox = new SpeechClient(api_key); + const {api_key} = credentials; + const https = require('https'); - return new Promise(async(resolve, reject) => { - try { - const result = await soniox.transcribeFileShort('data/test_audio.wav'); - if (result.words.length > 0) resolve(result); - else reject(new Error('no transcript returned')); - } catch (error) { - logger.info({error}, 'failed to get soniox transcript'); - reject(error); + try { + // Upload file using form-data with https module + const FormData = require('form-data'); + const form = new FormData(); + form.append('file', fs.createReadStream('data/test_audio.wav')); + + const fileId = await new Promise((resolve, reject) => { + const req = https.request({ + hostname: 'api.soniox.com', + path: '/v1/files', + method: 'POST', + headers: { + 'Authorization': `Bearer ${api_key}`, + ...form.getHeaders() + } + }, (res) => { + let data = ''; + res.on('data', (chunk) => data += chunk); + res.on('end', () => { + if (res.statusCode >= 400) { + reject(new Error(`HTTP ${res.statusCode}: ${data}`)); + } else { + try { + const result = JSON.parse(data); + resolve(result.id); + } catch (e) { + reject(e); + } + } + }); + }); + req.on('error', reject); + form.pipe(req); + }); + + // Create transcription + const transcriptionRes = await fetch('https://api.soniox.com/v1/transcriptions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${api_key}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ model: 'stt-async-v3', file_id: fileId }) + }); + + if (!transcriptionRes.ok) throw new Error(`HTTP ${transcriptionRes.status}: ${await transcriptionRes.text()}`); + const { id: transcriptionId } = await transcriptionRes.json(); + + // Wait for transcription to complete + let attempts = 0; + const maxAttempts = 30; // 30 seconds max wait + + while (attempts < maxAttempts) { + const statusRes = await fetch(`https://api.soniox.com/v1/transcriptions/${transcriptionId}`, { + headers: { 'Authorization': `Bearer ${api_key}` } + }); + if (!statusRes.ok) throw new Error(`HTTP ${statusRes.status}: ${await statusRes.text()}`); + const status = await statusRes.json(); + + if (status.status === 'completed') { + // Get the transcript + const transcriptRes = await fetch(`https://api.soniox.com/v1/transcriptions/${transcriptionId}/transcript`, { + headers: { 'Authorization': `Bearer ${api_key}` } + }); + + if (!transcriptRes.ok) throw new Error(`HTTP ${transcriptRes.status}: ${await transcriptRes.text()}`); + const transcript = await transcriptRes.json(); + + // Clean up + await fetch(`https://api.soniox.com/v1/files/${fileId}`, { + method: 'DELETE', + headers: { 'Authorization': `Bearer ${api_key}` } + }); + + await fetch(`https://api.soniox.com/v1/transcriptions/${transcriptionId}`, { + method: 'DELETE', + headers: { 'Authorization': `Bearer ${api_key}` } + }); + + return transcript; + } else if (status.status === 'error') { + throw new Error(`Transcription error: ${status.error_message}`); + } + + // Wait 1 second before next check + await new Promise((resolve) => setTimeout(resolve, 1000)); + attempts++; } - }); + + // Timeout reached - clean up resources before throwing error + await fetch(`https://api.soniox.com/v1/files/${fileId}`, { + method: 'DELETE', + headers: { 'Authorization': `Bearer ${api_key}` } + }).catch(() => {}); // Ignore cleanup errors + + await fetch(`https://api.soniox.com/v1/transcriptions/${transcriptionId}`, { + method: 'DELETE', + headers: { 'Authorization': `Bearer ${api_key}` } + }).catch(() => {}); // Ignore cleanup errors + + throw new Error('Transcription timeout after 30 seconds'); + } catch (error) { + logger.info({error}, 'failed to get soniox transcript'); + throw error; + } }; const testSpeechmaticsStt = async(logger, credentials) => { diff --git a/package-lock.json b/package-lock.json index 449e5f6..8b79789 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,7 +24,6 @@ "@jambonz/speech-utils": "^0.2.26", "@jambonz/time-series": "^0.2.8", "@jambonz/verb-specifications": "^0.0.118", - "@soniox/soniox-node": "^1.2.2", "ajv": "^8.17.1", "argon2": "^0.40.1", "assemblyai": "^4.3.4", @@ -5442,15 +5441,6 @@ "node": ">=16.0.0" } }, - "node_modules/@soniox/soniox-node": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@soniox/soniox-node/-/soniox-node-1.2.2.tgz", - "integrity": "sha512-AnY9eo0Ula627wrOb6u8ubN7zCvie6MI296pQDV4PzGvElQuqpzXRqHyum8PL/bGADbYmPvIP5ctCwTNu0AOTQ==", - "dependencies": { - "@grpc/grpc-js": "^1.6.10", - "@grpc/proto-loader": "^0.7.2" - } - }, "node_modules/@tokenizer/token": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", diff --git a/package.json b/package.json index a811a74..25b77f8 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,6 @@ "@jambonz/speech-utils": "^0.2.26", "@jambonz/time-series": "^0.2.8", "@jambonz/verb-specifications": "^0.0.118", - "@soniox/soniox-node": "^1.2.2", "ajv": "^8.17.1", "argon2": "^0.40.1", "assemblyai": "^4.3.4",