mirror of
https://github.com/jambonz/batch-speech-utils.git
synced 2025-12-19 06:07:43 +00:00
lib update
This commit is contained in:
24
lib/deepgram/config.js
Normal file
24
lib/deepgram/config.js
Normal file
@@ -0,0 +1,24 @@
|
||||
const transcriptionOptions = {
|
||||
model: 'nova-2',
|
||||
smart_format: true,
|
||||
detect_entities: true
|
||||
};
|
||||
|
||||
const redactionOptions = {
|
||||
model: 'nova-2',
|
||||
smart_format: true,
|
||||
redact: 'pii'
|
||||
};
|
||||
|
||||
const analysisOptions = {
|
||||
language: 'en',
|
||||
sentiment: true,
|
||||
intents: true,
|
||||
summarize: true
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
transcriptionOptions,
|
||||
redactionOptions,
|
||||
analysisOptions
|
||||
};
|
||||
4
lib/deepgram/index.js
Normal file
4
lib/deepgram/index.js
Normal file
@@ -0,0 +1,4 @@
|
||||
module.exports = {
|
||||
transcribe: require('./transcribe')
|
||||
|
||||
};
|
||||
36
lib/deepgram/transcribe.js
Normal file
36
lib/deepgram/transcribe.js
Normal file
@@ -0,0 +1,36 @@
|
||||
const fs = require('fs');
|
||||
const { createClient } = require('@deepgram/sdk');
|
||||
const { transcriptionOptions, redactionOptions, analysisOptions } = require('./config');
|
||||
|
||||
const transcribe = async(logger, apiKey, filePath) => {
|
||||
logger.info(`Transcribing audio file: ${filePath}`);
|
||||
//creating a deepgram client
|
||||
const client = createClient(apiKey);
|
||||
//audio file buffer
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
//transcription
|
||||
const { result:transcriptResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, transcriptionOptions);
|
||||
const transcript = transcriptResult.results.channels[0].alternatives[0].transcript;
|
||||
const timestamps = transcriptResult.results.channels[0].alternatives[0].words;
|
||||
const entities = transcriptResult.results.channels[0].alternatives[0].entities;
|
||||
//redaction
|
||||
const { result:redactionResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, redactionOptions);
|
||||
const redactionTimestamps = redactionResult.results.channels[0].alternatives[0].words;
|
||||
const redacted = redactionResult.results.channels[0].alternatives[0].transcript;
|
||||
//analysis and sentiment
|
||||
const { result:analysisResult } = await client.read.analyzeText({ text:transcript }, analysisOptions);
|
||||
const sentimentSegment = analysisResult.results.sentiments.segments[0];
|
||||
const sentiment = sentimentSegment.sentiment;
|
||||
const sentimentScore = sentimentSegment.sentiment_score;
|
||||
return {
|
||||
transcript,
|
||||
timestamps,
|
||||
redactionTimestamps,
|
||||
redacted,
|
||||
sentiment,
|
||||
sentimentScore,
|
||||
entities
|
||||
};
|
||||
};
|
||||
|
||||
module.exports = transcribe;
|
||||
@@ -1,59 +0,0 @@
|
||||
const fs = require('fs');
|
||||
|
||||
async function transcribeFile(deepgram, filePath) {
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
const options = { model: 'nova-2', smart_format: true, detect_entities: true };
|
||||
const { result } = await deepgram.listen.prerecorded.transcribeFile(fileBuffer, options);
|
||||
return result;
|
||||
}
|
||||
|
||||
async function redactFile(deepgram, filePath) {
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
const options = { model: 'nova-2', smart_format: true, redact: 'pii' };
|
||||
const { result } = await deepgram.listen.prerecorded.transcribeFile(fileBuffer, options);
|
||||
return result;
|
||||
}
|
||||
|
||||
async function analyzeText(deepgram, text) {
|
||||
const options = { language: 'en', sentiment: true, intents: true, summarize: true };
|
||||
const { result } = await deepgram.read.analyzeText({ text }, options);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
async function processAudio(deepgram, filePath) {
|
||||
try {
|
||||
const transcription = await transcribeFile(deepgram, filePath);
|
||||
const redaction = await redactFile(deepgram, filePath);
|
||||
const transcript = transcription.results.channels[0].alternatives[0].transcript;
|
||||
const timestamps = transcription.results.channels[0].alternatives[0].words;
|
||||
const redactionTimestamps = redaction.results.channels[0].alternatives[0].words;
|
||||
const redacted = redaction.results.channels[0].alternatives[0].transcript;
|
||||
const entities = transcription.results.channels[0].alternatives[0].entities;
|
||||
|
||||
const analysisResult = await analyzeText(deepgram, transcript);
|
||||
const sentimentSegment = analysisResult.results.sentiments.segments[0];
|
||||
const sentiment = sentimentSegment.sentiment;
|
||||
const sentimentScore = sentimentSegment.sentiment_score;
|
||||
|
||||
return {
|
||||
transcript,
|
||||
timestamps,
|
||||
redactionTimestamps,
|
||||
redacted,
|
||||
sentiment,
|
||||
sentimentScore,
|
||||
entities
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error processing audio:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
transcribeFile,
|
||||
analyzeText,
|
||||
processAudio,
|
||||
redactFile
|
||||
};
|
||||
@@ -1,46 +0,0 @@
|
||||
const ffmpeg = require('fluent-ffmpeg');
|
||||
|
||||
|
||||
async function redactAudioDeepgram(transcriptionData, audioPath, audioOutputPath, { delta = 0.05 } = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const command = ffmpeg(audioPath)
|
||||
.outputFormat('wav'); // Ensure output format is WAV
|
||||
|
||||
// Iterate over transcription data to apply audio filters
|
||||
transcriptionData.forEach((data, i) => {
|
||||
const { word, start } = data;
|
||||
let end = data.end; // Default end time
|
||||
|
||||
// Check if the word needs redaction
|
||||
if (word.startsWith('[') && word.endsWith(']')) {
|
||||
// Find the start of the next non-redacted word
|
||||
for (let j = i + 1; j < transcriptionData.length; j++) {
|
||||
if (!(transcriptionData[j].word.startsWith('[') && transcriptionData[j].word.endsWith(']'))) {
|
||||
end = transcriptionData[j].start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
command.audioFilters({
|
||||
filter: 'volume',
|
||||
options: `volume=0:enable='between(t,${start - delta},${end})'` // Applying silence
|
||||
});
|
||||
|
||||
// Log the redacted segments
|
||||
console.log(`Redacting from ${start}s to ${end}s: "${word}"`);
|
||||
}
|
||||
});
|
||||
|
||||
// Handlers for command execution
|
||||
command.on('end', () => {
|
||||
console.log(`Redacted audio saved at ${audioOutputPath}`);
|
||||
resolve(); // Resolve the promise on successful completion
|
||||
}).on('error', (err, stdout, stderr) => {
|
||||
console.error('Error processing audio file:', err.message);
|
||||
console.error('ffmpeg stdout:', stdout);
|
||||
console.error('ffmpeg stderr:', stderr);
|
||||
reject(err); // Reject the promise on error
|
||||
}).saveToFile(audioOutputPath);
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = { redactAudioDeepgram };
|
||||
@@ -1,16 +1,49 @@
|
||||
const { redactAudioDeepgram } = require('./make-redact-audio-deepgram');
|
||||
const ffmpeg = require('fluent-ffmpeg');
|
||||
|
||||
const assert = require('assert');
|
||||
const redact = async(logger, { transcriptionData, audioPath, audioOutputPath, delta = 0.05 }) => {
|
||||
logger.info(`Redacting audio file: ${audioPath}`);
|
||||
console.log(transcriptionData);
|
||||
return new Promise((resolve, reject) => {
|
||||
const command = ffmpeg(audioPath)
|
||||
.outputFormat('wav'); // Ensure output format is WAV
|
||||
|
||||
function getRedactedAudio(vendor, transcriptionData, audioPath, audioOutputPath) {
|
||||
assert.ok(['deepgram', 'otherVendor'].includes(vendor), 'vendor not supported');
|
||||
// Iterate over transcription data to apply audio filters
|
||||
transcriptionData.forEach((data, i) => {
|
||||
const { word, start } = data;
|
||||
let end = data.end; // Default end time
|
||||
|
||||
if (vendor === 'deepgram') {
|
||||
return redactAudioDeepgram(transcriptionData, audioPath, audioOutputPath);
|
||||
}
|
||||
else {
|
||||
throw new Error(`Unsupported vendor: ${vendor}`);
|
||||
}
|
||||
}
|
||||
// Check if the word needs redaction
|
||||
if (word.startsWith('[') && word.endsWith(']')) {
|
||||
// Find the start of the next non-redacted word
|
||||
for (let j = i + 1; j < transcriptionData.length; j++) {
|
||||
if (!(transcriptionData[j].word.startsWith('[') && transcriptionData[j].word.endsWith(']'))) {
|
||||
end = transcriptionData[j].start;
|
||||
break;
|
||||
}
|
||||
}
|
||||
command.audioFilters({
|
||||
filter: 'volume',
|
||||
options: `volume=0:enable='between(t,${start - delta},${end})'` // Applying silence
|
||||
});
|
||||
|
||||
module.exports = { getRedactedAudio };
|
||||
// Log the redacted segments
|
||||
console.log(`Redacting from ${start}s to ${end}s: "${word}"`);
|
||||
}
|
||||
});
|
||||
|
||||
// Handlers for command execution
|
||||
command.on('end', () => {
|
||||
console.log(`Redacted audio saved at ${audioOutputPath}`);
|
||||
resolve(); // Resolve the promise on successful completion
|
||||
}).on('error', (err, stdout, stderr) => {
|
||||
console.log(`Current working directory: ${process.cwd()}`);
|
||||
console.log(typeof (audioOutputPath));
|
||||
console.error('Error processing audio file:', err.message);
|
||||
console.error('ffmpeg stdout:', stdout);
|
||||
console.error('ffmpeg stderr:', stderr);
|
||||
reject(err); // Reject the promise on error
|
||||
}).saveToFile(audioOutputPath);
|
||||
});
|
||||
};
|
||||
|
||||
module.exports = redact ;
|
||||
|
||||
@@ -1,19 +1,16 @@
|
||||
const { createDeepGramClient } = require('./utils');
|
||||
const fs = require('fs');
|
||||
const { processAudio } = require('./get-transcription-deepgram');
|
||||
const assert = require('assert');
|
||||
const {transcribe:dgTranscribe} = require('./deepgram');
|
||||
|
||||
function getTranscription(vendor, apiKey, filePath) {
|
||||
assert.ok(['deepgram'].includes(vendor), 'vendor not supported');
|
||||
if (vendor === 'deepgram') {
|
||||
const deepgramClient = createDeepGramClient(apiKey);
|
||||
assert.ok(deepgramClient, 'Invalid Deepgram API key');
|
||||
return processAudio(deepgramClient, filePath);
|
||||
}
|
||||
else {
|
||||
throw new Error(`Unsupported vendor: ${vendor}`);
|
||||
const transcribe = async(logger, credentials, filePath) => {
|
||||
const { vendor } = credentials;
|
||||
|
||||
switch (credentials.vendor) {
|
||||
case 'deepgram':
|
||||
assert.ok(credentials.apiKey, 'Deepgram API key is required');
|
||||
return await dgTranscribe(logger, credentials.apiKey, filePath);
|
||||
default:
|
||||
throw new Error(`Unsupported vendor: ${vendor}`);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
module.exports = {getTranscription};
|
||||
module.exports = transcribe;
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
|
||||
const { createClient } = require('@deepgram/sdk');
|
||||
|
||||
function createDeepGramClient(DEEPGRAM_API_KEY) {
|
||||
return createClient(DEEPGRAM_API_KEY);
|
||||
}
|
||||
|
||||
|
||||
module.exports = {createDeepGramClient};
|
||||
Reference in New Issue
Block a user