restructure

2026-05-06 08:16:57 +00:00 · 2024-06-13 12:43:05 -04:00
parent f74bbd4f45
commit 4aea7128b1
8 changed files with 83 additions and 147 deletions
@@ -1,7 +1,14 @@
-const { getTranscription } = require('./lib/transcribe');
-const { getRedactedAudio } = require('./lib/redact');
-
-module.exports = {
-  getTranscription,
-  getRedactedAudio
+const noopLogger = {
+  info: () => {},
+  debug: () => {},
+  error: () => {}
+};
+
+module.exports = (logger) => {
+  logger = logger || noopLogger;
+
+  return {
+    transcribe: require('./lib/transcribe').bind(null, logger),
+    redact: require('./lib/redact').bind(null, logger),
+  };
 };
@@ -0,0 +1,3 @@
+module.exports = {
+  transcribe: require('./transcribe')
+};
@@ -0,0 +1,13 @@
+const fs = require('fs');
+const { createClient } = require('@deepgram/sdk');
+
+const transcribe = async(logger, apiKey, filePath) => {
+  logger.info(`Transcribing audio file: ${filePath}`);
+  const client = createClient(apiKey);
+  const fileBuffer = fs.readFileSync(filePath);
+  const options = { model: 'nova-2', smart_format: true, detect_entities: true };
+  const { result } = await client.listen.prerecorded.transcribeFile(fileBuffer, options);
+  return result;
+};
+
+module.exports = transcribe;
@@ -1,59 +0,0 @@
-const fs = require('fs');
-
-async function transcribeFile(deepgram, filePath) {
-  const fileBuffer = fs.readFileSync(filePath);
-  const options = { model: 'nova-2', smart_format: true, detect_entities: true };
-  const { result } = await deepgram.listen.prerecorded.transcribeFile(fileBuffer, options);
-  return result;
-}
-
-async function redactFile(deepgram, filePath) {
-  const fileBuffer = fs.readFileSync(filePath);
-  const options = { model: 'nova-2', smart_format: true, redact: 'pii' };
-  const { result } = await deepgram.listen.prerecorded.transcribeFile(fileBuffer, options);
-  return result;
-}
-
-async function analyzeText(deepgram, text) {
-  const options = { language: 'en', sentiment: true, intents: true, summarize: true };
-  const { result } = await deepgram.read.analyzeText({ text }, options);
-  return result;
-}
-
-
-async function processAudio(deepgram, filePath) {
-  try {
-    const transcription = await transcribeFile(deepgram, filePath);
-    const redaction = await redactFile(deepgram, filePath);
-    const transcript = transcription.results.channels[0].alternatives[0].transcript;
-    const timestamps = transcription.results.channels[0].alternatives[0].words;
-    const redactionTimestamps = redaction.results.channels[0].alternatives[0].words;
-    const redacted = redaction.results.channels[0].alternatives[0].transcript;
-    const entities = transcription.results.channels[0].alternatives[0].entities;
-
-    const analysisResult = await analyzeText(deepgram, transcript);
-    const sentimentSegment = analysisResult.results.sentiments.segments[0];
-    const sentiment = sentimentSegment.sentiment;
-    const sentimentScore = sentimentSegment.sentiment_score;
-
-    return {
-      transcript,
-      timestamps,
-      redactionTimestamps,
-      redacted,
-      sentiment,
-      sentimentScore,
-      entities
-    };
-  } catch (error) {
-    console.error('Error processing audio:', error);
-    throw error;
-  }
-}
-
-module.exports = {
-  transcribeFile,
-  analyzeText,
-  processAudio,
-  redactFile
-};
@@ -1,46 +0,0 @@
-const ffmpeg = require('fluent-ffmpeg');
-
-
-async function redactAudioDeepgram(transcriptionData, audioPath, audioOutputPath, { delta = 0.05 } = {}) {
-  return new Promise((resolve, reject) => {
-    const command = ffmpeg(audioPath)
-      .outputFormat('wav'); // Ensure output format is WAV
-
-    // Iterate over transcription data to apply audio filters
-    transcriptionData.forEach((data, i) => {
-      const { word, start } = data;
-      let end = data.end; // Default end time
-
-      // Check if the word needs redaction
-      if (word.startsWith('[') && word.endsWith(']')) {
-        // Find the start of the next non-redacted word
-        for (let j = i + 1; j < transcriptionData.length; j++) {
-          if (!(transcriptionData[j].word.startsWith('[') && transcriptionData[j].word.endsWith(']'))) {
-            end = transcriptionData[j].start;
-            break;
-          }
-        }
-        command.audioFilters({
-          filter: 'volume',
-          options: `volume=0:enable='between(t,${start - delta},${end})'`  // Applying silence
-        });
-
-        // Log the redacted segments
-        console.log(`Redacting from ${start}s to ${end}s: "${word}"`);
-      }
-    });
-
-    // Handlers for command execution
-    command.on('end', () => {
-      console.log(`Redacted audio saved at ${audioOutputPath}`);
-      resolve();  // Resolve the promise on successful completion
-    }).on('error', (err, stdout, stderr) => {
-      console.error('Error processing audio file:', err.message);
-      console.error('ffmpeg stdout:', stdout);
-      console.error('ffmpeg stderr:', stderr);
-      reject(err);  // Reject the promise on error
-    }).saveToFile(audioOutputPath);
-  });
-}
-
-module.exports = { redactAudioDeepgram };
@@ -1,16 +1,46 @@
-const { redactAudioDeepgram } = require('./make-redact-audio-deepgram');
+const ffmpeg = require('fluent-ffmpeg');

-const assert = require('assert');
+const redact = async(logger, {vendor, transcriptionData, audioPath, audioOutputPath, delta = 0.05}) => {
+  logger.info(`Redacting audio file: ${audioPath} vendor: ${vendor}`);
+  return new Promise((resolve, reject) => {
+    const command = ffmpeg(audioPath)
+      .outputFormat('wav'); // Ensure output format is WAV

-function getRedactedAudio(vendor, transcriptionData, audioPath, audioOutputPath) {
-  assert.ok(['deepgram', 'otherVendor'].includes(vendor), 'vendor not supported');
+    // Iterate over transcription data to apply audio filters
+    transcriptionData.forEach((data, i) => {
+      const { word, start } = data;
+      let end = data.end; // Default end time

-  if (vendor === 'deepgram') {
-    return redactAudioDeepgram(transcriptionData, audioPath, audioOutputPath);
-  }
-  else {
-    throw new Error(`Unsupported vendor: ${vendor}`);
-  }
-}
+      // Check if the word needs redaction
+      if (word.startsWith('[') && word.endsWith(']')) {
+        // Find the start of the next non-redacted word
+        for (let j = i + 1; j < transcriptionData.length; j++) {
+          if (!(transcriptionData[j].word.startsWith('[') && transcriptionData[j].word.endsWith(']'))) {
+            end = transcriptionData[j].start;
+            break;
+          }
+        }
+        command.audioFilters({
+          filter: 'volume',
+          options: `volume=0:enable='between(t,${start - delta},${end})'`  // Applying silence
+        });

-module.exports = { getRedactedAudio };
+        // Log the redacted segments
+        console.log(`Redacting from ${start}s to ${end}s: "${word}"`);
+      }
+    });
+
+    // Handlers for command execution
+    command.on('end', () => {
+      console.log(`Redacted audio saved at ${audioOutputPath}`);
+      resolve();  // Resolve the promise on successful completion
+    }).on('error', (err, stdout, stderr) => {
+      console.error('Error processing audio file:', err.message);
+      console.error('ffmpeg stdout:', stdout);
+      console.error('ffmpeg stderr:', stderr);
+      reject(err);  // Reject the promise on error
+    }).saveToFile(audioOutputPath);
+  });
+};
+
+module.exports = redact;
@@ -1,19 +1,16 @@
-const { createDeepGramClient } = require('./utils');
-const fs = require('fs');
-const { processAudio } = require('./get-transcription-deepgram');
 const assert = require('assert');
+const {transcribe:dgTranscribe} = require('./deepgram');

-function getTranscription(vendor, apiKey, filePath) {
-  assert.ok(['deepgram'].includes(vendor), 'vendor not supported');
-  if (vendor === 'deepgram') {
-    const deepgramClient = createDeepGramClient(apiKey);
-    assert.ok(deepgramClient, 'Invalid Deepgram API key');
-    return processAudio(deepgramClient, filePath);
-  }
-  else {
-    throw new Error(`Unsupported vendor: ${vendor}`);
+const transcribe = async(logger, credentials, filePath) => {
+  const { vendor } = credentials;
+
+  switch (credentials.vendor) {
+    case 'deepgram':
+      assert.ok(credentials.apiKey, 'Deepgram API key is required');
+      return await dgTranscribe(logger, credentials.apiKey, filePath);
+    default:
+      throw new Error(`Unsupported vendor: ${vendor}`);
  }
+};

-}
-
-module.exports = {getTranscription};
+module.exports = transcribe;
@@ -1,9 +0,0 @@
-
-const { createClient } = require('@deepgram/sdk');
-
-function createDeepGramClient(DEEPGRAM_API_KEY) {
-  return createClient(DEEPGRAM_API_KEY);
-}
-
-
-module.exports = {createDeepGramClient};