updated deepgram transcription - reduced an api call

2026-01-25 02:08:27 +00:00 · 2024-06-27 03:04:17 -04:00
parent 41c6384080
commit 0183f87064
1 changed files with 54 additions and 31 deletions
--- a/lib/deepgram/transcribe.js
+++ b/lib/deepgram/transcribe.js
@@ -1,37 +1,59 @@
 const fs = require('fs');
 const { createClient } = require('@deepgram/sdk');
-const { transcriptionOptions, redactionOptions, analysisOptions } = require('./config');
+const { redactionOptions, analysisOptions } = require('./config');

 function extractTranscript(data) {
-  // eslint-disable-next-line max-len
-  const paragraphs = data.results.channels.flatMap((channel) => channel.alternatives.flatMap((alt) => alt.paragraphs.paragraphs));
-  let ctr = 0;
-  // Use reduce to process each paragraph and sentence, consolidating transcripts by speaker
-  return  paragraphs.reduce((acc, paragraph) => {
-    paragraph.sentences.forEach((sentence) => {
-      const wordsDetails = data.results.channels
-        .find((channel) => channel.alternatives.some((alt) => alt.paragraphs.paragraphs.includes(paragraph)))
-        .alternatives[0].words
-        .filter((word) => word.start >= sentence.start && word.end <= sentence.end)
-        .map((word) => ({
-          word: word.word,
-          start: word.start,
-          end: word.end,
-          confidence: word.confidence
-        }));
-
-      acc.push({
-        timestamp: sentence.start,
-        duration: Math.round(1000 * (sentence.end - sentence.start)),
-        startTime: sentence.start,
-        endTime: sentence.end,
-        speaker: ctr++ % 2,
-        transcript: sentence.text,
-        words: wordsDetails
-      });
+  var words = [];
+  data.results.channels.forEach((d, idx) => {
+    d.alternatives.forEach((f) => {
+      f.words.forEach((w) => {w.channel = idx % 2; words.push(w); });
    });
-    return acc;
-  }, []);
+  });
+  words = words.sort((a, b) => a.start - b.start);
+  const aggregatedSentences = [];
+  let currentChannel = null;
+  let currentSentence = null;
+
+  words.forEach((word) => {
+    if (currentChannel === null) {
+      currentChannel = word.channel;
+      currentSentence = {
+        start: word.start,
+        end: word.end,
+        duration: word.end - word.start,
+        speaker: word.channel,
+        sentence: word.punctuated_word,
+        words: [{ word: word.word, start: word.start, end: word.end }]
+      };
+    } else if (word.channel === currentChannel) {
+      if (currentSentence.sentence) {
+        currentSentence.sentence += ' ' + (word.punctuated_word);
+      } else {
+        currentSentence.sentence = word.punctuated_word || word.word;
+      }
+      currentSentence.words.push({ word: word.word, start: word.start, end: word.end });
+      currentSentence.end = word.end; // Update end time of the current sentence
+      currentSentence.duration = currentSentence.end - currentSentence.start; // Calculate duration correctly
+    } else {
+      aggregatedSentences.push(currentSentence);
+      currentChannel = word.channel;
+      currentSentence = {
+        start: word.start,
+        end: word.end,
+        duration: word.end - word.start,
+        speaker: word.channel,
+        sentence: word.punctuated_word,
+        words: [{ word: word.word, start: word.start, end: word.end }]
+      };
+    }
+  });
+
+  // Push the last sentence
+  if (currentSentence !== null) {
+    aggregatedSentences.push(currentSentence);
+  }
+
+  return aggregatedSentences;
 }


@@ -52,14 +74,15 @@ const transcribe = async(logger, apiKey, filePath) => {
    'createdAt': redactionResult.metadata.created
  };
  data.speechEvents = extractTranscript(redactionResult);
-  const combinedTranscript = data.speechEvents.map(event => event.transcript).join(" ");
-  data.redactionTimestamps = data.speechEvents.flatMap(event => event.words);
+  const combinedTranscript = data.speechEvents.map((event) => event.sentence).join(' ');
+  data.redactionTimestamps = data.speechEvents.flatMap((event) => event.words);
  //analysis and sentiment
  const { result:analysisResult } = await client.read.analyzeText({ text:combinedTranscript }, analysisOptions);
  const sentimentSegment = analysisResult.results.sentiments.segments[0];
  data.sentiment = sentimentSegment.sentiment;
  data.sentimentScore = sentimentSegment.sentiment_score;
  data.totalDuration = Math.round(1000 * redactionResult.metadata.duration);
+
  return data;
 };