updating for transcription schema

This commit is contained in:
surajshivakumar
2024-06-26 19:51:18 -04:00
parent 4a6cc284a7
commit 90ecd734b4
5 changed files with 189 additions and 156 deletions

View File

@@ -1,13 +1,15 @@
const transcriptionOptions = {
model: 'nova-2',
smart_format: true,
detect_entities: true
detect_entities: true,
multichannel:true
};
const redactionOptions = {
model: 'nova-2',
smart_format: true,
redact: 'pii'
redact: 'pii',
multichannel:true
};
const analysisOptions = {

View File

@@ -2,6 +2,39 @@ const fs = require('fs');
const { createClient } = require('@deepgram/sdk');
const { transcriptionOptions, redactionOptions, analysisOptions } = require('./config');
function extractTranscript(data) {
// eslint-disable-next-line max-len
const paragraphs = data.results.channels.flatMap((channel) => channel.alternatives.flatMap((alt) => alt.paragraphs.paragraphs));
let ctr = 0;
// Use reduce to process each paragraph and sentence, consolidating transcripts by speaker
return paragraphs.reduce((acc, paragraph) => {
paragraph.sentences.forEach((sentence) => {
const wordsDetails = data.results.channels
.find((channel) => channel.alternatives.some((alt) => alt.paragraphs.paragraphs.includes(paragraph)))
.alternatives[0].words
.filter((word) => word.start >= sentence.start && word.end <= sentence.end)
.map((word) => ({
word: word.word,
start: word.start,
end: word.end,
confidence: word.confidence
}));
acc.push({
timestamp: sentence.start,
duration: Math.round(1000 * (sentence.end - sentence.start)),
startTime: sentence.start,
endTime: sentence.end,
speaker: ctr++ % 2,
transcript: sentence.text,
words: wordsDetails
});
});
return acc;
}, []);
}
const transcribe = async(logger, apiKey, filePath) => {
logger.info(`Transcribing audio file: ${filePath}`);
//creating a deepgram client
@@ -9,32 +42,25 @@ const transcribe = async(logger, apiKey, filePath) => {
//audio file buffer
const fileBuffer = fs.readFileSync(filePath);
//transcription
const { result:transcriptResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, transcriptionOptions);
const transcript = transcriptResult.results.channels[0].alternatives[0].transcript;
const timestamps = transcriptResult.results.channels[0].alternatives[0].words;
const entities = transcriptResult.results.channels[0].alternatives[0].entities;
const confidence = transcriptResult.results.channels[0].alternatives[0].confidence;
// const { result:transcriptResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, transcriptionOptions);
//redaction
const { result:redactionResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, redactionOptions);
const redactionTimestamps = redactionResult.results.channels[0].alternatives[0].words;
const redacted = redactionResult.results.channels[0].alternatives[0].transcript;
//analysis and sentiment
const { result:analysisResult } = await client.read.analyzeText({ text:transcript }, analysisOptions);
const sentimentSegment = analysisResult.results.sentiments.segments[0];
const sentiment = sentimentSegment.sentiment;
const sentimentScore = sentimentSegment.sentiment_score;
const vendor = 'deepgram';
return {
vendor,
transcript,
timestamps,
redactionTimestamps,
redacted,
sentiment,
sentimentScore,
entities,
confidence
const data = {
'vendor' : 'deepgram',
'model' : redactionResult.metadata.model_info[redactionResult.metadata.models[0]].arch,
'channels' : redactionResult.metadata.channels,
'createdAt': redactionResult.metadata.created
};
data.speechEvents = extractTranscript(redactionResult);
const combinedTranscript = data.speechEvents.map(event => event.transcript).join(" ");
data.redactionTimestamps = data.speechEvents.flatMap(event => event.words);
//analysis and sentiment
const { result:analysisResult } = await client.read.analyzeText({ text:combinedTranscript }, analysisOptions);
const sentimentSegment = analysisResult.results.sentiments.segments[0];
data.sentiment = sentimentSegment.sentiment;
data.sentimentScore = sentimentSegment.sentiment_score;
data.totalDuration = Math.round(1000 * redactionResult.metadata.duration);
return data;
};
module.exports = transcribe;

View File

@@ -2,7 +2,7 @@ const ffmpeg = require('fluent-ffmpeg');
const redact = async(logger, { transcriptionData, audioPath, audioOutputPath, delta = 0.05 }) => {
logger.info(`Redacting audio file: ${audioPath}`);
console.log(transcriptionData);
// console.log(transcriptionData);
return new Promise((resolve, reject) => {
const command = ffmpeg(audioPath)
.outputFormat('wav'); // Ensure output format is WAV

View File

@@ -1,24 +1,41 @@
const audioRecordingTemplate = {
'parties': {
'N': 0,
'from': '',
'to': ''
},
'duration': 0,
'url': '',
'conversation': {
'as heard': {
'full transcript': '',
'confidence': '',
'transcription vendor': '',
'timestamps': []
},
'after the fact': {
'full transcript': '',
'confidence': '',
'transcription vendor': '',
'timestamps': []
'participants': [
{
'type': '',
'initiatedConversation': false,
'id': {
'name': null,
'phone': ''
}
}
],
'duration': 0,
'transcript': {
'vendor': '',
'model': '',
'channels': 0,
'createdAt': '',
'speechEvents': [
{
'timestamp': 0,
'duration': 0,
'startTime': 0,
'endTime': 0,
'speaker': 0,
'transcript': '',
'words': [
{
'word': '',
'start': 0,
'end': 0,
'confidence': 0
}
]
}
],
'sentiment': '',
'sentimentScore': 0,
'totalDuration': 0
}
};

View File

@@ -2,127 +2,115 @@
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"parties": {
"type": "object",
"properties": {
"N": {
"type": "integer",
"description": "Number of parties"
"participants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": ["human", "machine"]
},
"initiatedConversation": {
"type": "boolean"
},
"id": {
"type": "object",
"properties": {
"name": {
"type": ["string", "null"]
},
"phone": {
"type": "string",
"pattern": "^\\+\\d{11}$"
}
},
"required": ["phone"]
}
},
"from": {
"type": "string",
"description": "Identifier for the initiating party"
},
"to": {
"type": "string",
"description": "Identifier for the receiving party"
}
},
"required": ["type", "initiatedConversation", "id"]
}
},
"duration": {
"type": "integer",
"description": "Duration of recording in milliseconds"
"type": "integer"
},
"url": {
"type": "string",
"description": "Where recording is located",
"format": "uri"
},
"conversation": {
"transcript": {
"type": "object",
"properties": {
"as heard": {
"type": "object",
"properties": {
"full transcript": {
"type": "string",
"description": "Transcript as heard during the conversation"
},
"confidence": {
"type": "string",
"description": "confidence score for transcription as heard"
},
"transcription vendor": {
"type": "string",
"description": "transcription vendor realtime"
},
"timestamps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"word": {
"type": "string",
"description": "Word in the as heard transcript"
},
"startTime": {
"type": "string",
"description": "Start time of the word",
"format": "date-time"
},
"endTime": {
"type": "string",
"description": "End time of the word",
"format": "date-time"
},
"confidence": {
"type": "number",
"description": "Confidence level of the word"
}
},
"required": ["word", "startTime", "endTime", "confidence"]
}
}
},
"required": ["full transcript", "timestamps"]
"vendor": {
"type": "string"
},
"after the fact": {
"type": "object",
"properties": {
"full transcript": {
"type": "string",
"description": "Transcript generated after analyzing the conversation"
},
"confidence": {
"type": "string",
"description": "confidence score for transcription after the fact"
},
"transcription vendor": {
"type": "string",
"description": "transcription vendor used for after the fact processing"
},
"timestamps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"word": {
"type": "string",
"description": "Word in the after the fact transcript"
"model": {
"type": "string"
},
"channels": {
"type": "integer"
},
"createdAt": {
"type": "string",
"format": "date-time"
},
"speechEvents": {
"type": "array",
"items": {
"type": "object",
"properties": {
"timestamp": {
"type": "number"
},
"duration": {
"type": "number"
},
"startTime": {
"type": "number"
},
"endTime": {
"type": "number"
},
"speaker": {
"type": "integer"
},
"transcript": {
"type": "string"
},
"words": {
"type": "array",
"items": {
"type": "object",
"properties": {
"word": {
"type": "string"
},
"start": {
"type": "number"
},
"end": {
"type": "number"
},
"confidence": {
"type": "number"
}
},
"startTime": {
"type": "string",
"description": "Start time of the word",
"format": "date-time"
},
"endTime": {
"type": "string",
"description": "End time of the word",
"format": "date-time"
},
"confidence": {
"type": "number",
"description": "Confidence level of the word"
}
},
"required": ["word", "startTime", "endTime", "confidence"]
"required": ["word", "start", "end", "confidence"]
}
}
}
},
"required": ["full transcript", "timestamps"]
},
"required": ["timestamp", "duration", "startTime", "endTime", "speaker", "transcript", "words"]
}
},
"sentiment": {
"type": "string",
"enum": ["positive", "negative", "neutral"]
},
"sentimentScore": {
"type": "number"
},
"totalDuration": {
"type": "integer"
}
},
"required": ["as heard", "after the fact"]
"required": ["vendor", "model", "channels", "createdAt", "speechEvents", "sentiment", "sentimentScore", "totalDuration"]
}
},
"required": ["body", "parties", "duration", "url", "conversation"]
"required": ["participants", "duration", "transcript"]
}