mirror of
https://github.com/jambonz/batch-speech-utils.git
synced 2026-01-25 02:08:27 +00:00
updating for transcription schema
This commit is contained in:
@@ -1,13 +1,15 @@
|
||||
const transcriptionOptions = {
|
||||
model: 'nova-2',
|
||||
smart_format: true,
|
||||
detect_entities: true
|
||||
detect_entities: true,
|
||||
multichannel:true
|
||||
};
|
||||
|
||||
const redactionOptions = {
|
||||
model: 'nova-2',
|
||||
smart_format: true,
|
||||
redact: 'pii'
|
||||
redact: 'pii',
|
||||
multichannel:true
|
||||
};
|
||||
|
||||
const analysisOptions = {
|
||||
|
||||
@@ -2,6 +2,39 @@ const fs = require('fs');
|
||||
const { createClient } = require('@deepgram/sdk');
|
||||
const { transcriptionOptions, redactionOptions, analysisOptions } = require('./config');
|
||||
|
||||
function extractTranscript(data) {
|
||||
// eslint-disable-next-line max-len
|
||||
const paragraphs = data.results.channels.flatMap((channel) => channel.alternatives.flatMap((alt) => alt.paragraphs.paragraphs));
|
||||
let ctr = 0;
|
||||
// Use reduce to process each paragraph and sentence, consolidating transcripts by speaker
|
||||
return paragraphs.reduce((acc, paragraph) => {
|
||||
paragraph.sentences.forEach((sentence) => {
|
||||
const wordsDetails = data.results.channels
|
||||
.find((channel) => channel.alternatives.some((alt) => alt.paragraphs.paragraphs.includes(paragraph)))
|
||||
.alternatives[0].words
|
||||
.filter((word) => word.start >= sentence.start && word.end <= sentence.end)
|
||||
.map((word) => ({
|
||||
word: word.word,
|
||||
start: word.start,
|
||||
end: word.end,
|
||||
confidence: word.confidence
|
||||
}));
|
||||
|
||||
acc.push({
|
||||
timestamp: sentence.start,
|
||||
duration: Math.round(1000 * (sentence.end - sentence.start)),
|
||||
startTime: sentence.start,
|
||||
endTime: sentence.end,
|
||||
speaker: ctr++ % 2,
|
||||
transcript: sentence.text,
|
||||
words: wordsDetails
|
||||
});
|
||||
});
|
||||
return acc;
|
||||
}, []);
|
||||
}
|
||||
|
||||
|
||||
const transcribe = async(logger, apiKey, filePath) => {
|
||||
logger.info(`Transcribing audio file: ${filePath}`);
|
||||
//creating a deepgram client
|
||||
@@ -9,32 +42,25 @@ const transcribe = async(logger, apiKey, filePath) => {
|
||||
//audio file buffer
|
||||
const fileBuffer = fs.readFileSync(filePath);
|
||||
//transcription
|
||||
const { result:transcriptResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, transcriptionOptions);
|
||||
const transcript = transcriptResult.results.channels[0].alternatives[0].transcript;
|
||||
const timestamps = transcriptResult.results.channels[0].alternatives[0].words;
|
||||
const entities = transcriptResult.results.channels[0].alternatives[0].entities;
|
||||
const confidence = transcriptResult.results.channels[0].alternatives[0].confidence;
|
||||
// const { result:transcriptResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, transcriptionOptions);
|
||||
//redaction
|
||||
const { result:redactionResult } = await client.listen.prerecorded.transcribeFile(fileBuffer, redactionOptions);
|
||||
const redactionTimestamps = redactionResult.results.channels[0].alternatives[0].words;
|
||||
const redacted = redactionResult.results.channels[0].alternatives[0].transcript;
|
||||
//analysis and sentiment
|
||||
const { result:analysisResult } = await client.read.analyzeText({ text:transcript }, analysisOptions);
|
||||
const sentimentSegment = analysisResult.results.sentiments.segments[0];
|
||||
const sentiment = sentimentSegment.sentiment;
|
||||
const sentimentScore = sentimentSegment.sentiment_score;
|
||||
const vendor = 'deepgram';
|
||||
return {
|
||||
vendor,
|
||||
transcript,
|
||||
timestamps,
|
||||
redactionTimestamps,
|
||||
redacted,
|
||||
sentiment,
|
||||
sentimentScore,
|
||||
entities,
|
||||
confidence
|
||||
const data = {
|
||||
'vendor' : 'deepgram',
|
||||
'model' : redactionResult.metadata.model_info[redactionResult.metadata.models[0]].arch,
|
||||
'channels' : redactionResult.metadata.channels,
|
||||
'createdAt': redactionResult.metadata.created
|
||||
};
|
||||
data.speechEvents = extractTranscript(redactionResult);
|
||||
const combinedTranscript = data.speechEvents.map(event => event.transcript).join(" ");
|
||||
data.redactionTimestamps = data.speechEvents.flatMap(event => event.words);
|
||||
//analysis and sentiment
|
||||
const { result:analysisResult } = await client.read.analyzeText({ text:combinedTranscript }, analysisOptions);
|
||||
const sentimentSegment = analysisResult.results.sentiments.segments[0];
|
||||
data.sentiment = sentimentSegment.sentiment;
|
||||
data.sentimentScore = sentimentSegment.sentiment_score;
|
||||
data.totalDuration = Math.round(1000 * redactionResult.metadata.duration);
|
||||
return data;
|
||||
};
|
||||
|
||||
module.exports = transcribe;
|
||||
|
||||
@@ -2,7 +2,7 @@ const ffmpeg = require('fluent-ffmpeg');
|
||||
|
||||
const redact = async(logger, { transcriptionData, audioPath, audioOutputPath, delta = 0.05 }) => {
|
||||
logger.info(`Redacting audio file: ${audioPath}`);
|
||||
console.log(transcriptionData);
|
||||
// console.log(transcriptionData);
|
||||
return new Promise((resolve, reject) => {
|
||||
const command = ffmpeg(audioPath)
|
||||
.outputFormat('wav'); // Ensure output format is WAV
|
||||
|
||||
@@ -1,24 +1,41 @@
|
||||
const audioRecordingTemplate = {
|
||||
'parties': {
|
||||
'N': 0,
|
||||
'from': '',
|
||||
'to': ''
|
||||
},
|
||||
'duration': 0,
|
||||
'url': '',
|
||||
'conversation': {
|
||||
'as heard': {
|
||||
'full transcript': '',
|
||||
'confidence': '',
|
||||
'transcription vendor': '',
|
||||
'timestamps': []
|
||||
},
|
||||
'after the fact': {
|
||||
'full transcript': '',
|
||||
'confidence': '',
|
||||
'transcription vendor': '',
|
||||
'timestamps': []
|
||||
'participants': [
|
||||
{
|
||||
'type': '',
|
||||
'initiatedConversation': false,
|
||||
'id': {
|
||||
'name': null,
|
||||
'phone': ''
|
||||
}
|
||||
}
|
||||
],
|
||||
'duration': 0,
|
||||
'transcript': {
|
||||
'vendor': '',
|
||||
'model': '',
|
||||
'channels': 0,
|
||||
'createdAt': '',
|
||||
'speechEvents': [
|
||||
{
|
||||
'timestamp': 0,
|
||||
'duration': 0,
|
||||
'startTime': 0,
|
||||
'endTime': 0,
|
||||
'speaker': 0,
|
||||
'transcript': '',
|
||||
'words': [
|
||||
{
|
||||
'word': '',
|
||||
'start': 0,
|
||||
'end': 0,
|
||||
'confidence': 0
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
'sentiment': '',
|
||||
'sentimentScore': 0,
|
||||
'totalDuration': 0
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -2,127 +2,115 @@
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parties": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"N": {
|
||||
"type": "integer",
|
||||
"description": "Number of parties"
|
||||
"participants": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": ["human", "machine"]
|
||||
},
|
||||
"initiatedConversation": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"id": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": ["string", "null"]
|
||||
},
|
||||
"phone": {
|
||||
"type": "string",
|
||||
"pattern": "^\\+\\d{11}$"
|
||||
}
|
||||
},
|
||||
"required": ["phone"]
|
||||
}
|
||||
},
|
||||
"from": {
|
||||
"type": "string",
|
||||
"description": "Identifier for the initiating party"
|
||||
},
|
||||
"to": {
|
||||
"type": "string",
|
||||
"description": "Identifier for the receiving party"
|
||||
}
|
||||
},
|
||||
"required": ["type", "initiatedConversation", "id"]
|
||||
}
|
||||
},
|
||||
"duration": {
|
||||
"type": "integer",
|
||||
"description": "Duration of recording in milliseconds"
|
||||
"type": "integer"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "Where recording is located",
|
||||
"format": "uri"
|
||||
},
|
||||
"conversation": {
|
||||
"transcript": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"as heard": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"full transcript": {
|
||||
"type": "string",
|
||||
"description": "Transcript as heard during the conversation"
|
||||
},
|
||||
"confidence": {
|
||||
"type": "string",
|
||||
"description": "confidence score for transcription as heard"
|
||||
},
|
||||
"transcription vendor": {
|
||||
"type": "string",
|
||||
"description": "transcription vendor realtime"
|
||||
},
|
||||
"timestamps": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"word": {
|
||||
"type": "string",
|
||||
"description": "Word in the as heard transcript"
|
||||
},
|
||||
"startTime": {
|
||||
"type": "string",
|
||||
"description": "Start time of the word",
|
||||
"format": "date-time"
|
||||
},
|
||||
"endTime": {
|
||||
"type": "string",
|
||||
"description": "End time of the word",
|
||||
"format": "date-time"
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"description": "Confidence level of the word"
|
||||
}
|
||||
},
|
||||
"required": ["word", "startTime", "endTime", "confidence"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["full transcript", "timestamps"]
|
||||
"vendor": {
|
||||
"type": "string"
|
||||
},
|
||||
"after the fact": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"full transcript": {
|
||||
"type": "string",
|
||||
"description": "Transcript generated after analyzing the conversation"
|
||||
},
|
||||
"confidence": {
|
||||
"type": "string",
|
||||
"description": "confidence score for transcription after the fact"
|
||||
},
|
||||
"transcription vendor": {
|
||||
"type": "string",
|
||||
"description": "transcription vendor used for after the fact processing"
|
||||
},
|
||||
"timestamps": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"word": {
|
||||
"type": "string",
|
||||
"description": "Word in the after the fact transcript"
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"channels": {
|
||||
"type": "integer"
|
||||
},
|
||||
"createdAt": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"speechEvents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"timestamp": {
|
||||
"type": "number"
|
||||
},
|
||||
"duration": {
|
||||
"type": "number"
|
||||
},
|
||||
"startTime": {
|
||||
"type": "number"
|
||||
},
|
||||
"endTime": {
|
||||
"type": "number"
|
||||
},
|
||||
"speaker": {
|
||||
"type": "integer"
|
||||
},
|
||||
"transcript": {
|
||||
"type": "string"
|
||||
},
|
||||
"words": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"word": {
|
||||
"type": "string"
|
||||
},
|
||||
"start": {
|
||||
"type": "number"
|
||||
},
|
||||
"end": {
|
||||
"type": "number"
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"startTime": {
|
||||
"type": "string",
|
||||
"description": "Start time of the word",
|
||||
"format": "date-time"
|
||||
},
|
||||
"endTime": {
|
||||
"type": "string",
|
||||
"description": "End time of the word",
|
||||
"format": "date-time"
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"description": "Confidence level of the word"
|
||||
}
|
||||
},
|
||||
"required": ["word", "startTime", "endTime", "confidence"]
|
||||
"required": ["word", "start", "end", "confidence"]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["full transcript", "timestamps"]
|
||||
},
|
||||
"required": ["timestamp", "duration", "startTime", "endTime", "speaker", "transcript", "words"]
|
||||
}
|
||||
},
|
||||
"sentiment": {
|
||||
"type": "string",
|
||||
"enum": ["positive", "negative", "neutral"]
|
||||
},
|
||||
"sentimentScore": {
|
||||
"type": "number"
|
||||
},
|
||||
"totalDuration": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["as heard", "after the fact"]
|
||||
"required": ["vendor", "model", "channels", "createdAt", "speechEvents", "sentiment", "sentimentScore", "totalDuration"]
|
||||
}
|
||||
},
|
||||
"required": ["body", "parties", "duration", "url", "conversation"]
|
||||
"required": ["participants", "duration", "transcript"]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user