@@ -124,6 +124,80 @@ const formatTranscript = (data) => {
124124 storeData ( transcription ) ;
125125}
126126
127+
128+
129+
130+ // formatTranscript with Speakers function
131+ const formatSpeakerTranscript = ( data ) => {
132+
133+ // initialize variable to keep track of speaker
134+ global . lastSpeaker = 0 ;
135+
136+ // last results object is different, it has one big alternatives array with speaker tags for each word
137+ const transcription = ( ) => {
138+ return data . results [ data . results . length - 1 ] . alternatives [ 0 ] . words
139+ . map ( ( wordInfo , index ) => {
140+
141+ // wordInfo looks like this:
142+ // {
143+ // "startTime": {
144+ // "seconds": "4",
145+ // "nanos": 200000000
146+ // },
147+ // "endTime": {
148+ // "seconds": "4",
149+ // "nanos": 800000000
150+ // },
151+ // "word": "Welcome",
152+ // "speakerTag": 1
153+ // },
154+
155+ // get start time of first word
156+
157+ const startSecs =
158+ `${ wordInfo . startTime . seconds } ` +
159+ `.` +
160+ wordInfo . startTime . nanos / 100000000 ;
161+
162+ // const endSecs =
163+ // `${wordInfo.endTime.seconds}` +
164+ // `.` +
165+ // wordInfo.endTime.nanos / 100000000;
166+
167+ // get speaker
168+ const speaker = wordInfo . speakerTag ;
169+
170+ // init last speaker - leave as is if it is set,
171+ // lastSpeaker = lastSpeaker !== 0 ? lastSpeaker : speaker;
172+
173+ // if last speaker is different, add speaker label and time with word
174+ if ( global . lastSpeaker !== speaker ) {
175+
176+ // set last speaker to current speaker
177+ global . lastSpeaker = speaker ;
178+
179+
180+ return `\nSpeaker ${ speaker } (${ sec2time ( startSecs ) } ) \n\t${ wordInfo . word } ` ;
181+
182+ } else {
183+
184+ // set last speaker to current speaker
185+ global . lastSpeaker = speaker ;
186+
187+ return wordInfo . word ;
188+ }
189+
190+
191+
192+ // if(typeof result.alternatives[0].transcript !== 'undefined')
193+ // return `${sec2time(startSecs)}:\n\t${result.alternatives[0].transcript}`;
194+ } )
195+ . join ( ' ' ) ;
196+ }
197+
198+ storeData ( transcription ( ) ) ;
199+ }
200+
127201// // Detects speech in the audio file. This creates a recognition job that you
128202// // can wait for now, or get its result later.
129203// const [operation] = client.longRunningRecognize(request);
@@ -172,4 +246,4 @@ const formatTranscript = (data) => {
172246// // fs.unlink(name);
173247// });
174248
175- formatTranscript ( JSON . parse ( readData ( ) ) ) ;
249+ formatSpeakerTranscript ( JSON . parse ( readData ( ) ) ) ;
0 commit comments