nodejs-docs-samples/speech/betaFeatures.js at main · christopherdongo/nodejs-docs-samples

460 lines (396 loc) · 13.1 KB
// Copyright 2017 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//      http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
 * This application demonstrates how to perform basic recognize operations with
 * with the Google Cloud Speech API.
 * For more information, see the README.md under /speech and the documentation
 * at https://cloud.google.com/speech/docs.
'use strict';
async function speechTranscribeDiarization(fileName) {
  // [START speech_transcribe_diarization_beta]
  const fs = require('fs');
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following lines before running the sample.
  // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
  const config = {
    encoding: 'LINEAR16',
    sampleRateHertz: 8000,
    languageCode: 'en-US',
    enableSpeakerDiarization: true,
    minSpeakerCount: 2,
    maxSpeakerCount: 2,
    model: 'phone_call',
  const audio = {
    content: fs.readFileSync(fileName).toString('base64'),
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  console.log(`Transcription: ${transcription}`);
  console.log('Speaker Diarization:');
  const result = response.results[response.results.length - 1];
  const wordsInfo = result.alternatives[0].words;
  // Note: The transcript within each result is separate and sequential per result.
  // However, the words list within an alternative includes all the words
  // from all the results thus far. Thus, to get all the words with speaker
  // tags, you only have to take the words list from the last result:
  wordsInfo.forEach(a =>
    console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`)
  // [END speech_transcribe_diarization_beta]
async function asyncSpeechTranscribeDiarizationGCS(gcsUri) {
  // [START speech_transcribe_diarization_gcs_beta]
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following line before running the sample.
  // const uri = path to GCS audio file e.g. `gs:/bucket/audio.wav`;
  const config = {
    encoding: 'LINEAR16',
    sampleRateHertz: 8000,
    languageCode: 'en-US',
    enableSpeakerDiarization: true,
    minSpeakerCount: 2,
    maxSpeakerCount: 2,
    model: 'phone_call',
  const audio = {
    uri: gcsUri,
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  console.log(`Transcription: ${transcription}`);
  console.log('Speaker Diarization:');
  const result = response.results[response.results.length - 1];
  const wordsInfo = result.alternatives[0].words;
  // Note: The transcript within each result is separate and sequential per result.
  // However, the words list within an alternative includes all the words
  // from all the results thus far. Thus, to get all the words with speaker
  // tags, you only have to take the words list from the last result:
  wordsInfo.forEach(a =>
    console.log(` word: ${a.word}, speakerTag: ${a.speakerTag}`)
  // [END speech_transcribe_diarization_gcs_beta]
async function speechTranscribeMultiChannel(fileName) {
  // [START speech_transcribe_multichannel_beta]
  const fs = require('fs');
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following lines before running the sample.
  // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
  const config = {
    encoding: 'LINEAR16',
    languageCode: 'en-US',
    audioChannelCount: 2,
    enableSeparateRecognitionPerChannel: true,
  const audio = {
    content: fs.readFileSync(fileName).toString('base64'),
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
      result =>
        ` Channel Tag: ${result.channelTag} ${result.alternatives[0].transcript}`
    .join('\n');
  console.log(`Transcription: \n${transcription}`);
  // [END speech_transcribe_multichannel_beta]
async function speechTranscribeMultichannelGCS(gcsUri) {
  // [START speech_transcribe_multichannel_gcs_beta]
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
  const config = {
    encoding: 'LINEAR16',
    languageCode: 'en-US',
    audioChannelCount: 2,
    enableSeparateRecognitionPerChannel: true,
  const audio = {
    uri: gcsUri,
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
      result =>
        ` Channel Tag: ${result.channelTag} ${result.alternatives[0].transcript}`
    .join('\n');
  console.log(`Transcription: \n${transcription}`);
  // [END speech_transcribe_multichannel_gcs_beta]
async function speechTranscribeMultilang(fileName) {
  // [START speech_transcribe_multilanguage_beta]
  const fs = require('fs');
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following lines before running the sample.
  // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
  const config = {
    encoding: 'LINEAR16',
    sampleRateHertz: 44100,
    languageCode: 'en-US',
    alternativeLanguageCodes: ['es-ES', 'en-US'],
  const audio = {
    content: fs.readFileSync(fileName).toString('base64'),
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  console.log(`Transcription: ${transcription}`);
  // [END speech_transcribe_multilanguage_beta]
async function speechTranscribeMultilangGCS(gcsUri) {
  // [START speech_transcribe_multilanguage_gcs_beta]
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following line before running the sample.
  // const uri = path to GCS audio file e.g. `gs:/bucket/audio.wav`;
  const config = {
    encoding: 'LINEAR16',
    sampleRateHertz: 44100,
    languageCode: 'en-US',
    alternativeLanguageCodes: ['es-ES', 'en-US'],
  const audio = {
    uri: gcsUri,
  const request = {
    config: config,
    audio: audio,
  const [operation] = await client.longRunningRecognize(request);
  const [response] = await operation.promise();
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  console.log(`Transcription: ${transcription}`);
  // [END speech_transcribe_multilanguage_gcs_beta]
async function speechTranscribeWordLevelConfidence(fileName) {
  // [START speech_transcribe_word_level_confidence_beta]
  const fs = require('fs');
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following lines before running the sample.
  // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
  const config = {
    encoding: 'FLAC',
    sampleRateHertz: 16000,
    languageCode: 'en-US',
    enableWordConfidence: true,
  const audio = {
    content: fs.readFileSync(fileName).toString('base64'),
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  const confidence = response.results
    .map(result => result.alternatives[0].confidence)
    .join('\n');
  console.log(`Transcription: ${transcription} \n Confidence: ${confidence}`);
  console.log('Word-Level-Confidence:');
  const words = response.results.map(result => result.alternatives[0]);
  words[0].words.forEach(a => {
    console.log(` word: ${a.word}, confidence: ${a.confidence}`);
  // [END speech_transcribe_word_level_confidence_beta]
async function speechTranscribeWordLevelConfidenceGCS(gcsUri) {
  // [START speech_transcribe_word_level_confidence_gcs_beta]
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech').v1p1beta1;
  // Creates a client
  const client = new speech.SpeechClient();
   * TODO(developer): Uncomment the following line before running the sample.
  // const uri = path to GCS audio file e.g. `gs:/bucket/audio.wav`;
  const config = {
    encoding: 'FLAC',
    sampleRateHertz: 16000,
    languageCode: 'en-US',
    enableWordConfidence: true,
  const audio = {
    uri: gcsUri,
  const request = {
    config: config,
    audio: audio,
  const [response] = await client.recognize(request);
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  const confidence = response.results
    .map(result => result.alternatives[0].confidence)
    .join('\n');
  console.log(`Transcription: ${transcription} \n Confidence: ${confidence}`);
  console.log('Word-Level-Confidence:');
  const words = response.results.map(result => result.alternatives[0]);
  words[0].words.forEach(a => {
    console.log(` word: ${a.word}, confidence: ${a.confidence}`);
  // [END speech_transcribe_word_level_confidence_gcs_beta]
require('yargs')
  .demand(1)
  .command(
    'Diarization',
    'Isolate distinct speakers in an audio file',
    opts => speechTranscribeDiarization(opts.speechFile)
  .command(
    'DiarizationGCS',
    'Isolate distinct speakers in an audio file located in a Google Cloud Storage bucket.',
    opts => asyncSpeechTranscribeDiarizationGCS(opts.gcsUri)
  .command(
    'multiChannelTranscribe',
    'Differentiates input by audio channel in local audio file.',
    opts => speechTranscribeMultiChannel(opts.speechFile)
  .command(
    'multiChannelTranscribeGCS',
    'Differentiates input by audio channel in an audio file located in a Google Cloud Storage bucket.',
    opts => speechTranscribeMultichannelGCS(opts.gcsUri)
  .command(
    'multiLanguageTranscribe',
    'Transcribes multiple languages from local audio file.',
    opts => speechTranscribeMultilang(opts.speechFile)
  .command(
    'multiLanguageTranscribeGCS',
    'Transcribes multiple languages from GCS audio file.',
    opts => speechTranscribeMultilangGCS(opts.gcsUri)
  .command(
    'wordLevelConfidence',
    'Detects word level confidence from local audio file.',
    opts => speechTranscribeWordLevelConfidence(opts.speechFile)
  .command(
    'wordLevelConfidenceGCS',
    'Detects word level confidence from GCS audio file.',
    opts => speechTranscribeWordLevelConfidenceGCS(opts.gcsUri)
  .options({
    speechFile: {
      alias: 'f',
      global: true,
      requiresArg: false,
      type: 'string',
    gcsUri: {
      alias: 'u',
      global: true,
      requiresArg: true,
      type: 'string',
  .example('node $0 Diarization -f ./resources/commercial_mono.wav')
  .example(
    'node $0 DiarizationGCS -u gs://cloud-samples-tests/speech/commercial_mono.wav'
  .example(
    'node $0 multiChannelTranscribe -f ./resources/commercial_stereo.wav'
  .example(
    'node $0 multiChannelTranscribeGCS -u gs://cloud-samples-tests/speech/commercial_stereo.wav'
  .example('node $0 multiLanguageTranscribe -f ./resources/multi.wav')
  .example(
    'node $0 multiLanguageTranscribeGCS -u gs://nodejs-docs-samples/multi_mono.wav'
  .example('node $0 wordLevelConfidence -f ./resources/brooklyn.flac')
  .example(
    'node $0 wordLevelConfidenceGCS -u gs://cloud-samples-tests/speech/brooklyn.flac'
  .wrap(120)
  .recommendCommands()
  .epilogue('For more information, see https://cloud.google.com/speech/docs')
  .strict().argv;
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

betaFeatures.js

Latest commit

History

betaFeatures.js

File metadata and controls