|
23 | 23 |
|
24 | 24 | 'use strict'; |
25 | 25 |
|
26 | | -const Speech = require('@google-cloud/speech'); |
| 26 | +function syncRecognize (filename, encoding, sampleRate) { |
| 27 | + // [START speech_sync_recognize] |
| 28 | + // Imports the Google Cloud client library |
| 29 | + const Speech = require('@google-cloud/speech'); |
27 | 30 |
|
28 | | -// [START speech_sync_recognize] |
29 | | -function syncRecognize (filename) { |
30 | 31 | // Instantiates a client |
31 | 32 | const speech = Speech(); |
32 | 33 |
|
33 | | - const config = { |
34 | | - // Configure these settings based on the audio you're transcribing |
35 | | - encoding: 'LINEAR16', |
36 | | - sampleRate: 16000 |
| 34 | + // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw |
| 35 | + // const filename = '/path/to/audio.raw'; |
| 36 | + |
| 37 | + // The encoding of the audio file, e.g. 'LINEAR16' |
| 38 | + // const encoding = 'LINEAR16'; |
| 39 | + |
| 40 | + // The sample rate of the audio file, e.g. 16000 |
| 41 | + // const sampleRate = 16000; |
| 42 | + |
| 43 | + const request = { |
| 44 | + encoding: encoding, |
| 45 | + sampleRate: sampleRate |
37 | 46 | }; |
38 | 47 |
|
39 | | - // Detects speech in the audio file, e.g. "./resources/audio.raw" |
40 | | - return speech.recognize(filename, config) |
| 48 | + // Detects speech in the audio file |
| 49 | + speech.recognize(filename, request) |
41 | 50 | .then((results) => { |
42 | 51 | const transcription = results[0]; |
| 52 | + |
| 53 | + console.log(`Transcription: ${transcription}`); |
| 54 | + }); |
| 55 | + // [END speech_sync_recognize] |
| 56 | +} |
| 57 | + |
| 58 | +function syncRecognizeGCS (gcsUri, encoding, sampleRate) { |
| 59 | + // [START speech_sync_recognize_gcs] |
| 60 | + // Imports the Google Cloud client library |
| 61 | + const Speech = require('@google-cloud/speech'); |
| 62 | + |
| 63 | + // Instantiates a client |
| 64 | + const speech = Speech(); |
| 65 | + |
| 66 | + // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw |
| 67 | + // const gcsUri = 'gs://my-bucket/audio.raw'; |
| 68 | + |
| 69 | + // The encoding of the audio file, e.g. 'LINEAR16' |
| 70 | + // const encoding = 'LINEAR16'; |
| 71 | + |
| 72 | + // The sample rate of the audio file, e.g. 16000 |
| 73 | + // const sampleRate = 16000; |
| 74 | + |
| 75 | + const request = { |
| 76 | + encoding: encoding, |
| 77 | + sampleRate: sampleRate |
| 78 | + }; |
| 79 | + |
| 80 | + // Detects speech in the audio file |
| 81 | + speech.recognize(gcsUri, request) |
| 82 | + .then((results) => { |
| 83 | + const transcription = results[0]; |
| 84 | + |
| 85 | + console.log(`Transcription: ${transcription}`); |
| 86 | + }); |
| 87 | + // [END speech_sync_recognize_gcs] |
| 88 | +} |
| 89 | + |
| 90 | +function asyncRecognize (filename, encoding, sampleRate) { |
| 91 | + // [START speech_async_recognize] |
| 92 | + // Imports the Google Cloud client library |
| 93 | + const Speech = require('@google-cloud/speech'); |
| 94 | + |
| 95 | + // Instantiates a client |
| 96 | + const speech = Speech(); |
| 97 | + |
| 98 | + // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw |
| 99 | + // const filename = '/path/to/audio.raw'; |
| 100 | + |
| 101 | + // The encoding of the audio file, e.g. 'LINEAR16' |
| 102 | + // const encoding = 'LINEAR16'; |
| 103 | + |
| 104 | + // The sample rate of the audio file, e.g. 16000 |
| 105 | + // const sampleRate = 16000; |
| 106 | + |
| 107 | + const request = { |
| 108 | + encoding: encoding, |
| 109 | + sampleRate: sampleRate |
| 110 | + }; |
| 111 | + |
| 112 | + // Detects speech in the audio file. This creates a recognition job that you |
| 113 | + // can wait for now, or get its result later. |
| 114 | + speech.startRecognition(filename, request) |
| 115 | + .then((results) => { |
| 116 | + const operation = results[0]; |
| 117 | + // Get a Promise represention of the final result of the job |
| 118 | + return operation.promise(); |
| 119 | + }) |
| 120 | + .then((transcription) => { |
43 | 121 | console.log(`Transcription: ${transcription}`); |
44 | | - return transcription; |
45 | 122 | }); |
| 123 | + // [END speech_async_recognize] |
46 | 124 | } |
47 | | -// [END speech_sync_recognize] |
48 | 125 |
|
49 | | -// [START speech_async_recognize] |
50 | | -function asyncRecognize (filename) { |
| 126 | +function asyncRecognizeGCS (gcsUri, encoding, sampleRate) { |
| 127 | + // [START speech_async_recognize_gcs] |
| 128 | + // Imports the Google Cloud client library |
| 129 | + const Speech = require('@google-cloud/speech'); |
| 130 | + |
51 | 131 | // Instantiates a client |
52 | 132 | const speech = Speech(); |
53 | 133 |
|
54 | | - const config = { |
55 | | - // Configure these settings based on the audio you're transcribing |
56 | | - encoding: 'LINEAR16', |
57 | | - sampleRate: 16000 |
| 134 | + // The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw |
| 135 | + // const gcsUri = 'gs://my-bucket/audio.raw'; |
| 136 | + |
| 137 | + // The encoding of the audio file, e.g. 'LINEAR16' |
| 138 | + // const encoding = 'LINEAR16'; |
| 139 | + |
| 140 | + // The sample rate of the audio file, e.g. 16000 |
| 141 | + // const sampleRate = 16000; |
| 142 | + |
| 143 | + const request = { |
| 144 | + encoding: encoding, |
| 145 | + sampleRate: sampleRate |
58 | 146 | }; |
59 | 147 |
|
60 | | - // Detects speech in the audio file, e.g. "./resources/audio.raw" |
61 | | - // This creates a recognition job that you can wait for now, or get its result |
62 | | - // later. |
63 | | - return speech.startRecognition(filename, config) |
| 148 | + // Detects speech in the audio file. This creates a recognition job that you |
| 149 | + // can wait for now, or get its result later. |
| 150 | + speech.startRecognition(gcsUri, request) |
64 | 151 | .then((results) => { |
65 | 152 | const operation = results[0]; |
66 | | - // Get a Promise represention the final result of the job |
| 153 | + // Get a Promise represention of the final result of the job |
67 | 154 | return operation.promise(); |
68 | 155 | }) |
69 | 156 | .then((transcription) => { |
70 | 157 | console.log(`Transcription: ${transcription}`); |
71 | | - return transcription; |
72 | 158 | }); |
| 159 | + // [END speech_async_recognize_gcs] |
73 | 160 | } |
74 | | -// [END speech_async_recognize] |
75 | 161 |
|
76 | | -// [START speech_streaming_recognize] |
77 | | -const fs = require('fs'); |
| 162 | +function streamingRecognize (filename, encoding, sampleRate) { |
| 163 | + // [START speech_streaming_recognize] |
| 164 | + const fs = require('fs'); |
| 165 | + |
| 166 | + // Imports the Google Cloud client library |
| 167 | + const Speech = require('@google-cloud/speech'); |
78 | 168 |
|
79 | | -function streamingRecognize (filename, callback) { |
80 | 169 | // Instantiates a client |
81 | 170 | const speech = Speech(); |
82 | 171 |
|
83 | | - const options = { |
| 172 | + // The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw |
| 173 | + // const filename = '/path/to/audio.raw'; |
| 174 | + |
| 175 | + // The encoding of the audio file, e.g. 'LINEAR16' |
| 176 | + // const encoding = 'LINEAR16'; |
| 177 | + |
| 178 | + // The sample rate of the audio file, e.g. 16000 |
| 179 | + // const sampleRate = 16000; |
| 180 | + |
| 181 | + const request = { |
84 | 182 | config: { |
85 | | - // Configure these settings based on the audio you're transcribing |
86 | | - encoding: 'LINEAR16', |
87 | | - sampleRate: 16000 |
| 183 | + encoding: encoding, |
| 184 | + sampleRate: sampleRate |
88 | 185 | } |
89 | 186 | }; |
90 | 187 |
|
91 | | - // Create a recognize stream |
92 | | - const recognizeStream = speech.createRecognizeStream(options) |
93 | | - .on('error', callback) |
| 188 | + // Stream the audio to the Google Cloud Speech API |
| 189 | + const recognizeStream = speech.createRecognizeStream(request) |
| 190 | + .on('error', console.error) |
94 | 191 | .on('data', (data) => { |
95 | 192 | console.log('Data received: %j', data); |
96 | | - callback(); |
97 | 193 | }); |
98 | 194 |
|
99 | 195 | // Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw" |
100 | 196 | fs.createReadStream(filename).pipe(recognizeStream); |
| 197 | + // [END speech_streaming_recognize] |
101 | 198 | } |
102 | | -// [END speech_streaming_recognize] |
103 | 199 |
|
104 | | -// [START speech_streaming_mic_recognize] |
105 | | -const record = require('node-record-lpcm16'); |
| 200 | +function streamingMicRecognize (encoding, sampleRate) { |
| 201 | + // [START speech_streaming_mic_recognize] |
| 202 | + const record = require('node-record-lpcm16'); |
| 203 | + |
| 204 | + // Imports the Google Cloud client library |
| 205 | + const Speech = require('@google-cloud/speech'); |
106 | 206 |
|
107 | | -function streamingMicRecognize () { |
108 | 207 | // Instantiates a client |
109 | 208 | const speech = Speech(); |
110 | 209 |
|
111 | | - const options = { |
| 210 | + // The encoding of the audio file, e.g. 'LINEAR16' |
| 211 | + // const encoding = 'LINEAR16'; |
| 212 | + |
| 213 | + // The sample rate of the audio file, e.g. 16000 |
| 214 | + // const sampleRate = 16000; |
| 215 | + |
| 216 | + const request = { |
112 | 217 | config: { |
113 | | - // Configure these settings based on the audio you're transcribing |
114 | | - encoding: 'LINEAR16', |
115 | | - sampleRate: 16000 |
| 218 | + encoding: encoding, |
| 219 | + sampleRate: sampleRate |
116 | 220 | } |
117 | 221 | }; |
118 | 222 |
|
119 | 223 | // Create a recognize stream |
120 | | - const recognizeStream = speech.createRecognizeStream(options) |
| 224 | + const recognizeStream = speech.createRecognizeStream(request) |
121 | 225 | .on('error', console.error) |
122 | 226 | .on('data', (data) => process.stdout.write(data.results)); |
123 | 227 |
|
124 | 228 | // Start recording and send the microphone input to the Speech API |
125 | 229 | record.start({ |
126 | | - sampleRate: 16000, |
| 230 | + sampleRate: sampleRate, |
127 | 231 | threshold: 0 |
128 | 232 | }).pipe(recognizeStream); |
129 | 233 |
|
130 | 234 | console.log('Listening, press Ctrl+C to stop.'); |
| 235 | + // [END speech_streaming_mic_recognize] |
131 | 236 | } |
132 | | -// [END speech_streaming_mic_recognize] |
133 | 237 |
|
134 | 238 | require(`yargs`) |
135 | 239 | .demand(1) |
136 | 240 | .command( |
137 | 241 | `sync <filename>`, |
138 | | - `Detects speech in an audio file.`, |
| 242 | + `Detects speech in a local audio file.`, |
139 | 243 | {}, |
140 | | - (opts) => syncRecognize(opts.filename) |
| 244 | + (opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate) |
| 245 | + ) |
| 246 | + .command( |
| 247 | + `sync-gcs <gcsUri>`, |
| 248 | + `Detects speech in an audio file located in a Google Cloud Storage bucket.`, |
| 249 | + {}, |
| 250 | + (opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate) |
141 | 251 | ) |
142 | 252 | .command( |
143 | 253 | `async <filename>`, |
144 | | - `Creates a job to detect speech in an audio file, and waits for the job to complete.`, |
| 254 | + `Creates a job to detect speech in a local audio file, and waits for the job to complete.`, |
145 | 255 | {}, |
146 | | - (opts) => asyncRecognize(opts.filename) |
| 256 | + (opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate) |
| 257 | + ) |
| 258 | + .command( |
| 259 | + `async-gcs <gcsUri>`, |
| 260 | + `Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`, |
| 261 | + {}, |
| 262 | + (opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate) |
147 | 263 | ) |
148 | 264 | .command( |
149 | 265 | `stream <filename>`, |
150 | | - `Detects speech in an audio file by streaming it to the Speech API.`, |
| 266 | + `Detects speech in a local audio file by streaming it to the Speech API.`, |
151 | 267 | {}, |
152 | | - (opts) => streamingRecognize(opts.filename, () => {}) |
| 268 | + (opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate) |
153 | 269 | ) |
154 | 270 | .command( |
155 | 271 | `listen`, |
156 | 272 | `Detects speech in a microphone input stream.`, |
157 | 273 | {}, |
158 | | - streamingMicRecognize |
| 274 | + (opts) => streamingMicRecognize(opts.encoding, opts.sampleRate) |
159 | 275 | ) |
160 | | - .example(`node $0 sync ./resources/audio.raw`) |
161 | | - .example(`node $0 async ./resources/audio.raw`) |
162 | | - .example(`node $0 stream ./resources/audio.raw`) |
| 276 | + .options({ |
| 277 | + encoding: { |
| 278 | + alias: 'e', |
| 279 | + default: 'LINEAR16', |
| 280 | + global: true, |
| 281 | + requiresArg: true, |
| 282 | + type: 'string' |
| 283 | + }, |
| 284 | + sampleRate: { |
| 285 | + alias: 'r', |
| 286 | + default: 16000, |
| 287 | + global: true, |
| 288 | + requiresArg: true, |
| 289 | + type: 'number' |
| 290 | + } |
| 291 | + }) |
| 292 | + .example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`) |
| 293 | + .example(`node $0 async-gcs gs://my-bucket/audio.raw -e LINEAR16 -r 16000`) |
| 294 | + .example(`node $0 stream ./resources/audio.raw -e LINEAR16 -r 16000`) |
163 | 295 | .example(`node $0 listen`) |
164 | 296 | .wrap(120) |
165 | 297 | .recommendCommands() |
|
0 commit comments