Skip to content

Commit 3a50f30

Browse files
authored
Update Speech samples. (GoogleCloudPlatform#307)
1 parent 507a9a3 commit 3a50f30

File tree

7 files changed

+496
-116
lines changed

7 files changed

+496
-116
lines changed

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@
8181
"@google-cloud/monitoring": "0.1.4",
8282
"@google-cloud/pubsub": "0.7.0",
8383
"@google-cloud/resource": "0.5.1",
84-
"@google-cloud/speech": "0.5.0",
85-
"@google-cloud/storage": "0.6.0",
84+
"@google-cloud/speech": "0.6.0",
85+
"@google-cloud/storage": "0.6.1",
8686
"@google-cloud/translate": "0.6.0",
8787
"@google-cloud/vision": "0.7.0",
8888
"@google/cloud-debug": "0.9.1",

speech/README.md

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@
22

33
# Google Cloud Speech API Node.js Samples
44

5-
[Sign up for the Alpha][speech_signup].
6-
75
The [Cloud Speech API][speech_docs] enables easy integration of Google speech
86
recognition technologies into developer applications.
97

10-
[speech_signup]: https://services.google.com/fb/forms/speech-api-alpha/
118
[speech_docs]: https://cloud.google.com/speech/
129

1310
## Table of Contents
@@ -36,18 +33,23 @@ __Usage:__ `node recognize.js --help`
3633

3734
```
3835
Commands:
39-
sync <filename> Detects speech in an audio file.
40-
async <filename> Creates a job to detect speech in an audio file, and waits for the job to complete.
41-
stream <filename> Detects speech in an audio file by streaming it to the Speech API.
42-
listen Detects speech in a microphone input stream.
36+
sync <filename> Detects speech in a local audio file.
37+
sync-gcs <gcsUri> Detects speech in an audio file located in a Google Cloud Storage bucket.
38+
async <filename> Creates a job to detect speech in a local audio file, and waits for the job to complete.
39+
async-gcs <gcsUri> Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and
40+
waits for the job to complete.
41+
stream <filename> Detects speech in a local audio file by streaming it to the Speech API.
42+
listen Detects speech in a microphone input stream.
4343
4444
Options:
45-
--help Show help [boolean]
45+
--help Show help [boolean]
46+
--encoding, -e [string] [default: "LINEAR16"]
47+
--sampleRate, -r [number] [default: 16000]
4648
4749
Examples:
48-
node recognize.js sync ./resources/audio.raw
49-
node recognize.js async ./resources/audio.raw
50-
node recognize.js stream ./resources/audio.raw
50+
node recognize.js sync ./resources/audio.raw -e LINEAR16 -r 16000
51+
node recognize.js async-gcs gs://my-bucket/audio.raw -e LINEAR16 -r 16000
52+
node recognize.js stream ./resources/audio.raw -e LINEAR16 -r 16000
5153
node recognize.js listen
5254
5355
For more information, see https://cloud.google.com/speech/docs

speech/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
"test": "cd ..; npm run st -- --verbose speech/system-test/*.test.js"
99
},
1010
"dependencies": {
11-
"@google-cloud/speech": "0.5.0",
11+
"@google-cloud/speech": "0.6.0",
12+
"@google-cloud/storage": "0.6.1",
1213
"node-record-lpcm16": "0.2.0",
1314
"yargs": "6.6.0"
1415
},

speech/recognize.js

Lines changed: 188 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -23,143 +23,275 @@
2323

2424
'use strict';
2525

26-
const Speech = require('@google-cloud/speech');
26+
function syncRecognize (filename, encoding, sampleRate) {
27+
// [START speech_sync_recognize]
28+
// Imports the Google Cloud client library
29+
const Speech = require('@google-cloud/speech');
2730

28-
// [START speech_sync_recognize]
29-
function syncRecognize (filename) {
3031
// Instantiates a client
3132
const speech = Speech();
3233

33-
const config = {
34-
// Configure these settings based on the audio you're transcribing
35-
encoding: 'LINEAR16',
36-
sampleRate: 16000
34+
// The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
35+
// const filename = '/path/to/audio.raw';
36+
37+
// The encoding of the audio file, e.g. 'LINEAR16'
38+
// const encoding = 'LINEAR16';
39+
40+
// The sample rate of the audio file, e.g. 16000
41+
// const sampleRate = 16000;
42+
43+
const request = {
44+
encoding: encoding,
45+
sampleRate: sampleRate
3746
};
3847

39-
// Detects speech in the audio file, e.g. "./resources/audio.raw"
40-
return speech.recognize(filename, config)
48+
// Detects speech in the audio file
49+
speech.recognize(filename, request)
4150
.then((results) => {
4251
const transcription = results[0];
52+
53+
console.log(`Transcription: ${transcription}`);
54+
});
55+
// [END speech_sync_recognize]
56+
}
57+
58+
function syncRecognizeGCS (gcsUri, encoding, sampleRate) {
59+
// [START speech_sync_recognize_gcs]
60+
// Imports the Google Cloud client library
61+
const Speech = require('@google-cloud/speech');
62+
63+
// Instantiates a client
64+
const speech = Speech();
65+
66+
// The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw
67+
// const gcsUri = 'gs://my-bucket/audio.raw';
68+
69+
// The encoding of the audio file, e.g. 'LINEAR16'
70+
// const encoding = 'LINEAR16';
71+
72+
// The sample rate of the audio file, e.g. 16000
73+
// const sampleRate = 16000;
74+
75+
const request = {
76+
encoding: encoding,
77+
sampleRate: sampleRate
78+
};
79+
80+
// Detects speech in the audio file
81+
speech.recognize(gcsUri, request)
82+
.then((results) => {
83+
const transcription = results[0];
84+
85+
console.log(`Transcription: ${transcription}`);
86+
});
87+
// [END speech_sync_recognize_gcs]
88+
}
89+
90+
function asyncRecognize (filename, encoding, sampleRate) {
91+
// [START speech_async_recognize]
92+
// Imports the Google Cloud client library
93+
const Speech = require('@google-cloud/speech');
94+
95+
// Instantiates a client
96+
const speech = Speech();
97+
98+
// The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
99+
// const filename = '/path/to/audio.raw';
100+
101+
// The encoding of the audio file, e.g. 'LINEAR16'
102+
// const encoding = 'LINEAR16';
103+
104+
// The sample rate of the audio file, e.g. 16000
105+
// const sampleRate = 16000;
106+
107+
const request = {
108+
encoding: encoding,
109+
sampleRate: sampleRate
110+
};
111+
112+
// Detects speech in the audio file. This creates a recognition job that you
113+
// can wait for now, or get its result later.
114+
speech.startRecognition(filename, request)
115+
.then((results) => {
116+
const operation = results[0];
117+
// Get a Promise represention of the final result of the job
118+
return operation.promise();
119+
})
120+
.then((transcription) => {
43121
console.log(`Transcription: ${transcription}`);
44-
return transcription;
45122
});
123+
// [END speech_async_recognize]
46124
}
47-
// [END speech_sync_recognize]
48125

49-
// [START speech_async_recognize]
50-
function asyncRecognize (filename) {
126+
function asyncRecognizeGCS (gcsUri, encoding, sampleRate) {
127+
// [START speech_async_recognize_gcs]
128+
// Imports the Google Cloud client library
129+
const Speech = require('@google-cloud/speech');
130+
51131
// Instantiates a client
52132
const speech = Speech();
53133

54-
const config = {
55-
// Configure these settings based on the audio you're transcribing
56-
encoding: 'LINEAR16',
57-
sampleRate: 16000
134+
// The Google Cloud Storage URI of the file on which to perform speech recognition, e.g. gs://my-bucket/audio.raw
135+
// const gcsUri = 'gs://my-bucket/audio.raw';
136+
137+
// The encoding of the audio file, e.g. 'LINEAR16'
138+
// const encoding = 'LINEAR16';
139+
140+
// The sample rate of the audio file, e.g. 16000
141+
// const sampleRate = 16000;
142+
143+
const request = {
144+
encoding: encoding,
145+
sampleRate: sampleRate
58146
};
59147

60-
// Detects speech in the audio file, e.g. "./resources/audio.raw"
61-
// This creates a recognition job that you can wait for now, or get its result
62-
// later.
63-
return speech.startRecognition(filename, config)
148+
// Detects speech in the audio file. This creates a recognition job that you
149+
// can wait for now, or get its result later.
150+
speech.startRecognition(gcsUri, request)
64151
.then((results) => {
65152
const operation = results[0];
66-
// Get a Promise represention the final result of the job
153+
// Get a Promise represention of the final result of the job
67154
return operation.promise();
68155
})
69156
.then((transcription) => {
70157
console.log(`Transcription: ${transcription}`);
71-
return transcription;
72158
});
159+
// [END speech_async_recognize_gcs]
73160
}
74-
// [END speech_async_recognize]
75161

76-
// [START speech_streaming_recognize]
77-
const fs = require('fs');
162+
function streamingRecognize (filename, encoding, sampleRate) {
163+
// [START speech_streaming_recognize]
164+
const fs = require('fs');
165+
166+
// Imports the Google Cloud client library
167+
const Speech = require('@google-cloud/speech');
78168

79-
function streamingRecognize (filename, callback) {
80169
// Instantiates a client
81170
const speech = Speech();
82171

83-
const options = {
172+
// The path to the local file on which to perform speech recognition, e.g. /path/to/audio.raw
173+
// const filename = '/path/to/audio.raw';
174+
175+
// The encoding of the audio file, e.g. 'LINEAR16'
176+
// const encoding = 'LINEAR16';
177+
178+
// The sample rate of the audio file, e.g. 16000
179+
// const sampleRate = 16000;
180+
181+
const request = {
84182
config: {
85-
// Configure these settings based on the audio you're transcribing
86-
encoding: 'LINEAR16',
87-
sampleRate: 16000
183+
encoding: encoding,
184+
sampleRate: sampleRate
88185
}
89186
};
90187

91-
// Create a recognize stream
92-
const recognizeStream = speech.createRecognizeStream(options)
93-
.on('error', callback)
188+
// Stream the audio to the Google Cloud Speech API
189+
const recognizeStream = speech.createRecognizeStream(request)
190+
.on('error', console.error)
94191
.on('data', (data) => {
95192
console.log('Data received: %j', data);
96-
callback();
97193
});
98194

99195
// Stream an audio file from disk to the Speech API, e.g. "./resources/audio.raw"
100196
fs.createReadStream(filename).pipe(recognizeStream);
197+
// [END speech_streaming_recognize]
101198
}
102-
// [END speech_streaming_recognize]
103199

104-
// [START speech_streaming_mic_recognize]
105-
const record = require('node-record-lpcm16');
200+
function streamingMicRecognize (encoding, sampleRate) {
201+
// [START speech_streaming_mic_recognize]
202+
const record = require('node-record-lpcm16');
203+
204+
// Imports the Google Cloud client library
205+
const Speech = require('@google-cloud/speech');
106206

107-
function streamingMicRecognize () {
108207
// Instantiates a client
109208
const speech = Speech();
110209

111-
const options = {
210+
// The encoding of the audio file, e.g. 'LINEAR16'
211+
// const encoding = 'LINEAR16';
212+
213+
// The sample rate of the audio file, e.g. 16000
214+
// const sampleRate = 16000;
215+
216+
const request = {
112217
config: {
113-
// Configure these settings based on the audio you're transcribing
114-
encoding: 'LINEAR16',
115-
sampleRate: 16000
218+
encoding: encoding,
219+
sampleRate: sampleRate
116220
}
117221
};
118222

119223
// Create a recognize stream
120-
const recognizeStream = speech.createRecognizeStream(options)
224+
const recognizeStream = speech.createRecognizeStream(request)
121225
.on('error', console.error)
122226
.on('data', (data) => process.stdout.write(data.results));
123227

124228
// Start recording and send the microphone input to the Speech API
125229
record.start({
126-
sampleRate: 16000,
230+
sampleRate: sampleRate,
127231
threshold: 0
128232
}).pipe(recognizeStream);
129233

130234
console.log('Listening, press Ctrl+C to stop.');
235+
// [END speech_streaming_mic_recognize]
131236
}
132-
// [END speech_streaming_mic_recognize]
133237

134238
require(`yargs`)
135239
.demand(1)
136240
.command(
137241
`sync <filename>`,
138-
`Detects speech in an audio file.`,
242+
`Detects speech in a local audio file.`,
139243
{},
140-
(opts) => syncRecognize(opts.filename)
244+
(opts) => syncRecognize(opts.filename, opts.encoding, opts.sampleRate)
245+
)
246+
.command(
247+
`sync-gcs <gcsUri>`,
248+
`Detects speech in an audio file located in a Google Cloud Storage bucket.`,
249+
{},
250+
(opts) => syncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
141251
)
142252
.command(
143253
`async <filename>`,
144-
`Creates a job to detect speech in an audio file, and waits for the job to complete.`,
254+
`Creates a job to detect speech in a local audio file, and waits for the job to complete.`,
145255
{},
146-
(opts) => asyncRecognize(opts.filename)
256+
(opts) => asyncRecognize(opts.filename, opts.encoding, opts.sampleRate)
257+
)
258+
.command(
259+
`async-gcs <gcsUri>`,
260+
`Creates a job to detect speech in an audio file located in a Google Cloud Storage bucket, and waits for the job to complete.`,
261+
{},
262+
(opts) => asyncRecognizeGCS(opts.gcsUri, opts.encoding, opts.sampleRate)
147263
)
148264
.command(
149265
`stream <filename>`,
150-
`Detects speech in an audio file by streaming it to the Speech API.`,
266+
`Detects speech in a local audio file by streaming it to the Speech API.`,
151267
{},
152-
(opts) => streamingRecognize(opts.filename, () => {})
268+
(opts) => streamingRecognize(opts.filename, opts.encoding, opts.sampleRate)
153269
)
154270
.command(
155271
`listen`,
156272
`Detects speech in a microphone input stream.`,
157273
{},
158-
streamingMicRecognize
274+
(opts) => streamingMicRecognize(opts.encoding, opts.sampleRate)
159275
)
160-
.example(`node $0 sync ./resources/audio.raw`)
161-
.example(`node $0 async ./resources/audio.raw`)
162-
.example(`node $0 stream ./resources/audio.raw`)
276+
.options({
277+
encoding: {
278+
alias: 'e',
279+
default: 'LINEAR16',
280+
global: true,
281+
requiresArg: true,
282+
type: 'string'
283+
},
284+
sampleRate: {
285+
alias: 'r',
286+
default: 16000,
287+
global: true,
288+
requiresArg: true,
289+
type: 'number'
290+
}
291+
})
292+
.example(`node $0 sync ./resources/audio.raw -e LINEAR16 -r 16000`)
293+
.example(`node $0 async-gcs gs://my-bucket/audio.raw -e LINEAR16 -r 16000`)
294+
.example(`node $0 stream ./resources/audio.raw -e LINEAR16 -r 16000`)
163295
.example(`node $0 listen`)
164296
.wrap(120)
165297
.recommendCommands()

0 commit comments

Comments
 (0)