2323import com .google .cloud .speech .v1beta1 .RecognitionConfig .AudioEncoding ;
2424import com .google .cloud .speech .v1beta1 .SpeechGrpc ;
2525import com .google .cloud .speech .v1beta1 .StreamingRecognitionConfig ;
26+ import com .google .cloud .speech .v1beta1 .StreamingRecognitionResult ;
2627import com .google .cloud .speech .v1beta1 .StreamingRecognizeRequest ;
2728import com .google .cloud .speech .v1beta1 .StreamingRecognizeResponse ;
2829import com .google .protobuf .ByteString ;
29- import com .google .protobuf .TextFormat ;
3030
3131import io .grpc .ManagedChannel ;
3232import io .grpc .ManagedChannelBuilder ;
4444import org .apache .log4j .Logger ;
4545import org .apache .log4j .SimpleLayout ;
4646
47- import java .io .File ;
48- import java .io .FileInputStream ;
4947import java .io .IOException ;
5048import java .util .Arrays ;
5149import java .util .List ;
5250import java .util .concurrent .CountDownLatch ;
5351import java .util .concurrent .Executors ;
5452import java .util .concurrent .TimeUnit ;
53+ import javax .sound .sampled .AudioFormat ;
54+ import javax .sound .sampled .AudioSystem ;
55+ import javax .sound .sampled .DataLine ;
56+ import javax .sound .sampled .LineUnavailableException ;
57+ import javax .sound .sampled .TargetDataLine ;
5558
5659
5760/**
5861 * Client that sends streaming audio to Speech.Recognize and returns streaming transcript.
5962 */
6063public class StreamingRecognizeClient {
6164
62- private final String file ;
63- private final int samplingRate ;
64-
6565 private static final Logger logger = Logger .getLogger (StreamingRecognizeClient .class .getName ());
6666
6767 private final ManagedChannel channel ;
68-
6968 private final SpeechGrpc .SpeechStub speechClient ;
70-
71- private static final int BYTES_PER_BUFFER = 3200 ; //buffer size in bytes
72- private static final int BYTES_PER_SAMPLE = 2 ; //bytes per sample for LINEAR16
73-
7469 private static final List <String > OAUTH2_SCOPES =
7570 Arrays .asList ("https://www.googleapis.com/auth/cloud-platform" );
7671
72+ static final int BYTES_PER_SAMPLE = 2 ; // bytes per sample for LINEAR16
73+
74+ private final int samplingRate ;
75+ final int bytesPerBuffer ; // buffer size in bytes
76+
77+ // Used for testing
78+ protected TargetDataLine mockDataLine = null ;
79+
7780 /**
7881 * Construct client connecting to Cloud Speech server at {@code host:port}.
7982 */
80- public StreamingRecognizeClient (ManagedChannel channel , String file , int samplingRate )
83+ public StreamingRecognizeClient (ManagedChannel channel , int samplingRate )
8184 throws IOException {
82- this .file = file ;
8385 this .samplingRate = samplingRate ;
8486 this .channel = channel ;
87+ this .bytesPerBuffer = samplingRate * BYTES_PER_SAMPLE / 10 ; // 100 ms
8588
8689 speechClient = SpeechGrpc .newStub (channel );
8790
8891 // Send log4j logs to Console
8992 // If you are going to run this on GCE, you might wish to integrate with
90- // google-cloud-java logging. See:
93+ // google-cloud-java logging. See:
9194 // https://github.com/GoogleCloudPlatform/google-cloud-java/blob/master/README.md#stackdriver-logging-alpha
92-
9395 ConsoleAppender appender = new ConsoleAppender (new SimpleLayout (), SYSTEM_OUT );
9496 logger .addAppender (appender );
9597 }
@@ -109,19 +111,73 @@ static ManagedChannel createChannel(String host, int port) throws IOException {
109111 return channel ;
110112 }
111113
114+ /**
115+ * Return a Line to the audio input device.
116+ */
117+ private TargetDataLine getAudioInputLine () {
118+ // For testing
119+ if (null != mockDataLine ) {
120+ return mockDataLine ;
121+ }
122+
123+ AudioFormat format = new AudioFormat (samplingRate , BYTES_PER_SAMPLE * 8 , 1 , true , false );
124+ DataLine .Info info = new DataLine .Info (TargetDataLine .class , format );
125+ if (!AudioSystem .isLineSupported (info )) {
126+ throw new RuntimeException (String .format (
127+ "Device doesn't support LINEAR16 mono raw audio format at {}Hz" , samplingRate ));
128+ }
129+ try {
130+ TargetDataLine line = (TargetDataLine ) AudioSystem .getLine (info );
131+ // Make sure the line buffer doesn't overflow while we're filling this thread's buffer.
132+ line .open (format , bytesPerBuffer * 5 );
133+ return line ;
134+ } catch (LineUnavailableException e ) {
135+ throw new RuntimeException (e );
136+ }
137+ }
138+
112139 /** Send streaming recognize requests to server. */
113140 public void recognize () throws InterruptedException , IOException {
114141 final CountDownLatch finishLatch = new CountDownLatch (1 );
115142 StreamObserver <StreamingRecognizeResponse > responseObserver =
116143 new StreamObserver <StreamingRecognizeResponse >() {
144+ private int sentenceLength = 1 ;
145+ /**
146+ * Prints the transcription results. Interim results are overwritten by subsequent
147+ * results, until a final one is returned, at which point we start a new line.
148+ *
149+ * Flags the program to exit when it hears "exit".
150+ */
117151 @ Override
118152 public void onNext (StreamingRecognizeResponse response ) {
119- logger .info ("Received response: " + TextFormat .printToString (response ));
153+ List <StreamingRecognitionResult > results = response .getResultsList ();
154+ if (results .size () < 1 ) {
155+ return ;
156+ }
157+
158+ StreamingRecognitionResult result = results .get (0 );
159+ String transcript = result .getAlternatives (0 ).getTranscript ();
160+
161+ // Print interim results with a line feed, so subsequent transcriptions will overwrite
162+ // it. Final result will print a newline.
163+ String format = "%-" + this .sentenceLength + 's' ;
164+ if (result .getIsFinal ()) {
165+ format += '\n' ;
166+ this .sentenceLength = 1 ;
167+
168+ if (transcript .toLowerCase ().indexOf ("exit" ) >= 0 ) {
169+ finishLatch .countDown ();
170+ }
171+ } else {
172+ format += '\r' ;
173+ this .sentenceLength = transcript .length ();
174+ }
175+ System .out .print (String .format (format , transcript ));
120176 }
121177
122178 @ Override
123179 public void onError (Throwable error ) {
124- logger .log (Level .WARN , "recognize failed: {0}" , error );
180+ logger .log (Level .ERROR , "recognize failed: {0}" , error );
125181 finishLatch .countDown ();
126182 }
127183
@@ -146,33 +202,28 @@ public void onCompleted() {
146202 StreamingRecognitionConfig .newBuilder ()
147203 .setConfig (config )
148204 .setInterimResults (true )
149- .setSingleUtterance (true )
205+ .setSingleUtterance (false )
150206 .build ();
151207
152208 StreamingRecognizeRequest initial =
153209 StreamingRecognizeRequest .newBuilder ().setStreamingConfig (streamingConfig ).build ();
154210 requestObserver .onNext (initial );
155211
156- // Open audio file. Read and send sequential buffers of audio as additional RecognizeRequests.
157- FileInputStream in = new FileInputStream (new File (file ));
158- // For LINEAR16 at 16000 Hz sample rate, 3200 bytes corresponds to 100 milliseconds of audio.
159- byte [] buffer = new byte [BYTES_PER_BUFFER ];
212+ // Get a Line to the audio input device.
213+ TargetDataLine in = getAudioInputLine ();
214+ byte [] buffer = new byte [bytesPerBuffer ];
160215 int bytesRead ;
161- int totalBytes = 0 ;
162- int samplesPerBuffer = BYTES_PER_BUFFER / BYTES_PER_SAMPLE ;
163- int samplesPerMillis = samplingRate / 1000 ;
164216
165- while ((bytesRead = in .read (buffer )) != -1 ) {
166- totalBytes += bytesRead ;
217+ in .start ();
218+ // Read and send sequential buffers of audio as additional RecognizeRequests.
219+ while (finishLatch .getCount () > 0
220+ && (bytesRead = in .read (buffer , 0 , buffer .length )) != -1 ) {
167221 StreamingRecognizeRequest request =
168222 StreamingRecognizeRequest .newBuilder ()
169223 .setAudioContent (ByteString .copyFrom (buffer , 0 , bytesRead ))
170224 .build ();
171225 requestObserver .onNext (request );
172- // To simulate real-time audio, sleep after sending each audio buffer.
173- Thread .sleep (samplesPerBuffer / samplesPerMillis );
174226 }
175- logger .info ("Sent " + totalBytes + " bytes from audio file: " + file );
176227 } catch (RuntimeException e ) {
177228 // Cancel RPC.
178229 requestObserver .onError (e );
@@ -187,21 +238,13 @@ public void onCompleted() {
187238
188239 public static void main (String [] args ) throws Exception {
189240
190- String audioFile = "" ;
191- String host = "speech.googleapis.com" ;
192- Integer port = 443 ;
193- Integer sampling = 16000 ;
241+ String host = null ;
242+ Integer port = null ;
243+ Integer sampling = null ;
194244
195245 CommandLineParser parser = new DefaultParser ();
196246
197247 Options options = new Options ();
198- options .addOption (
199- Option .builder ()
200- .longOpt ("file" )
201- .desc ("path to audio file" )
202- .hasArg ()
203- .argName ("FILE_PATH" )
204- .build ());
205248 options .addOption (
206249 Option .builder ()
207250 .longOpt ("host" )
@@ -226,31 +269,14 @@ public static void main(String[] args) throws Exception {
226269
227270 try {
228271 CommandLine line = parser .parse (options , args );
229- if (line .hasOption ("file" )) {
230- audioFile = line .getOptionValue ("file" );
231- } else {
232- System .err .println ("An Audio file must be specified (e.g. /foo/baz.raw)." );
233- System .exit (1 );
234- }
235-
236- if (line .hasOption ("host" )) {
237- host = line .getOptionValue ("host" );
238- } else {
239- System .err .println ("An API enpoint must be specified (typically speech.googleapis.com)." );
240- System .exit (1 );
241- }
242272
243- if (line .hasOption ("port" )) {
244- port = Integer .parseInt (line .getOptionValue ("port" ));
245- } else {
246- System .err .println ("An SSL port must be specified (typically 443)." );
247- System .exit (1 );
248- }
273+ host = line .getOptionValue ("host" , "speech.googleapis.com" );
274+ port = Integer .parseInt (line .getOptionValue ("port" , "443" ));
249275
250276 if (line .hasOption ("sampling" )) {
251277 sampling = Integer .parseInt (line .getOptionValue ("sampling" ));
252278 } else {
253- System .err .println ("An Audio sampling rate must be specified." );
279+ System .err .println ("An Audio sampling rate (--sampling) must be specified. (e.g. 16000) " );
254280 System .exit (1 );
255281 }
256282 } catch (ParseException exp ) {
@@ -259,7 +285,7 @@ public static void main(String[] args) throws Exception {
259285 }
260286
261287 ManagedChannel channel = createChannel (host , port );
262- StreamingRecognizeClient client = new StreamingRecognizeClient (channel , audioFile , sampling );
288+ StreamingRecognizeClient client = new StreamingRecognizeClient (channel , sampling );
263289 try {
264290 client .recognize ();
265291 } finally {
0 commit comments