Skip to content

Commit f364416

Browse files
authored
Merge pull request googleapis#2714 from daspecster/add-stability-to-speech-results
Add stability information to streaming results.
2 parents 901bd7d + 38e0887 commit f364416

File tree

7 files changed

+121
-33
lines changed

7 files changed

+121
-33
lines changed

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@
178178
Client <speech-client>
179179
speech-encoding
180180
speech-operation
181+
speech-result
181182
speech-sample
182183
speech-alternative
183184

docs/speech-result.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Speech Result
2+
=============
3+
4+
.. automodule:: google.cloud.speech.result
5+
:members:
6+
:undoc-members:
7+
:show-inheritance:

docs/speech-usage.rst

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,10 @@ speech data to possible text alternatives on the fly.
171171
... sample = client.sample(content=stream,
172172
... encoding=speech.Encoding.LINEAR16,
173173
... sample_rate=16000)
174-
... alternatives = list(client.streaming_recognize(sample))
175-
>>> print(alternatives[0].transcript)
174+
... results = list(client.streaming_recognize(sample))
175+
>>> print(results[0].alternatives[0].transcript)
176176
'hello'
177-
>>> print(alternatives[0].confidence)
177+
>>> print(results[0].alternatives[0].confidence)
178178
0.973458576
179179
180180
@@ -196,10 +196,10 @@ See: `Single Utterance`_
196196
... sample_rate=16000)
197197
... responses = client.streaming_recognize(sample,
198198
... single_utterance=True)
199-
... alternatives = list(responses)
200-
>>> print(alternatives[0].transcript)
199+
... results = list(responses)
200+
>>> print(results[0].alternatives[0].transcript)
201201
hello
202-
>>> print(alternatives[0].confidence)
202+
>>> print(results[0].alternatives[0].confidence)
203203
0.96523453546
204204
205205
@@ -214,20 +214,28 @@ If ``interim_results`` is set to :data:`True`, interim results
214214
... sample = client.sample(content=stream,
215215
... encoding=speech.Encoding.LINEAR16,
216216
... sample_rate=16000)
217-
... for alternatives in client.streaming_recognize(sample,
218-
... interim_results=True):
217+
... for results in client.streaming_recognize(sample,
218+
... interim_results=True):
219219
... print('=' * 20)
220-
... print(alternatives[0].transcript)
221-
... print(alternatives[0].confidence)
220+
... print(results[0].alternatives[0].transcript)
221+
... print(results[0].alternatives[0].confidence)
222+
... print(results[0].is_final)
223+
... print(results[0].stability)
222224
====================
223225
'he'
224226
None
227+
False
228+
0.113245
225229
====================
226230
'hell'
227231
None
232+
False
233+
0.132454
228234
====================
229235
'hello'
230236
0.973458576
237+
True
238+
0.982345
231239
232240
233241
.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig

speech/google/cloud/speech/client.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from google.cloud.speech.connection import Connection
2828
from google.cloud.speech.encoding import Encoding
2929
from google.cloud.speech.operation import Operation
30+
from google.cloud.speech.result import StreamingSpeechResult
3031
from google.cloud.speech.sample import Sample
3132

3233

@@ -170,7 +171,8 @@ def streaming_recognize(self, sample, language_code=None,
170171
Streaming recognition requests are limited to 1 minute of audio.
171172
See: https://cloud.google.com/speech/limits#content
172173
173-
Yields: list of :class:`~google.cloud.speech.alternative.Alternatives`
174+
Yields: Instance of
175+
:class:`~google.cloud.speech.result.StreamingSpeechResult`
174176
containing results and metadata from the streaming request.
175177
176178
:type sample: :class:`~google.cloud.speech.sample.Sample`
@@ -242,8 +244,7 @@ def streaming_recognize(self, sample, language_code=None,
242244
for response in responses:
243245
for result in response.results:
244246
if result.is_final or interim_results:
245-
yield [Alternative.from_pb(alternative)
246-
for alternative in result.alternatives]
247+
yield StreamingSpeechResult.from_pb(result)
247248

248249
def sync_recognize(self, sample, language_code=None,
249250
max_alternatives=None, profanity_filter=None,
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright 2016 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Speech result representations."""
16+
17+
from google.cloud.speech.alternative import Alternative
18+
19+
20+
class StreamingSpeechResult(object):
21+
"""Streaming speech result representation.
22+
23+
:type alternatives: list
24+
:param alternatives: List of
25+
:class:`~google.cloud.speech.alternative.Alternative`.
26+
27+
:type is_final: bool
28+
:param is_final: Boolean indicator of results finality.
29+
30+
:type stability: float
31+
:param stability: 0.0-1.0 stability score for the results returned.
32+
"""
33+
def __init__(self, alternatives, is_final=False, stability=0.0):
34+
self.alternatives = alternatives
35+
self.is_final = is_final
36+
self.stability = stability
37+
38+
@classmethod
39+
def from_pb(cls, response):
40+
"""Factory: construct instance of ``StreamingSpeechResult``.
41+
42+
:type response: :class:`~google.cloud.grpc.speech.v1beta1\
43+
.cloud_speech_pb2.StreamingRecognizeResult`
44+
:param response: Instance of ``StreamingRecognizeResult`` protobuf.
45+
46+
:rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult`
47+
:returns: Instance of ``StreamingSpeechResult``.
48+
"""
49+
alternatives = [Alternative.from_pb(alternative)
50+
for alternative in response.alternatives]
51+
is_final = response.is_final
52+
stability = response.stability
53+
return cls(alternatives=alternatives, is_final=is_final,
54+
stability=stability)

speech/unit_tests/test_client.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _make_result(alternatives=()):
2828
)
2929

3030

31-
def _make_streaming_result(alternatives=(), is_final=True):
31+
def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
3232
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2
3333

3434
return cloud_speech_pb2.StreamingRecognitionResult(
@@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
3939
) for alternative in alternatives
4040
],
4141
is_final=is_final,
42+
stability=stability,
4243
)
4344

4445

@@ -477,6 +478,7 @@ def test_stream_recognize_interim_results(self):
477478

478479
from google.cloud.speech import _gax
479480
from google.cloud.speech.encoding import Encoding
481+
from google.cloud.speech.client import StreamingSpeechResult
480482

481483
stream = BytesIO(b'Some audio data...')
482484
credentials = _Credentials()
@@ -492,11 +494,13 @@ def test_stream_recognize_interim_results(self):
492494
'confidence': 0.0123456,
493495
}]
494496
first_response = _make_streaming_response(
495-
_make_streaming_result([], is_final=False))
497+
_make_streaming_result([], is_final=False, stability=0.122435))
496498
second_response = _make_streaming_response(
497-
_make_streaming_result(alternatives, is_final=False))
499+
_make_streaming_result(alternatives, is_final=False,
500+
stability=0.1432343))
498501
last_response = _make_streaming_response(
499-
_make_streaming_result(alternatives, is_final=True))
502+
_make_streaming_result(alternatives, is_final=True,
503+
stability=0.9834534))
500504
responses = [first_response, second_response, last_response]
501505

502506
channel_args = []
@@ -522,15 +526,28 @@ def speech_api(channel=None):
522526

523527
results = list(client.streaming_recognize(sample,
524528
interim_results=True))
525-
self.assertEqual(results[0], [])
526-
self.assertEqual(results[1][0].transcript,
529+
530+
self.assertEqual(len(results), 3)
531+
self.assertIsInstance(results[0], StreamingSpeechResult)
532+
self.assertEqual(results[0].alternatives, [])
533+
self.assertFalse(results[0].is_final)
534+
self.assertEqual(results[0].stability, 0.122435)
535+
self.assertEqual(results[1].stability, 0.1432343)
536+
self.assertFalse(results[1].is_final)
537+
self.assertEqual(results[1].alternatives[0].transcript,
527538
alternatives[0]['transcript'])
528-
self.assertEqual(results[1][0].confidence,
539+
self.assertEqual(results[1].alternatives[0].confidence,
529540
alternatives[0]['confidence'])
530-
self.assertEqual(results[1][1].transcript,
541+
self.assertEqual(results[1].alternatives[1].transcript,
531542
alternatives[1]['transcript'])
532-
self.assertEqual(results[1][1].confidence,
543+
self.assertEqual(results[1].alternatives[1].confidence,
533544
alternatives[1]['confidence'])
545+
self.assertTrue(results[2].is_final)
546+
self.assertEqual(results[2].stability, 0.9834534)
547+
self.assertEqual(results[2].alternatives[0].transcript,
548+
alternatives[0]['transcript'])
549+
self.assertEqual(results[2].alternatives[0].confidence,
550+
alternatives[0]['confidence'])
534551

535552
def test_stream_recognize(self):
536553
from io import BytesIO
@@ -583,9 +600,9 @@ def speech_api(channel=None):
583600

584601
results = list(client.streaming_recognize(sample))
585602
self.assertEqual(len(results), 1)
586-
self.assertEqual(results[0][0].transcript,
603+
self.assertEqual(results[0].alternatives[0].transcript,
587604
alternatives[0]['transcript'])
588-
self.assertEqual(results[0][0].confidence,
605+
self.assertEqual(results[0].alternatives[0].confidence,
589606
alternatives[0]['confidence'])
590607

591608
def test_stream_recognize_no_results(self):

system_tests/speech.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,15 @@ def _make_streaming_request(self, file_obj, single_utterance=True,
128128
interim_results=interim_results,
129129
speech_context=['hello', 'google'])
130130

131-
def _check_results(self, results, num_results=1):
132-
self.assertEqual(len(results), num_results)
133-
top_result = results[0]
131+
def _check_results(self, alternatives, num_results=1):
132+
self.assertEqual(len(alternatives), num_results)
133+
top_result = alternatives[0]
134134
self.assertIsInstance(top_result, Alternative)
135135
self.assertEqual(top_result.transcript,
136136
'hello ' + self.ASSERT_TEXT)
137137
self.assertGreater(top_result.confidence, 0.90)
138138
if num_results == 2:
139-
second_alternative = results[1]
139+
second_alternative = alternatives[1]
140140
self.assertIsInstance(second_alternative, Alternative)
141141
self.assertEqual(second_alternative.transcript, self.ASSERT_TEXT)
142142
self.assertIsNone(second_alternative.confidence)
@@ -193,7 +193,7 @@ def test_stream_recognize(self):
193193

194194
with open(AUDIO_FILE, 'rb') as file_obj:
195195
for results in self._make_streaming_request(file_obj):
196-
self._check_results(results)
196+
self._check_results(results.alternatives)
197197

198198
def test_stream_recognize_interim_results(self):
199199
if not Config.USE_GAX:
@@ -208,12 +208,12 @@ def test_stream_recognize_interim_results(self):
208208
interim_results=True)
209209
responses = list(recognize)
210210
for response in responses:
211-
if response[0].transcript:
212-
self.assertIn(response[0].transcript,
211+
if response.alternatives[0].transcript:
212+
self.assertIn(response.alternatives[0].transcript,
213213
extras + self.ASSERT_TEXT)
214214

215215
self.assertGreater(len(responses), 5)
216-
self._check_results(responses[-1])
216+
self._check_results(responses[-1].alternatives)
217217

218218
def test_stream_recognize_single_utterance(self):
219219
if not Config.USE_GAX:
@@ -222,4 +222,4 @@ def test_stream_recognize_single_utterance(self):
222222
with open(AUDIO_FILE, 'rb') as file_obj:
223223
for results in self._make_streaming_request(
224224
file_obj, single_utterance=False):
225-
self._check_results(results)
225+
self._check_results(results.alternatives)

0 commit comments

Comments
 (0)