Skip to content

Commit 9f2c34e

Browse files
committed
Add stability information to streaming results. Fixes #2702.
1 parent 1abfcbe commit 9f2c34e

File tree

4 files changed

+123
-32
lines changed

4 files changed

+123
-32
lines changed

docs/speech-usage.rst

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,10 @@ speech data to possible text alternatives on the fly.
171171
... sample = client.sample(content=stream,
172172
... encoding=speech.Encoding.LINEAR16,
173173
... sample_rate=16000)
174-
... alternatives = list(client.streaming_recognize(sample))
175-
>>> print(alternatives[0].transcript)
174+
... results = list(client.streaming_recognize(sample))
175+
>>> print(results[0].alternatives[0].transcript)
176176
'hello'
177-
>>> print(alternatives[0].confidence)
177+
>>> print(results[0].alternatives[0].confidence)
178178
0.973458576
179179
180180
@@ -196,10 +196,10 @@ See: `Single Utterance`_
196196
... sample_rate=16000)
197197
... responses = client.streaming_recognize(sample,
198198
... single_utterance=True)
199-
... alternatives = list(responses)
200-
>>> print(alternatives[0].transcript)
199+
... results = list(responses)
200+
>>> print(results[0].alternatives[0].transcript)
201201
hello
202-
>>> print(alternatives[0].confidence)
202+
>>> print(results[0].alternatives[0].confidence)
203203
0.96523453546
204204
205205
@@ -214,20 +214,28 @@ If ``interim_results`` is set to :data:`True`, interim results
214214
... sample = client.sample(content=stream,
215215
... encoding=speech.Encoding.LINEAR16,
216216
... sample_rate=16000)
217-
... for alternatives in client.streaming_recognize(sample,
218-
... interim_results=True):
217+
... for results in client.streaming_recognize(sample,
218+
... interim_results=True):
219219
... print('=' * 20)
220-
... print(alternatives[0].transcript)
221-
... print(alternatives[0].confidence)
220+
... print(results[0].alternatives[0].transcript)
221+
... print(results[0].alternatives[0].confidence)
222+
... print(results[0].is_final)
223+
... print(results[0].stability)
222224
====================
223225
'he'
224226
None
227+
False
228+
0.113245
225229
====================
226230
'hell'
227231
None
232+
False
233+
0.132454
228234
====================
229235
'hello'
230236
0.973458576
237+
True
238+
0.982345
231239
232240
233241
.. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig

speech/google/cloud/speech/client.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,7 @@ def streaming_recognize(self, sample, language_code=None,
242242
for response in responses:
243243
for result in response.results:
244244
if result.is_final or interim_results:
245-
yield [Alternative.from_pb(alternative)
246-
for alternative in result.alternatives]
245+
yield StreamingSpeechResult.from_pb(result)
247246

248247
def sync_recognize(self, sample, language_code=None,
249248
max_alternatives=None, profanity_filter=None,
@@ -299,6 +298,74 @@ def sync_recognize(self, sample, language_code=None,
299298
profanity_filter, speech_context)
300299

301300

301+
class StreamingSpeechResult(object):
302+
"""Streaming speech result representation.
303+
304+
:type alternatives: list
305+
:param alternatives: List of protobuf speech alternatives.
306+
307+
:type is_final: bool
308+
:param is_final: Boolean indicator of results finality.
309+
310+
:type stability: float
311+
:param stability: 0.0-1.0 stability score for the results returned.
312+
313+
:rtype: :class:`~google.cloud.speech.client.StreamingSpeechResult`
314+
:returns: Instance of ``StreamingSpeechResult``.
315+
"""
316+
def __init__(self, alternatives, is_final=False, stability=0.0):
317+
self._alternatives = [Alternative.from_pb(alternative)
318+
for alternative in alternatives]
319+
self._is_final = is_final
320+
self._stability = stability
321+
322+
@classmethod
323+
def from_pb(cls, response):
324+
"""Factory: construct instance of ``StreamingSpeechResult``.
325+
326+
:type response: :class:`~google.cloud.grpc.speech.v1beta1\
327+
.cloud_speech_pb2.StreamingRecognizeResult`
328+
:param response: Istance of ``StreamingRecognizeResult`` protobuf.
329+
330+
:rtype: :class:`~google.cloud.speech.client.StreamingSpeechResult`
331+
:returns: Instance of ``StreamingSpeechResult``.
332+
"""
333+
alternatives = response.alternatives
334+
is_final = response.is_final
335+
stability = response.stability
336+
return cls(alternatives=alternatives, is_final=is_final,
337+
stability=stability)
338+
339+
@property
340+
def alternatives(self):
341+
"""List of alternative transcripts.
342+
343+
:rtype: list of :class:`~google.cloud.speech.alternative.Alternative`
344+
:returns: List of ``Alternative`` instances.
345+
"""
346+
return self._alternatives
347+
348+
@property
349+
def is_final(self):
350+
"""Boolean indicator of result finality.
351+
352+
:rtype: bool
353+
:returns: True if this result is final and no more processing will
354+
occur. False if more processing can will be done and results
355+
may change.
356+
"""
357+
return self._is_final
358+
359+
@property
360+
def stability(self):
361+
"""Result stability indicator.
362+
363+
:rtype: float
364+
:returns: 0.0-1.0 value indicating the stability the currents results.
365+
"""
366+
return self._stability
367+
368+
302369
class _JSONSpeechAPI(object):
303370
"""Speech API for interacting with the JSON/REST version of the API.
304371

speech/unit_tests/test_client.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _make_result(alternatives=()):
2828
)
2929

3030

31-
def _make_streaming_result(alternatives=(), is_final=True):
31+
def _make_streaming_result(alternatives=(), is_final=True, stability=1.0):
3232
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2
3333

3434
return cloud_speech_pb2.StreamingRecognitionResult(
@@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True):
3939
) for alternative in alternatives
4040
],
4141
is_final=is_final,
42+
stability=stability,
4243
)
4344

4445

@@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self):
476477

477478
from google.cloud.speech import _gax
478479
from google.cloud.speech.encoding import Encoding
480+
from google.cloud.speech.client import StreamingSpeechResult
479481

480482
stream = BytesIO(b'Some audio data...')
481483
credentials = _Credentials()
@@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self):
491493
'confidence': 0.0123456,
492494
}]
493495
first_response = _make_streaming_response(
494-
_make_streaming_result([], is_final=False))
496+
_make_streaming_result([], is_final=False, stability=0.122435))
495497
second_response = _make_streaming_response(
496-
_make_streaming_result(alternatives, is_final=False))
498+
_make_streaming_result(alternatives, is_final=False,
499+
stability=0.1432343))
497500
last_response = _make_streaming_response(
498-
_make_streaming_result(alternatives, is_final=True))
501+
_make_streaming_result(alternatives, is_final=True,
502+
stability=0.9834534))
499503
responses = [first_response, second_response, last_response]
500504

501505
channel_args = []
@@ -521,15 +525,27 @@ def speech_api(channel=None):
521525

522526
results = list(client.streaming_recognize(sample,
523527
interim_results=True))
524-
self.assertEqual(results[0], [])
525-
self.assertEqual(results[1][0].transcript,
528+
529+
self.assertIsInstance(results[0], StreamingSpeechResult)
530+
self.assertEqual(results[0].alternatives, [])
531+
self.assertFalse(results[0].is_final)
532+
self.assertEqual(results[0].stability, 0.122435)
533+
self.assertEqual(results[1].stability, 0.1432343)
534+
self.assertFalse(results[1].is_final)
535+
self.assertEqual(results[1].alternatives[0].transcript,
526536
alternatives[0]['transcript'])
527-
self.assertEqual(results[1][0].confidence,
537+
self.assertEqual(results[1].alternatives[0].confidence,
528538
alternatives[0]['confidence'])
529-
self.assertEqual(results[1][1].transcript,
539+
self.assertEqual(results[1].alternatives[1].transcript,
530540
alternatives[1]['transcript'])
531-
self.assertEqual(results[1][1].confidence,
541+
self.assertEqual(results[1].alternatives[1].confidence,
532542
alternatives[1]['confidence'])
543+
self.assertTrue(results[2].is_final)
544+
self.assertEqual(results[2].stability, 0.9834534)
545+
self.assertEqual(results[2].alternatives[0].transcript,
546+
alternatives[0]['transcript'])
547+
self.assertEqual(results[2].alternatives[0].confidence,
548+
alternatives[0]['confidence'])
533549

534550
def test_stream_recognize(self):
535551
from io import BytesIO
@@ -582,9 +598,9 @@ def speech_api(channel=None):
582598

583599
results = list(client.streaming_recognize(sample))
584600
self.assertEqual(len(results), 1)
585-
self.assertEqual(results[0][0].transcript,
601+
self.assertEqual(results[0].alternatives[0].transcript,
586602
alternatives[0]['transcript'])
587-
self.assertEqual(results[0][0].confidence,
603+
self.assertEqual(results[0].alternatives[0].confidence,
588604
alternatives[0]['confidence'])
589605

590606
def test_stream_recognize_no_results(self):

system_tests/speech.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -127,15 +127,15 @@ def _make_streaming_request(self, file_obj, single_utterance=True,
127127
single_utterance=single_utterance,
128128
interim_results=interim_results)
129129

130-
def _check_results(self, results, num_results=1):
131-
self.assertEqual(len(results), num_results)
132-
top_result = results[0]
130+
def _check_results(self, alternatives, num_results=1):
131+
self.assertEqual(len(alternatives), num_results)
132+
top_result = alternatives[0]
133133
self.assertIsInstance(top_result, Alternative)
134134
self.assertEqual(top_result.transcript,
135135
'hello ' + self.ASSERT_TEXT)
136136
self.assertGreater(top_result.confidence, 0.90)
137137
if num_results == 2:
138-
second_alternative = results[1]
138+
second_alternative = alternatives[1]
139139
self.assertIsInstance(second_alternative, Alternative)
140140
self.assertEqual(second_alternative.transcript, self.ASSERT_TEXT)
141141
self.assertIsNone(second_alternative.confidence)
@@ -192,7 +192,7 @@ def test_stream_recognize(self):
192192

193193
with open(AUDIO_FILE, 'rb') as file_obj:
194194
for results in self._make_streaming_request(file_obj):
195-
self._check_results(results)
195+
self._check_results(results.alternatives)
196196

197197
def test_stream_recognize_interim_results(self):
198198
if not Config.USE_GAX:
@@ -207,12 +207,12 @@ def test_stream_recognize_interim_results(self):
207207
interim_results=True)
208208
responses = list(recognize)
209209
for response in responses:
210-
if response[0].transcript:
211-
self.assertIn(response[0].transcript,
210+
if response.alternatives[0].transcript:
211+
self.assertIn(response.alternatives[0].transcript,
212212
extras + self.ASSERT_TEXT)
213213

214214
self.assertGreater(len(responses), 5)
215-
self._check_results(responses[-1])
215+
self._check_results(responses[-1].alternatives)
216216

217217
def test_stream_recognize_single_utterance(self):
218218
if not Config.USE_GAX:
@@ -221,4 +221,4 @@ def test_stream_recognize_single_utterance(self):
221221
with open(AUDIO_FILE, 'rb') as file_obj:
222222
for results in self._make_streaming_request(
223223
file_obj, single_utterance=False):
224-
self._check_results(results)
224+
self._check_results(results.alternatives)

0 commit comments

Comments
 (0)