Skip to content

Commit e5184af

Browse files
committed
Add speech asynchronous recognize support.
1 parent b7f004e commit e5184af

File tree

13 files changed

+605
-72
lines changed

13 files changed

+605
-72
lines changed

docs/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@
173173

174174
speech-usage
175175
Client <speech-client>
176+
speech-encoding
177+
speech-operation
176178

177179
.. toctree::
178180
:maxdepth: 0

docs/speech-client.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Speech Client
2-
================
2+
=============
33

44
.. automodule:: google.cloud.speech.client
55
:members:

docs/speech-encoding.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Speech Encoding
2+
===============
3+
4+
.. automodule:: google.cloud.speech.encoding
5+
:members:
6+
:undoc-members:
7+
:show-inheritance:

docs/speech-operation.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Speech Operation
2+
================
3+
4+
.. automodule:: google.cloud.speech.operation
5+
:members:
6+
:undoc-members:
7+
:show-inheritance:

docs/speech-usage.rst

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ Using the API
22
=============
33

44
The `Google Speech`_ API enables developers to convert audio to text.
5-
The API recognizes over 80 languages and variants, to support your global user base.
5+
The API recognizes over 80 languages and variants, to support your global user
6+
base.
67

78
.. warning::
89

@@ -30,11 +31,41 @@ create an instance of :class:`~google.cloud.speech.client.Client`.
3031
>>> client = speech.Client()
3132
3233
34+
Asychronous Recognition
35+
-----------------------
36+
37+
The :meth:`~google.cloud.speech.Client.async_recognize` sends audio data to the
38+
Speech API and initiates a Long Running Operation. Using this operation, you
39+
can periodically poll for recognition results. Use asynchronous requests for
40+
audio data of any duration up to 80 minutes.
41+
42+
See: `Speech Asynchronous Recognize`_
43+
44+
45+
.. code-block:: python
46+
47+
>>> import time
48+
>>> operation = client.async_recognize(
49+
... None, 'gs://my-bucket/recording.flac',
50+
... 'FLAC', 16000, max_alternatives=2)
51+
>>> retry_count = 100
52+
>>> while retry_count > 0 and not operation.complete:
53+
... retry_count -= 1
54+
... time.sleep(10)
55+
... operation.poll() # API call
56+
>>> operation.complete
57+
True
58+
>>> operation.results[0]['alternatives'][0]['transcript']
59+
"how old is the Brooklyn Bridge"
60+
>>> operation.results[0]['alternatives'][0]['confidence']
61+
0.98267895
62+
63+
3364
Synchronous Recognition
3465
-----------------------
3566

36-
The :meth:`~google.cloud.speech.Client.sync_recognize` method converts speech data to text
37-
and returns alternative text transcriptons.
67+
The :meth:`~google.cloud.speech.Client.sync_recognize` method converts speech
68+
data to text and returns alternative text transcriptons.
3869

3970
.. code-block:: python
4071
@@ -53,3 +84,4 @@ and returns alternative text transcriptons.
5384
confidence: 0
5485
5586
.. _sync_recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/syncrecognize
87+
.. _Speech Asynchronous Recognize: https://cloud.google.com/speech/reference/rest/v1beta1/speech/asyncrecognize

google/cloud/speech/encoding.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2016 Google Inc. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Encodings used by the Google Cloud Speech API."""
16+
17+
18+
class Encoding(object):
19+
"""Audio encoding types.
20+
21+
See:
22+
https://cloud.google.com/speech/reference/rest/v1beta1/\
23+
RecognitionConfig#AudioEncoding
24+
"""
25+
26+
LINEAR16 = 'LINEAR16'
27+
"""LINEAR16 encoding type."""
28+
29+
FLAC = 'FLAC'
30+
"""FLAC encoding type."""
31+
32+
MULAW = 'MULAW'
33+
"""MULAW encoding type."""
34+
35+
AMR = 'AMR'
36+
"""AMR encoding type."""
37+
38+
AMR_WB = 'AMR_WB'
39+
"""AMR_WB encoding type."""

google/cloud/speech/operation.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
# Copyright 2016 Google Inc. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Long running operation representation for Google Speech API"""
16+
17+
from google.cloud._helpers import _rfc3339_to_datetime
18+
19+
20+
class Operation(object):
21+
"""Representation of a Google API Long-Running Operation.
22+
23+
:type name: int
24+
:param name: ID assigned to an operation.
25+
26+
:type complete: bool
27+
:param complete: True if operation is complete, else False.
28+
29+
:type last_updated: datetime
30+
:param last_updated: The last time the operation was updated.
31+
32+
:type progress_percent: int
33+
:param progress_percent: Percentage of operation that has been completed.
34+
35+
:type results: dict
36+
:param results: Dictionary with transcript and score of operation.
37+
38+
:type start_time: datetime
39+
:param start_time: Datetime when operation was started.
40+
"""
41+
def __init__(self, client, name, complete=False, last_updated=None,
42+
progress_percent=0, results=None, start_time=None):
43+
self.client = client
44+
self.name = name
45+
self._complete = complete
46+
self._last_updated = last_updated
47+
self._progress_percent = progress_percent
48+
self._results = results
49+
self._start_time = start_time
50+
51+
@classmethod
52+
def from_api_repr(cls, client, response):
53+
"""Factory: construct an instance from Google Speech API.
54+
55+
:type response: dict
56+
:param response: Dictionary response from Google Speech Operations API.
57+
58+
:rtype: :class:`Operation`
59+
:returns: Instance of `~google.cloud.speech.operations.Operation`.
60+
"""
61+
last_updated = None
62+
progress_percent = 0
63+
results = None
64+
start_time = None
65+
66+
name = response['name']
67+
metadata = response.get('metadata', None)
68+
69+
if metadata:
70+
last_updated = _rfc3339_to_datetime(metadata.get('lastUpdateTime'))
71+
start_time = _rfc3339_to_datetime(metadata.get('startTime'))
72+
progress_percent = metadata.get('progressPercent')
73+
74+
if response.get('response'):
75+
results = response.get('response').get('results')
76+
complete = response.get('done', False)
77+
78+
return cls(client, name, complete, last_updated, progress_percent,
79+
results, start_time)
80+
81+
@property
82+
def complete(self):
83+
"""Completion state of the `Operation`.
84+
85+
:rtype: bool
86+
:returns: True if already completed, else false.
87+
"""
88+
return self._complete
89+
90+
@property
91+
def last_updated(self):
92+
"""Operation last updated time.
93+
94+
:rtype: datetime
95+
:returns: RFC3339 last updated time of the operation.
96+
"""
97+
return self._last_updated
98+
99+
@property
100+
def progress_percent(self):
101+
"""Progress percentage of operation.
102+
103+
:rtype: int
104+
:returns: Percentage of operation completed. [0-100]
105+
"""
106+
return self._progress_percent
107+
108+
@property
109+
def results(self):
110+
"""Results dictionary with transcript information.
111+
112+
:rtype: dict
113+
:returns: Dictionary with transcript and confidence score.
114+
"""
115+
return self._results
116+
117+
@property
118+
def start_time(self):
119+
"""Operation start time.
120+
121+
:rtype: datetime
122+
:returns: RFC3339 start time of the operation.
123+
"""
124+
return self._start_time
125+
126+
def poll(self):
127+
"""Check if the operation has finished.
128+
129+
:rtype: bool
130+
:returns: A boolean indicating if the current operation has completed.
131+
:raises: :class:`ValueError <exceptions.ValueError>` if the operation
132+
has already completed.
133+
"""
134+
if self.complete:
135+
raise ValueError('The operation has completed.')
136+
137+
path = 'operations/%s' % (self.name,)
138+
api_response = self.client.connection.api_request(method='GET',
139+
path=path)
140+
self._update(api_response)
141+
return self.complete
142+
143+
def _update(self, response):
144+
"""Update Operation instance with latest data from Speech API.
145+
146+
.. _speech_operations: https://cloud.google.com/speech/reference/\
147+
rest/v1beta1/operations
148+
149+
:type response: dict
150+
:param response: Response from Speech API Operations endpoint.
151+
See: `speech_operations`_.
152+
"""
153+
metadata = response['metadata']
154+
results = response.get('response', {}).get('results')
155+
self._last_updated = _rfc3339_to_datetime(metadata['lastUpdateTime'])
156+
self._results = results
157+
self._start_time = _rfc3339_to_datetime(metadata['startTime'])
158+
self._complete = response.get('done', False)
159+
self._progress_percent = metadata.get('progressPercent', 0)

0 commit comments

Comments
 (0)