Skip to content

Commit 8f33865

Browse files
committed
Make sample.content handle bytes and files.
1 parent badbd9d commit 8f33865

6 files changed

Lines changed: 68 additions & 67 deletions

File tree

speech/google/cloud/speech/_gax.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,7 @@ def async_recognize(self, sample, language_code=None,
9999
profanity_filter=profanity_filter,
100100
speech_context=SpeechContext(phrases=speech_context))
101101

102-
audio = RecognitionAudio(content=sample.content,
103-
uri=sample.source_uri)
102+
audio = RecognitionAudio(content=sample.content, uri=sample.source_uri)
104103
api = self._gapic_api
105104
response = api.async_recognize(config=config, audio=audio)
106105

@@ -182,9 +181,7 @@ def streaming_recognize(self, sample, language_code=None,
182181
.cloud_speech_pb2.StreamingRecognizeResponse`
183182
:returns: ``StreamingRecognizeResponse`` instances.
184183
"""
185-
if getattr(sample.content, 'closed', None) is None:
186-
raise ValueError('Please use file-like object for data stream.')
187-
if sample.content.closed:
184+
if sample.stream.closed:
188185
raise ValueError('Stream is closed.')
189186

190187
requests = _stream_requests(sample, language_code=language_code,
@@ -252,9 +249,7 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
252249
language_code=language_code, max_alternatives=max_alternatives,
253250
profanity_filter=profanity_filter,
254251
speech_context=SpeechContext(phrases=speech_context))
255-
256-
audio = RecognitionAudio(content=sample.content,
257-
uri=sample.source_uri)
252+
audio = RecognitionAudio(content=sample.content, uri=sample.source_uri)
258253
api = self._gapic_api
259254
api_response = api.sync_recognize(config=config, audio=audio)
260255
if len(api_response.results) == 1:
@@ -337,7 +332,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
337332
yield config_request
338333

339334
while True:
340-
data = sample.content.read(sample.chunk_size)
335+
data = sample.stream.read(sample.chunk_size)
341336
if not data:
342337
break
343338
yield StreamingRecognizeRequest(audio_content=data)

speech/google/cloud/speech/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ def sample(self, content=None, source_uri=None, encoding=None,
6767
sample_rate=None):
6868
"""Factory: construct Sample to use when making recognize requests.
6969
70-
:type content: bytes
71-
:param content: (Optional) Byte stream of audio.
70+
:type content: bytes or file
71+
:param content: (Optional) Byte stream of audio or file like object.
7272
7373
:type source_uri: str
7474
:param source_uri: (Optional) URI that points to a file that contains

speech/google/cloud/speech/sample.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,19 @@
1414

1515
"""Sample class to handle content for Google Cloud Speech API."""
1616

17+
from io import BytesIO
18+
19+
from google.cloud._helpers import _to_bytes
20+
1721
from google.cloud.speech.encoding import Encoding
1822
from google.cloud.speech.result import StreamingSpeechResult
1923

2024

2125
class Sample(object):
2226
"""Representation of an audio sample to be used with Google Speech API.
2327
24-
:type content: bytes
25-
:param content: (Optional) Byte stream of audio.
28+
:type content: bytes or file
29+
:param content: (Optional) Byte stream of audio or file like object.
2630
2731
:type source_uri: str
2832
:param source_uri: (Optional) URI that points to a file that contains
@@ -95,10 +99,15 @@ def source_uri(self):
9599
def content(self):
96100
"""Bytes of audio content.
97101
98-
:rtype: bytes
99-
:returns: Byte stream of audio content.
102+
:rtype: bytes or file
103+
:returns: Byte stream of audio content or file like object.
100104
"""
101-
return self._content
105+
if self._content is None:
106+
return self._content
107+
content = self._content
108+
if getattr(self._content, 'read', None) is not None:
109+
content = self._content.read()
110+
return content
102111

103112
@property
104113
def sample_rate(self):
@@ -109,6 +118,18 @@ def sample_rate(self):
109118
"""
110119
return self._sample_rate
111120

121+
@property
122+
def stream(self):
123+
"""Stream the content when it is a file-like object.
124+
125+
:rtype: file
126+
:returns: File like object to stream.
127+
"""
128+
stream = self._content
129+
if stream is not None and getattr(stream, 'read', None) is None:
130+
stream = BytesIO(_to_bytes(stream))
131+
return stream
132+
112133
@property
113134
def encoding(self):
114135
"""Audio encoding type

speech/unit_tests/test__gax.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -14,43 +14,6 @@
1414

1515
import unittest
1616

17-
import mock
18-
19-
20-
def _make_credentials():
21-
import google.auth.credentials
22-
return mock.Mock(spec=google.auth.credentials.Credentials)
23-
24-
25-
class TestGAPICSpeechAPI(unittest.TestCase):
26-
SAMPLE_RATE = 16000
27-
28-
@staticmethod
29-
def _get_target_class():
30-
from google.cloud.speech._gax import GAPICSpeechAPI
31-
32-
return GAPICSpeechAPI
33-
34-
def _make_one(self, *args, **kw):
35-
return self._get_target_class()(*args, **kw)
36-
37-
def test_use_bytes_instead_of_file_like_object(self):
38-
from google.cloud import speech
39-
from google.cloud.speech.sample import Sample
40-
41-
credentials = _make_credentials()
42-
client = speech.Client(credentials=credentials, use_gax=True)
43-
client.connection = _Connection()
44-
client.connection.credentials = credentials
45-
46-
sample = Sample(content=b'', encoding=speech.Encoding.FLAC,
47-
sample_rate=self.SAMPLE_RATE)
48-
49-
api = self._make_one(client)
50-
with self.assertRaises(ValueError):
51-
api.streaming_recognize(sample)
52-
self.assertEqual(client.connection._requested, [])
53-
5417

5518
class TestSpeechGAXMakeRequests(unittest.TestCase):
5619
SAMPLE_RATE = 16000
@@ -172,10 +135,3 @@ def test_stream_requests(self):
172135
self.assertEqual(streaming_request.audio_content, self.AUDIO_CONTENT)
173136
self.assertIsInstance(config_request.streaming_config,
174137
StreamingRecognitionConfig)
175-
176-
177-
class _Connection(object):
178-
179-
def __init__(self, *responses):
180-
self._responses = responses
181-
self._requested = []

speech/unit_tests/test_client.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
import unittest
16-
1716
import mock
1817

1918

@@ -72,7 +71,7 @@ class TestClient(unittest.TestCase):
7271
SAMPLE_RATE = 16000
7372
HINTS = ['hi']
7473
AUDIO_SOURCE_URI = 'gs://sample-bucket/sample-recording.flac'
75-
AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'
74+
AUDIO_CONTENT = b'testing 1 2 3'
7675

7776
@staticmethod
7877
def _get_target_class():
@@ -125,14 +124,12 @@ def test_sync_recognize_content_with_optional_params_no_gax(self):
125124
from base64 import b64encode
126125

127126
from google.cloud._helpers import _bytes_to_unicode
128-
from google.cloud._helpers import _to_bytes
129127

130128
from google.cloud import speech
131129
from google.cloud.speech.alternative import Alternative
132130
from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE
133131

134-
_AUDIO_CONTENT = _to_bytes(self.AUDIO_CONTENT)
135-
_B64_AUDIO_CONTENT = _bytes_to_unicode(b64encode(_AUDIO_CONTENT))
132+
_B64_AUDIO_CONTENT = _bytes_to_unicode(b64encode(self.AUDIO_CONTENT))
136133
RETURNED = SYNC_RECOGNIZE_RESPONSE
137134
REQUEST = {
138135
'config': {
@@ -325,8 +322,7 @@ def speech_api(channel=None):
325322
self.assertIsInstance(low_level, _MockGAPICSpeechAPI)
326323
self.assertIs(low_level._channel, channel_obj)
327324
self.assertEqual(
328-
channel_args,
329-
[(creds, _gax.DEFAULT_USER_AGENT, host)])
325+
channel_args, [(creds, _gax.DEFAULT_USER_AGENT, host)])
330326

331327
results = sample.sync_recognize()
332328

speech/unit_tests/test_sample.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,39 @@ def test_content_and_source_uri(self):
4242
self._make_one(content='awefawagaeragere',
4343
source_uri=self.AUDIO_SOURCE_URI)
4444

45+
def test_content_to_stream(self):
46+
from google.cloud.speech.encoding import Encoding
47+
48+
content = 'testing 1 2 3 4'
49+
sample = self._make_one(content=content,
50+
encoding=Encoding.FLAC,
51+
sample_rate=self.SAMPLE_RATE)
52+
53+
self.assertIsNotNone(getattr(sample.stream, 'read', None))
54+
55+
def test_content_is_file_like(self):
56+
from io import BytesIO
57+
from google.cloud.speech.encoding import Encoding
58+
59+
test_bytes = b'testing 1 2 3 4'
60+
content = BytesIO(test_bytes)
61+
sample = self._make_one(content=content,
62+
encoding=Encoding.FLAC,
63+
sample_rate=self.SAMPLE_RATE)
64+
self.assertEqual(sample.content, test_bytes)
65+
66+
def test_bytes_converts_to_file_like_object(self):
67+
from google.cloud import speech
68+
from google.cloud.speech.sample import Sample
69+
70+
test_bytes = b'testing 1 2 3'
71+
72+
sample = Sample(content=test_bytes, encoding=speech.Encoding.FLAC,
73+
sample_rate=self.SAMPLE_RATE)
74+
self.assertEqual(sample.content, test_bytes)
75+
self.assertEqual(sample.encoding, speech.Encoding.FLAC)
76+
self.assertEqual(sample.sample_rate, self.SAMPLE_RATE)
77+
4578
def test_sample_rates(self):
4679
from google.cloud.speech.encoding import Encoding
4780

0 commit comments

Comments
 (0)