Make sample.content handle bytes and files.

daspecster · daspecster · commit 8f3386534c9b · 2016-12-16T17:52:01.000-05:00
diff --git a/speech/google/cloud/speech/_gax.py b/speech/google/cloud/speech/_gax.py
@@ -99,8 +99,7 @@ def async_recognize(self, sample, language_code=None,
             profanity_filter=profanity_filter,
             speech_context=SpeechContext(phrases=speech_context))
 
-        audio = RecognitionAudio(content=sample.content,
-                                 uri=sample.source_uri)
+        audio = RecognitionAudio(content=sample.content, uri=sample.source_uri)
         api = self._gapic_api
         response = api.async_recognize(config=config, audio=audio)
 
@@ -182,9 +181,7 @@ def streaming_recognize(self, sample, language_code=None,
                        .cloud_speech_pb2.StreamingRecognizeResponse`
         :returns: ``StreamingRecognizeResponse`` instances.
         """
-        if getattr(sample.content, 'closed', None) is None:
-            raise ValueError('Please use file-like object for data stream.')
-        if sample.content.closed:
+        if sample.stream.closed:
             raise ValueError('Stream is closed.')
 
         requests = _stream_requests(sample, language_code=language_code,
@@ -252,9 +249,7 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
             language_code=language_code, max_alternatives=max_alternatives,
             profanity_filter=profanity_filter,
             speech_context=SpeechContext(phrases=speech_context))
-
-        audio = RecognitionAudio(content=sample.content,
-                                 uri=sample.source_uri)
+        audio = RecognitionAudio(content=sample.content, uri=sample.source_uri)
         api = self._gapic_api
         api_response = api.sync_recognize(config=config, audio=audio)
         if len(api_response.results) == 1:
@@ -337,7 +332,7 @@ def _stream_requests(sample, language_code=None, max_alternatives=None,
     yield config_request
 
     while True:
-        data = sample.content.read(sample.chunk_size)
+        data = sample.stream.read(sample.chunk_size)
         if not data:
             break
         yield StreamingRecognizeRequest(audio_content=data)
diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py
@@ -67,8 +67,8 @@ def sample(self, content=None, source_uri=None, encoding=None,
                sample_rate=None):
         """Factory: construct Sample to use when making recognize requests.
 
-        :type content: bytes
-        :param content: (Optional) Byte stream of audio.
+        :type content: bytes or file
+        :param content: (Optional) Byte stream of audio or file like object.
 
         :type source_uri: str
         :param source_uri: (Optional) URI that points to a file that contains
diff --git a/speech/google/cloud/speech/sample.py b/speech/google/cloud/speech/sample.py
@@ -14,15 +14,19 @@
 
 """Sample class to handle content for Google Cloud Speech API."""
 
+from io import BytesIO
+
+from google.cloud._helpers import _to_bytes
+
 from google.cloud.speech.encoding import Encoding
 from google.cloud.speech.result import StreamingSpeechResult
 
 
 class Sample(object):
     """Representation of an audio sample to be used with Google Speech API.
 
-    :type content: bytes
-    :param content: (Optional) Byte stream of audio.
+    :type content: bytes or file
+    :param content: (Optional) Byte stream of audio or file like object.
 
     :type source_uri: str
     :param source_uri: (Optional) URI that points to a file that contains
@@ -95,10 +99,15 @@ def source_uri(self):
     def content(self):
         """Bytes of audio content.
 
-        :rtype: bytes
-        :returns: Byte stream of audio content.
+        :rtype: bytes or file
+        :returns: Byte stream of audio content or file like object.
         """
-        return self._content
+        if self._content is None:
+            return self._content
+        content = self._content
+        if getattr(self._content, 'read', None) is not None:
+            content = self._content.read()
+        return content
 
     @property
     def sample_rate(self):
@@ -109,6 +118,18 @@ def sample_rate(self):
         """
         return self._sample_rate
 
+    @property
+    def stream(self):
+        """Stream the content when it is a file-like object.
+
+        :rtype: file
+        :returns: File like object to stream.
+        """
+        stream = self._content
+        if stream is not None and getattr(stream, 'read', None) is None:
+            stream = BytesIO(_to_bytes(stream))
+        return stream
+
     @property
     def encoding(self):
         """Audio encoding type
diff --git a/speech/unit_tests/test__gax.py b/speech/unit_tests/test__gax.py
@@ -14,43 +14,6 @@
 
 import unittest
 
-import mock
-
-
-def _make_credentials():
-    import google.auth.credentials
-    return mock.Mock(spec=google.auth.credentials.Credentials)
-
-
-class TestGAPICSpeechAPI(unittest.TestCase):
-    SAMPLE_RATE = 16000
-
-    @staticmethod
-    def _get_target_class():
-        from google.cloud.speech._gax import GAPICSpeechAPI
-
-        return GAPICSpeechAPI
-
-    def _make_one(self, *args, **kw):
-        return self._get_target_class()(*args, **kw)
-
-    def test_use_bytes_instead_of_file_like_object(self):
-        from google.cloud import speech
-        from google.cloud.speech.sample import Sample
-
-        credentials = _make_credentials()
-        client = speech.Client(credentials=credentials, use_gax=True)
-        client.connection = _Connection()
-        client.connection.credentials = credentials
-
-        sample = Sample(content=b'', encoding=speech.Encoding.FLAC,
-                        sample_rate=self.SAMPLE_RATE)
-
-        api = self._make_one(client)
-        with self.assertRaises(ValueError):
-            api.streaming_recognize(sample)
-        self.assertEqual(client.connection._requested, [])
-
 
 class TestSpeechGAXMakeRequests(unittest.TestCase):
     SAMPLE_RATE = 16000
@@ -172,10 +135,3 @@ def test_stream_requests(self):
         self.assertEqual(streaming_request.audio_content, self.AUDIO_CONTENT)
         self.assertIsInstance(config_request.streaming_config,
                               StreamingRecognitionConfig)
-
-
-class _Connection(object):
-
-    def __init__(self, *responses):
-        self._responses = responses
-        self._requested = []
diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import unittest
-
 import mock
 
 
@@ -72,7 +71,7 @@ class TestClient(unittest.TestCase):
     SAMPLE_RATE = 16000
     HINTS = ['hi']
     AUDIO_SOURCE_URI = 'gs://sample-bucket/sample-recording.flac'
-    AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'
+    AUDIO_CONTENT = b'testing 1 2 3'
 
     @staticmethod
     def _get_target_class():
@@ -125,14 +124,12 @@ def test_sync_recognize_content_with_optional_params_no_gax(self):
         from base64 import b64encode
 
         from google.cloud._helpers import _bytes_to_unicode
-        from google.cloud._helpers import _to_bytes
 
         from google.cloud import speech
         from google.cloud.speech.alternative import Alternative
         from unit_tests._fixtures import SYNC_RECOGNIZE_RESPONSE
 
-        _AUDIO_CONTENT = _to_bytes(self.AUDIO_CONTENT)
-        _B64_AUDIO_CONTENT = _bytes_to_unicode(b64encode(_AUDIO_CONTENT))
+        _B64_AUDIO_CONTENT = _bytes_to_unicode(b64encode(self.AUDIO_CONTENT))
         RETURNED = SYNC_RECOGNIZE_RESPONSE
         REQUEST = {
             'config': {
@@ -325,8 +322,7 @@ def speech_api(channel=None):
         self.assertIsInstance(low_level, _MockGAPICSpeechAPI)
         self.assertIs(low_level._channel, channel_obj)
         self.assertEqual(
-            channel_args,
-            [(creds, _gax.DEFAULT_USER_AGENT, host)])
+            channel_args, [(creds, _gax.DEFAULT_USER_AGENT, host)])
 
         results = sample.sync_recognize()
 
diff --git a/speech/unit_tests/test_sample.py b/speech/unit_tests/test_sample.py
@@ -42,6 +42,39 @@ def test_content_and_source_uri(self):
             self._make_one(content='awefawagaeragere',
                            source_uri=self.AUDIO_SOURCE_URI)
 
+    def test_content_to_stream(self):
+        from google.cloud.speech.encoding import Encoding
+
+        content = 'testing 1 2 3 4'
+        sample = self._make_one(content=content,
+                                encoding=Encoding.FLAC,
+                                sample_rate=self.SAMPLE_RATE)
+
+        self.assertIsNotNone(getattr(sample.stream, 'read', None))
+
+    def test_content_is_file_like(self):
+        from io import BytesIO
+        from google.cloud.speech.encoding import Encoding
+
+        test_bytes = b'testing 1 2 3 4'
+        content = BytesIO(test_bytes)
+        sample = self._make_one(content=content,
+                                encoding=Encoding.FLAC,
+                                sample_rate=self.SAMPLE_RATE)
+        self.assertEqual(sample.content, test_bytes)
+
+    def test_bytes_converts_to_file_like_object(self):
+        from google.cloud import speech
+        from google.cloud.speech.sample import Sample
+
+        test_bytes = b'testing 1 2 3'
+
+        sample = Sample(content=test_bytes, encoding=speech.Encoding.FLAC,
+                        sample_rate=self.SAMPLE_RATE)
+        self.assertEqual(sample.content, test_bytes)
+        self.assertEqual(sample.encoding, speech.Encoding.FLAC)
+        self.assertEqual(sample.sample_rate, self.SAMPLE_RATE)
+
     def test_sample_rates(self):
         from google.cloud.speech.encoding import Encoding