Skip to content

Commit 53650ba

Browse files
authored
fixit: clean up transcribe/microphone (GoogleCloudPlatform#10104)
## Description Fixes #<ISSUE-NUMBER> Note: Before submitting a pull request, please open an issue for discussion if you are not associated with Google. ## Checklist - [ ] I have followed [Sample Guidelines from AUTHORING_GUIDE.MD](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md) - [ ] README is updated to include [all relevant information](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md#readme-file) - [ ] **Tests** pass: `nox -s py-3.9` (see [Test Environment Setup](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md#test-environment-setup)) - [ ] **Lint** pass: `nox -s lint` (see [Test Environment Setup](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md#test-environment-setup)) - [ ] These samples need a new **API enabled** in testing projects to pass (let us know which ones) - [ ] These samples need a new/updated **env vars** in testing projects set to pass (let us know which ones) - [ ] This sample adds a new sample directory, and I updated the [CODEOWNERS file](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/.github/CODEOWNERS) with the codeowners for this sample - [ ] This sample adds a new **Product API**, and I updated the [Blunderbuss issue/PR auto-assigner](https://togithub.com/GoogleCloudPlatform/python-docs-samples/blob/main/.github/blunderbuss.yml) with the codeowners for this sample - [ ] Please **merge** this PR for me once it is approved
1 parent 8b4947d commit 53650ba

3 files changed

Lines changed: 162 additions & 32 deletions

File tree

speech/microphone/transcribe_streaming_infinite.py

Lines changed: 81 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,33 @@
4444
YELLOW = "\033[0;33m"
4545

4646

47-
def get_current_time():
48-
"""Return Current Time in MS."""
47+
def get_current_time() -> int:
48+
"""Return Current Time in MS.
49+
50+
Returns:
51+
int: Current Time in MS.
52+
"""
4953

5054
return int(round(time.time() * 1000))
5155

5256

5357
class ResumableMicrophoneStream:
5458
"""Opens a recording stream as a generator yielding the audio chunks."""
5559

56-
def __init__(self, rate, chunk_size):
60+
def __init__(
61+
self: object,
62+
rate: int,
63+
chunk_size: int,
64+
) -> None:
65+
"""Creates a resumable microphone stream.
66+
67+
Args:
68+
self: The class instance.
69+
rate: The audio file's sampling rate.
70+
chunk_size: The audio file's chunk size.
71+
72+
returns: None
73+
"""
5774
self._rate = rate
5875
self.chunk_size = chunk_size
5976
self._num_channels = 1
@@ -82,13 +99,33 @@ def __init__(self, rate, chunk_size):
8299
stream_callback=self._fill_buffer,
83100
)
84101

85-
def __enter__(self):
102+
def __enter__(self: object) -> object:
103+
"""Opens the stream.
86104
105+
Args:
106+
self: The class instance.
107+
108+
returns: None
109+
"""
87110
self.closed = False
88111
return self
89112

90-
def __exit__(self, type, value, traceback):
91-
113+
def __exit__(
114+
self: object,
115+
type: object,
116+
value: object,
117+
traceback: object,
118+
) -> object:
119+
"""Closes the stream and releases resources.
120+
121+
Args:
122+
self: The class instance.
123+
type: The exception type.
124+
value: The exception value.
125+
traceback: The exception traceback.
126+
127+
returns: None
128+
"""
92129
self._audio_stream.stop_stream()
93130
self._audio_stream.close()
94131
self.closed = True
@@ -97,15 +134,34 @@ def __exit__(self, type, value, traceback):
97134
self._buff.put(None)
98135
self._audio_interface.terminate()
99136

100-
def _fill_buffer(self, in_data, *args, **kwargs):
101-
"""Continuously collect data from the audio stream, into the buffer."""
102-
137+
def _fill_buffer(
138+
self: object,
139+
in_data: object,
140+
*args: object,
141+
**kwargs: object,
142+
) -> object:
143+
"""Continuously collect data from the audio stream, into the buffer.
144+
145+
Args:
146+
self: The class instance.
147+
in_data: The audio data as a bytes object.
148+
args: Additional arguments.
149+
kwargs: Additional arguments.
150+
151+
returns: None
152+
"""
103153
self._buff.put(in_data)
104154
return None, pyaudio.paContinue
105155

106-
def generator(self):
107-
"""Stream Audio from microphone to API and to local buffer"""
156+
def generator(self: object) -> object:
157+
"""Stream Audio from microphone to API and to local buffer
108158
159+
Args:
160+
self: The class instance.
161+
162+
returns:
163+
The data from the audio stream.
164+
"""
109165
while not self.closed:
110166
data = []
111167

@@ -160,7 +216,10 @@ def generator(self):
160216
yield b"".join(data)
161217

162218

163-
def listen_print_loop(responses, stream):
219+
def listen_print_loop(
220+
responses: object,
221+
stream: object
222+
) -> object:
164223
"""Iterates through server responses and prints them.
165224
166225
The responses passed is a generator that will block until a response
@@ -174,8 +233,14 @@ def listen_print_loop(responses, stream):
174233
response is an interim one, print a line feed at the end of it, to allow
175234
the next result to overwrite it, until the response is a final one. For the
176235
final one, print a newline to preserve the finalized transcription.
177-
"""
178236
237+
Arg:
238+
responses: The responses returned from the API.
239+
stream: The audio stream to be processed.
240+
241+
Returns:
242+
The transcript of the result
243+
"""
179244
for response in responses:
180245

181246
if get_current_time() - stream.start_time > STREAMING_LIMIT:
@@ -227,18 +292,18 @@ def listen_print_loop(responses, stream):
227292
sys.stdout.write("Exiting...\n")
228293
stream.closed = True
229294
break
230-
231295
else:
232296
sys.stdout.write(RED)
233297
sys.stdout.write("\033[K")
234298
sys.stdout.write(str(corrected_time) + ": " + transcript + "\r")
235299

236300
stream.last_transcript_was_final = False
237301

302+
return transcript
238303

239-
def main():
240-
"""start bidirectional streaming from microphone input to speech API"""
241304

305+
def main() -> None:
306+
"""start bidirectional streaming from microphone input to speech API"""
242307
client = speech.SpeechClient()
243308
config = speech.RecognitionConfig(
244309
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,

speech/microphone/transcribe_streaming_mic.py

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,21 @@
4141
class MicrophoneStream:
4242
"""Opens a recording stream as a generator yielding the audio chunks."""
4343

44-
def __init__(self, rate, chunk):
44+
def __init__(
45+
self: object,
46+
rate: int = RATE,
47+
chunk: int = CHUNK
48+
) -> None:
49+
"""The audio -- and generator -- is guaranteed to be on the main thread.
50+
"""
4551
self._rate = rate
4652
self._chunk = chunk
4753

4854
# Create a thread-safe buffer of audio data
4955
self._buff = queue.Queue()
5056
self.closed = True
5157

52-
def __enter__(self):
58+
def __enter__(self: object) -> object:
5359
self._audio_interface = pyaudio.PyAudio()
5460
self._audio_stream = self._audio_interface.open(
5561
format=pyaudio.paInt16,
@@ -69,7 +75,13 @@ def __enter__(self):
6975

7076
return self
7177

72-
def __exit__(self, type, value, traceback):
78+
def __exit__(
79+
self: object,
80+
type: object,
81+
value: object,
82+
traceback: object,
83+
) -> None:
84+
"""Closes the stream, regardless of whether the connection was lost or not."""
7385
self._audio_stream.stop_stream()
7486
self._audio_stream.close()
7587
self.closed = True
@@ -78,12 +90,36 @@ def __exit__(self, type, value, traceback):
7890
self._buff.put(None)
7991
self._audio_interface.terminate()
8092

81-
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
82-
"""Continuously collect data from the audio stream, into the buffer."""
93+
def _fill_buffer(
94+
self: object,
95+
in_data: object,
96+
frame_count: int,
97+
time_info: object,
98+
status_flags: object,
99+
) -> object:
100+
"""Continuously collect data from the audio stream, into the buffer.
101+
102+
Args:
103+
in_data: The audio data as a bytes object
104+
frame_count: The number of frames captured
105+
time_info: The time information
106+
status_flags: The status flags
107+
108+
Returns:
109+
The audio data as a bytes object
110+
"""
83111
self._buff.put(in_data)
84112
return None, pyaudio.paContinue
85113

86-
def generator(self):
114+
def generator(self: object) -> object:
115+
"""Generates audio chunks from the stream of audio data in chunks.
116+
117+
Args:
118+
self: The MicrophoneStream object
119+
120+
Returns:
121+
A generator that outputs audio chunks.
122+
"""
87123
while not self.closed:
88124
# Use a blocking get() to ensure there's at least one chunk of
89125
# data, and stop iteration if the chunk is None, indicating the
@@ -106,7 +142,7 @@ def generator(self):
106142
yield b"".join(data)
107143

108144

109-
def listen_print_loop(responses):
145+
def listen_print_loop(responses: object) -> str:
110146
"""Iterates through server responses and prints them.
111147
112148
The responses passed is a generator that will block until a response
@@ -120,6 +156,12 @@ def listen_print_loop(responses):
120156
response is an interim one, print a line feed at the end of it, to allow
121157
the next result to overwrite it, until the response is a final one. For the
122158
final one, print a newline to preserve the finalized transcription.
159+
160+
Args:
161+
responses: List of server responses
162+
163+
Returns:
164+
The transcribed text.
123165
"""
124166
num_chars_printed = 0
125167
for response in responses:
@@ -160,8 +202,11 @@ def listen_print_loop(responses):
160202

161203
num_chars_printed = 0
162204

205+
return transcript
206+
163207

164-
def main():
208+
def main() -> None:
209+
"""Transcribe speech from audio file."""
165210
# See http://g.co/cloud/speech/docs/languages
166211
# for a list of supported languages.
167212
language_code = "en-US" # a BCP-47 language tag

speech/microphone/transcribe_streaming_mic_test.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,31 @@
1919

2020
from unittest import mock
2121

22+
import pytest
23+
2224
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
2325

2426

2527
class MockPyAudio:
26-
def __init__(self, audio_filename):
28+
def __init__(
29+
self: object,
30+
audio_filename: str
31+
) -> None:
2732
self.audio_filename = audio_filename
2833

29-
def __call__(self, *args):
34+
def __call__(
35+
self: object,
36+
*args: object
37+
) -> object:
3038
return self
3139

32-
def open(self, stream_callback, rate, *args, **kwargs):
40+
def open(
41+
self: object,
42+
stream_callback: object,
43+
rate: int,
44+
*args: object,
45+
**kwargs: object
46+
) -> object:
3347
self.rate = rate
3448
self.closed = threading.Event()
3549
self.stream_thread = threading.Thread(
@@ -39,16 +53,22 @@ def open(self, stream_callback, rate, *args, **kwargs):
3953
self.stream_thread.start()
4054
return self
4155

42-
def close(self):
56+
def close(self: object) -> None:
4357
self.closed.set()
4458

45-
def stop_stream(self):
59+
def stop_stream(self: object) -> None:
4660
pass
4761

48-
def terminate(self):
62+
def terminate(self: object) -> None:
4963
pass
5064

51-
def stream_audio(self, audio_filename, callback, closed, num_frames=512):
65+
def stream_audio(
66+
self: object,
67+
audio_filename: str,
68+
callback: object,
69+
closed: object,
70+
num_frames: int = 512,
71+
) -> None:
5272
with open(audio_filename, "rb") as audio_file:
5373
while not closed.is_set():
5474
# Approximate realtime by sleeping for the appropriate time for
@@ -64,7 +84,7 @@ def stream_audio(self, audio_filename, callback, closed, num_frames=512):
6484
"sys.modules",
6585
pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))),
6686
)
67-
def test_main(capsys):
87+
def test_main(capsys: pytest.CaptureFixture) -> None:
6888
import transcribe_streaming_mic
6989

7090
transcribe_streaming_mic.main()

0 commit comments

Comments
 (0)