Skip to content

Commit 26a3c4b

Browse files
authored
Update streaming_transcription.py to allow longer audio input per stream (GoogleCloudPlatform#13891)
* Update streaming_transcription.py to allow longer audio input per stream * Configure output_multiple_utterances=true for long audio streams * Update comments per gemini suggestion. * Update lint. * Disable output_multiple_utterances by default. * Update lint and revert license hearder change.
1 parent 867a6fd commit 26a3c4b

File tree

4 files changed

+17
-6
lines changed

4 files changed

+17
-6
lines changed

dialogflow/detect_intent_texts_with_location.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def detect_intent_texts_with_location(
5858
print("=" * 20)
5959
print(f"Query text: {response.query_result.query_text}")
6060
print(
61-
f"Detected intent: {response.query_result.intent.display_name} (confidence: {response.query_result.intent_detection_confidence,})\n"
61+
f"Detected intent: {response.query_result.intent.display_name} (confidence: {response.query_result.intent_detection_confidence})\n"
6262
)
6363
print(f"Fulfillment text: {response.query_result.fulfillment_text}\n")
6464

dialogflow/participant_management.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ def analyze_content_audio_stream(
196196
timeout: int,
197197
language_code: str,
198198
single_utterance=False,
199+
output_multiple_utterances=False,
199200
):
200201
import google.auth
201202
from google.cloud import dialogflow_v2beta1 as dialogflow
@@ -231,7 +232,9 @@ def gen_requests(participant_name, audio_config, stream):
231232
"""Generates requests for streaming."""
232233
audio_generator = stream.generator()
233234
yield dialogflow.types.participant.StreamingAnalyzeContentRequest(
234-
participant=participant_name, audio_config=audio_config
235+
participant=participant_name,
236+
audio_config=audio_config,
237+
output_multiple_utterances=output_multiple_utterances
235238
)
236239
for content in audio_generator:
237240
yield dialogflow.types.participant.StreamingAnalyzeContentRequest(

dialogflow/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
google-cloud-dialogflow==2.36.0
1+
google-cloud-dialogflow==2.46.0
22
Flask==3.0.3
33
pyaudio==0.2.14
44
termcolor==3.0.0

dialogflow/streaming_transcription.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import re
3535
import sys
3636

37-
from google.api_core.exceptions import DeadlineExceeded
37+
from google.api_core.exceptions import DeadlineExceeded, OutOfRange
3838

3939
import pyaudio
4040

@@ -51,6 +51,7 @@
5151
CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms
5252
RESTART_TIMEOUT = 160 # seconds
5353
MAX_LOOKBACK = 3 # seconds
54+
HALF_CLOSE_DURATION_MS = 90 * 1000 # milliseconds
5455

5556
YELLOW = "\033[0;33m"
5657

@@ -198,6 +199,9 @@ def main():
198199
timeout=RESTART_TIMEOUT,
199200
language_code="en-US",
200201
single_utterance=False,
202+
# Uncomment to process multiple utterances detected in the audio stream
203+
# individually instead of stitching together to form a single utterance.
204+
# output_multiple_utterances=True,
201205
)
202206

203207
# Now, print the final transcription responses to user.
@@ -213,8 +217,10 @@ def main():
213217
offset.seconds * 1000 + offset.microseconds / 1000
214218
)
215219
transcript = response.recognition_result.transcript
216-
# Half-close the stream with gRPC (in Python just stop yielding requests)
217-
stream.is_final = True
220+
# Half-close upon final results for better streaming experiences
221+
# (in Python just stop yielding requests)
222+
if stream.is_final_offset > HALF_CLOSE_DURATION_MS:
223+
stream.is_final = True
218224
# Exit recognition if any of the transcribed phrase could be
219225
# one of our keywords.
220226
if re.search(r"\b(exit|quit)\b", transcript, re.I):
@@ -223,6 +229,8 @@ def main():
223229
terminate = True
224230
stream.closed = True
225231
break
232+
except OutOfRange:
233+
print("Maximum audio duration exceeded in the stream, restarting.")
226234
except DeadlineExceeded:
227235
print("Deadline Exceeded, restarting.")
228236

0 commit comments

Comments
 (0)