Skip to content
This repository was archived by the owner on Mar 23, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions localstack-core/localstack/services/transcribe/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
from localstack.utils.run import run
from localstack.utils.threads import start_thread

# Amazon Transcribe service calls are limited to four hours (or 2 GB) per API call for our batch service.
# The streaming service can accommodate open connections up to four hours long.
# See https://aws.amazon.com/transcribe/faqs/
MAX_AUDIO_DURATION_SECONDS = 60 * 60 * 4
Comment thread
brunodmartins marked this conversation as resolved.

LOG = logging.getLogger(__name__)

VOSK_MODELS_URL = f"{HUGGING_FACE_ENDPOINT}/vosk-models/resolve/main/"
Expand Down Expand Up @@ -304,6 +309,11 @@ def _run_transcription_job(self, args: Tuple[TranscribeStore, str]):
format = ffprobe_output["format"]["format_name"]
LOG.debug("Media format detected as: %s", format)
job["MediaFormat"] = SUPPORTED_FORMAT_NAMES[format]
duration = ffprobe_output["format"]["duration"]

if float(duration) >= MAX_AUDIO_DURATION_SECONDS:
Comment thread
k-a-il marked this conversation as resolved.
failure_reason = "Invalid file size: file size too large. Maximum audio duration is 4.000000 hours.Check the length of the file and try your request again."
raise RuntimeError()

# Determine the sample rate of input audio if possible
for stream in ffprobe_output["streams"]:
Expand Down
Binary file added tests/aws/files/audio_4h.mp3
Binary file not shown.
29 changes: 29 additions & 0 deletions tests/aws/services/transcribe/test_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,3 +442,32 @@ def test_transcribe_error_speaker_labels(self, transcribe_create_job, aws_client
with pytest.raises(ParamValidationError) as e:
transcribe_create_job(audio_file=file_path, params=settings)
snapshot.match("err_speaker_labels_diarization", e.value)

@markers.aws.validated
@markers.snapshot.skip_snapshot_verify(
paths=[
"$..TranscriptionJob..Settings",
"$..TranscriptionJob..Transcript",
"$..TranscriptionJob..MediaFormat",
]
)
def test_transcribe_error_invalid_length(self, transcribe_create_job, aws_client, snapshot):
media_file = "../../files/audio_4h.mp3"
Comment thread
brunodmartins marked this conversation as resolved.
Outdated
file_path = os.path.join(BASEDIR, media_file)
job_name = transcribe_create_job(audio_file=file_path)

def _is_transcription_done():
transcription_status = aws_client.transcribe.get_transcription_job(
TranscriptionJobName=job_name
)
return transcription_status["TranscriptionJob"]["TranscriptionJobStatus"] == "FAILED"

# empirically it takes around
# <5sec for a vosk transcription
# ~100sec for an AWS transcription -> adjust timeout accordingly
assert poll_condition(_is_transcription_done, timeout=100), (
f"could not finish transcription job: {job_name} in time"
)

job = aws_client.transcribe.get_transcription_job(TranscriptionJobName=job_name)
snapshot.match("TranscribeErrorInvalidLength", job)
27 changes: 27 additions & 0 deletions tests/aws/services/transcribe/test_transcribe.snapshot.json
Original file line number Diff line number Diff line change
Expand Up @@ -893,5 +893,32 @@
"recorded-content": {
"err_speaker_labels_diarization": "Parameter validation failed:\nInvalid value for parameter Settings.MaxSpeakerLabels, value: 1, valid min value: 2"
}
},
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_invalid_length": {
"recorded-date": "12-04-2025, 16:02:39",
"recorded-content": {
"TranscribeErrorInvalidLength": {
"TranscriptionJob": {
"CreationTime": "datetime",
"FailureReason": "Invalid file size: file size too large. Maximum audio duration is 4.000000 hours.Check the length of the file and try your request again.",
"LanguageCode": "en-GB",
"Media": {
"MediaFileUri": "s3:/<test-bucket>/test-clip.wav"
},
"Settings": {
"ChannelIdentification": false,
"ShowAlternatives": false
},
"StartTime": "datetime",
"Transcript": {},
"TranscriptionJobName": "<transcription-job:1>",
"TranscriptionJobStatus": "FAILED"
},
"ResponseMetadata": {
"HTTPHeaders": {},
"HTTPStatusCode": 200
}
}
}
}
}
3 changes: 3 additions & 0 deletions tests/aws/services/transcribe/test_transcribe.validation.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_list_transcription_jobs": {
"last_validated_date": "2023-10-06T15:11:25+00:00"
},
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_invalid_length": {
"last_validated_date": "2025-04-12T16:02:38+00:00"
},
"tests/aws/services/transcribe/test_transcribe.py::TestTranscribe::test_transcribe_error_speaker_labels": {
"last_validated_date": "2025-03-19T15:42:06+00:00"
},
Expand Down