1+ # importing libraries
2+ import speech_recognition as sr
3+ import os
4+ from pydub import AudioSegment
5+ from pydub .silence import split_on_silence
6+
7+ # create a speech recognition object
8+ r = sr .Recognizer ()
9+
10+ # a function to recognize speech in the audio file
11+ # so that we don't repeat ourselves in in other functions
12+ def transcribe_audio (path ):
13+ # use the audio file as the audio source
14+ with sr .AudioFile (path ) as source :
15+ audio_listened = r .record (source )
16+ # try converting it to text
17+ text = r .recognize_google (audio_listened )
18+ return text
19+
20+ # a function that splits the audio file into chunks on silence
21+ # and applies speech recognition
22+ def get_large_audio_transcription_on_silence (path ):
23+ """
24+ Splitting the large audio file into chunks
25+ and apply speech recognition on each of these chunks
26+ """
27+ # open the audio file using pydub
28+ sound = AudioSegment .from_file (path )
29+ # split audio sound where silence is 700 miliseconds or more and get chunks
30+ chunks = split_on_silence (sound ,
31+ # experiment with this value for your target audio file
32+ min_silence_len = 500 ,
33+ # adjust this per requirement
34+ silence_thresh = sound .dBFS - 14 ,
35+ # keep the silence for 1 second, adjustable as well
36+ keep_silence = 500 ,
37+ )
38+ folder_name = "audio-chunks"
39+ # create a directory to store the audio chunks
40+ if not os .path .isdir (folder_name ):
41+ os .mkdir (folder_name )
42+ whole_text = ""
43+ # process each chunk
44+ for i , audio_chunk in enumerate (chunks , start = 1 ):
45+ # export audio chunk and save it in
46+ # the `folder_name` directory.
47+ chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
48+ audio_chunk .export (chunk_filename , format = "wav" )
49+ # recognize the chunk
50+ with sr .AudioFile (chunk_filename ) as source :
51+ audio_listened = r .record (source )
52+ # try converting it to text
53+ try :
54+ text = r .recognize_google (audio_listened )
55+ except sr .UnknownValueError as e :
56+ print ("Error:" , str (e ))
57+ else :
58+ text = f"{ text .capitalize ()} . "
59+ print (chunk_filename , ":" , text )
60+ whole_text += text
61+ # return the text for all chunks detected
62+ return whole_text
63+
64+
65+ # a function that splits the audio file into fixed interval chunks
66+ # and applies speech recognition
67+ def get_large_audio_transcription_fixed_interval (path , minutes = 5 ):
68+ """
69+ Splitting the large audio file into fixed interval chunks
70+ and apply speech recognition on each of these chunks
71+ """
72+ # open the audio file using pydub
73+ sound = AudioSegment .from_file (path )
74+ # split the audio file into chunks
75+ chunk_length_ms = int (1000 * 60 * minutes ) # convert to milliseconds
76+ chunks = [sound [i :i + chunk_length_ms ] for i in range (0 , len (sound ), chunk_length_ms )]
77+ folder_name = "audio-fixed-chunks"
78+ # create a directory to store the audio chunks
79+ if not os .path .isdir (folder_name ):
80+ os .mkdir (folder_name )
81+ whole_text = ""
82+ # process each chunk
83+ for i , audio_chunk in enumerate (chunks , start = 1 ):
84+ # export audio chunk and save it in
85+ # the `folder_name` directory.
86+ chunk_filename = os .path .join (folder_name , f"chunk{ i } .wav" )
87+ audio_chunk .export (chunk_filename , format = "wav" )
88+ # recognize the chunk
89+ with sr .AudioFile (chunk_filename ) as source :
90+ audio_listened = r .record (source )
91+ # try converting it to text
92+ try :
93+ text = r .recognize_google (audio_listened )
94+ except sr .UnknownValueError as e :
95+ print ("Error:" , str (e ))
96+ else :
97+ text = f"{ text .capitalize ()} . "
98+ print (chunk_filename , ":" , text )
99+ whole_text += text
100+ # return the text for all chunks detected
101+ return whole_text
102+
103+
104+ if __name__ == "__main__" :
105+ print (get_large_audio_transcription_on_silence ("7601-291468-0006.wav" ))
0 commit comments