ffmpeg-python/examples/transcribe_many.py at extra-examples · wangfly/ffmpeg-python

executable file

43 lines (33 loc) · 1.57 KB

#!/usr/bin/env python
from functools import partial
from multiprocessing import Pool
from transcribe import transcribe_to_file
import argparse
import logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__file__)
DEFAULT_WORKER_COUNT = 20
parser = argparse.ArgumentParser(description='Transcribe multiple audio files in parallel using Google Speech API')
parser.add_argument('in_filenames', nargs='+', help='Input filename(s)')
parser.add_argument('--keep-suffix', action='store_true',
    help='Don\'t strip filename suffix when generating metadata .json output filename')
parser.add_argument('--workers', default=DEFAULT_WORKER_COUNT,
    help='Number of workers (default {})'.format(DEFAULT_WORKER_COUNT))
def transcribe_one(in_filename, keep_suffix=False):
    if keep_suffix:
        base_filename = in_filename
        base_filename = os.path.splitext(in_filename)[0]
    out_filename = '{}.json'.format(base_filename)
    logger.info('Starting: {} -> {}'.format(in_filename, out_filename))
    with open(out_filename, 'w') as out_file:
        transcribe_to_file(in_filename, out_file, as_json=True)
    logger.info('Finished: {} -> {}'.format(in_filename, out_filename))
def transcribe_many(in_filenames, keep_suffix=False, worker_count=DEFAULT_WORKER_COUNT):
    pool = Pool(processes=worker_count)
    func = partial(transcribe_one, keep_suffix=keep_suffix)
    pool.map_async(func, in_filenames).get(99999999)
if __name__ == '__main__':
    args = parser.parse_args()
    transcribe_many(args.in_filenames, args.keep_suffix)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

transcribe_many.py

Latest commit

History

transcribe_many.py

File metadata and controls