-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathcli.py
More file actions
executable file
·140 lines (126 loc) · 6.1 KB
/
cli.py
File metadata and controls
executable file
·140 lines (126 loc) · 6.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3
import os
import gc
import sys
import atexit
import logging
import argparse
from udapi.core.run import Run
# Parse command line arguments.
argparser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
usage="udapy [optional_arguments] scenario",
epilog="See http://udapi.github.io",
description="udapy - Python interface to Udapi - API for Universal Dependencies\n\n"
"Examples of usage:\n"
" udapy -s read.Sentences udpipe.En < in.txt > out.conllu\n"
" udapy -T < sample.conllu | less -R\n"
" udapy -HAM ud.MarkBugs < sample.conllu > bugs.html\n")
argparser.add_argument(
"-q", "--quiet", action="store_true",
help="Warning, info and debug messages are suppressed. Only fatal errors are reported.")
argparser.add_argument(
"-v", "--verbose", action="store_true",
help="Warning, info and debug messages are printed to the STDERR.")
argparser.add_argument(
"-s", "--save", action="store_true",
help="Add write.Conllu to the end of the scenario")
argparser.add_argument(
"-T", "--save_text_mode_trees", action="store_true",
help="Add write.TextModeTrees color=1 to the end of the scenario")
argparser.add_argument(
"-H", "--save_html", action="store_true",
help="Add write.TextModeTreesHtml color=1 to the end of the scenario")
argparser.add_argument(
"-A", "--save_all_attributes", action="store_true",
help="Add attributes=form,lemma,upos,xpos,feats,deprel,misc (to be used after -T and -H)")
argparser.add_argument(
"-C", "--save_comments", action="store_true",
help="Add print_comments=1 (to be used after -T and -H)")
argparser.add_argument(
"-M", "--marked_only", action="store_true",
help="Add marked_only=1 to the end of the scenario (to be used after -T and -H)")
argparser.add_argument(
"-N", "--no_color", action="store_true",
help="Add color=0 to the end of the scenario, this overrides color=1 of -T and -H")
argparser.add_argument(
"-X", "--extra", action="append",
help="Add a specified parameter (or a block name) to the end of the scenario\n"
"For example 'udapy -TNX attributes=form,misc -X layout=align < my.conllu'")
argparser.add_argument(
"--gc", action="store_true",
help="By default, udapy disables Python garbage collection and at-exit cleanup\n"
"to speed up everything (especially reading CoNLL-U files). In edge cases,\n"
"when processing many files and running out of memory, you can disable this\n"
"optimization (i.e. enable garbage collection) with 'udapy --gc'.")
argparser.add_argument(
'scenario', nargs=argparse.REMAINDER, help="A sequence of blocks and their parameters.")
# Process and provide the scenario.
def main(argv=None):
args = argparser.parse_args(argv)
# Set the level of logs according to parameters.
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.CRITICAL
else:
level = logging.INFO
logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s',
level=level)
# Global flag to track if an unhandled exception occurred
_unhandled_exception_occurred = False
def _custom_excepthook(exc_type, exc_value, traceback):
global _unhandled_exception_occurred
_unhandled_exception_occurred = True
# Call the default excepthook to allow normal error reporting
sys.__excepthook__(exc_type, exc_value, traceback)
# Override the default excepthook
sys.excepthook = _custom_excepthook
# Disabling garbage collections makes the whole processing much faster.
# Similarly, we can save several seconds by partially disabling the at-exit Python cleanup
# (atexit hooks are called in reversed order of their registration,
# so flushing stdio buffers etc. will be still done before the os._exit(0) call).
# See https://instagram-engineering.com/dismissing-python-garbage-collection-at-instagram-4dca40b29172
# Is it safe to disable GC?
# OS will free the memory allocated by this process after it ends anyway.
# The udapy wrapper is aimed for one-time tasks, not a long-running server,
# so in a typical case a document is loaded and almost no memory is freed before the end.
# Udapi documents have a many cyclic references, so running GC is quite slow.
if not args.gc:
gc.disable()
# When an exception/error has happened, udapy should exit with a non-zero exit code,
# so that users can use `udapy ... || echo "Error detected"` (or Makefile reports errors).
# However, we cannot use `atexit.register(lambda: os._exit(1 if sys.exc_info()[0] else 0))`
# because the Python has already exited the exception-handling block
# (the exception/error has been already reported and sys.exc_info()[0] is None).
# We thus keep record whether _unhandled_exception_occurred.
atexit.register(lambda: os._exit(1 if _unhandled_exception_occurred else 0))
atexit.register(sys.stderr.flush)
if args.save:
args.scenario = args.scenario + ['write.Conllu']
if args.save_text_mode_trees:
args.scenario = args.scenario + ['write.TextModeTrees', 'color=1']
if args.save_html:
args.scenario = args.scenario + ['write.TextModeTreesHtml', 'color=1']
if args.save_all_attributes:
args.scenario = args.scenario + ['attributes=form,lemma,upos,xpos,feats,deprel,misc']
if args.save_comments:
args.scenario = args.scenario + ['print_comments=1']
if args.marked_only:
args.scenario = args.scenario + ['marked_only=1']
if args.no_color:
args.scenario = args.scenario + ['color=0']
if args.extra:
args.scenario += args.extra
runner = Run(args)
# udapy is often piped to head etc., e.g.
# `seq 1000 | udapy -s read.Sentences | head`
# Let's prevent Python from reporting (with distracting stacktrace)
# "BrokenPipeError: [Errno 32] Broken pipe"
try:
runner.execute()
except BrokenPipeError:
pass
return 0
if __name__ == "__main__":
sys.exit(main())