Skip to content

Commit 06dcc81

Browse files
committed
updated validate_json.py
1 parent 49fcf0d commit 06dcc81

1 file changed

Lines changed: 19 additions & 14 deletions

File tree

validate_json.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,18 @@
4747
class JsonValidatorTool(CLI):
4848

4949
def add_options(self):
50-
self.parser.add_option('-m', '--multi-line', action='store_true',
51-
help='Test explicitly for multi-line JSON, must use if reading large multi-line json ' \
52-
+ 'from standard input to prevent out of memory error')
50+
self.parser.add_option('-m', '--multi-record', action='store_true',
51+
help='Test explicitly for multi-record JSON data, where each line is a separate json ' \
52+
+ 'document separated by newlines. Must use if reading multi-record json format ' \
53+
+ 'on standard input')
5354

54-
def check_multiline_json(self):
55+
def check_mutlirecord_json(self):
5556
for line in self.f:
5657
if not isJson(line):
5758
if isJson(line.replace("'", '"')):
58-
die(self.invalid_json_msg_single_quotes)
59+
die('%s (multi-record format)' % self.invalid_json_msg_single_quotes)
5960
else:
60-
die(self.invalid_json_msg)
61+
return False
6162
print('%s (multi-record format)' % self.valid_json_msg)
6263
return True
6364

@@ -69,12 +70,15 @@ def check_json(self, content):
6970
else:
7071
if self.f is not sys.stdin:
7172
self.f.seek(0)
72-
if self.check_multiline_json():
73+
if self.check_mutlirecord_json():
7374
return True
7475
# pointless since it would simply return 'ValueError: No JSON object could be decoded'
7576
# TODO: replace with a getter
7677
# if self.options.verbose > 2:
77-
# json.loads(content)
78+
# try:
79+
# json.loads(content)
80+
# except Exception, e:
81+
# print(e)
7882
die(self.invalid_json_msg)
7983

8084
def run(self):
@@ -93,14 +97,15 @@ def run(self):
9397
mem_err = "file '%s', assuming Big Data multi-record json and re-trying validation line-by-line" % filename
9498
if filename == '<STDIN>':
9599
self.f = sys.stdin
96-
if self.options.multi_line:
97-
self.check_multiline_json()
100+
if self.options.multi_record:
101+
if not self.check_mutlirecord_json():
102+
die(self.invalid_json_msg)
98103
else:
99104
self.check_json(sys.stdin.read())
100105
else:
101106
with open(filename) as self.f:
102-
if self.options.multi_line:
103-
self.check_multiline_json()
107+
if self.options.multi_record:
108+
self.check_mutlirecord_json()
104109
else:
105110
# most JSON files are fine to slurp like this
106111
# Big Data JSON files are json multi-record, will throw exception after running out of RAM and then be handled line by line
@@ -111,11 +116,11 @@ def run(self):
111116
except MemoryError:
112117
print("memory error validating contents from %s" % mem_err)
113118
f.seek(0)
114-
self.check_multiline_json()
119+
self.check_mutlirecord_json()
115120
except MemoryError:
116121
print("memory error reading %s" % mem_err)
117122
f.seek(0)
118-
self.check_multiline_json()
123+
self.check_mutlirecord_json()
119124

120125

121126
if __name__ == '__main__':

0 commit comments

Comments
 (0)