4747class JsonValidatorTool (CLI ):
4848
4949 def add_options (self ):
50- self .parser .add_option ('-m' , '--multi-line' , action = 'store_true' ,
51- help = 'Test explicitly for multi-line JSON, must use if reading large multi-line json ' \
52- + 'from standard input to prevent out of memory error' )
50+ self .parser .add_option ('-m' , '--multi-record' , action = 'store_true' ,
51+ help = 'Test explicitly for multi-record JSON data, where each line is a separate json ' \
52+ + 'document separated by newlines. Must use if reading multi-record json format ' \
53+ + 'on standard input' )
5354
54- def check_multiline_json (self ):
55+ def check_mutlirecord_json (self ):
5556 for line in self .f :
5657 if not isJson (line ):
5758 if isJson (line .replace ("'" , '"' )):
58- die (self .invalid_json_msg_single_quotes )
59+ die ('%s (multi-record format)' % self .invalid_json_msg_single_quotes )
5960 else :
60- die ( self . invalid_json_msg )
61+ return False
6162 print ('%s (multi-record format)' % self .valid_json_msg )
6263 return True
6364
@@ -69,12 +70,15 @@ def check_json(self, content):
6970 else :
7071 if self .f is not sys .stdin :
7172 self .f .seek (0 )
72- if self .check_multiline_json ():
73+ if self .check_mutlirecord_json ():
7374 return True
7475 # pointless since it would simply return 'ValueError: No JSON object could be decoded'
7576 # TODO: replace with a getter
7677 # if self.options.verbose > 2:
77- # json.loads(content)
78+ # try:
79+ # json.loads(content)
80+ # except Exception, e:
81+ # print(e)
7882 die (self .invalid_json_msg )
7983
8084 def run (self ):
@@ -93,14 +97,15 @@ def run(self):
9397 mem_err = "file '%s', assuming Big Data multi-record json and re-trying validation line-by-line" % filename
9498 if filename == '<STDIN>' :
9599 self .f = sys .stdin
96- if self .options .multi_line :
97- self .check_multiline_json ()
100+ if self .options .multi_record :
101+ if not self .check_mutlirecord_json ():
102+ die (self .invalid_json_msg )
98103 else :
99104 self .check_json (sys .stdin .read ())
100105 else :
101106 with open (filename ) as self .f :
102- if self .options .multi_line :
103- self .check_multiline_json ()
107+ if self .options .multi_record :
108+ self .check_mutlirecord_json ()
104109 else :
105110 # most JSON files are fine to slurp like this
106111 # Big Data JSON files are json multi-record, will throw exception after running out of RAM and then be handled line by line
@@ -111,11 +116,11 @@ def run(self):
111116 except MemoryError :
112117 print ("memory error validating contents from %s" % mem_err )
113118 f .seek (0 )
114- self .check_multiline_json ()
119+ self .check_mutlirecord_json ()
115120 except MemoryError :
116121 print ("memory error reading %s" % mem_err )
117122 f .seek (0 )
118- self .check_multiline_json ()
123+ self .check_mutlirecord_json ()
119124
120125
121126if __name__ == '__main__' :
0 commit comments