@@ -17,16 +17,12 @@ set -euo pipefail
1717[ -n " ${DEBUG:- } " ] && set -x
1818srcdir=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd ) "
1919
20- echo "
21- # ======================== #
22- # Testing validate_json.py
23- # ======================== #
24- "
25-
2620cd " $srcdir /.." ;
2721
2822. ./tests/utils.sh
2923
24+ section " Testing validate_json.py"
25+
3026until [ $# -lt 1 ]; do
3127 case $1 in
3228 -* ) shift
3632data_dir=" tests/data"
3733broken_dir=" $data_dir /broken_json_data"
3834
35+ exclude=' /tests/spark-\d+\.\d+.\d+-bin-hadoop\d+.\d+$|broken|error'
36+
3937rm -fr " $broken_dir " || :
4038mkdir " $broken_dir "
41- ./validate_json.py -vvv $(
42- find " ${1:- .} " -iname ' *.json' |
43- grep -v ' /spark-.*-bin-hadoop.*/' |
44- grep -v -e ' broken' -e ' error' -e ' '
45- )
39+
40+ ./validate_json.py -vvv --exclude " $exclude " .
4641echo
42+
4743echo " checking directory recursion (mixed with explicit file given)"
4844./validate_json.py -vvv " $data_dir /test.json" .
4945echo
@@ -53,6 +49,18 @@ cp -iv "$(find "${1:-.}" -iname '*.json' | grep -v -e '/spark-.*-bin-hadoop.*/'
5349./validate_json.py -vvv -t 1 " $broken_dir /no_extension_testfile"
5450echo
5551
52+ echo " checking json with embedded double quotes"
53+ ./validate_json.py -s " $data_dir /single_quotes_embedded_double_quotes.notjson"
54+ echo
55+
56+ echo " checking json with embedded double quotes"
57+ ./validate_json.py -s " $data_dir /single_quotes_embedded_double_quotes.notjson"
58+ echo
59+
60+ echo " checking json with embedded non-escaped double quotes"
61+ ./validate_json.py -s " $data_dir /single_quotes_embedded_double_quotes_unescaped.notjson"
62+ echo
63+
5664echo " testing stdin"
5765./validate_json.py - < " $data_dir /test.json"
5866./validate_json.py < " $data_dir /test.json"
@@ -72,8 +80,9 @@ echo "Now trying broken / non-json files to test failure detection:"
7280check_broken (){
7381 filename=" $1 "
7482 expected_exitcode=" ${2:- 2} "
83+ options=" ${3:- } "
7584 set +e
76- ./validate_json.py " $filename " ${@: 3}
85+ ./validate_json.py $options " $filename " ${@: 3}
7786 exitcode=$?
7887 set -e
7988 if [ $exitcode = $expected_exitcode ]; then
@@ -94,43 +103,105 @@ set +e
94103exitcode=$?
95104set -e
96105if [ $exitcode = 2 ]; then
97- echo " successfully detected breakage for --multi-line stdin vs normal json"
106+ echo " successfully detected breakage for --multi-record stdin vs normal json"
98107 echo
99108else
100- echo " FAILED to detect breakage when feeding normal multi-line json doc to stdin with --multi-line (expecting one json doc per line), returned unexpected exit code $exitcode "
109+ echo " FAILED to detect breakage when feeding normal multi-record json doc to stdin with --multi-record (expecting one json doc per line), returned unexpected exit code $exitcode "
101110 exit 1
102111fi
103112
104113echo blah > " $broken_dir /blah.json"
105114check_broken " $broken_dir /blah.json"
106115
107- echo " { 'name': 'hari' }" > " $broken_dir /single_quote.json"
108- check_broken " $broken_dir /single_quote.json"
116+ check_broken " $data_dir /single_quotes.notjson"
117+ check_broken " $data_dir /single_quotes_multirecord.notjson"
118+ check_broken " $data_dir /single_quotes_multirecord_embedded_double_quotes.notjson"
119+ check_broken " $data_dir /single_quotes_multirecord_embedded_double_quotes_unescaped.notjson"
109120
110121echo " checking invalid single quote detection"
111122set +o pipefail
112- ./validate_json.py " $broken_dir /single_quote.json " 2>&1 | grep --color ' JSON INVALID.*found single quotes not double quotes' || { echo " Failed to find single quote message in output" ; exit 1; }
123+ ./validate_json.py " $data_dir /single_quotes.notjson " 2>&1 | grep --color ' JSON INVALID.*found single quotes not double quotes' || { echo " Failed to find single quote message in output" ; exit 1; }
113124set -o pipefail
114125echo
115126
116127echo " checking --permit-single-quotes mode works"
117- ./validate_json.py -s " $broken_dir /single_quote.json"
128+ ./validate_json.py -s " $data_dir /single_quotes.notjson"
129+ echo
130+
131+ echo " checking --permit-single-quotes mode works with embedded double quotes"
132+ ./validate_json.py -s " $data_dir /single_quotes_embedded_double_quotes.notjson"
133+ echo
134+
135+ echo " checking --permit-single-quotes mode works with unescaped embedded double quotes"
136+ ./validate_json.py -s " $data_dir /single_quotes_embedded_double_quotes.notjson"
137+ echo
138+
139+ echo " checking --permit-single-quotes mode works with multirecord"
140+ ./validate_json.py -s " $data_dir /single_quotes_multirecord.notjson" -m
141+ echo
142+
143+ echo " checking --permit-single-quotes mode works with multirecord"
144+ ./validate_json.py -s " $data_dir /single_quotes_multirecord_embedded_double_quotes.notjson" -m
145+ echo
146+
147+ echo " checking --permit-single-quotes mode works with multirecord"
148+ ./validate_json.py -s " $data_dir /single_quotes_multirecord_embedded_double_quotes_unescaped.notjson" -m
149+ echo
150+
151+ echo " checking --permit-single-quotes mode works and auto retries to succeed with multirecord"
152+ ./validate_json.py -s " $data_dir /single_quotes_multirecord.notjson"
153+ echo
154+
155+ echo " checking --permit-single-quotes mode works and auto retries to succeed with multirecord with embedded double quotes"
156+ ./validate_json.py -s " $data_dir /single_quotes_multirecord_embedded_double_quotes.notjson"
118157echo
119158
120- # TODO: add failure print silent mode exit code and stdout/stderr
159+ echo " checking --permit-single-quotes mode works and auto retries to succeed with multirecord with unescaped embedded double quotes"
160+ ./validate_json.py -s " $data_dir /single_quotes_multirecord_embedded_double_quotes_unescaped.notjson"
161+ echo
162+
163+ # ============================================================================ #
164+ # Print Mode Passthrough Tests
165+ # ============================================================================ #
166+
121167echo " testing print mode"
122168[ " $( ./validate_json.py -p " $data_dir /test.json" | cksum) " = " $( cksum < " $data_dir /test.json" ) " ] || { echo " print test failed!" ; exit 1; }
123169echo " successfully passed out test json to stdout"
124170echo
171+
172+ echo " testing print mode failed"
173+ set +e
174+ output=" $( ./validate_json.py -p " $data_dir /single_quotes.notjson" ) "
175+ result=$?
176+ set -e
177+ [ $result -eq 2 ] || { echo " print test failed with wrong exit code $result instead of 2!" ; exit 1; }
178+ [ -z " $output " ] || { echo " print test failed by passing output to stdout for records that should be broken!" ; exit 1; }
179+ echo " successfully passed test of print mode failure"
180+ echo
181+
125182echo " testing print mode with multi-record"
126183[ " $( ./validate_json.py -mp " $data_dir /multirecord.json" | cksum) " = " $( cksum < " $data_dir /multirecord.json" ) " ] || { echo " print multi-record test failed!" ; exit 1; }
127184echo " successfully passed out multi-record json to stdout"
128185echo
186+
129187echo " testing print mode with --permit-single-quotes"
130- [ " $( ./validate_json.py -sp " $broken_dir /single_quote.json" | cksum) " = " $( cksum < " $broken_dir /single_quote.json" ) " ] || { echo " print single quote json test failed!" ; exit 1; }
188+ [ " $( ./validate_json.py -sp " $data_dir /single_quotes.notjson" | cksum) " = " $( cksum < " $data_dir /single_quotes.notjson" ) " ] || { echo " print single quote json test failed!" ; exit 1; }
189+ echo
190+
191+ echo " testing print mode with --permit-single-quotes multirecord"
192+ [ " $( ./validate_json.py -sp " $data_dir /single_quotes_multirecord.notjson" | cksum) " = " $( cksum < " $data_dir /single_quotes_multirecord.notjson" ) " ] || { echo " print single quote multirecord json test failed!" ; exit 1; }
193+ echo
194+
195+ echo " testing print mode with --permit-single-quotes multirecord with embedded double quotes"
196+ [ " $( ./validate_json.py -sp " $data_dir /single_quotes_multirecord.notjson" | cksum) " = " $( cksum < " $data_dir /single_quotes_multirecord.notjson" ) " ] || { echo " print single quote multirecord json with embedded double quotes test failed!" ; exit 1; }
197+ echo
198+
199+ echo " testing print mode with --permit-single-quotes multirecord with unescaped embedded double quotes"
200+ [ " $( ./validate_json.py -sp " $data_dir /single_quotes_multirecord_embedded_double_quotes.notjson" | cksum) " = " $( cksum < " $data_dir /single_quotes_multirecord_embedded_double_quotes.notjson" ) " ] || { echo " print single quote multirecord json with unescaped embedded double quotes test failed!" ; exit 1; }
131201echo
132202
133203echo
204+ # ============================================================================ #
134205
135206echo ' { "name": "hari" ' > " $broken_dir /missing_end_quote.json"
136207check_broken " $broken_dir /missing_end_quote.json"
0 commit comments