Skip to content

Commit 1991ff1

Browse files
committed
switched to using --exclude and added a bunch of multirecord tests for single quotes and embedded double quotes
1 parent 6168c1c commit 1991ff1

1 file changed

Lines changed: 91 additions & 20 deletions

File tree

tests/test_validate_json.sh

Lines changed: 91 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,12 @@ set -euo pipefail
1717
[ -n "${DEBUG:-}" ] && set -x
1818
srcdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
1919

20-
echo "
21-
# ======================== #
22-
# Testing validate_json.py
23-
# ======================== #
24-
"
25-
2620
cd "$srcdir/..";
2721

2822
. ./tests/utils.sh
2923

24+
section "Testing validate_json.py"
25+
3026
until [ $# -lt 1 ]; do
3127
case $1 in
3228
-*) shift
@@ -36,14 +32,14 @@ done
3632
data_dir="tests/data"
3733
broken_dir="$data_dir/broken_json_data"
3834

35+
exclude='/tests/spark-\d+\.\d+.\d+-bin-hadoop\d+.\d+$|broken|error'
36+
3937
rm -fr "$broken_dir" || :
4038
mkdir "$broken_dir"
41-
./validate_json.py -vvv $(
42-
find "${1:-.}" -iname '*.json' |
43-
grep -v '/spark-.*-bin-hadoop.*/' |
44-
grep -v -e 'broken' -e 'error' -e ' '
45-
)
39+
40+
./validate_json.py -vvv --exclude "$exclude" .
4641
echo
42+
4743
echo "checking directory recursion (mixed with explicit file given)"
4844
./validate_json.py -vvv "$data_dir/test.json" .
4945
echo
@@ -53,6 +49,18 @@ cp -iv "$(find "${1:-.}" -iname '*.json' | grep -v -e '/spark-.*-bin-hadoop.*/'
5349
./validate_json.py -vvv -t 1 "$broken_dir/no_extension_testfile"
5450
echo
5551

52+
echo "checking json with embedded double quotes"
53+
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes.notjson"
54+
echo
55+
56+
echo "checking json with embedded double quotes"
57+
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes.notjson"
58+
echo
59+
60+
echo "checking json with embedded non-escaped double quotes"
61+
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes_unescaped.notjson"
62+
echo
63+
5664
echo "testing stdin"
5765
./validate_json.py - < "$data_dir/test.json"
5866
./validate_json.py < "$data_dir/test.json"
@@ -72,8 +80,9 @@ echo "Now trying broken / non-json files to test failure detection:"
7280
check_broken(){
7381
filename="$1"
7482
expected_exitcode="${2:-2}"
83+
options="${3:-}"
7584
set +e
76-
./validate_json.py "$filename" ${@:3}
85+
./validate_json.py $options "$filename" ${@:3}
7786
exitcode=$?
7887
set -e
7988
if [ $exitcode = $expected_exitcode ]; then
@@ -94,43 +103,105 @@ set +e
94103
exitcode=$?
95104
set -e
96105
if [ $exitcode = 2 ]; then
97-
echo "successfully detected breakage for --multi-line stdin vs normal json"
106+
echo "successfully detected breakage for --multi-record stdin vs normal json"
98107
echo
99108
else
100-
echo "FAILED to detect breakage when feeding normal multi-line json doc to stdin with --multi-line (expecting one json doc per line), returned unexpected exit code $exitcode"
109+
echo "FAILED to detect breakage when feeding normal multi-record json doc to stdin with --multi-record (expecting one json doc per line), returned unexpected exit code $exitcode"
101110
exit 1
102111
fi
103112

104113
echo blah > "$broken_dir/blah.json"
105114
check_broken "$broken_dir/blah.json"
106115

107-
echo "{ 'name': 'hari' }" > "$broken_dir/single_quote.json"
108-
check_broken "$broken_dir/single_quote.json"
116+
check_broken "$data_dir/single_quotes.notjson"
117+
check_broken "$data_dir/single_quotes_multirecord.notjson"
118+
check_broken "$data_dir/single_quotes_multirecord_embedded_double_quotes.notjson"
119+
check_broken "$data_dir/single_quotes_multirecord_embedded_double_quotes_unescaped.notjson"
109120

110121
echo "checking invalid single quote detection"
111122
set +o pipefail
112-
./validate_json.py "$broken_dir/single_quote.json" 2>&1 | grep --color 'JSON INVALID.*found single quotes not double quotes' || { echo "Failed to find single quote message in output"; exit 1; }
123+
./validate_json.py "$data_dir/single_quotes.notjson" 2>&1 | grep --color 'JSON INVALID.*found single quotes not double quotes' || { echo "Failed to find single quote message in output"; exit 1; }
113124
set -o pipefail
114125
echo
115126

116127
echo "checking --permit-single-quotes mode works"
117-
./validate_json.py -s "$broken_dir/single_quote.json"
128+
./validate_json.py -s "$data_dir/single_quotes.notjson"
129+
echo
130+
131+
echo "checking --permit-single-quotes mode works with embedded double quotes"
132+
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes.notjson"
133+
echo
134+
135+
echo "checking --permit-single-quotes mode works with unescaped embedded double quotes"
136+
./validate_json.py -s "$data_dir/single_quotes_embedded_double_quotes.notjson"
137+
echo
138+
139+
echo "checking --permit-single-quotes mode works with multirecord"
140+
./validate_json.py -s "$data_dir/single_quotes_multirecord.notjson" -m
141+
echo
142+
143+
echo "checking --permit-single-quotes mode works with multirecord"
144+
./validate_json.py -s "$data_dir/single_quotes_multirecord_embedded_double_quotes.notjson" -m
145+
echo
146+
147+
echo "checking --permit-single-quotes mode works with multirecord"
148+
./validate_json.py -s "$data_dir/single_quotes_multirecord_embedded_double_quotes_unescaped.notjson" -m
149+
echo
150+
151+
echo "checking --permit-single-quotes mode works and auto retries to succeed with multirecord"
152+
./validate_json.py -s "$data_dir/single_quotes_multirecord.notjson"
153+
echo
154+
155+
echo "checking --permit-single-quotes mode works and auto retries to succeed with multirecord with embedded double quotes"
156+
./validate_json.py -s "$data_dir/single_quotes_multirecord_embedded_double_quotes.notjson"
118157
echo
119158

120-
# TODO: add failure print silent mode exit code and stdout/stderr
159+
echo "checking --permit-single-quotes mode works and auto retries to succeed with multirecord with unescaped embedded double quotes"
160+
./validate_json.py -s "$data_dir/single_quotes_multirecord_embedded_double_quotes_unescaped.notjson"
161+
echo
162+
163+
# ============================================================================ #
164+
# Print Mode Passthrough Tests
165+
# ============================================================================ #
166+
121167
echo "testing print mode"
122168
[ "$(./validate_json.py -p "$data_dir/test.json" | cksum)" = "$(cksum < "$data_dir/test.json")" ] || { echo "print test failed!"; exit 1; }
123169
echo "successfully passed out test json to stdout"
124170
echo
171+
172+
echo "testing print mode failed"
173+
set +e
174+
output="$(./validate_json.py -p "$data_dir/single_quotes.notjson")"
175+
result=$?
176+
set -e
177+
[ $result -eq 2 ] || { echo "print test failed with wrong exit code $result instead of 2!"; exit 1; }
178+
[ -z "$output" ] || { echo "print test failed by passing output to stdout for records that should be broken!"; exit 1; }
179+
echo "successfully passed test of print mode failure"
180+
echo
181+
125182
echo "testing print mode with multi-record"
126183
[ "$(./validate_json.py -mp "$data_dir/multirecord.json" | cksum)" = "$(cksum < "$data_dir/multirecord.json")" ] || { echo "print multi-record test failed!"; exit 1; }
127184
echo "successfully passed out multi-record json to stdout"
128185
echo
186+
129187
echo "testing print mode with --permit-single-quotes"
130-
[ "$(./validate_json.py -sp "$broken_dir/single_quote.json" | cksum)" = "$(cksum < "$broken_dir/single_quote.json")" ] || { echo "print single quote json test failed!"; exit 1; }
188+
[ "$(./validate_json.py -sp "$data_dir/single_quotes.notjson" | cksum)" = "$(cksum < "$data_dir/single_quotes.notjson")" ] || { echo "print single quote json test failed!"; exit 1; }
189+
echo
190+
191+
echo "testing print mode with --permit-single-quotes multirecord"
192+
[ "$(./validate_json.py -sp "$data_dir/single_quotes_multirecord.notjson" | cksum)" = "$(cksum < "$data_dir/single_quotes_multirecord.notjson")" ] || { echo "print single quote multirecord json test failed!"; exit 1; }
193+
echo
194+
195+
echo "testing print mode with --permit-single-quotes multirecord with embedded double quotes"
196+
[ "$(./validate_json.py -sp "$data_dir/single_quotes_multirecord.notjson" | cksum)" = "$(cksum < "$data_dir/single_quotes_multirecord.notjson")" ] || { echo "print single quote multirecord json with embedded double quotes test failed!"; exit 1; }
197+
echo
198+
199+
echo "testing print mode with --permit-single-quotes multirecord with unescaped embedded double quotes"
200+
[ "$(./validate_json.py -sp "$data_dir/single_quotes_multirecord_embedded_double_quotes.notjson" | cksum)" = "$(cksum < "$data_dir/single_quotes_multirecord_embedded_double_quotes.notjson")" ] || { echo "print single quote multirecord json with unescaped embedded double quotes test failed!"; exit 1; }
131201
echo
132202

133203
echo
204+
# ============================================================================ #
134205

135206
echo '{ "name": "hari" ' > "$broken_dir/missing_end_quote.json"
136207
check_broken "$broken_dir/missing_end_quote.json"

0 commit comments

Comments
 (0)