|
10 | 10 | (def !regexp (complement regexp)) |
11 | 11 | (def regexp-escape (all-fn :clojureRegexpEscape)) |
12 | 12 | (def !regexp-escape (complement regexp-escape)) |
| 13 | +(def regexp-char-class (all-fn :clojureRegexpCharClass)) |
| 14 | +(def !regexp-char-class (complement regexp-char-class)) |
| 15 | +(def regexp-predefined-char-class (all-fn :clojureRegexpPredefinedCharClass)) |
| 16 | +(def !regexp-predefined-char-class (complement regexp-predefined-char-class)) |
| 17 | +(def regexp-posix-char-class (all-fn :clojureRegexpPosixCharClass)) |
| 18 | +(def !regexp-posix-char-class (complement regexp-posix-char-class)) |
| 19 | +(def regexp-boundary (all-fn :clojureRegexpBoundary)) |
| 20 | +(def !regexp-boundary (complement regexp-boundary)) |
| 21 | +(def regexp-quantifier (all-fn :clojureRegexpQuantifier)) |
| 22 | +(def !regexp-quantifier (complement regexp-quantifier)) |
| 23 | +(def regexp-back-ref (all-fn :clojureRegexpBackRef)) |
| 24 | +(def !regexp-back-ref (complement regexp-back-ref)) |
| 25 | +(def regexp-quote (all-fn :clojureRegexpQuote)) |
| 26 | +(def !regexp-quote (complement regexp-quote)) |
| 27 | +(def regexp-or (all-fn :clojureRegexpOr)) |
| 28 | +(def !regexp-or (complement regexp-or)) |
| 29 | +(def regexp-group (all-fn :clojureRegexpGroup)) |
| 30 | +(def !regexp-group (complement regexp-group)) |
| 31 | +(defn regexp-mod [xs] (= (second xs) :clojureRegexpMod)) |
| 32 | +(def !regexp-mod (complement regexp-mod)) |
13 | 33 |
|
14 | 34 | (defsyntaxtest number-literals-test |
15 | 35 | (with-format "%s" |
|
119 | 139 | "\\e" regexp-escape |
120 | 140 | "\\E" !regexp-escape |
121 | 141 | ;; \cx The control character corresponding to x |
122 | | - ;; |
123 | | - ;; Character classes |
| 142 | + "\\cA" regexp-escape |
| 143 | + "\\c1" !regexp-escape |
| 144 | + "\\c" !regexp-escape |
| 145 | + ;; Additional escape sequences not mentioned in the official documenation. |
| 146 | + "\\." regexp-escape |
| 147 | + "\\*" regexp-escape |
| 148 | + "\\?" regexp-escape |
| 149 | + "\\{" regexp-escape |
| 150 | + "\\}" regexp-escape |
| 151 | + "\\[" regexp-escape |
| 152 | + "\\]" regexp-escape |
| 153 | + "\\(" regexp-escape |
| 154 | + "\\)" regexp-escape |
| 155 | + |
| 156 | + ;;;; Character classes |
| 157 | + |
124 | 158 | ;; [abc] a, b, or c (simple class) |
| 159 | + "[abc]" regexp-char-class |
125 | 160 | ;; [^abc] Any character except a, b, or c (negation) |
| 161 | + "[^abc]" regexp-char-class |
126 | 162 | ;; [a-zA-Z] a through z or A through Z, inclusive (range) |
127 | 163 | ;; [a-d[m-p]] a through d, or m through p: [a-dm-p] (union) |
128 | 164 | ;; [a-z&&[def]] d, e, or f (intersection) |
129 | 165 | ;; [a-z&&[^bc]] a through z, except for b and c: [ad-z] (subtraction) |
130 | 166 | ;; [a-z&&[^m-p]] a through z, and not m through p: [a-lq-z](subtraction) |
131 | | - ;; |
132 | | - ;; Predefined character classes |
| 167 | + |
| 168 | + ;;;; Predefined character classes |
| 169 | + |
133 | 170 | ;; . Any character (may or may not match line terminators) |
| 171 | + "." regexp-predefined-char-class |
134 | 172 | ;; \d A digit: [0-9] |
| 173 | + "\\d" regexp-predefined-char-class |
135 | 174 | ;; \D A non-digit: [^0-9] |
| 175 | + "\\D" regexp-predefined-char-class |
136 | 176 | ;; \s A whitespace character: [ \t\n\x0B\f\r] |
| 177 | + "\\s" regexp-predefined-char-class |
137 | 178 | ;; \S A non-whitespace character: [^\s] |
| 179 | + "\\S" regexp-predefined-char-class |
138 | 180 | ;; \w A word character: [a-zA-Z_0-9] |
| 181 | + "\\w" regexp-predefined-char-class |
139 | 182 | ;; \W A non-word character: [^\w] |
140 | | - ;; |
141 | | - ;; POSIX character classes (US-ASCII only) |
| 183 | + "\\W" regexp-predefined-char-class |
| 184 | + |
| 185 | + ;;;; POSIX character classes (US-ASCII only) |
| 186 | + |
142 | 187 | ;; \p{Lower} A lower-case alphabetic character: [a-z] |
| 188 | + "\\p{Lower}" regexp-posix-char-class |
143 | 189 | ;; \p{Upper} An upper-case alphabetic character:[A-Z] |
| 190 | + "\\p{Upper}" regexp-posix-char-class |
144 | 191 | ;; \p{ASCII} All ASCII:[\x00-\x7F] |
| 192 | + "\\p{ASCII}" regexp-posix-char-class |
145 | 193 | ;; \p{Alpha} An alphabetic character:[\p{Lower}\p{Upper}] |
| 194 | + "\\p{Alpha}" regexp-posix-char-class |
146 | 195 | ;; \p{Digit} A decimal digit: [0-9] |
| 196 | + "\\p{Digit}" regexp-posix-char-class |
147 | 197 | ;; \p{Alnum} An alphanumeric character:[\p{Alpha}\p{Digit}] |
| 198 | + "\\p{Alnum}" regexp-posix-char-class |
148 | 199 | ;; \p{Punct} Punctuation: One of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ |
| 200 | + "\\p{Punct}" regexp-posix-char-class |
149 | 201 | ;; \p{Graph} A visible character: [\p{Alnum}\p{Punct}] |
| 202 | + "\\p{Graph}" regexp-posix-char-class |
150 | 203 | ;; \p{Print} A printable character: [\p{Graph}\x20] |
| 204 | + "\\p{Print}" regexp-posix-char-class |
151 | 205 | ;; \p{Blank} A space or a tab: [ \t] |
| 206 | + "\\p{Blank}" regexp-posix-char-class |
152 | 207 | ;; \p{Cntrl} A control character: [\x00-\x1F\x7F] |
| 208 | + "\\p{Cntrl}" regexp-posix-char-class |
153 | 209 | ;; \p{XDigit} A hexadecimal digit: [0-9a-fA-F] |
| 210 | + "\\p{XDigit}" regexp-posix-char-class |
154 | 211 | ;; \p{Space} A whitespace character: [ \t\n\x0B\f\r] |
155 | | - ;; |
156 | | - ;; java.lang.Character classes (simple java character type) |
| 212 | + "\\p{Space}" regexp-posix-char-class |
| 213 | + |
| 214 | + ;;;; java.lang.Character classes (simple java character type) |
| 215 | + |
157 | 216 | ;; \p{javaLowerCase} Equivalent to java.lang.Character.isLowerCase() |
| 217 | + "\\p{javaLowerCase}" regexp-posix-char-class |
158 | 218 | ;; \p{javaUpperCase} Equivalent to java.lang.Character.isUpperCase() |
| 219 | + "\\p{javaUpperCase}" regexp-posix-char-class |
159 | 220 | ;; \p{javaWhitespace} Equivalent to java.lang.Character.isWhitespace() |
| 221 | + "\\p{javaWhitespace}" regexp-posix-char-class |
160 | 222 | ;; \p{javaMirrored} Equivalent to java.lang.Character.isMirrored() |
161 | | - ;; |
162 | | - ;; Classes for Unicode scripts, blocks, categories and binary properties |
| 223 | + "\\p{javaMirrored}" regexp-posix-char-class |
| 224 | + |
| 225 | + ;;;; Classes for Unicode scripts, blocks, categories and binary properties |
| 226 | + |
163 | 227 | ;; \p{IsLatin} A Latin script character (script) |
| 228 | + "\\p{IsLatin}" regexp-posix-char-class |
164 | 229 | ;; \p{InGreek} A character in the Greek block (block) |
| 230 | + "\\p{InGreek}" regexp-posix-char-class |
165 | 231 | ;; \p{Lu} An uppercase letter (category) |
| 232 | + "\\p{Lu}" regexp-posix-char-class |
166 | 233 | ;; \p{IsAlphabetic} An alphabetic character (binary property) |
| 234 | + "\\p{IsAlphabetic}" regexp-posix-char-class |
167 | 235 | ;; \p{Sc} A currency symbol |
| 236 | + "\\p{Sc}" regexp-posix-char-class |
168 | 237 | ;; \P{InGreek} Any character except one in the Greek block (negation) |
| 238 | + "\\P{InGreek}" regexp-posix-char-class |
169 | 239 | ;; [\p{L}&&[^\p{Lu}]] Any letter except an uppercase letter (subtraction) |
170 | | - ;; |
171 | | - ;; Boundary matchers |
| 240 | + |
| 241 | + ;;;; Invalid classes |
| 242 | + |
| 243 | + "\\P{Xzibit}" !regexp-posix-char-class |
| 244 | + "\\p{YoDawg}" !regexp-posix-char-class |
| 245 | + |
| 246 | + ;;;; Boundary matchers |
| 247 | + |
172 | 248 | ;; ^ The beginning of a line |
| 249 | + "^" regexp-boundary |
173 | 250 | ;; $ The end of a line |
| 251 | + "$" regexp-boundary |
174 | 252 | ;; \b A word boundary |
| 253 | + "\\b" regexp-boundary |
175 | 254 | ;; \B A non-word boundary |
| 255 | + "\\B" regexp-boundary |
176 | 256 | ;; \A The beginning of the input |
| 257 | + "\\A" regexp-boundary |
177 | 258 | ;; \G The end of the previous match |
| 259 | + "\\G" regexp-boundary |
178 | 260 | ;; \Z The end of the input but for the final terminator, if any |
| 261 | + "\\Z" regexp-boundary |
179 | 262 | ;; \z The end of the input |
180 | | - ;; |
181 | | - ;; Greedy quantifiers |
| 263 | + "\\z" regexp-boundary |
| 264 | + |
| 265 | + ;;;; Greedy quantifiers |
| 266 | + |
182 | 267 | ;; X? X, once or not at all |
| 268 | + "?" regexp-quantifier |
183 | 269 | ;; X* X, zero or more times |
| 270 | + "*" regexp-quantifier |
184 | 271 | ;; X+ X, one or more times |
| 272 | + "+" regexp-quantifier |
185 | 273 | ;; X{n} X, exactly n times |
| 274 | + "{0}" regexp-quantifier |
186 | 275 | ;; X{n,} X, at least n times |
| 276 | + "{0,}" regexp-quantifier |
187 | 277 | ;; X{n,m} X, at least n but not more than m times |
188 | | - ;; |
189 | | - ;; Reluctant quantifiers |
| 278 | + "{0,1}" regexp-quantifier |
| 279 | + |
| 280 | + ;;;; Reluctant quantifiers |
| 281 | + |
190 | 282 | ;; X?? X, once or not at all |
| 283 | + "??" regexp-quantifier |
191 | 284 | ;; X*? X, zero or more times |
| 285 | + "*?" regexp-quantifier |
192 | 286 | ;; X+? X, one or more times |
| 287 | + "+?" regexp-quantifier |
193 | 288 | ;; X{n}? X, exactly n times |
| 289 | + "{0}?" regexp-quantifier |
194 | 290 | ;; X{n,}? X, at least n times |
| 291 | + "{0,}?" regexp-quantifier |
195 | 292 | ;; X{n,m}? X, at least n but not more than m times |
196 | | - ;; |
197 | | - ;; Possessive quantifiers |
| 293 | + "{0,1}?" regexp-quantifier |
| 294 | + |
| 295 | + ;;;; Possessive quantifiers |
| 296 | + |
198 | 297 | ;; X?+ X, once or not at all |
| 298 | + "?+" regexp-quantifier |
199 | 299 | ;; X*+ X, zero or more times |
| 300 | + "*+" regexp-quantifier |
200 | 301 | ;; X++ X, one or more times |
| 302 | + "++" regexp-quantifier |
201 | 303 | ;; X{n}+ X, exactly n times |
| 304 | + "{0}+" regexp-quantifier |
202 | 305 | ;; X{n,}+ X, at least n times |
| 306 | + "{0,}+" regexp-quantifier |
203 | 307 | ;; X{n,m}+ X, at least n but not more than m times |
204 | | - ;; |
205 | | - ;; Logical operators |
| 308 | + "{0,1}+" regexp-quantifier |
| 309 | + |
| 310 | + "{-1}" !regexp-quantifier |
| 311 | + "{-1,}" !regexp-quantifier |
| 312 | + "{-1,-2}" !regexp-quantifier |
| 313 | + "{-1}?" !regexp-quantifier |
| 314 | + "{-1,}?" !regexp-quantifier |
| 315 | + "{-1,-2}?" !regexp-quantifier |
| 316 | + "{-1}?" !regexp-quantifier |
| 317 | + "{-1,}?" !regexp-quantifier |
| 318 | + "{-1,-2}?" !regexp-quantifier |
| 319 | + |
| 320 | + ;;;; Logical operators |
206 | 321 | ;; XY X followed by Y |
| 322 | + ;; XXX: Tested above (regexp) |
| 323 | + |
207 | 324 | ;; X|Y Either X or Y |
| 325 | + "|" regexp-or |
| 326 | + |
208 | 327 | ;; (X) X, as a capturing group |
209 | | - ;; |
210 | | - ;; Back references |
| 328 | + "(X)" regexp-group |
| 329 | + |
| 330 | + ;;;; Back references |
| 331 | + |
211 | 332 | ;; \n Whatever the nth capturing group matched |
| 333 | + "\\1" regexp-back-ref |
212 | 334 | ;; \k<name> Whatever the named-capturing group "name" matched |
213 | | - ;; |
214 | | - ;; Quotation |
| 335 | + "\\k<name>" regexp-back-ref |
| 336 | + |
| 337 | + ;;;; Quotation |
| 338 | + |
215 | 339 | ;; \ Nothing, but quotes the following character |
| 340 | + ;; XXX: Tested above |
| 341 | + |
216 | 342 | ;; \Q Nothing, but quotes all characters until \E |
217 | 343 | ;; \E Nothing, but ends quoting started by \Q |
218 | | - ;; |
219 | | - ;; Special constructs (named-capturing and non-capturing) |
| 344 | + "\\Qabc\\E" regexp-quote |
| 345 | + "\\qabc\\E" !regexp-quote |
| 346 | + |
| 347 | + ;;;; Special constructs (named-capturing and non-capturing) |
220 | 348 | ;; (?<name>X) X, as a named-capturing group |
| 349 | + "(?<name>X)" regexp-mod |
221 | 350 | ;; (?:X) X, as a non-capturing group |
| 351 | + "(?:X)" regexp-mod |
222 | 352 | ;; (?idmsuxU-idmsuxU) Nothing, but turns match flags i d m s u x U on - off |
| 353 | + "(?idmsuxU-idmsuxU)" regexp-mod |
| 354 | + "(?idmsuxU)" regexp-mod |
| 355 | + "(?-idmsuxU)" regexp-mod |
223 | 356 | ;; (?idmsux-idmsux:X) X, as a non-capturing group with the given flags i d m s u x on - off |
| 357 | + "(?idmsuxU-idmsuxU:X)" regexp-mod |
| 358 | + "(?idmsuxU:)" regexp-mod |
| 359 | + "(?-idmsuxU:)" regexp-mod |
224 | 360 | ;; (?=X) X, via zero-width positive lookahead |
| 361 | + "(?=X)" regexp-mod |
225 | 362 | ;; (?!X) X, via zero-width negative lookahead |
| 363 | + "(?!X)" regexp-mod |
226 | 364 | ;; (?<=X) X, via zero-width positive lookbehind |
| 365 | + "(?<=X)" regexp-mod |
227 | 366 | ;; (?<!X) X, via zero-width negative lookbehind |
| 367 | + "(?<!X)" regexp-mod |
228 | 368 | ;; (?>X) X, as an independent, non-capturing group |
| 369 | + "(?>X)" regexp-mod |
| 370 | + |
| 371 | + "(?X)" !regexp-mod |
229 | 372 | )) |
230 | 373 |
|
231 | 374 | ;; (test #'java-regexp-literals-test) |
0 commit comments