Skip to content

Commit 4ce0036

Browse files
committed
Merge remote-tracking branch 'noprompt/noprompt-regexp' into noprompt-regexp
* noprompt/noprompt-regexp: Add some more Unicode character classes Swap the order of set and sort Add all valid java* character classes Allow "#\Q…\E" regions without matching \E Conflicts: clj/test/syntax_test.clj syntax/clojure.vim
2 parents 4f865f3 + 50e6b59 commit 4ce0036

2 files changed

Lines changed: 28 additions & 8 deletions

File tree

clj/src/vim_clojure_static/generate.clj

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
(ns vim-clojure-static.generate
66
(:require [clojure.string :as string]
7-
[clojure.set :as set]))
7+
[clojure.set :as set]
8+
[clojure.reflect :as r]))
89

910
(def generation-message
1011
(str "\" Generated from https://github.com/guns/vim-clojure-static/blob/vim-release-004/clj/src/vim_clojure_static/generate.clj"
@@ -68,5 +69,26 @@
6869
sort
6970
(string/join \,)))))
7071

72+
(def java-char-class-names
73+
"Returns a list of valid java character class names (excluding the \"java\"
74+
prefix) for use in a regular expression literal."
75+
;; java.lang.Character/is* methods.
76+
(let [is-ms (->> java.lang.Character
77+
r/reflect
78+
:members
79+
(map (comp name :name))
80+
(filter #(.startsWith % "is"))
81+
set
82+
sort)]
83+
(reduce
84+
(fn [pats is-m]
85+
(let [c-name (second (s/split is-m #"is" 2))]
86+
(try
87+
(re-pattern (format "\\p{java%s}" c-name))
88+
(conj pats c-name)
89+
(catch java.util.regex.PatternSyntaxException e pats))))
90+
[]
91+
is-ms)))
92+
7193
(comment
7294
(spit "/tmp/clojure-defs.vim" (str syntax-keywords "\n\n" completion-words)))

syntax/clojure.vim

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,15 @@ syntax match clojureDispatch "\v#[\^'=<_]?"
7575
" Clojure permits no more than 20 params.
7676
syntax match clojureAnonArg "%\(20\|1\d\|[1-9]\|&\)\?"
7777

78-
" Note: Although not mentioned in the official documentation prefixing the
79-
" characters ".", "+", "*", "?", "{", "}", "[", "]", "(", and ")" with a "\"
80-
" forms a legal escape sequence.
81-
syntax match clojureRegexpEscape "\v\\%(\\|[tnrfae]|c[A-Z]|0[0-3]?[0-7]{1,2}|x\x{2}|u\x{4}|[.+*?{}[\]()])" contained
78+
syntax match clojureRegexpEscape "\v\\%(\\|[tnrfae]|c[A-Z]|0[0-3]?[0-7]{1,2}|x\x{2}|u\x{4}|.)" contained
8279
syntax region clojureRegexpQuoted start=/\v\<@!\\Q/ms=e+1 skip=/\v\\\\|\\"/ end=/\\E/me=s-1 end=/"/me=s-1 contained
8380
syntax region clojureRegexpQuote start=/\v\<@!\\Q/ skip=/\v\\\\|\\"/ end=/\\E/ end=/"/me=s-1 contains=clojureRegexpQuoted keepend contained
81+
syntax cluster clojureRegexpEscapes contains=clojureRegexpEscape,clojureRegexpQuote
8482
" Character classes
8583
syntax match clojureRegexpPredefinedCharClass "\v%(\\[dDsSwW]|\.)" contained
86-
" XXX: Should we distinguish between posix, java, and unicode character
87-
" classes as in the documentation?
88-
syntax match clojureRegexpPosixCharClass "\v\\[pP]\{%(Lower|Upper|ASCII|Alpha|Digit|Alnum|Punct|Graph|Print|Blank|Cntrl|XDigit|Space|IsLatin|InGreek|Lu|IsAlphabetic|Sc|java%(LowerCase|UpperCase|Whitespace|Mirrored))\}" contained
84+
syntax match clojureRegexpPosixCharClass "\v\\[pP]\{%(Lower|Upper|ASCII|Alpha|Digit|Alnum|Punct|Graph|Print|Blank|Cntrl|XDigit|Space|IsLatin|InGreek|Lu|IsAlphabetic|Sc)\}" contained
85+
syntax match clojureRegexpPosixCharClass "\v\\[pP]\{%(Is)?%(Cn|Cc|Cf|Co|Cs|Lu|Ll|Lt|Lm|Lo|Mn|Me|Mc|Nd|Nl|No|Pd|Ps|Pe|Pc|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp)\}"
86+
syntax match clojureRegexpPosixCharClass "\v\\[pP]\{java%(Defined|Digit|ISOControl|IdentifierIgnorable|JavaIdentifierPart|JavaIdentifierStart|Letter|LetterOrDigit|LowerCase|Mirrored|SpaceChar|TitleCase|UnicodeIdentifierPart|UnicodeIdentifierStart|UpperCase|Whitespace)\}" contained
8987
syntax region clojureRegexpCharClass start="\\\@<!\[" end="\\\@<!\]" contained contains=clojureRegexpSpecialChar,clojureRegexpPredefinedCharClass,clojureRegexpPosixCharClass
9088
syntax cluster clojureRegexpCharClasses contains=clojureRegexpPredefinedCharClass,clojureRegexpPosixCharClass,clojureRegexpCharClass
9189
" Boundary

0 commit comments

Comments
 (0)