|
69 | 69 | sort |
70 | 70 | (string/join \,))))) |
71 | 71 |
|
72 | | -(def java-char-class-names |
73 | | - "Returns a list of valid java character class names (excluding the \"java\" |
74 | | - prefix) for use in a regular expression literal." |
75 | | - ;; java.lang.Character/is* methods. |
76 | | - (let [is-ms (->> java.lang.Character |
77 | | - r/reflect |
78 | | - :members |
79 | | - (map (comp name :name)) |
80 | | - (filter #(.startsWith % "is")) |
81 | | - set |
82 | | - sort)] |
83 | | - (reduce |
84 | | - (fn [pats is-m] |
85 | | - (let [c-name (second (string/split is-m #"is" 2))] |
86 | | - (try |
87 | | - (re-pattern (format "\\p{java%s}" c-name)) |
88 | | - (conj pats c-name) |
89 | | - (catch java.util.regex.PatternSyntaxException e pats)))) |
90 | | - [] |
91 | | - is-ms))) |
| 72 | +;; Helper functions (should probably be moved to a util ns). |
| 73 | + |
| 74 | +(defn syntax-match [group pattern contained?] |
| 75 | + "Returns a Vimscript literal `syntax match` statement. The content of pattern |
| 76 | + is automatically wrapped in quotes." |
| 77 | + (let [parts ["syntax match" (name group) (format "\"%s\"" pattern)] |
| 78 | + parts (if contained? |
| 79 | + (conj parts "contained") |
| 80 | + parts)] |
| 81 | + (string/join \space parts))) |
| 82 | + |
| 83 | +(defn re-pattern? [s] |
| 84 | + "Returns true if s is a valid regular expression pattern, false otherwiese." |
| 85 | + (try |
| 86 | + (re-pattern s) |
| 87 | + true |
| 88 | + (catch java.util.regex.PatternSyntaxException _ false))) |
| 89 | + |
| 90 | +(defn pipe-join [ss] |
| 91 | + (string/join \| ss)) |
| 92 | + |
| 93 | +;;;; clojureRegex*CharClass generation ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 94 | + |
| 95 | +(defn bracket-char-class? [s] |
| 96 | + "Returns true if s is a valid posix, java, or unicode character class." |
| 97 | + (re-pattern? (format "\\p{%s}" s))) |
| 98 | + |
| 99 | +;; This helps cut down on line noise. |
| 100 | +(defn unicode-char-class-pattern [s] |
| 101 | + (format "\\v\\\\[pP]\\{%s\\}" s)) |
| 102 | + |
| 103 | +(def unicode-char-classes |
| 104 | + "Vimscript literal syntax match for unicode regex character classes." |
| 105 | + (delay ;; Since we need to hit the network. |
| 106 | + (let [page (slurp "http://www.regular-expressions.info/unicode.html") |
| 107 | + cs (loop [m (re-matcher #"\\p\{([a-zA-Z_]+)\}" page) |
| 108 | + v (transient [])] |
| 109 | + (if-let [[_ t] (re-find m)] |
| 110 | + (do |
| 111 | + (conj! v t) |
| 112 | + (recur m v)) |
| 113 | + (sort (distinct (persistent! v))))) |
| 114 | + cs (filter bracket-char-class? cs) |
| 115 | + ;; This complicates things mildly but apparently not every unicode |
| 116 | + ;; class can be prefixed with "Is". |
| 117 | + {cs1 true cs2 false} (group-by #(bracket-char-class? (str "Is" %)) cs)] |
| 118 | + (syntax-match |
| 119 | + :clojureRegexpUnicodeCharClass |
| 120 | + (unicode-char-class-pattern (format "%%(%%(Is)?%%(%s)|%%(%s))" (pipe-join cs1) (pipe-join cs2))) |
| 121 | + true)))) |
| 122 | + |
| 123 | +(def java-char-classes |
| 124 | + "Vimscript literal syntax match for (Is)java* regex character classes." |
| 125 | + (let [is-methods (->> java.lang.Character |
| 126 | + r/reflect |
| 127 | + :members |
| 128 | + (map (comp name :name)) |
| 129 | + (filter #(.startsWith % "is")) |
| 130 | + distinct |
| 131 | + sort) |
| 132 | + cs (filter #(bracket-char-class? (str "java" %)) |
| 133 | + (map #(second (string/split % #"is" 2)) is-methods)) |
| 134 | + {cs1 true cs2 false} (group-by #(bracket-char-class? (str "Is" %)) cs)] |
| 135 | + (syntax-match |
| 136 | + :clojureRegexpJavaCharClass |
| 137 | + (unicode-char-class-pattern (format "%%(%%(Is)?java%%(%s)|java%%(%s))" (pipe-join cs1) (pipe-join cs2))) |
| 138 | + true))) |
92 | 139 |
|
93 | 140 | (comment |
94 | | - (spit "/tmp/clojure-defs.vim" (str syntax-keywords "\n\n" completion-words))) |
| 141 | + (spit "/tmp/clojure-defs.vim" (str syntax-keywords "\n\n" completion-words)) |
| 142 | + (spit "/tmp/clojure-char-classes.vim" (str java-char-classes "\n" @unicode-char-classes))) |
0 commit comments