|
3 | 3 |
|
4 | 4 | (ns vim-clojure-static.generate |
5 | 5 | (:require [clojure.string :as string] |
6 | | - [clojure.set :as set])) |
| 6 | + [clojure.set :as set] |
| 7 | + [frak])) |
7 | 8 |
|
8 | 9 | ;; |
9 | 10 | ;; Helpers |
10 | 11 | ;; |
11 | 12 |
|
| 13 | +(defn vim-frak-pattern |
| 14 | + "Create a non-capturing regular expression pattern compatible with Vim." |
| 15 | + [strs] |
| 16 | + (-> (str (frak/pattern strs)) |
| 17 | + (string/replace #"\(\?:" "\\%\\("))) |
| 18 | + |
12 | 19 | (defn property-pattern |
13 | 20 | "Vimscript very magic pattern for a character property class." |
14 | 21 | ([s] (property-pattern s true)) |
15 | | - ([s braces?] (if braces? |
16 | | - (format "\\v\\\\[pP]\\{%s\\}" s) |
17 | | - (format "\\v\\\\[pP]%s" s)))) |
| 22 | + ([s braces?] |
| 23 | + (if braces? |
| 24 | + (format "\\v\\\\[pP]\\{%s\\}" s) |
| 25 | + (format "\\v\\\\[pP]%s" s)))) |
18 | 26 |
|
19 | 27 | (defn syntax-match-properties |
20 | 28 | "Vimscript literal `syntax match` for a character property class." |
21 | 29 | ([group fmt props] (syntax-match-properties group fmt props true)) |
22 | 30 | ([group fmt props braces?] |
23 | 31 | (format "syntax match %s \"%s\" contained display\n" |
24 | 32 | (name group) |
25 | | - (property-pattern (format fmt (string/join \| (sort props))) braces?)))) |
| 33 | + (property-pattern (format fmt (vim-frak-pattern props)) braces?)))) |
26 | 34 |
|
27 | 35 | (defn get-private-field |
28 | 36 | "Violate encapsulation and get the value of a private field." |
|
141 | 149 | ;; `IsPosix` works, but is undefined. |
142 | 150 | (syntax-match-properties |
143 | 151 | :clojureRegexpPosixCharClass |
144 | | - "%%(%s)" |
| 152 | + "%s" |
145 | 153 | (:posix character-properties))) |
146 | 154 |
|
147 | 155 | (def vim-java-char-classes |
148 | 156 | "Vimscript literal `syntax match` for \\p{javaMethod} property classes." |
149 | 157 | ;; `IsjavaMethod` works, but is undefined. |
150 | 158 | (syntax-match-properties |
151 | 159 | :clojureRegexpJavaCharClass |
152 | | - "java%%(%s)" |
| 160 | + "java%s" |
153 | 161 | (map #(string/replace % #"\Ajava" "") (:java character-properties)))) |
154 | 162 |
|
155 | 163 | (def vim-unicode-binary-char-classes |
|
158 | 166 | ;; insensitively like the other Unicode properties. |
159 | 167 | (syntax-match-properties |
160 | 168 | :clojureRegexpUnicodeCharClass |
161 | | - "\\cIs%%(%s)" |
| 169 | + "\\cIs%s" |
162 | 170 | (map string/lower-case (:binary character-properties)))) |
163 | 171 |
|
164 | 172 | (def vim-unicode-category-char-classes |
165 | 173 | "Vimscript literal `syntax match` for Unicode General Category classes." |
166 | | - (let [cats (map seq (:category character-properties)) |
167 | | - cats (map (fn [[c subcats]] |
168 | | - (format "%s[%s]" c (apply str (sort (mapcat rest subcats))))) |
169 | | - (group-by first cats))] |
| 174 | + (let [cats (sort (:category character-properties)) |
| 175 | + chrs (->> (map seq cats) |
| 176 | + (group-by first) |
| 177 | + (keys) |
| 178 | + (map str) |
| 179 | + (sort))] |
170 | 180 | ;; gc= and general_category= can be case insensitive, but this is behavior |
171 | 181 | ;; is undefined. |
172 | 182 | (str |
173 | 183 | (syntax-match-properties |
174 | 184 | :clojureRegexpUnicodeCharClass |
175 | | - "%%(%s)" |
176 | | - (sort (filter #(= (count %) 1) (:category character-properties))) |
| 185 | + "%s" |
| 186 | + chrs |
177 | 187 | false) |
178 | 188 | (syntax-match-properties |
179 | 189 | :clojureRegexpUnicodeCharClass |
180 | | - "%%(Is|gc\\=|general_category\\=)?%%(%s)" |
| 190 | + "%s" |
| 191 | + cats) |
| 192 | + (syntax-match-properties |
| 193 | + :clojureRegexpUnicodeCharClass |
| 194 | + "%%(Is|gc\\=|general_category\\=)?%s" |
181 | 195 | cats)))) |
182 | 196 |
|
183 | 197 | (def vim-unicode-script-char-classes |
|
189 | 203 | ;; InScriptName works, but is undefined. |
190 | 204 | (syntax-match-properties |
191 | 205 | :clojureRegexpUnicodeCharClass |
192 | | - "\\c%%(Is|sc\\=|script\\=)%%(%s)" |
| 206 | + "\\c%%(Is|sc\\=|script\\=)%s" |
193 | 207 | (map string/lower-case (:script character-properties)))) |
194 | 208 |
|
195 | 209 | (def vim-unicode-block-char-classes |
|
198 | 212 | ;; of Is. |
199 | 213 | (syntax-match-properties |
200 | 214 | :clojureRegexpUnicodeCharClass |
201 | | - "\\c%%(In|blk\\=|block\\=)%%(%s)" |
| 215 | + "\\c%%(In|blk\\=|block\\=)%s" |
202 | 216 | (map string/lower-case (:block character-properties)))) |
203 | 217 |
|
| 218 | +(def comprehensive-clojure-character-property-regexps |
| 219 | + "A string representing a Clojure literal vector of regular expressions |
| 220 | + containing all possible property character classes. For testing Vimscript |
| 221 | + syntax matching optimizations." |
| 222 | + (let [fmt (fn [prefix prop-key] |
| 223 | + (let [props (map (partial format "\\p{%s%s}" prefix) |
| 224 | + (sort (get character-properties prop-key)))] |
| 225 | + (format "#\"%s\"" (string/join props))))] |
| 226 | + (string/join \newline [(fmt "" :posix) |
| 227 | + (fmt "" :java) |
| 228 | + (fmt "Is" :binary) |
| 229 | + (fmt "general_category=" :category) |
| 230 | + (fmt "script=" :script) |
| 231 | + (fmt "block=" :block)]))) |
| 232 | + |
204 | 233 | (comment |
| 234 | + ;; Generate the vim literal definitions for pasting into the runtime files. |
205 | 235 | (spit "tmp/clojure-defs.vim" |
206 | 236 | (str generation-comment |
207 | 237 | clojure-version-comment |
|
218 | 248 | vim-unicode-binary-char-classes |
219 | 249 | vim-unicode-category-char-classes |
220 | 250 | vim-unicode-script-char-classes |
221 | | - vim-unicode-block-char-classes))) |
| 251 | + vim-unicode-block-char-classes)) |
| 252 | + ;; Generate an example file with all possible character property literals. |
| 253 | + (spit "tmp/all-char-props.clj" |
| 254 | + comprehensive-clojure-character-property-regexps)) |
| 255 | + |
0 commit comments