Skip to content

Commit 33a0fc9

Browse files
committed
Merge branch 'development'
* development: Replace commented comment macro Fix accidentally damaged char class, tidy up Upgrade to frak 0.1.3 Update frak to v0.1.2 for character sets Remove leading backslash check from clojureRegexpBoundary Fix custom-nfa-log.patch Generate new regexp definitions with Frak Move vim-nfa-dump to test and add convenience fn Add Vim NFA regexp engine dump log helpers Support more abbreviated Unicode category classes Fix failing test for ^ and $ boundaries Add complete char prop regexp generator Add old TODOs from git stash Small correction to clojureRegexpUnicodeCharClass Generate *CharClass patterns with frak Add frak
2 parents f7ac5f3 + dadefa7 commit 33a0fc9

8 files changed

Lines changed: 227 additions & 33 deletions

File tree

clj/project.clj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
:license {:name "Vim License"
55
:url "http://vimdoc.sourceforge.net/htmldoc/uganda.html#license"
66
:comments ":help license"}
7-
:dependencies [[org.clojure/clojure "1.5.1"]])
7+
:dependencies [[org.clojure/clojure "1.5.1"]
8+
[frak "0.1.3"]])

clj/src/vim_clojure_static/generate.clj

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,34 @@
33

44
(ns vim-clojure-static.generate
55
(:require [clojure.string :as string]
6-
[clojure.set :as set]))
6+
[clojure.set :as set]
7+
[frak]))
78

89
;;
910
;; Helpers
1011
;;
1112

13+
(defn vim-frak-pattern
14+
"Create a non-capturing regular expression pattern compatible with Vim."
15+
[strs]
16+
(-> (str (frak/pattern strs))
17+
(string/replace #"\(\?:" "\\%\\(")))
18+
1219
(defn property-pattern
1320
"Vimscript very magic pattern for a character property class."
1421
([s] (property-pattern s true))
15-
([s braces?] (if braces?
16-
(format "\\v\\\\[pP]\\{%s\\}" s)
17-
(format "\\v\\\\[pP]%s" s))))
22+
([s braces?]
23+
(if braces?
24+
(format "\\v\\\\[pP]\\{%s\\}" s)
25+
(format "\\v\\\\[pP]%s" s))))
1826

1927
(defn syntax-match-properties
2028
"Vimscript literal `syntax match` for a character property class."
2129
([group fmt props] (syntax-match-properties group fmt props true))
2230
([group fmt props braces?]
2331
(format "syntax match %s \"%s\" contained display\n"
2432
(name group)
25-
(property-pattern (format fmt (string/join \| (sort props))) braces?))))
33+
(property-pattern (format fmt (vim-frak-pattern props)) braces?))))
2634

2735
(defn get-private-field
2836
"Violate encapsulation and get the value of a private field."
@@ -141,15 +149,15 @@
141149
;; `IsPosix` works, but is undefined.
142150
(syntax-match-properties
143151
:clojureRegexpPosixCharClass
144-
"%%(%s)"
152+
"%s"
145153
(:posix character-properties)))
146154

147155
(def vim-java-char-classes
148156
"Vimscript literal `syntax match` for \\p{javaMethod} property classes."
149157
;; `IsjavaMethod` works, but is undefined.
150158
(syntax-match-properties
151159
:clojureRegexpJavaCharClass
152-
"java%%(%s)"
160+
"java%s"
153161
(map #(string/replace % #"\Ajava" "") (:java character-properties))))
154162

155163
(def vim-unicode-binary-char-classes
@@ -158,26 +166,32 @@
158166
;; insensitively like the other Unicode properties.
159167
(syntax-match-properties
160168
:clojureRegexpUnicodeCharClass
161-
"\\cIs%%(%s)"
169+
"\\cIs%s"
162170
(map string/lower-case (:binary character-properties))))
163171

164172
(def vim-unicode-category-char-classes
165173
"Vimscript literal `syntax match` for Unicode General Category classes."
166-
(let [cats (map seq (:category character-properties))
167-
cats (map (fn [[c subcats]]
168-
(format "%s[%s]" c (apply str (sort (mapcat rest subcats)))))
169-
(group-by first cats))]
174+
(let [cats (sort (:category character-properties))
175+
chrs (->> (map seq cats)
176+
(group-by first)
177+
(keys)
178+
(map str)
179+
(sort))]
170180
;; gc= and general_category= can be case insensitive, but this is behavior
171181
;; is undefined.
172182
(str
173183
(syntax-match-properties
174184
:clojureRegexpUnicodeCharClass
175-
"%%(%s)"
176-
(sort (filter #(= (count %) 1) (:category character-properties)))
185+
"%s"
186+
chrs
177187
false)
178188
(syntax-match-properties
179189
:clojureRegexpUnicodeCharClass
180-
"%%(Is|gc\\=|general_category\\=)?%%(%s)"
190+
"%s"
191+
cats)
192+
(syntax-match-properties
193+
:clojureRegexpUnicodeCharClass
194+
"%%(Is|gc\\=|general_category\\=)?%s"
181195
cats))))
182196

183197
(def vim-unicode-script-char-classes
@@ -189,7 +203,7 @@
189203
;; InScriptName works, but is undefined.
190204
(syntax-match-properties
191205
:clojureRegexpUnicodeCharClass
192-
"\\c%%(Is|sc\\=|script\\=)%%(%s)"
206+
"\\c%%(Is|sc\\=|script\\=)%s"
193207
(map string/lower-case (:script character-properties))))
194208

195209
(def vim-unicode-block-char-classes
@@ -198,10 +212,26 @@
198212
;; of Is.
199213
(syntax-match-properties
200214
:clojureRegexpUnicodeCharClass
201-
"\\c%%(In|blk\\=|block\\=)%%(%s)"
215+
"\\c%%(In|blk\\=|block\\=)%s"
202216
(map string/lower-case (:block character-properties))))
203217

218+
(def comprehensive-clojure-character-property-regexps
219+
"A string representing a Clojure literal vector of regular expressions
220+
containing all possible property character classes. For testing Vimscript
221+
syntax matching optimizations."
222+
(let [fmt (fn [prefix prop-key]
223+
(let [props (map (partial format "\\p{%s%s}" prefix)
224+
(sort (get character-properties prop-key)))]
225+
(format "#\"%s\"" (string/join props))))]
226+
(string/join \newline [(fmt "" :posix)
227+
(fmt "" :java)
228+
(fmt "Is" :binary)
229+
(fmt "general_category=" :category)
230+
(fmt "script=" :script)
231+
(fmt "block=" :block)])))
232+
204233
(comment
234+
;; Generate the vim literal definitions for pasting into the runtime files.
205235
(spit "tmp/clojure-defs.vim"
206236
(str generation-comment
207237
clojure-version-comment
@@ -218,4 +248,8 @@
218248
vim-unicode-binary-char-classes
219249
vim-unicode-category-char-classes
220250
vim-unicode-script-char-classes
221-
vim-unicode-block-char-classes)))
251+
vim-unicode-block-char-classes))
252+
;; Generate an example file with all possible character property literals.
253+
(spit "tmp/all-char-props.clj"
254+
comprehensive-clojure-character-property-regexps))
255+

clj/src/vim_clojure_static/test.clj

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
[file & lines]
1818
(io/make-parents file)
1919
(spit file (string/join \newline lines))
20-
(shell/sh "vim" "-u" "NONE" "-N" "-S" "vim/syn-id-names.vim" file)
20+
(shell/sh "vim" "-u" "NONE" "-N" "-S" "vim/test-runtime.vim" file)
2121
;; The last line of the file will contain valid EDN
2222
(into {} (map (fn [l ids] [l (mapv keyword ids)])
2323
lines
@@ -69,6 +69,42 @@
6969
ss λs)))
7070
contexts)))))
7171

72+
(defn vim-nfa-dump
73+
"Run a patched version of Vim compiled with -DDEBUG on a new file containing
74+
buffer, then move the NFA log to log-path. The patch is located at
75+
vim/custom-nfa-log.patch"
76+
[vim-path buffer log-path]
77+
(let [file "tmp/nfa-test-file.clj"]
78+
(spit file buffer)
79+
(time (shell/sh vim-path "-u" "NONE" "-N" "-S" "vim/test-runtime.vim" file))
80+
(shell/sh "mv" "nfa_regexp.log" log-path)))
81+
82+
(defn compare-nfa-dumps
83+
"Dump NFA logs with given buffer and syntax-files; log-files are written to
84+
tmp/ and are distinguished by the hash of the buffer and syntax script.
85+
86+
The vim-path passed to vim-nfa-dump should either be in the VIMDEBUG
87+
environment variable, or be the top vim in your PATH.
88+
89+
Returns the line count of each corresponding log file."
90+
[buf [& syntax-files] & opts]
91+
(let [{:keys [vim-path]
92+
:or {vim-path (or (System/getenv "VIMDEBUG") "vim")}} opts
93+
syn-path "../syntax/clojure.vim"
94+
orig-syn (slurp syn-path)
95+
buf-hash (hash buf)]
96+
(try
97+
(mapv (fn [path]
98+
(let [syn-buf (slurp path)
99+
syn-hash (hash syn-buf)
100+
log-path (format "tmp/debug:%d:%d.log" buf-hash syn-hash)]
101+
(spit syn-path syn-buf)
102+
(vim-nfa-dump vim-path buf log-path)
103+
(count (re-seq #"\n" (slurp log-path)))))
104+
syntax-files)
105+
(finally
106+
(spit syn-path orig-syn)))))
107+
72108
(comment
73109

74110
(macroexpand-1
@@ -80,4 +116,12 @@
80116
["^" #(= % [:clojureRegexpBoundary])]]))
81117
(test #'number-literals-test)
82118

119+
(defn dump! [buf]
120+
(compare-nfa-dumps (format "#\"\\p{%s}\"\n" buf)
121+
["../syntax/clojure.vim" "tmp/altsyntax.vim"]))
122+
123+
(dump! "Ll")
124+
(dump! "javaLowercase")
125+
(dump! "block=UNIFIED CANADIAN ABORIGINAL SYLLABICS")
126+
83127
)

clj/test/vim_clojure_static/syntax_test.clj

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,28 @@
9292

9393
(comment (test #'number-literals-test))
9494

95+
;; TODO: Finish me! (this was in an old git stash)
96+
;; (defsyntaxtest keywords-test
97+
;; (with-format "%s"
98+
;; ":1" kw
99+
;; ":A" kw
100+
;; ":a" kw
101+
;; ":αβγ" kw
102+
;; "::a" kw
103+
;; ":a/b" kw
104+
;; ":a:b" kw
105+
;; ":a:b/:c:b" kw
106+
;; ":a/b/c/d" kw
107+
;; "::a/b" !kw
108+
;; "::" !kw
109+
;; ":a:" !kw
110+
;; ":a/" !kw
111+
;; ":/" !kw
112+
;; ":" !kw
113+
;; ))
114+
;;
115+
;; (comment (test #'keywords-test))
116+
95117
(defsyntaxtest java-regexp-literals-test
96118
["#\"%s\""
97119
[;; http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
@@ -221,8 +243,6 @@
221243
"\\p{IsLatin}" regexp-unicode-char-class
222244
;; \p{InGreek} A character in the Greek block (block)
223245
"\\p{InGreek}" regexp-unicode-char-class
224-
;; \p{Lu} An uppercase letter (category)
225-
"\\p{Lu}" regexp-unicode-char-class
226246
;; \p{IsAlphabetic} An alphabetic character (binary property)
227247
"\\p{IsAlphabetic}" regexp-unicode-char-class
228248
;; \p{Sc} A currency symbol
@@ -231,6 +251,13 @@
231251
"\\P{InGreek}" regexp-unicode-char-class
232252
;; [\p{L}&&[^\p{Lu}]] Any letter except an uppercase letter (subtraction)
233253

254+
;; Abbreviated categories
255+
"\\pL" regexp-unicode-char-class
256+
"\\p{L}" regexp-unicode-char-class
257+
"\\p{Lu}" regexp-unicode-char-class
258+
"\\p{gc=L}" regexp-unicode-char-class
259+
"\\p{IsLu}" regexp-unicode-char-class
260+
234261
;;;; Invalid classes
235262

236263
"\\P{Xzibit}" !regexp-posix-char-class

clj/vim/custom-nfa-log.patch

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
commit 09b1321fd7fa91e5a8e36ebc2d858079969adca4 (HEAD, github/custom-nfa-log, custom-nfa-log)
2+
Author: guns <self@sungpae.com>
3+
Date: Thu, 1 Aug 2013 10:56:19 -0500
4+
5+
Add custom NFA logging to nfa_regexp.log
6+
7+
The goal is to get a ballpark estimate of the number of steps the regexp
8+
engine undertakes in a session. One line of logging is done:
9+
10+
* Start of nfa_regmatch()
11+
* For each character considered
12+
* For each state of a character considered
13+
14+
Vimm should be compiled with -DDEBUG; if the NFA engine state graphs are
15+
desired, then also use -DINCLUDE_NFA_DUMP.
16+
17+
This branch can also be found at:
18+
19+
https://github.com/guns/vim/tree/custom-nfa-log
20+
---
21+
src/regexp_nfa.c | 20 +++++++++++++++++---
22+
1 file changed, 17 insertions(+), 3 deletions(-)
23+
24+
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
25+
index 738ac3b..bea9d50 100644
26+
--- a/src/regexp_nfa.c
27+
+++ b/src/regexp_nfa.c
28+
@@ -24,9 +24,18 @@
29+
#ifdef DEBUG
30+
# define NFA_REGEXP_ERROR_LOG "nfa_regexp_error.log"
31+
# define ENABLE_LOG
32+
-# define NFA_REGEXP_DUMP_LOG "nfa_regexp_dump.log"
33+
-# define NFA_REGEXP_RUN_LOG "nfa_regexp_run.log"
34+
-# define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log"
35+
+# ifdef INCLUDE_NFA_DUMP
36+
+# define NFA_REGEXP_DUMP_LOG "nfa_regexp.log"
37+
+# else
38+
+# define NFA_REGEXP_DUMP_LOG "/dev/null"
39+
+# endif
40+
+# define NFA_REGEXP_RUN_LOG "/dev/null"
41+
+# define NFA_REGEXP_DEBUG_LOG "/dev/null"
42+
+# define LOG(fmt, ...) do { \
43+
+ FILE *log = fopen("nfa_regexp.log", "a"); \
44+
+ fprintf(log, fmt, __VA_ARGS__); \
45+
+ fclose(log); \
46+
+} while (0)
47+
#endif
48+
49+
enum
50+
@@ -5043,6 +5052,8 @@ nfa_regmatch(prog, start, submatch, m)
51+
goto theend;
52+
53+
#ifdef ENABLE_LOG
54+
+ LOG("START nfa_regmatch: alloc=%d pattern=\"%s\"\n", size*2, prog->pattern);
55+
+
56+
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
57+
if (log_fd != NULL)
58+
{
59+
@@ -5129,6 +5140,8 @@ nfa_regmatch(prog, start, submatch, m)
60+
nextlist->id = nfa_listid + 1;
61+
62+
#ifdef ENABLE_LOG
63+
+ LOG("# states=%d reginput=\"%s\"\n", thislist->n, reginput);
64+
+
65+
fprintf(log_fd, "------------------------------------------\n");
66+
fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput);
67+
fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc);
68+
@@ -5161,6 +5174,7 @@ nfa_regmatch(prog, start, submatch, m)
69+
fprintf(debug, "%s, ", code);
70+
#endif
71+
#ifdef ENABLE_LOG
72+
+ LOG("## computing nextlist: code=\"%s\"\n", code);
73+
{
74+
int col;
75+
Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,23 @@
33
execute 'set rtp=' . expand('%:p:h:h:h') . ',$VIMRUNTIME'
44
filetype plugin on
55
syntax on
6+
set synmaxcol=0
67
setfiletype clojure
78

8-
function! s:append_syn_id_names()
9+
if !exists('g:testing')
10+
let g:testing = 1
11+
endif
12+
13+
function! s:syn_id_names()
914
let names = []
1015
for lnum in range(1, line('$'))
1116
let f = 'synIDattr(synID(' . lnum . ', v:val, 0), "name")'
1217
call add(names, map(range(1, virtcol([lnum, '$']) - 1), f))
1318
endfor
14-
" Changing the quotes will make this valid EDN
15-
call append(line('$'), tr(string(names), "'", '"'))
19+
return names
1620
endfunction
1721

18-
call s:append_syn_id_names() | write | quitall!
22+
if g:testing
23+
" Changing the quotes will make this valid EDN
24+
call append(line('$'), tr(string(s:syn_id_names()), "'", '"')) | write | quitall!
25+
endif

indent/clojure.vim

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
" License: Same as Vim
99
" Last Change: 30 January 2013
1010

11+
" TODO: Indenting after multibyte characters is broken:
12+
" (let [Δ (if foo
13+
" bar ; Indent error
14+
" baz)])
15+
1116
if exists("b:did_indent")
1217
finish
1318
endif

0 commit comments

Comments
 (0)