Skip to content

Commit ae5e83e

Browse files
committed
Support for matching groups in tokenizer with arrays of tokens.
1 parent d1e0546 commit ae5e83e

1 file changed

Lines changed: 44 additions & 18 deletions

File tree

lib/ace/tokenizer.js

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,24 @@ var Tokenizer = function(rules) {
4141
this.rules = rules;
4242

4343
this.regExps = {};
44+
this.matchMappings = {};
4445
for ( var key in this.rules) {
4546
var rule = this.rules[key];
4647
var state = rule;
4748
var ruleRegExps = [];
48-
49-
for ( var i = 0; i < state.length; i++)
49+
var matchTotal = 0;
50+
var mapping = this.matchMappings[key] = {};
51+
52+
for ( var i = 0; i < state.length; i++) {
53+
var matchcount = new RegExp("(?:(" + state[i].regex + ")|(.))").exec("a");
54+
mapping[matchTotal] = {
55+
rule: i,
56+
len: matchcount.length - 2
57+
};
58+
matchTotal += matchcount.length - 2;
59+
5060
ruleRegExps.push(state[i].regex);
61+
}
5162

5263
this.regExps[key] = new RegExp("(?:(" + ruleRegExps.join(")|(") + ")|(.))", "g");
5364

@@ -59,25 +70,30 @@ var Tokenizer = function(rules) {
5970
this.getLineTokens = function(line, startState) {
6071
var currentState = startState;
6172
var state = this.rules[currentState];
73+
var mapping = this.matchMappings[currentState];
6274
var re = this.regExps[currentState];
6375
re.lastIndex = 0;
64-
76+
6577
var match, tokens = [];
66-
78+
6779
var lastIndex = 0;
68-
80+
6981
var token = {
7082
type: null,
7183
value: ""
7284
};
73-
85+
7486
while (match = re.exec(line)) {
7587
var type = "text";
76-
var value = match[0];
88+
var value = [match[0]];
7789

7890
for ( var i = 0; i < state.length; i++) {
7991
if (match[i + 1] !== undefined) {
80-
var rule = state[i];
92+
var rule = state[mapping[i].rule];
93+
94+
if (mapping[i].len > 1) {
95+
value = match.slice(i+2, i+1+mapping[i].len);
96+
}
8197

8298
if (typeof rule.token == "function")
8399
type = rule.token(match[0]);
@@ -87,6 +103,7 @@ var Tokenizer = function(rules) {
87103
if (rule.next && rule.next !== currentState) {
88104
currentState = rule.next;
89105
state = this.rules[currentState];
106+
mapping = this.matchMappings[currentState];
90107
lastIndex = re.lastIndex;
91108

92109
re = this.regExps[currentState];
@@ -96,17 +113,26 @@ var Tokenizer = function(rules) {
96113
}
97114
};
98115

99-
100-
if (token.type !== type) {
101-
if (token.type)
102-
tokens.push(token);
116+
if (typeof type == "string") {
117+
if (typeof value != "string") {
118+
value = [value.join("")];
119+
}
120+
type = [type];
121+
}
122+
123+
for ( var i = 0; i < value.length; i++) {
124+
if (token.type !== type[i]) {
125+
if (token.type) {
126+
tokens.push(token);
127+
}
103128

104-
token = {
105-
type: type,
106-
value: value
107-
};
108-
} else {
109-
token.value += value;
129+
token = {
130+
type: type[i],
131+
value: value[i]
132+
}
133+
} else {
134+
token.value += value;
135+
}
110136
}
111137

112138
if (lastIndex == line.length)

0 commit comments

Comments
 (0)