@@ -41,13 +41,24 @@ var Tokenizer = function(rules) {
4141 this . rules = rules ;
4242
4343 this . regExps = { } ;
44+ this . matchMappings = { } ;
4445 for ( var key in this . rules ) {
4546 var rule = this . rules [ key ] ;
4647 var state = rule ;
4748 var ruleRegExps = [ ] ;
48-
49- for ( var i = 0 ; i < state . length ; i ++ )
49+ var matchTotal = 0 ;
50+ var mapping = this . matchMappings [ key ] = { } ;
51+
52+ for ( var i = 0 ; i < state . length ; i ++ ) {
53+ var matchcount = new RegExp ( "(?:(" + state [ i ] . regex + ")|(.))" ) . exec ( "a" ) ;
54+ mapping [ matchTotal ] = {
55+ rule : i ,
56+ len : matchcount . length - 2
57+ } ;
58+ matchTotal += matchcount . length - 2 ;
59+
5060 ruleRegExps . push ( state [ i ] . regex ) ;
61+ }
5162
5263 this . regExps [ key ] = new RegExp ( "(?:(" + ruleRegExps . join ( ")|(" ) + ")|(.))" , "g" ) ;
5364
@@ -59,25 +70,30 @@ var Tokenizer = function(rules) {
5970 this . getLineTokens = function ( line , startState ) {
6071 var currentState = startState ;
6172 var state = this . rules [ currentState ] ;
73+ var mapping = this . matchMappings [ currentState ] ;
6274 var re = this . regExps [ currentState ] ;
6375 re . lastIndex = 0 ;
64-
76+
6577 var match , tokens = [ ] ;
66-
78+
6779 var lastIndex = 0 ;
68-
80+
6981 var token = {
7082 type : null ,
7183 value : ""
7284 } ;
73-
85+
7486 while ( match = re . exec ( line ) ) {
7587 var type = "text" ;
76- var value = match [ 0 ] ;
88+ var value = [ match [ 0 ] ] ;
7789
7890 for ( var i = 0 ; i < state . length ; i ++ ) {
7991 if ( match [ i + 1 ] !== undefined ) {
80- var rule = state [ i ] ;
92+ var rule = state [ mapping [ i ] . rule ] ;
93+
94+ if ( mapping [ i ] . len > 1 ) {
95+ value = match . slice ( i + 2 , i + 1 + mapping [ i ] . len ) ;
96+ }
8197
8298 if ( typeof rule . token == "function" )
8399 type = rule . token ( match [ 0 ] ) ;
@@ -87,6 +103,7 @@ var Tokenizer = function(rules) {
87103 if ( rule . next && rule . next !== currentState ) {
88104 currentState = rule . next ;
89105 state = this . rules [ currentState ] ;
106+ mapping = this . matchMappings [ currentState ] ;
90107 lastIndex = re . lastIndex ;
91108
92109 re = this . regExps [ currentState ] ;
@@ -96,17 +113,26 @@ var Tokenizer = function(rules) {
96113 }
97114 } ;
98115
99-
100- if ( token . type !== type ) {
101- if ( token . type )
102- tokens . push ( token ) ;
116+ if ( typeof type == "string" ) {
117+ if ( typeof value != "string" ) {
118+ value = [ value . join ( "" ) ] ;
119+ }
120+ type = [ type ] ;
121+ }
122+
123+ for ( var i = 0 ; i < value . length ; i ++ ) {
124+ if ( token . type !== type [ i ] ) {
125+ if ( token . type ) {
126+ tokens . push ( token ) ;
127+ }
103128
104- token = {
105- type : type ,
106- value : value
107- } ;
108- } else {
109- token . value += value ;
129+ token = {
130+ type : type [ i ] ,
131+ value : value [ i ]
132+ }
133+ } else {
134+ token . value += value ;
135+ }
110136 }
111137
112138 if ( lastIndex == line . length )
0 commit comments