@@ -226,8 +226,8 @@ def char(self):
226226 self .col += 1
227227 return char
228228
229- def readChunk (self , chunkSize = 1024 ):
230- data = self .dataStream .read (1024 )
229+ def readChunk (self , chunkSize = 10240 ):
230+ data = self .dataStream .read (chunkSize )
231231 if not data :
232232 return
233233 #Replace null characters
@@ -250,18 +250,40 @@ def charsUntil(self, characters, opposite = False):
250250 including any character in characters or EOF. characters can be
251251 any container that supports the in method being called on it.
252252 """
253- charStack = [self .char ()]
254253
255- while charStack [- 1 ] and (charStack [- 1 ] in characters ) == opposite :
256- charStack .append (self .char ())
254+ #This method is currently 40-50% of our total runtime and badly needs
255+ #optimizing
256+ #Possible improvements:
257+ # - use regexp to find characters that match the required character set
258+ # - compute line positions in a single pass at the end
259+ # - improve EOF handling for fewer if statements
257260
258- # Put the character stopped on back to the front of the queue
259- # from where it came.
260- c = charStack . pop ()
261- if c != EOF :
262- self . unget ( c )
261+ if not self . queue :
262+ self . readChunk ()
263+ #Break if we have reached EOF
264+ if not self . queue or self . queue [ 0 ] == None :
265+ return u""
263266
264- return u"" .join (charStack )
267+ i = 0
268+ while (self .queue [i ] in characters ) == opposite :
269+ #Working out positions like this really sucks
270+ if self .queue [i ] == '\n ' :
271+ self .lineLengths .append (self .col )
272+ self .line += 1
273+ self .col = 0
274+ else :
275+ self .col += 1
276+ i += 1
277+ if i == len (self .queue ):
278+ self .readChunk ()
279+ #If the queue doesn't grow we have reached EOF
280+ if i == len (self .queue ) or self .queue [i ] is EOF :
281+ break
282+
283+ rv = u"" .join (self .queue [:i ])
284+ self .queue = self .queue [i :]
285+
286+ return rv
265287
266288 def unget (self , chars ):
267289 if chars :
0 commit comments