|
5 | 5 | from constants import EOF, spaceCharacters, asciiLetters, asciiUppercase |
6 | 6 | from constants import encodings |
7 | 7 | from utils import MethodDispatcher |
| 8 | +from collections import deque |
8 | 9 |
|
| 10 | +try: |
| 11 | + from collections import deque |
| 12 | +except ImportError: |
| 13 | + from utils import deque |
| 14 | + |
9 | 15 | class HTMLInputStream(object): |
10 | 16 | """Provides a unicode stream of characters to the HTMLTokenizer. |
11 | 17 |
|
@@ -55,7 +61,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True): |
55 | 61 | self.dataStream = codecs.getreader(self.charEncoding)(self.rawStream, |
56 | 62 | 'replace') |
57 | 63 |
|
58 | | - self.queue = [] |
| 64 | + self.queue = deque([]) |
59 | 65 | self.errors = [] |
60 | 66 |
|
61 | 67 | self.line = self.col = 0 |
@@ -212,7 +218,7 @@ def char(self): |
212 | 218 | if not self.queue: |
213 | 219 | return EOF |
214 | 220 |
|
215 | | - char = self.queue.pop(0) |
| 221 | + char = self.queue.popleft() |
216 | 222 |
|
217 | 223 | # update position in stream |
218 | 224 | if char == '\n': |
@@ -277,8 +283,7 @@ def charsUntil(self, characters, opposite = False): |
277 | 283 | else: |
278 | 284 | self.col += 1 |
279 | 285 |
|
280 | | - rv = u"".join(self.queue[:i]) |
281 | | - self.queue = self.queue[i:] |
| 286 | + rv = u"".join([ self.queue.popleft() for c in range(i) ]) |
282 | 287 |
|
283 | 288 | #Calculate where we now are in the stream |
284 | 289 | #One possible optimisation would be to store all read characters and |
@@ -313,7 +318,9 @@ def charsUntil(self, characters, opposite = False): |
313 | 318 |
|
314 | 319 | def unget(self, chars): |
315 | 320 | if chars: |
316 | | - self.queue = list(chars) + self.queue |
| 321 | + l = list(chars) |
| 322 | + l.reverse() |
| 323 | + self.queue.extendleft(l) |
317 | 324 | #Alter the current line, col position |
318 | 325 | for c in chars[::-1]: |
319 | 326 | if c == '\n': |
|
0 commit comments