Use a deque for the character queue. Patch by shawn.hsiao

jgraham · jgraham · commit ed1c27f25478 · 2007-09-19T22:40:33.000Z
--HG--
extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401010
diff --git a/src/html5lib/inputstream.py b/src/html5lib/inputstream.py
@@ -5,7 +5,13 @@
 from constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
 from constants import encodings
 from utils import MethodDispatcher
+from collections import deque
 
+try:
+    from collections import deque
+except ImportError:
+    from utils import deque
+    
 class HTMLInputStream(object):
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
@@ -55,7 +61,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
         self.dataStream = codecs.getreader(self.charEncoding)(self.rawStream,
                                                               'replace')
 
-        self.queue = []
+        self.queue = deque([])
         self.errors = []
 
         self.line = self.col = 0
@@ -212,7 +218,7 @@ def char(self):
         if not self.queue:
             return EOF
         
-        char = self.queue.pop(0)
+        char = self.queue.popleft()
         
         # update position in stream
         if char == '\n':
@@ -277,8 +283,7 @@ def charsUntil(self, characters, opposite = False):
             else:
                 self.col += 1
 
-        rv = u"".join(self.queue[:i])
-        self.queue = self.queue[i:]
+        rv = u"".join([ self.queue.popleft() for c in range(i) ])
         
         #Calculate where we now are in the stream
         #One possible optimisation would be to store all read characters and
@@ -313,7 +318,9 @@ def charsUntil(self, characters, opposite = False):
 
     def unget(self, chars):
         if chars:
-            self.queue = list(chars) + self.queue
+            l = list(chars)
+            l.reverse()
+            self.queue.extendleft(l)
             #Alter the current line, col position
             for c in chars[::-1]:
                 if c == '\n':