@@ -87,7 +87,7 @@ def detectEncoding(self, parseMeta=True, chardet=True):
8787 import chardet
8888 buffer = self .rawStream .read ()
8989 encoding = chardet .detect (buffer )['encoding' ]
90- self .rawStream = self . openStream (buffer )
90+ self .seek (buffer , 0 )
9191 except ImportError :
9292 pass
9393 # If all else fails use the default encoding
@@ -127,18 +127,50 @@ def detectBOM(self):
127127 seek = 2
128128
129129
130- #AT - move this to the caller?
131- # Set the read position past the BOM if one was found, otherwise
132- # set it to the start of the stream
133- self .rawStream .seek (encoding and seek or 0 )
130+ self .seek (string , encoding and seek or 0 )
134131
135132 return encoding
136133
134+ def seek (self , buffer , n ):
135+ """Unget buffer[n:]"""
136+ if hasattr (self .rawStream , 'unget' ):
137+ self .rawStream .unget (buffer [n :])
138+ return
139+
140+ try :
141+ self .rawStream .seek (n )
142+ except IOError :
143+ class BufferedStream :
144+ def __init__ (self , data , stream ):
145+ self .data = data
146+ self .stream = stream
147+ def read (self , chars = - 1 ):
148+ if chars == - 1 or chars > len (self .data ):
149+ result = self .data
150+ self .data = ''
151+ if chars == - 1 :
152+ return result + self .stream .read ()
153+ else :
154+ return result + self .stream .read (chars - len (result ))
155+ elif not self .data :
156+ return self .stream .read (chars )
157+ else :
158+ result = self .data [:chars ]
159+ self .data = self .data [chars :]
160+ return result
161+ def unget (self , data ):
162+ if self .data :
163+ self .data += data
164+ else :
165+ self .data = data
166+ self .rawStream = BufferedStream (buffer [n :], self .rawStream )
167+
137168 def detectEncodingMeta (self ):
138169 """Report the encoding declared by the meta element
139170 """
140- parser = EncodingParser (self .rawStream .read (self .numBytesMeta ))
141- self .rawStream .seek (0 )
171+ buffer = self .rawStream .read (self .numBytesMeta )
172+ parser = EncodingParser (buffer )
173+ self .seek (buffer , 0 )
142174 return parser .getEncoding ()
143175
144176 def position (self ):
@@ -195,18 +227,9 @@ def charsUntil(self, characters, opposite = False):
195227 # Put the character stopped on back to the front of the queue
196228 # from where it came.
197229 c = charStack .pop ()
198- if c != EOF :
199- self .queue .insert (0 , c )
230+ if c != EOF :
231+ self .queue .insert (0 , c )
200232
201- # XXX the following is need for correct line number reporting apparently
202- # but it causes to break other tests with the fixes in tokenizer. I have
203- # no idea why...
204- #
205- #if c != EOF and self.tell <= len(self.dataStream) and \
206- # self.dataStream[self.tell - 1] == c[0]:
207- # self.tell -= 1
208- #else:
209- # self.queue.insert(0, c)
210233 return u"" .join (charStack )
211234
212235class EncodingBytes (str ):
0 commit comments