@@ -207,24 +207,25 @@ def char(self):
207207 EOF when EOF is reached.
208208 """
209209 if self .queue :
210- return self .queue .pop (0 )
210+ char = self .queue .pop (0 )
211+ if char == "\n " :
212+ self .lineLengths .append (self .col )
213+ self .line += 1
214+ self .col = 0
215+ return char
211216 else :
212- c = self .dataStream .read (1 , 1 )
213- if not c :
214- self .col += 1
215- return EOF
216-
217- # Normalize newlines and null characters
218- if c == '\x00 ' :
219- self .errors .append ('null character found in input stream, '
220- 'replaced with U+FFFD' )
221- c = u'\uFFFD '
217+ c = self .readChar ()
218+ if c is EOF :
219+ return c
220+
222221 if c == '\r ' :
223222 #XXX This isn't right in the case with multiple CR in a row
224223 #also recursing here isn't ideal + not sure what happens to input position
225- c = self .char ()
226- if c and c != '\n ' :
224+ c = self .readChar ()
225+ if c is not EOF and c not in ( '\n ' , ' \r ' ) :
227226 self .queue .insert (0 , unicode (c ))
227+ elif c == '\r ' :
228+ self .queue .insert (0 , u'\n ' )
228229 c = '\n '
229230
230231 # update position in stream
@@ -236,6 +237,21 @@ def char(self):
236237 self .col += 1
237238 return unicode (c )
238239
240+ def readChar (self ):
241+ """Read the next character from the datastream and normalize for null
242+ but not for CR"""
243+ c = self .dataStream .read (1 , 1 )
244+ if not c :
245+ self .col += 1
246+ return EOF
247+
248+ # Normalize newlines and null characters
249+ if c == '\x00 ' :
250+ self .errors .append ('null character found in input stream, '
251+ 'replaced with U+FFFD' )
252+ c = u'\uFFFD '
253+ return c
254+
239255 def charsUntil (self , characters , opposite = False ):
240256 """ Returns a string of characters from the stream up to but not
241257 including any character in characters or EOF. characters can be
0 commit comments