99__revision__ = "$Id$"
1010
1111from types import *
12- import sys , os , string , re
12+ import sys , os , string
1313
1414
1515class TextFile :
1616
1717 """Provides a file-like object that takes care of all the things you
1818 commonly want to do when processing a text file that has some
19- line-by-line syntax: strip comments (as long as "#" is your comment
20- character), skip blank lines, join adjacent lines by escaping the
21- newline (ie. backslash at end of line), strip leading and/or
22- trailing whitespace, and collapse internal whitespace. All of these
23- are optional and independently controllable.
19+ line-by-line syntax: strip comments (as long as "#" is your
20+ comment character), skip blank lines, join adjacent lines by
21+ escaping the newline (ie. backslash at end of line), strip
22+ leading and/or trailing whitespace. All of these are optional
23+ and independently controllable.
2424
2525 Provides a 'warn()' method so you can generate warning messages that
2626 report physical line number, even if the logical line in question
@@ -50,7 +50,7 @@ class TextFile:
5050 each line before returning it
5151 skip_blanks [default: true}
5252 skip lines that are empty *after* stripping comments and
53- whitespace. (If both lstrip_ws and rstrip_ws are true ,
53+ whitespace. (If both lstrip_ws and rstrip_ws are false ,
5454 then some lines may consist of solely whitespace: these will
5555 *not* be skipped, even if 'skip_blanks' is true.)
5656 join_lines [default: false]
@@ -59,12 +59,9 @@ class TextFile:
5959 to it to form one "logical line"; if N consecutive lines end
6060 with a backslash, then N+1 physical lines will be joined to
6161 form one logical line.
62- collapse_ws [default: false]
63- after stripping comments and whitespace and joining physical
64- lines into logical lines, all internal whitespace (strings of
65- whitespace surrounded by non-whitespace characters, and not at
66- the beginning or end of the logical line) will be collapsed
67- to a single space.
62+ collapse_join [default: false]
63+ strip leading whitespace from lines that are joined to their
64+ predecessor; only matters if (join_lines and not lstrip_ws)
6865
6966 Note that since 'rstrip_ws' can strip the trailing newline, the
7067 semantics of 'readline()' must differ from those of the builtin file
@@ -75,10 +72,10 @@ class TextFile:
7572
7673 default_options = { 'strip_comments' : 1 ,
7774 'skip_blanks' : 1 ,
78- 'join_lines' : 0 ,
7975 'lstrip_ws' : 0 ,
8076 'rstrip_ws' : 1 ,
81- 'collapse_ws' : 0 ,
77+ 'join_lines' : 0 ,
78+ 'collapse_join' : 0 ,
8279 }
8380
8481 def __init__ (self , filename = None , file = None , ** options ):
@@ -219,6 +216,8 @@ def readline (self):
219216 "end-of-file" )
220217 return buildup_line
221218
219+ if self .collapse_join :
220+ line = string .lstrip (line )
222221 line = buildup_line + line
223222
224223 # careful: pay attention to line number when incrementing it
@@ -261,10 +260,6 @@ def readline (self):
261260 buildup_line = line [0 :- 2 ] + '\n '
262261 continue
263262
264- # collapse internal whitespace (*after* joining lines!)
265- if self .collapse_ws :
266- line = re .sub (r'(\S)\s+(\S)' , r'\1 \2' , line )
267-
268263 # well, I guess there's some actual content there: return it
269264 return line
270265
@@ -295,24 +290,29 @@ def unreadline (self, line):
295290 test_data = """# test file
296291
297292line 3 \\
298- continues on next line
293+ continues on next line
299294"""
300295
301296
302297 # result 1: no fancy options
303298 result1 = map (lambda x : x + "\n " , string .split (test_data , "\n " )[0 :- 1 ])
304299
305300 # result 2: just strip comments
306- result2 = ["\n " , "\n " , "line 3 \\ \n " , "continues on next line\n " ]
301+ result2 = ["\n " , "\n " , "line 3 \\ \n " , " continues on next line\n " ]
307302
308303 # result 3: just strip blank lines
309- result3 = ["# test file\n " , "line 3 \\ \n " , "continues on next line\n " ]
304+ result3 = ["# test file\n " , "line 3 \\ \n " , " continues on next line\n " ]
310305
311306 # result 4: default, strip comments, blank lines, and trailing whitespace
312- result4 = ["line 3 \\ " , "continues on next line" ]
307+ result4 = ["line 3 \\ " , " continues on next line" ]
313308
314- # result 5: full processing, strip comments and blanks, plus join lines
315- result5 = ["line 3 continues on next line" ]
309+ # result 5: strip comments and blanks, plus join lines (but don't
310+ # "collapse" joined lines
311+ result5 = ["line 3 continues on next line" ]
312+
313+ # result 6: strip comments and blanks, plus join lines (and
314+ # "collapse" joined lines
315+ result6 = ["line 3 continues on next line" ]
316316
317317 def test_input (count , description , file , expected_result ):
318318 result = file .readlines ()
@@ -349,7 +349,11 @@ def test_input (count, description, file, expected_result):
349349
350350 in_file = TextFile (filename , strip_comments = 1 , skip_blanks = 1 ,
351351 join_lines = 1 , rstrip_ws = 1 )
352- test_input (5 , "full processing" , in_file , result5 )
352+ test_input (5 , "join lines without collapsing" , in_file , result5 )
353+
354+ in_file = TextFile (filename , strip_comments = 1 , skip_blanks = 1 ,
355+ join_lines = 1 , rstrip_ws = 1 , collapse_join = 1 )
356+ test_input (6 , "join lines with collapsing" , in_file , result6 )
353357
354358 os .remove (filename )
355359
0 commit comments