11import io
2+ import itertools
23import shlex
34import string
45import unittest
@@ -183,10 +184,12 @@ def testSyntaxSplitAmpersandAndPipe(self):
183184 src = ['echo hi %s echo bye' % delimiter ,
184185 'echo hi%secho bye' % delimiter ]
185186 ref = ['echo' , 'hi' , delimiter , 'echo' , 'bye' ]
186- for ss in src :
187+ for ss , ws in itertools . product ( src , ( False , True )) :
187188 s = shlex .shlex (ss , punctuation_chars = True )
189+ s .whitespace_split = ws
188190 result = list (s )
189- self .assertEqual (ref , result , "While splitting '%s'" % ss )
191+ self .assertEqual (ref , result ,
192+ "While splitting '%s' [ws=%s]" % (ss , ws ))
190193
191194 def testSyntaxSplitSemicolon (self ):
192195 """Test handling of syntax splitting of ;"""
@@ -197,10 +200,12 @@ def testSyntaxSplitSemicolon(self):
197200 'echo hi%s echo bye' % delimiter ,
198201 'echo hi%secho bye' % delimiter ]
199202 ref = ['echo' , 'hi' , delimiter , 'echo' , 'bye' ]
200- for ss in src :
203+ for ss , ws in itertools . product ( src , ( False , True )) :
201204 s = shlex .shlex (ss , punctuation_chars = True )
205+ s .whitespace_split = ws
202206 result = list (s )
203- self .assertEqual (ref , result , "While splitting '%s'" % ss )
207+ self .assertEqual (ref , result ,
208+ "While splitting '%s' [ws=%s]" % (ss , ws ))
204209
205210 def testSyntaxSplitRedirect (self ):
206211 """Test handling of syntax splitting of >"""
@@ -211,29 +216,37 @@ def testSyntaxSplitRedirect(self):
211216 'echo hi%s out' % delimiter ,
212217 'echo hi%sout' % delimiter ]
213218 ref = ['echo' , 'hi' , delimiter , 'out' ]
214- for ss in src :
219+ for ss , ws in itertools . product ( src , ( False , True )) :
215220 s = shlex .shlex (ss , punctuation_chars = True )
216221 result = list (s )
217- self .assertEqual (ref , result , "While splitting '%s'" % ss )
222+ self .assertEqual (ref , result ,
223+ "While splitting '%s' [ws=%s]" % (ss , ws ))
218224
219225 def testSyntaxSplitParen (self ):
220226 """Test handling of syntax splitting of ()"""
221227 # these should all parse to the same output
222228 src = ['( echo hi )' ,
223229 '(echo hi)' ]
224230 ref = ['(' , 'echo' , 'hi' , ')' ]
225- for ss in src :
231+ for ss , ws in itertools . product ( src , ( False , True )) :
226232 s = shlex .shlex (ss , punctuation_chars = True )
233+ s .whitespace_split = ws
227234 result = list (s )
228- self .assertEqual (ref , result , "While splitting '%s'" % ss )
235+ self .assertEqual (ref , result ,
236+ "While splitting '%s' [ws=%s]" % (ss , ws ))
229237
230238 def testSyntaxSplitCustom (self ):
231239 """Test handling of syntax splitting with custom chars"""
240+ ss = "~/a&&b-c --color=auto||d *.py?"
232241 ref = ['~/a' , '&' , '&' , 'b-c' , '--color=auto' , '||' , 'd' , '*.py?' ]
233- ss = "~/a && b-c --color=auto || d *.py?"
234242 s = shlex .shlex (ss , punctuation_chars = "|" )
235243 result = list (s )
236- self .assertEqual (ref , result , "While splitting '%s'" % ss )
244+ self .assertEqual (ref , result , "While splitting '%s' [ws=False]" % ss )
245+ ref = ['~/a&&b-c' , '--color=auto' , '||' , 'd' , '*.py?' ]
246+ s = shlex .shlex (ss , punctuation_chars = "|" )
247+ s .whitespace_split = True
248+ result = list (s )
249+ self .assertEqual (ref , result , "While splitting '%s' [ws=True]" % ss )
237250
238251 def testTokenTypes (self ):
239252 """Test that tokens are split with types as expected."""
@@ -293,6 +306,19 @@ def testEmptyStringHandling(self):
293306 s = shlex .shlex ("'')abc" , punctuation_chars = True )
294307 self .assertEqual (list (s ), expected )
295308
309+ def testUnicodeHandling (self ):
310+ """Test punctuation_chars and whitespace_split handle unicode."""
311+ ss = "\u2119 \u01b4 \u2602 \u210c \u00f8 \u1f24 "
312+ # Should be parsed as one complete token (whitespace_split=True).
313+ ref = ['\u2119 \u01b4 \u2602 \u210c \u00f8 \u1f24 ' ]
314+ s = shlex .shlex (ss , punctuation_chars = True )
315+ s .whitespace_split = True
316+ self .assertEqual (list (s ), ref )
317+ # Without whitespace_split, uses wordchars and splits on all.
318+ ref = ['\u2119 ' , '\u01b4 ' , '\u2602 ' , '\u210c ' , '\u00f8 ' , '\u1f24 ' ]
319+ s = shlex .shlex (ss , punctuation_chars = True )
320+ self .assertEqual (list (s ), ref )
321+
296322 def testQuote (self ):
297323 safeunquoted = string .ascii_letters + string .digits + '@%_-+=:,./'
298324 unicode_sample = '\xe9 \xe0 \xdf ' # e + acute accent, a + grave, sharp s
0 commit comments