55import re
66class AboutRegex (Koan ):
77 """
8- This koans are based on the Ben's book: Regular Expressions in 10 minutes.
8+ These koans are based on the Ben's book: Regular Expressions in 10 minutes.
99 I found this books very useful so I decided to write a koans in order to practice everything I had learned from it.
1010 http://www.forta.com/books/0672325667/
1111 """
@@ -33,7 +33,9 @@ def test_matching_literal_text_how_many(self):
3333 """
3434 string = "Hello, my name is Felix and this koans are based on the Ben's book: Regular Expressions in 10 minutes. Repeat My name is Felix"
3535 m = re .match ('Felix' , string ) #TIP: Maybe match it's not the best option
36- self .assertEqual (len (m ),2 , "I want to know how many times appears my name" )
36+
37+ # I want to know how many times appears my name
38+ self .assertEqual (m , __ )
3739
3840 def test_matching_literal_text_not_case_sensitivity (self ):
3941 """
@@ -43,8 +45,10 @@ def test_matching_literal_text_not_case_sensitivity(self):
4345 In Ben's book you can see more languages.
4446
4547 """
46- string = "Hello, my name is Felix or felix and this koans are based on the Ben's book: Regular Expressions in 10 minutes."
47- self .assertEqual (len (re .findall ("felix" , string ,__ )),2 , "I want my name" )
48+ string = "Hello, my name is Felix or felix and this koans is based on the Ben's book: Regular Expressions in 10 minutes."
49+
50+ self .assertEqual (re .findall ("felix" , string , 20 ), __ )
51+ self .assertEqual (re .findall ("felix" , string , 10 ), __ )
4852
4953 def test_matching_any_character (self ):
5054 """
@@ -59,25 +63,10 @@ def test_matching_any_character(self):
5963 + "na2.xls\n " \
6064 + "sa1.xls"
6165
62- #TIP: remember the issue of this lesson
63- self .assertEquals (len (re .findall (__ , string )),3 , "I want to find all files for North America(na) or South America(sa)" )
64-
65- def test_matching_special_character (self ):
66- """
67- Lesson 1 Matching special character
68-
69- Uses \ if you want to match special character
70- """
71- string = "sales.xlx\n " \
72- + "sales1.xls\n " \
73- + "orders1.xls\n " \
74- + "apac1.xls\n " \
75- + "sales2.xls\n " \
76- + "na1.xls\n " \
77- + "na2.xls\n " \
78- + "sa1.xls"
79- #TIP you can use the pattern .a. which matches in above test but in this case matches more than you want
80- self .assertEquals (len (re .findall (__ , string )),3 , "I want to find all files for North America(na) or South America(sa)" )
66+ # TIP: remember the name of this lesson
67+
68+ change_this_search_string = 'a..xlx' # <-- I want to find all uses of myArray
69+ self .assertEquals (len (re .findall (change_this_search_string , string )),3 )
8170
8271 def test_matching_set_character (self ):
8372 """
@@ -95,47 +84,10 @@ def test_matching_set_character(self):
9584 + "na2.xls\n " \
9685 + "sa1.xls\n " \
9786 + "ca1.xls"
98- #TIP you can use the pattern .a. which matches in above test but in this case matches more than you want
99- self .assertEquals (len (re .findall (__ , string )),3 , "I want to find all files for North America(na) or South America(sa), but not (ca)" )
100-
101- def test_using_character_set_ranges (self ):
102- """
103- Lesson 2 Using character set ranges
104-
105- The previous pattern could be [ns]a.\.xls and if a in the list had a file name sam.xls would be matched because the . matches all
106- characters, not just digits. This can be solved with Character sets.
107- You can use this pattern [ns]a[0123456789]\.xls but to simplify you can use a special metacharacter: - (hyphen). i.e [0-9]
108-
109- - is only a metacharacter when used between [].
110-
111- """
112- string = "sales.xlx\n " \
113- + "sales1.xls\n " \
114- + "orders3.xls\n " \
115- + "apac1.xls\n " \
116- + "sales2.xls\n " \
117- + "na1.xls\n " \
118- + "na2.xls\n " \
119- + "sa1.xls\n " \
120- + "sam.xls\n " \
121- + "ca1.xls"
122- self .assertEquals (len (re .findall (__ , string )),3 , "I want to find all files for North America(na) or South America(sa), but not (ca)" )
123-
124- def test_using_multiple_ranges (self ):
125- """
126- Lesson 2 Using character set ranges
127-
128- The following are valid ranges:
129- A-Z matches all uppercase characters from A to Z
130- a-z matches all uppercase characters from a to z
131- A-F matches all uppercase characters from A to F
132- A-z matches all uppercase characters from A to z. This pattern also includes characters such as [ and ^
133- Any two ASCII characters may be specified as the range start and end.
134-
135- """
136- string = '<BODY BGCOLOR="#336633" TEXT="#FFFFFF" MARGINWIDTH="0" MARGINHEIGHT="0" TOPMARGIN="0" LEFTMARGIN="0">'
137- self .assertEquals (len (re .findall (__ , string )),2 , "I want to find all the colors in RGB" )
138-
87+ # I want to find all files for North America(na) or South America(sa), but not (ca)
88+ # TIP you can use the pattern .a. which matches in above test but in this case matches more than you want
89+ change_this_search_string = '[nsc]a[2-9].xls'
90+ self .assertEquals (len (re .findall (change_this_search_string , string )),3 )
13991
14092 def test_anything_but_matching (self ):
14193 """
@@ -156,83 +108,9 @@ def test_anything_but_matching(self):
156108 + "na2.xls\n " \
157109 + "sa1.xls\n " \
158110 + "ca1.xls"
159- m = re .search (__ , string )
160- self .assertTrue (m and m .group (0 ) and m .group (0 )== 'sam.xls' , "I want to find the name sam" )
161111
162- def using_metacharacters_escaping ( self ):
163- """
164- Lesson 3 Using metacharacters
112+ # I want to find the name sam
113+ change_this_search_string = '[^nc]am'
114+ self . assertEquals ( re . findall ( change_this_search_string , string ), [ 'sam.xls' ])
165115
166- Metacharacters are characters that have special meaning within regular expressions.
167-
168- Metacharacters can be escaped by preceding them with a backslash, therefore \. matches .
169- """
170- string = "var myArray = new Array();\n " \
171- + "if (myArray[0]) { \n " \
172- + "}"
173- m = re .search ("myArray[0]" , string ) #TIP: This pattern matches "myArray0" because [ and ] are metacharacters
174- self .assertTrue (m and m .group (0 ) and m .group (0 )== 'myArray[0]' , "I want to find myArray[0]" )
175116
176- def using_metacharacters_matching_white_spaces (self ):
177- """
178- Lesson 3 Matching whitespace character
179-
180- Sometimes you'll have to match nonprinting whitespace characters embedded in your text. For example tab characters
181- or line breaks .
182- In this cases you can use these special metacharacters:
183- [\b ] Backspace
184- \f Form feed
185- \n Line feed
186- \r Carriage return
187- \t Tab
188- \v Vertical tab
189-
190- """
191- f = open ('koans/regex_cvs' , 'r' )
192- string = f .read ()
193- #This text contains a series of records in comma-delimited format (cvs). Before processing the records, you need
194- # to remove any blank lines in the data.
195- m = re .search ("" , string )
196- self .assertTrue (m and m .group (0 ) and m .group (0 )== '\n \n ' , "I want to find the blank lines" )
197-
198- def using_metacharacters_matching_digits (self ):
199- """
200- Lesson 3 Using metacharacters
201-
202- As you have seen in Lesson 2, [0-9] is a shorcut for [0123456789] and is used to match any digit.
203- To match anything other than a digit, the set can be negated as [^0-9].
204- With the next metacharacters you can do the same:
205- \d match any digit (same as [0-9])
206- \D match any nondigit (same as [0-9])
207- """
208- string = "var myArray = new Array();\n " \
209- + "if (myArray[0]) { \n " \
210- + " alert('Learning regex'); \n " \
211- + "} \n " \
212- + "if (myArray[1]) { \n " \
213- + " alert('With this great book');\n " \
214- + "} \n "
215-
216- self .assertEquals ( len (re .findall (__ , string )), 2 , "I want to find all uses of myArray" )
217-
218-
219- def using_metacharacters_matching_alphanumeric_characters (self ):
220- """
221- Lesson 3 Using metacharacters
222-
223- Like with the digits you have special characters for alphanumeric characters:
224- \w Any alphanumeric character in uppercase or lowercase and underscore: [a-zA-Z0-9_]
225- \W Any nonalphanumeric or underscore character: [^a-zA-Z0-9_]
226-
227- Here you have a list of IDs made of 3 characters/digits/underscores, 1 hyphen and 3 characters/digits/underscores:
228- A1A-B_A or BA_-2e3 or 1_2-34R
229- """
230- string = "A_1-DRA\n " \
231- + "A01-2ER\n " \
232- + "A01-(4d\n " \
233- + "B11=223\n " \
234- + "A1A-B_A\n " \
235- + "1_2-34R\n " \
236- + "BA_-2e3"
237-
238- self .assertEquals ( len (re .findall (__ , string )), 5 , "I want to find the ids" )
0 commit comments