99 The robots.txt Exclusion Protocol is implemented as specified in
1010 http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
1111"""
12- import re ,string , urlparse ,urllib
12+ import re ,urlparse ,urllib
1313
1414__all__ = ["RobotFileParser" ]
1515
@@ -71,7 +71,7 @@ def parse(self, lines):
7171 entry = Entry ()
7272
7373 for line in lines :
74- line = string .strip (line )
74+ line = line .strip ()
7575 linenumber = linenumber + 1
7676 if not line :
7777 if state == 1 :
@@ -85,16 +85,16 @@ def parse(self, lines):
8585 entry = Entry ()
8686 state = 0
8787 # remove optional comment and strip line
88- i = string .find (line , '#' )
88+ i = line .find ('#' )
8989 if i >= 0 :
9090 line = line [:i ]
91- line = string .strip (line )
91+ line = line .strip ()
9292 if not line :
9393 continue
94- line = string .split (line , ':' , 1 )
94+ line = line .split (':' , 1 )
9595 if len (line ) == 2 :
96- line [0 ] = string . lower ( string . strip ( line [0 ]) )
97- line [1 ] = string . strip ( line [1 ])
96+ line [0 ] = line [0 ]. strip (). lower ( )
97+ line [1 ] = line [1 ]. strip ( )
9898 if line [0 ] == "user-agent" :
9999 if state == 2 :
100100 _debug ("line %d: warning: you should insert a blank"
@@ -136,7 +136,7 @@ def can_fetch(self, useragent, url):
136136 return 1
137137 # search for given user agent matches
138138 # the first match counts
139- useragent = string .lower (useragent )
139+ useragent = useragent .lower ()
140140 url = urllib .quote (urlparse .urlparse (url )[2 ])
141141 for entry in self .entries :
142142 if entry .applies_to (useragent ):
0 commit comments