@@ -15,7 +15,7 @@ def parse(self, response):
1515 for repository in response .css ('li.public' ):
1616 item = GithubItem ()
1717 item ['name' ] = repository .xpath (
18- './/a[@itemprop="name codeRepository"]/text()' ).re_first (" \n \s*(.*)" )
18+ './/a[@itemprop="name codeRepository"]/text()' ).re_first (r' \n\s*(.*)' )
1919 item ['update_time' ] = repository .xpath (
2020 './/relative-time/@datetime' ).extract_first ()
2121 repo_url = response .urljoin (
@@ -25,7 +25,6 @@ def parse(self, response):
2525 yield request
2626
2727 # 如果 Next 按钮没被禁用,那么表示有下一页
28- # Scrapy 不支持 CSS :last-child 选择器
2928 spans = response .css ('div.pagination span.disabled::text' )
3029 if len (spans ) == 0 or spans [- 1 ].extract () != 'Next' :
3130 next_url = response .css (
@@ -35,9 +34,9 @@ def parse(self, response):
3534 def parse_repo (self , response ):
3635 item = response .meta ['item' ]
3736 for number in response .css ('ul.numbers-summary li' ):
38- type_text = number .xpath ('.//a/text()' ).re_first ('\n \s*(.*)\n ' )
37+ type_text = number .xpath ('.//a/text()' ).re_first (r '\n\s*(.*)\n' )
3938 number_text = number .xpath (
40- './/span[@class="num text-emphasized"]/text()' ).re_first ('\n \s*(.*)\n ' )
39+ './/span[@class="num text-emphasized"]/text()' ).re_first (r '\n\s*(.*)\n' )
4140 if type_text and number_text :
4241 number_text = number_text .replace (',' , '' )
4342 if type_text in ('commit' , 'commits' ):
0 commit comments