Skip to content

Commit df9c418

Browse files
committed
Fix 0009
1 parent 7ec7737 commit df9c418

1 file changed

Lines changed: 14 additions & 7 deletions

File tree

Drake-Z/0009/0009.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,20 @@
55

66
__author__ = 'Drake-Z'
77

8-
import re
8+
import os, re
9+
from html.parser import HTMLParser
10+
from html.entities import name2codepoint
911

10-
def analysis(a):
11-
b = re.findall(r'href="(http://.*?.zhihu.com/.*?)"', a) #以知乎为例
12-
for i in b:
13-
print(i)
12+
class MyHTMLParser(HTMLParser):
13+
14+
def handle_starttag(self, tag, attrs):
15+
if tag == 'a':
16+
for (variables, value) in attrs:
17+
if variables == 'href':
18+
if re.match(r'http(.*?)', value):
19+
print(value)
1420

1521
if __name__ == '__main__':
16-
with open('testzhihu.html', encoding='utf-8') as html:
17-
analysis(html.read())
22+
with open('test.html', encoding='utf-8') as html:
23+
parser = MyHTMLParser()
24+
parser.feed(html.read())

0 commit comments

Comments
 (0)