We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7ec7737 commit df9c418Copy full SHA for df9c418
1 file changed
Drake-Z/0009/0009.py
@@ -5,13 +5,20 @@
5
6
__author__ = 'Drake-Z'
7
8
-import re
+import os, re
9
+from html.parser import HTMLParser
10
+from html.entities import name2codepoint
11
-def analysis(a):
- b = re.findall(r'href="(http://.*?.zhihu.com/.*?)"', a) #以知乎为例
12
- for i in b:
13
- print(i)
+class MyHTMLParser(HTMLParser):
+
14
+ def handle_starttag(self, tag, attrs):
15
+ if tag == 'a':
16
+ for (variables, value) in attrs:
17
+ if variables == 'href':
18
+ if re.match(r'http(.*?)', value):
19
+ print(value)
20
21
if __name__ == '__main__':
- with open('testzhihu.html', encoding='utf-8') as html:
- analysis(html.read())
22
+ with open('test.html', encoding='utf-8') as html:
23
+ parser = MyHTMLParser()
24
+ parser.feed(html.read())
0 commit comments