Skip to content

Commit d9e87cd

Browse files
committed
Add 0008 file
1 parent 4d5fc2b commit d9e87cd

1 file changed

Lines changed: 33 additions & 0 deletions

File tree

Drake-Z/0008/0008.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
'第 0008 题:一个HTML文件,找出里面的正文。'
5+
6+
__author__ = 'Drake-Z'
7+
8+
from html.parser import HTMLParser
9+
from html.entities import name2codepoint
10+
11+
class MyHTMLParser(HTMLParser):
12+
in_zhengwen = False
13+
in_huanhang = False
14+
def handle_starttag(self, tag, attrs):
15+
if ('class', 'zh-summary summary clearfix') in attrs and tag=='div' :
16+
self.in_zhengwen = True
17+
elif ('class', 'zm-editable-content clearfix') in attrs and tag=='div' :
18+
self.in_zhengwen = True
19+
elif tag=='br':
20+
print('\n')
21+
else:
22+
self.in_zhengwen = False
23+
24+
def handle_data(self, data):
25+
if self.in_zhengwen:
26+
print(data.strip())
27+
else:
28+
pass
29+
30+
if __name__ == '__main__':
31+
parser = MyHTMLParser()
32+
f = open('test.html', 'r', encoding = 'utf-8').read()
33+
parser.feed(f)

0 commit comments

Comments
 (0)