File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python3
2+ # -*- coding: utf-8 -*-
3+
4+ '第 0008 题:一个HTML文件,找出里面的正文。'
5+
6+ __author__ = 'Drake-Z'
7+
8+ from html .parser import HTMLParser
9+ from html .entities import name2codepoint
10+
11+ class MyHTMLParser (HTMLParser ):
12+ in_zhengwen = False
13+ in_huanhang = False
14+ def handle_starttag (self , tag , attrs ):
15+ if ('class' , 'zh-summary summary clearfix' ) in attrs and tag == 'div' :
16+ self .in_zhengwen = True
17+ elif ('class' , 'zm-editable-content clearfix' ) in attrs and tag == 'div' :
18+ self .in_zhengwen = True
19+ elif tag == 'br' :
20+ print ('\n ' )
21+ else :
22+ self .in_zhengwen = False
23+
24+ def handle_data (self , data ):
25+ if self .in_zhengwen :
26+ print (data .strip ())
27+ else :
28+ pass
29+
30+ if __name__ == '__main__' :
31+ parser = MyHTMLParser ()
32+ f = open ('test.html' , 'r' , encoding = 'utf-8' ).read ()
33+ parser .feed (f )
You can’t perform that action at this time.
0 commit comments