File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # -*- coding: utf-8 -*-
2+ #!/usr/bin/env python
3+ import os
4+ from html .parser import HTMLParser
5+
6+ def get_path (root = os .curdir ):
7+ root += os .sep
8+ for path , dirs , files in os .walk (root ):
9+ for file_name in files :
10+ yield path , file_name
11+
12+ class MyHTMLParser (HTMLParser ):
13+ def handle_starttag (self , tag , attrs ):
14+ TagStack .append (tag )
15+ if tag == 'a' :
16+ for name , value in attrs :
17+ if name == 'href' :
18+ link .append ((value , 'None' ))
19+
20+ def handle_endtag (self , tag , tag_flag = True ):
21+ while tag_flag == True :
22+ if tag == TagStack [- 1 ]:
23+ TagStack .pop ()
24+ tag_flag = False
25+ else :
26+ TagStack .pop ()
27+
28+ def handle_data (self , data ):
29+ if data .strip () and 'body' in TagStack and 'a' in TagStack :
30+ link [- 1 ] = (link [- 1 ][0 ], data .strip ())
31+
32+ if __name__ == '__main__' :
33+ paths = get_path ()
34+ html_format = ('.html' ,'.htm' )
35+ TagStack = []
36+ parser = MyHTMLParser ()
37+ link = []
38+
39+ for path , file_name in paths :
40+ if file_name .endswith (html_format ):
41+ parser .feed (open (path + os .sep + file_name , encoding = 'utf-8' ).read ())
42+
43+ print (link )
You can’t perform that action at this time.
0 commit comments