Skip to content

Commit 0460f31

Browse files
authored
Create 0009.py
1 parent e5fcb27 commit 0460f31

1 file changed

Lines changed: 43 additions & 0 deletions

File tree

pylyria/0009/0009.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# -*- coding: utf-8 -*-
2+
#!/usr/bin/env python
3+
import os
4+
from html.parser import HTMLParser
5+
6+
def get_path(root = os.curdir):
7+
root += os.sep
8+
for path, dirs, files in os.walk(root):
9+
for file_name in files:
10+
yield path, file_name
11+
12+
class MyHTMLParser(HTMLParser):
13+
def handle_starttag(self, tag, attrs):
14+
TagStack.append(tag)
15+
if tag == 'a':
16+
for name, value in attrs:
17+
if name == 'href':
18+
link.append((value, 'None'))
19+
20+
def handle_endtag(self, tag, tag_flag = True):
21+
while tag_flag == True:
22+
if tag == TagStack[-1]:
23+
TagStack.pop()
24+
tag_flag = False
25+
else:
26+
TagStack.pop()
27+
28+
def handle_data(self, data):
29+
if data.strip() and 'body' in TagStack and 'a' in TagStack:
30+
link[-1] = (link[-1][0], data.strip())
31+
32+
if __name__ == '__main__':
33+
paths = get_path()
34+
html_format = ('.html','.htm')
35+
TagStack = []
36+
parser = MyHTMLParser()
37+
link = []
38+
39+
for path, file_name in paths:
40+
if file_name.endswith(html_format):
41+
parser.feed(open(path + os.sep + file_name, encoding='utf-8').read())
42+
43+
print(link)

0 commit comments

Comments
 (0)