Skip to content

Commit 806b051

Browse files
author
yincongxian
committed
Add the solution to 0006
1 parent b915e6d commit 806b051

1 file changed

Lines changed: 45 additions & 0 deletions

File tree

renzongxian/0006/0006.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Source:https://github.com/Show-Me-the-Code/show-me-the-code
2+
# Author:renzongxian
3+
# Date:2014-12-08
4+
# Python 3.4
5+
6+
"""
7+
8+
第 0006 题:你有一个目录,放了你一个月的日记,都是 txt,为了避免分词的问题,假设内容都是英文,请统计出你认为每篇日记最重要的词。
9+
10+
"""
11+
12+
import os
13+
import sys
14+
import re
15+
16+
17+
def important_word(target_file):
18+
file_object = open(target_file, 'r')
19+
file_content = file_object.read()
20+
21+
# Split the string
22+
p = re.compile(r'[\W\d]*')
23+
word_list = p.split(file_content)
24+
25+
word_dict = {}
26+
for word in word_list:
27+
if word not in word_dict:
28+
word_dict[word] = 1
29+
else:
30+
word_dict[word] += 1
31+
sort = sorted(word_dict.items(), key=lambda e: e[1], reverse=True)
32+
33+
print("The most word in '%s' is '%s', it appears %s times" % (target_file, sort[0][0], sort[0][1]))
34+
print("The second most word in '%s' is '%s', it appears %s times" % (target_file, sort[1][0], sort[1][1]))
35+
file_object.close()
36+
37+
38+
if __name__ == "__main__":
39+
if len(sys.argv) <= 1:
40+
print("Need at least 1 parameter. Try to execute 'python 0006.py $dir_path'")
41+
else:
42+
for dir_path in sys.argv[1:]:
43+
for file_name in os.listdir(dir_path):
44+
file_path = os.path.join(dir_path, file_name)
45+
important_word(file_path)

0 commit comments

Comments
 (0)