Skip to content

Commit 7ec3e16

Browse files
authored
Create 0004.py
1 parent 5052e21 commit 7ec3e16

1 file changed

Lines changed: 29 additions & 0 deletions

File tree

pylyria/0004/0004.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#! /usr/bin/env python
2+
#第 0004 题:任一个英文的纯文本文件,统计其中的单词出现的个数。
3+
# -*- coding: utf-8 -*-
4+
# vim:fenc=utf-8
5+
# Copyright By PyLyria
6+
# CreateTime: 2016-03-01 23:04:58
7+
8+
import re
9+
from string import punctuation
10+
from operator import itemgetter
11+
12+
def remove_punctuation(text):
13+
text = re.sub(r'[{}]+'.format(punctuation), '', text)
14+
return text.strip().lower()
15+
16+
def split(file_name):
17+
with open(file_name,'rt') as f:
18+
lines = (line.strip() for line in f)
19+
for line in lines:
20+
yield re.split(r'[;,\s]\s*', line)
21+
22+
if __name__ == '__main__':
23+
word2count = {}
24+
for line in split('chapter1.txt'):
25+
words = (remove_punctuation(word) for word in line)
26+
for word in words:
27+
word2count[word] = word2count.get(word, 0) + 1
28+
sorted_word2count = sorted(word2count.items(),key=itemgetter(0))
29+
print(sorted_word2count)

0 commit comments

Comments
 (0)