File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python3
2+ # -*- coding: utf-8 -*-
3+
4+ '第 0004 题:任一个英文的纯文本文件,统计其中的单词出现的个数。'
5+
6+ __author__ = 'Drake-Z'
7+
8+ import os
9+ import re
10+ import glob
11+ from collections import OrderedDict
12+
13+ def get_num (key_word , filename ):
14+ '''获得词汇出现次数'''
15+ f = open (filename , 'r' , encoding = 'utf-8' ).read ()
16+ re_zhengze = re .compile (r'[\s\,\;\.\n]{1}' + key_word + r'[\s\,\;\.\n]{1}' )
17+ numbers = re_zhengze .findall (f )
18+ return len (numbers )
19+
20+
21+
22+ def article_analysis (dirs ):
23+ article = glob .glob (r'*.txt' )
24+ dictdata = OrderedDict ()
25+ for m in article :
26+ doc = open (m , 'r' , encoding = 'utf-8' ).read ()
27+ doc = re .findall (r'[\w\-\_\.\']+' , doc ) #获得单词list
28+ doc = list (map (lambda x : x .strip ('.' ), doc )) #去除句号
29+ for n in doc :
30+ dictdata [n ] = get_num (n , m )
31+ a = OrderedDict (sorted (dictdata .items (), key = lambda x : x [1 ], reverse = True )) #dict排序
32+ print ('在 %s 中出现次数最多的单词是:' % m )
33+ for c in a :
34+ print (c + ' : %s 次' % a [c ])
35+ break
36+ return 0
37+
38+ if __name__ == '__main__' :
39+ file = '.'
40+ article_analysis (file )
You can’t perform that action at this time.
0 commit comments