forked from AllenDowney/ThinkPython2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathzipf.py
More file actions
65 lines (47 loc) · 1.42 KB
/
zipf.py
File metadata and controls
65 lines (47 loc) · 1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""This module contains code from
Think Python by Allen B. Downey
http://thinkpython.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import sys
import string
import matplotlib.pyplot as pyplot
from analyze_book import *
def rank_freq(hist):
"""Returns a list of tuples where each tuple is a rank
and the number of times the item with that rank appeared.
"""
# sort the list of frequencies in decreasing order
freqs = hist.values()
freqs.sort(reverse=True)
# enumerate the ranks and frequencies
rf = [(r+1, f) for r, f in enumerate(freqs)]
return rf
def print_ranks(hist):
"""Prints the rank vs. frequency data."""
for r, f in rank_freq(hist):
print r, f
def plot_ranks(hist, scale='log'):
"""Plots frequency vs. rank."""
t = rank_freq(hist)
rs, fs = zip(*t)
pyplot.clf()
pyplot.xscale(scale)
pyplot.yscale(scale)
pyplot.title('Zipf plot')
pyplot.xlabel('rank')
pyplot.ylabel('frequency')
pyplot.plot(rs, fs, 'r-')
pyplot.show()
def main(name, filename='emma.txt', flag='plot', *args):
hist = process_file(filename, skip_header=True)
# either print the results or plot them
if flag == 'print':
print_ranks(hist)
elif flag == 'plot':
plot_ranks(hist)
else:
print 'Usage: zipf.py filename [print|plot]'
if __name__ == '__main__':
main(*sys.argv)