Skip to content

Commit 011a6cb

Browse files
author
Director of Analytics
committed
updated w/ new python histogram plotter
1 parent 06bef79 commit 011a6cb

2 files changed

Lines changed: 108 additions & 0 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.csv
2+
*.txt

bin/hist.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import math
2+
from collections import Counter
3+
import optparse
4+
5+
6+
def drange(start, stop, step=1.0):
7+
"generate between 2 numbers w/ optional step"
8+
r = start
9+
while r < stop:
10+
yield r
11+
r += step
12+
13+
def calc_bins(n, min_val, max_val, h=None):
14+
"calculate number of bins for the histogram"
15+
if not h:
16+
h = math.log(n + 1, 2)
17+
bin_width = (max_val - min_val) / h
18+
for b in drange(min_val, max_val, bin_width):
19+
yield b
20+
21+
def plot_hist(f, height=20, bincount=None):
22+
"plot a histogram given a file of numbers"
23+
#first apss
24+
min_val, max_val = None, None
25+
n = 0.
26+
for number in open(f):
27+
n += 1
28+
number = float(number)
29+
30+
if not min_val or number < min_val:
31+
min_val = number
32+
if not max_val or number > max_val:
33+
max_val = number
34+
35+
bins = list(calc_bins(n, min_val, max_val, bincount))
36+
hist = Counter()
37+
for number in open(f):
38+
number = float(number)
39+
for i, b in enumerate(bins):
40+
if number < b:
41+
hist[i-1] += 1
42+
# print "breaking"
43+
break
44+
45+
46+
47+
min_y, max_y = min(hist.values()), max(hist.values())
48+
49+
ys = list(drange(min_y, max_y, (max_y-min_y)/height))
50+
ys.reverse()
51+
52+
nlen = max(len(str(min_y)), len(str(max_y))) + 1
53+
54+
55+
for y in ys:
56+
ylab = str(y)
57+
ylab += " "*(nlen - len(ylab)) + "|"
58+
59+
print ylab,
60+
61+
for i in range(len(hist)):
62+
if y < hist[i]:
63+
print "o",
64+
else:
65+
print " ",
66+
print
67+
xs = hist.keys() * 2
68+
69+
print " "*(nlen+1) + "-"*len(xs)
70+
71+
for i in range(0, nlen):
72+
print " "*(nlen+1),
73+
for x in range(0, len(hist)):
74+
n = str(bins[x])
75+
if x%2==0:
76+
print " ",
77+
elif i < len(n):
78+
print n[i],
79+
print
80+
81+
summary = "Summary\n--------\nMax: %s\nMin:%s" % (min_val, max_val)
82+
print summary
83+
84+
85+
if __name__=="__main__":
86+
87+
parser = optparse.OptionParser()
88+
parser.add_option('-f', '--file', help='a file containing a column of numbers',
89+
default=None, dest='f')
90+
parser.add_option('-b', '--bins', help='number of bins in the histogram',
91+
default=None, dest='b')
92+
parser.add_option('-s', help='height of the histogram (in lines)',
93+
default=20, dest='h')
94+
95+
(opts, args) = parser.parse_args()
96+
97+
if opts.f is None:
98+
opts.f = args[0]
99+
100+
plot_hist(opts.f, opts.h, opts.b)
101+
102+
103+
104+
105+
106+

0 commit comments

Comments
 (0)