Skip to content

Commit 6fe11f0

Browse files
committed
[OSAB] Add horizontal histograms
1 parent cded03a commit 6fe11f0

File tree

1 file changed

+202
-0
lines changed

1 file changed

+202
-0
lines changed
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
"""
2+
Plotting horizontal, terminal based histograms
3+
"""
4+
5+
from __future__ import print_function
6+
7+
import math
8+
from .utils.helpers import *
9+
from .utils.commandhelp import hist
10+
11+
12+
def plot_horiz_hist(f, width=20, bincount=None, binwidth=None, pch="o", title="", ylab=False, show_summary=False, regular=False):
13+
"""
14+
Make a histogram
15+
16+
Arguments:
17+
width -- the width of the histogram in # of lines
18+
bincount -- number of bins in the histogram
19+
binwidth -- width of bins in the histogram
20+
pch -- shape of the bars in the plot
21+
colour -- colour of the bars in the terminal
22+
title -- title at the top of the plot
23+
ylab -- boolen value for whether or not to display x-axis labels
24+
show_summary -- boolean value for whether or not to display a summary
25+
regular -- boolean value for whether or not to start y-labels at 0
26+
"""
27+
if pch is None:
28+
pch = "o"
29+
30+
if isinstance(f, str):
31+
with open(f) as fh:
32+
f = fh.readlines()
33+
34+
min_val, max_val = None, None
35+
n, mean, sd = 0.0, 0.0, 0.0
36+
37+
for number in read_numbers(f):
38+
n += 1
39+
if min_val is None or number < min_val:
40+
min_val = number
41+
if max_val is None or number > max_val:
42+
max_val = number
43+
mean += number
44+
45+
mean /= n
46+
47+
for number in read_numbers(f):
48+
sd += (mean - number)**2
49+
50+
sd /= (n - 1)
51+
sd **= 0.5
52+
53+
bins = list(calc_bins(n, min_val, max_val, bincount, binwidth))
54+
hist = dict((i, 0) for i in range(len(bins)))
55+
56+
for number in read_numbers(f):
57+
for i, b in enumerate(bins):
58+
if number <= b:
59+
hist[i] += 1
60+
break
61+
if number == max_val and max_val > bins[len(bins) - 1]:
62+
hist[len(hist) - 1] += 1
63+
64+
min_count = min(hist.values())
65+
max_count = max(hist.values())
66+
67+
# `min_display_count` and `max_display_count` are the min/max
68+
# counts that will be displayed on the x-axis of our
69+
# graph. If the user sets the `regular` argument to True,
70+
# we use a `min_display_count` of 0.
71+
if regular:
72+
min_display_count = 0
73+
else:
74+
min_display_count = min_count
75+
max_display_count = max_count + 1
76+
77+
if width is None:
78+
width = int(max_display_count - min_display_count)
79+
if width > 40:
80+
width = 40
81+
82+
# Calculate how many counts each horizontal unit (square)
83+
# represents. This will be useful for knowing how long
84+
# each of our bars should be.
85+
counts_per_horizontal_unit = float(max_display_count - min_display_count) / width
86+
87+
# If we need to display y-labels, use `bins` to generate
88+
# them. `ylabels_width` represents the width of the
89+
# y-labels "column" so that we can add the appropriate
90+
# amount of padding in the rest of our graph.
91+
if ylab:
92+
ylabels = [str(b) for b in bins]
93+
ylabels_width = max(len(l) for l in ylabels) + 1
94+
else:
95+
ylabels_width = 0
96+
97+
# Print the title, as per usual
98+
if title:
99+
print(box_text(title, width*2 + ylabels_width))
100+
print()
101+
102+
# Print the guts of the graph!
103+
for bin_n, count in hist.iteritems():
104+
line = ""
105+
if ylab:
106+
line += ylabels[bin_n].ljust(ylabels_width)
107+
line += "|"
108+
109+
# This is why we calculated `counts_per_horizontal_unit`
110+
# earlier.
111+
n_squares = int((count - min_display_count) / counts_per_horizontal_unit) + 1
112+
line += (" " + pch) * n_squares
113+
print(line)
114+
115+
print(" " * ylabels_width + "+" + "-" * width * 2)
116+
117+
# Printing the x-labels is quite difficult. We only want to print
118+
# a label for a square if it different to the previous label. This
119+
# is because we don't want to print labels that look like
120+
# "1 1 1 1 2 2 2 2...".
121+
#
122+
# We therefore generate a list of "candidate" x-labels, which are
123+
# the labels we would show if we didn't care about repetition. We
124+
# use this list of candidates to generate a list of deduplicated
125+
# labels.
126+
#
127+
# First we generate the candidates
128+
candidate_xlabels = [str(int(l)) for l in list(drange(
129+
min_display_count,
130+
max_display_count,
131+
float(max_display_count - min_display_count) / width))]
132+
133+
# Then we deduplicate `candidate_xlabels`
134+
xlabels = []
135+
for cand in candidate_xlabels:
136+
if cand not in xlabels:
137+
xlabels.append(cand)
138+
else:
139+
xlabels.append("")
140+
141+
# Print the labels vertically by printing a row with all
142+
# the first characters in each label, then a row with all
143+
# the second characters, etc.
144+
max_xlabel_len = max(len(l) for l in xlabels)
145+
for row_n in range(0, max_xlabel_len):
146+
row = ""
147+
for label in xlabels:
148+
if len(label) > row_n:
149+
row += label[row_n]
150+
else:
151+
row += " "
152+
row += " "
153+
print(" " * (ylabels_width + 2) + row)
154+
155+
# Finally, print the summary statistics, as per usual
156+
if show_summary:
157+
center = max(map(len, map(str, [n, min_val, mean, max_val])))
158+
center += 15
159+
160+
print()
161+
print("-" * (2 + center))
162+
print("|" + "Summary".center(center) + "|")
163+
print("-" * (2 + center))
164+
summary = "|" + ("observations: %d" % n).center(center) + "|\n"
165+
summary += "|" + ("min value: %f" % min_val).center(center) + "|\n"
166+
summary += "|" + ("mean : %f" % mean).center(center) + "|\n"
167+
summary += "|" + ("std dev : %f" % sd).center(center) + "|\n"
168+
summary += "|" + ("max value: %f" % max_val).center(center) + "|\n"
169+
summary += "-" * (2 + center)
170+
print(summary)
171+
172+
173+
def calc_bins(n, min_val, max_val, h=None, binwidth=None):
174+
"""
175+
Calculate number of bins for the histogram
176+
"""
177+
if not h:
178+
h = max(10, math.log(n + 1, 2))
179+
if binwidth == 0:
180+
binwidth = 0.1
181+
if binwidth is None:
182+
binwidth = (max_val - min_val) / h
183+
for b in drange(min_val, max_val, step=binwidth, include_stop=True):
184+
if b.is_integer():
185+
yield int(b)
186+
else:
187+
yield b
188+
189+
190+
def read_numbers(numbers):
191+
"""
192+
Read the input data in the most optimal way
193+
"""
194+
if isiterable(numbers):
195+
for number in numbers:
196+
yield float(str(number).strip())
197+
else:
198+
with open(numbers) as fh:
199+
for number in fh:
200+
yield float(number.strip())
201+
202+

0 commit comments

Comments
 (0)