|
| 1 | +""" |
| 2 | +Plotting horizontal, terminal based histograms |
| 3 | +""" |
| 4 | + |
| 5 | +from __future__ import print_function |
| 6 | + |
| 7 | +import math |
| 8 | +from .utils.helpers import * |
| 9 | +from .utils.commandhelp import hist |
| 10 | + |
| 11 | + |
| 12 | +def plot_horiz_hist(f, width=20, bincount=None, binwidth=None, pch="o", title="", ylab=False, show_summary=False, regular=False): |
| 13 | + """ |
| 14 | + Make a histogram |
| 15 | +
|
| 16 | + Arguments: |
| 17 | + width -- the width of the histogram in # of lines |
| 18 | + bincount -- number of bins in the histogram |
| 19 | + binwidth -- width of bins in the histogram |
| 20 | + pch -- shape of the bars in the plot |
| 21 | + colour -- colour of the bars in the terminal |
| 22 | + title -- title at the top of the plot |
| 23 | + ylab -- boolen value for whether or not to display x-axis labels |
| 24 | + show_summary -- boolean value for whether or not to display a summary |
| 25 | + regular -- boolean value for whether or not to start y-labels at 0 |
| 26 | + """ |
| 27 | + if pch is None: |
| 28 | + pch = "o" |
| 29 | + |
| 30 | + if isinstance(f, str): |
| 31 | + with open(f) as fh: |
| 32 | + f = fh.readlines() |
| 33 | + |
| 34 | + min_val, max_val = None, None |
| 35 | + n, mean, sd = 0.0, 0.0, 0.0 |
| 36 | + |
| 37 | + for number in read_numbers(f): |
| 38 | + n += 1 |
| 39 | + if min_val is None or number < min_val: |
| 40 | + min_val = number |
| 41 | + if max_val is None or number > max_val: |
| 42 | + max_val = number |
| 43 | + mean += number |
| 44 | + |
| 45 | + mean /= n |
| 46 | + |
| 47 | + for number in read_numbers(f): |
| 48 | + sd += (mean - number)**2 |
| 49 | + |
| 50 | + sd /= (n - 1) |
| 51 | + sd **= 0.5 |
| 52 | + |
| 53 | + bins = list(calc_bins(n, min_val, max_val, bincount, binwidth)) |
| 54 | + hist = dict((i, 0) for i in range(len(bins))) |
| 55 | + |
| 56 | + for number in read_numbers(f): |
| 57 | + for i, b in enumerate(bins): |
| 58 | + if number <= b: |
| 59 | + hist[i] += 1 |
| 60 | + break |
| 61 | + if number == max_val and max_val > bins[len(bins) - 1]: |
| 62 | + hist[len(hist) - 1] += 1 |
| 63 | + |
| 64 | + min_count = min(hist.values()) |
| 65 | + max_count = max(hist.values()) |
| 66 | + |
| 67 | + # `min_display_count` and `max_display_count` are the min/max |
| 68 | + # counts that will be displayed on the x-axis of our |
| 69 | + # graph. If the user sets the `regular` argument to True, |
| 70 | + # we use a `min_display_count` of 0. |
| 71 | + if regular: |
| 72 | + min_display_count = 0 |
| 73 | + else: |
| 74 | + min_display_count = min_count |
| 75 | + max_display_count = max_count + 1 |
| 76 | + |
| 77 | + if width is None: |
| 78 | + width = int(max_display_count - min_display_count) |
| 79 | + if width > 40: |
| 80 | + width = 40 |
| 81 | + |
| 82 | + # Calculate how many counts each horizontal unit (square) |
| 83 | + # represents. This will be useful for knowing how long |
| 84 | + # each of our bars should be. |
| 85 | + counts_per_horizontal_unit = float(max_display_count - min_display_count) / width |
| 86 | + |
| 87 | + # If we need to display y-labels, use `bins` to generate |
| 88 | + # them. `ylabels_width` represents the width of the |
| 89 | + # y-labels "column" so that we can add the appropriate |
| 90 | + # amount of padding in the rest of our graph. |
| 91 | + if ylab: |
| 92 | + ylabels = [str(b) for b in bins] |
| 93 | + ylabels_width = max(len(l) for l in ylabels) + 1 |
| 94 | + else: |
| 95 | + ylabels_width = 0 |
| 96 | + |
| 97 | + # Print the title, as per usual |
| 98 | + if title: |
| 99 | + print(box_text(title, width*2 + ylabels_width)) |
| 100 | + print() |
| 101 | + |
| 102 | + # Print the guts of the graph! |
| 103 | + for bin_n, count in hist.iteritems(): |
| 104 | + line = "" |
| 105 | + if ylab: |
| 106 | + line += ylabels[bin_n].ljust(ylabels_width) |
| 107 | + line += "|" |
| 108 | + |
| 109 | + # This is why we calculated `counts_per_horizontal_unit` |
| 110 | + # earlier. |
| 111 | + n_squares = int((count - min_display_count) / counts_per_horizontal_unit) + 1 |
| 112 | + line += (" " + pch) * n_squares |
| 113 | + print(line) |
| 114 | + |
| 115 | + print(" " * ylabels_width + "+" + "-" * width * 2) |
| 116 | + |
| 117 | + # Printing the x-labels is quite difficult. We only want to print |
| 118 | + # a label for a square if it different to the previous label. This |
| 119 | + # is because we don't want to print labels that look like |
| 120 | + # "1 1 1 1 2 2 2 2...". |
| 121 | + # |
| 122 | + # We therefore generate a list of "candidate" x-labels, which are |
| 123 | + # the labels we would show if we didn't care about repetition. We |
| 124 | + # use this list of candidates to generate a list of deduplicated |
| 125 | + # labels. |
| 126 | + # |
| 127 | + # First we generate the candidates |
| 128 | + candidate_xlabels = [str(int(l)) for l in list(drange( |
| 129 | + min_display_count, |
| 130 | + max_display_count, |
| 131 | + float(max_display_count - min_display_count) / width))] |
| 132 | + |
| 133 | + # Then we deduplicate `candidate_xlabels` |
| 134 | + xlabels = [] |
| 135 | + for cand in candidate_xlabels: |
| 136 | + if cand not in xlabels: |
| 137 | + xlabels.append(cand) |
| 138 | + else: |
| 139 | + xlabels.append("") |
| 140 | + |
| 141 | + # Print the labels vertically by printing a row with all |
| 142 | + # the first characters in each label, then a row with all |
| 143 | + # the second characters, etc. |
| 144 | + max_xlabel_len = max(len(l) for l in xlabels) |
| 145 | + for row_n in range(0, max_xlabel_len): |
| 146 | + row = "" |
| 147 | + for label in xlabels: |
| 148 | + if len(label) > row_n: |
| 149 | + row += label[row_n] |
| 150 | + else: |
| 151 | + row += " " |
| 152 | + row += " " |
| 153 | + print(" " * (ylabels_width + 2) + row) |
| 154 | + |
| 155 | + # Finally, print the summary statistics, as per usual |
| 156 | + if show_summary: |
| 157 | + center = max(map(len, map(str, [n, min_val, mean, max_val]))) |
| 158 | + center += 15 |
| 159 | + |
| 160 | + print() |
| 161 | + print("-" * (2 + center)) |
| 162 | + print("|" + "Summary".center(center) + "|") |
| 163 | + print("-" * (2 + center)) |
| 164 | + summary = "|" + ("observations: %d" % n).center(center) + "|\n" |
| 165 | + summary += "|" + ("min value: %f" % min_val).center(center) + "|\n" |
| 166 | + summary += "|" + ("mean : %f" % mean).center(center) + "|\n" |
| 167 | + summary += "|" + ("std dev : %f" % sd).center(center) + "|\n" |
| 168 | + summary += "|" + ("max value: %f" % max_val).center(center) + "|\n" |
| 169 | + summary += "-" * (2 + center) |
| 170 | + print(summary) |
| 171 | + |
| 172 | + |
| 173 | +def calc_bins(n, min_val, max_val, h=None, binwidth=None): |
| 174 | + """ |
| 175 | + Calculate number of bins for the histogram |
| 176 | + """ |
| 177 | + if not h: |
| 178 | + h = max(10, math.log(n + 1, 2)) |
| 179 | + if binwidth == 0: |
| 180 | + binwidth = 0.1 |
| 181 | + if binwidth is None: |
| 182 | + binwidth = (max_val - min_val) / h |
| 183 | + for b in drange(min_val, max_val, step=binwidth, include_stop=True): |
| 184 | + if b.is_integer(): |
| 185 | + yield int(b) |
| 186 | + else: |
| 187 | + yield b |
| 188 | + |
| 189 | + |
| 190 | +def read_numbers(numbers): |
| 191 | + """ |
| 192 | + Read the input data in the most optimal way |
| 193 | + """ |
| 194 | + if isiterable(numbers): |
| 195 | + for number in numbers: |
| 196 | + yield float(str(number).strip()) |
| 197 | + else: |
| 198 | + with open(numbers) as fh: |
| 199 | + for number in fh: |
| 200 | + yield float(number.strip()) |
| 201 | + |
| 202 | + |
0 commit comments