Skip to content

Commit 77942b9

Browse files
committed
read x/y pairs from a file with or without headers
1 parent a669e02 commit 77942b9

16 files changed

Lines changed: 2337 additions & 281 deletions

bashplotlib/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""bashplotlib/__init__.py
4+
"""
5+
6+
import os, sys
7+
8+
PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
9+

bashplotlib/cli/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""cli/__init__.py
4+
"""

bashplotlib/cli/demo.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""demo.py
4+
"""
5+
import os, sys
6+
from bashplotlib.cli.hist import __doc__ as HIST_DOCSTRING
7+
from bashplotlib.core.histogram import plot_hist
8+
from bashplotlib import PROJECT_ROOT
9+
10+
DATA_PATH = os.path.realpath(os.path.join(PROJECT_ROOT, '..', 'data'))
11+
12+
if not os.path.exists(DATA_PATH):
13+
sys.stderr.write('You need to download the example data set to run the demo...')
14+
sys.stderr.write('try running `./examples/downloaddata.sh` to get the data')
15+
sys.exit(1)
16+
17+
def _hist_demo():
18+
f = os.path.join(DATA_PATH, 'exp.txt')
19+
print f
20+
plot_hist(f)
21+
22+
def run_demo(command):
23+
if command == "hist":
24+
_hist_demo()
25+
elif command == "scatter":
26+
raise NotImplementedError('`run_demo` is only implemented for `hist` cmd so far.')

bashplotlib/cli/helpers.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""cli/helpers.py
4+
"""
5+
import sys, os
6+
from select import select
7+
8+
def read_stdin_or_timeout():
9+
"""Try reading stdin. give up in 0.5s if nothing read yet."""
10+
timeout = 0.5
11+
rlist, _, _ = select([sys.stdin], [], [], timeout)
12+
if rlist:
13+
return sys.stdin.readlines()
14+
else:
15+
return None
16+
Lines changed: 9 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
3-
"""commandhelp.py
4-
"""
5-
import sys
6-
import os
7-
from docopt import docopt
8-
from bashplotlib.utils.helpers import *
9-
from select import select
10-
11-
HIST_DOCSTRING = """hist - construct a histogram for a continuous variable from your terminal
3+
"""hist - construct a histogram for a continuous variable from your terminal
124
135
Usage:
14-
hist [[FILE | -f FILENAME] -t TITLE -b BINS -s SIZE -p MARKERSHAPE -x XLAB -c COLOUR] [-n] [-h]
6+
hist [[FILE|-f FILENAME] -t TITLE -b BINS -s SIZE -p MARKERSHAPE -x XLAB -c COLOUR] [-n] [-h]
157
168
Arguments:
179
FILE A file containing a column of numbers [default: stdin]
@@ -34,53 +26,27 @@
3426
$ cat test.csv | hist -t "you're the man now dog"
3527
3628
"""
29+
from docopt import docopt
30+
from bashplotlib.cli.helpers import read_stdin_or_timeout
3731

38-
# SCATTER_DOCSTRING = """scatter - construct a scatter plot from your terminal
39-
40-
# Usage:
41-
# scatter [[FILE | -f FILENAME] -t TITLE -b BINS -s SIZE -p MARKERSHAPE -x XLAB -c COLOUR] [-n] [-h]
42-
43-
44-
# """
45-
46-
scatter = {
47-
"usage": """scatterplot is a command for making xy plots. it accepts a series of x values and a series of y values in the
48-
following formats:
49-
1) a txt file or standard in value w/ 2 comma seperated columns of x,y values
50-
2) 2 txt files. 1 w/ designated x values and another with designated y values.
51-
52-
scatter -x <xcoords> -y <ycoords>
53-
cat <file_with_x_and_y_coords> | scatter
54-
55-
"""
56-
}
57-
58-
def _read_stdin_or_timeout():
59-
timeout = 0.5
60-
rlist, _, _ = select([sys.stdin], [], [], timeout)
61-
if rlist:
62-
return sys.stdin.readlines()
63-
else:
64-
return None
65-
66-
def parse_args(command_docstring):
32+
def parse_args():
6733
"""takes __doc__ for given cmd. Returns parsed args using docopt.
6834
"""
69-
args = docopt(command_docstring)
35+
args = docopt(__doc__)
7036
for k, v in args.iteritems():
7137
if v == 'None':
7238
args[k] = None
7339
if args['FILE'] and args['FILE'] != args['--file']:
7440
args['--file'] = args['FILE']
7541
if args['--file'] == 'stdin':
76-
args['--file'] = _read_stdin_or_timeout()
42+
args['--file'] = read_stdin_or_timeout()
7743
if args['--file'] is None:
78-
print command_docstring
44+
print __doc__
7945
sys.exit(1)
8046
plot_params = {
8147
'bincount': args['--bins'],
8248
'colour': args['--colour'],
83-
'f': args['--file'],
49+
'data': args['--file'],
8450
'height': float(args['--height'].strip()),
8551
'pch': args['--pch'],
8652
'showSummary': (not args['--nosummary']),

bashplotlib/cli/scatter.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
scatter = {
2+
"usage": """scatterplot is a command for making xy plots. it accepts a series of x values and a series of y values in the
3+
following formats:
4+
1) a txt file or standard in value w/ 2 comma seperated columns of x,y values
5+
2) 2 txt files. 1 w/ designated x values and another with designated y values.
6+
7+
scatter -x <xcoords> -y <ycoords>
8+
cat <file_with_x_and_y_coords> | scatter
9+
10+
"""
11+
}
12+
13+
14+
# SCATTER_DOCSTRING = """scatter - construct a scatter plot from your terminal
15+
16+
# Usage:
17+
# scatter [X Y]
18+
19+
20+
# """

bashplotlib/cli/scatter2.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""scatter2
4+
5+
Usage:
6+
scatter2 [[FILE|-f FILENAME] -t TITLE -b BINS -s SIZE -p MARKERSHAPE -x XLAB -c COLOUR] [-n] [-h]
7+
8+
Arguments:
9+
FILE Csv with 2 columns for x and y [default: ]
10+
-f --file FILENAME Same as FILE but shorter and less explicit [default: ]
11+
-t --title TITLE Title for the chart [default: ]
12+
-X --X-vals X X values
13+
-y --y-vals y y values
14+
-s --size SIZE Height of the histogram in lines [default: 20.0]
15+
-p --pch MARKERSHAPE Shape of each bar [default: x]
16+
-c --colour COLOUR Colour of the plot (Pink, blue, green, red, white, aqua, grey, yellow) [default: white]
17+
18+
Options:
19+
-n --nosummary Hide summary
20+
-h --help Show this screen
21+
22+
Examples:
23+
$ hist test.csv -t "you're the man now dog"
24+
$ hist -f test.csv -t "you're the man now dog"
25+
$ hist --file test.csv -t "you're the man now dog"
26+
$ cat test.csv | hist -t "you're the man now dog"
27+
28+
"""
29+
from docopt import docopt
30+
from bashplotlib.cli.helpers import read_stdin_or_timeout
31+
from bashplotlib.utils.helpers import try_cast_str_to_number
32+
33+
def _read_csv(filename, X=0, y=1, sep=',', header=False):
34+
X_y_pairs = []
35+
with open(filename, 'r') as f:
36+
data = [line.strip() for line in f.readlines()]
37+
if not data:
38+
return None
39+
else:
40+
if isinstance(X, int) and isinstance(y, int):
41+
X_idx, y_idx = X, y
42+
elif isinstance(X, basestring) and isinstance(y, basestring):
43+
if X.strip().isdigit() and y.strip().isdigit():
44+
X_idx, y_idx = map(try_cast_str_to_number, [X_idx, y_idx])
45+
else:
46+
X_idx, y_idx = None, None
47+
for i, line in enumerate(data):
48+
row = [item.strip() for item in line.strip().split(sep)]
49+
if i == 0:
50+
if header:
51+
for j, col in enumerate(row):
52+
if col.lower() == X.lower():
53+
X_idx = j
54+
if col.lower() == y.lower():
55+
y_idx = j
56+
if X_idx and y_idx:
57+
continue
58+
if row and isinstance(row, list) and len(row):
59+
try:
60+
X_value, y_value = row[X_idx], row[y_idx]
61+
X_value, y_value = map(try_cast_str_to_number, [X_value, y_value])
62+
X_y_pairs.append([X_value, y_value])
63+
except Exception, err:
64+
continue
65+
return X_y_pairs
66+
67+
68+
# # plot_scatter(opts.f, opts.x, opts.y, opts.size, opts.pch, opts.colour, opts.t)
69+
# def parse_args():
70+
# """takes __doc__ for given cmd. Returns parsed args using docopt.
71+
# """
72+
# args = docopt()
73+
# for k, v in args.iteritems():
74+
# if v == 'None':
75+
# args[k] = None
76+
# if args['FILE'] and args['FILE'] != args['--file']:
77+
# args['--file'] = args['FILE']
78+
# if args['--file'] == 'stdin':
79+
# args['--file'] = read_stdin_or_timeout()
80+
# if args['--file'] is None:
81+
# print
82+
# sys.exit(1)
83+
# plot_params = {
84+
# 'bincount': args['--bins'],
85+
# 'colour': args['--colour'],
86+
# 'data': args['--file'],
87+
# 'height': float(args['--height'].strip()),
88+
# 'pch': args['--pch'],
89+
# 'showSummary': (not args['--nosummary']),
90+
# 'title': args['--title'],
91+
# 'xlab': args['--xlab']
92+
# }
93+
# return plot_params

bashplotlib/core/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""core/__init__.py
4+
"""

bashplotlib/core/histogram.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#!/usr/bin/python
2+
import math
3+
import sys, os
4+
from bashplotlib.utils import helpers
5+
import collections
6+
7+
def calc_bins(n, min_val, max_val, h=None):
8+
"calculate number of bins for the histogram"
9+
if not h:
10+
h = max(10, math.log(n + 1, 2))
11+
bin_width = (max_val - min_val) / h
12+
for b in helpers.drange(min_val, max_val, bin_width):
13+
yield b
14+
15+
def read_numbers(numbers):
16+
"read input optimally; skip NA values. Takes a list() or a file."
17+
if not numbers:
18+
numbers = []
19+
if isinstance(numbers, basestring):
20+
try:
21+
# read numbers from file
22+
# ignore empty rows
23+
numbers = [line for line in open(numbers, 'r') if line.strip()]
24+
except Exception, err:
25+
pass
26+
if isinstance(numbers, collections.Iterable):
27+
for number in numbers:
28+
number = helpers.try_cast_str_to_number(number)
29+
if number:
30+
yield number
31+
32+
33+
def plot_hist(data, height=20.0, bincount=None, pch="o", colour="white", title="", xlab=None, showSummary=False):
34+
"""make a histogram for continuous variable.
35+
36+
Arguments:
37+
data: List of numbers or file with numbers
38+
height: The height of the histogram in # of lines
39+
bincount: Number of bins in the histogram
40+
pch: Shape of the bars in the plot
41+
colour: Colour of the bars in the terminal
42+
title: Title at the top of the plot
43+
xlab: Boolen value for whether or not to display x-axis labels
44+
showSummary: Boolean value for whether or not to display a summary
45+
"""
46+
if pch is None:
47+
pch = "o"
48+
colour = helpers.get_colour(colour)
49+
min_val, max_val = None, None
50+
n, mean = 0., 0.
51+
for number in read_numbers(data):
52+
n += 1
53+
54+
if not min_val or number < min_val:
55+
min_val = number
56+
if not max_val or number > max_val:
57+
max_val = number
58+
mean += number
59+
mean /= n
60+
61+
bins = list(calc_bins(n, min_val, max_val, bincount))
62+
hist = {}
63+
for i in range(len(bins)):
64+
hist[i] = 0
65+
for number in read_numbers(data):
66+
for i, b in enumerate(bins):
67+
if number < b:
68+
hist[i] += 1
69+
break
70+
71+
min_y, max_y = min(hist.values()), max(hist.values())
72+
73+
ys = list(helpers.drange(min_y, max_y, (max_y-min_y)/height))
74+
ys.reverse()
75+
76+
nlen = max(len(str(min_y)), len(str(max_y))) + 1
77+
78+
if title:
79+
print helpers.box_text(title, len(hist)*2, nlen)
80+
print
81+
used_labs = set()
82+
for y in ys:
83+
ylab = str(int(y))
84+
if ylab in used_labs:
85+
ylab = ""
86+
else:
87+
used_labs.add(ylab)
88+
ylab = " "*(nlen - len(ylab)) + ylab + "|"
89+
90+
print ylab,
91+
92+
for i in range(len(hist)):
93+
if y < hist[i]:
94+
helpers.printcolor(pch, True, colour)
95+
else:
96+
helpers.printcolor(" ", True, colour)
97+
print
98+
xs = hist.keys() * 2
99+
100+
print " "*(nlen+1) + "-"*len(xs)
101+
102+
103+
if xlab:
104+
for i in range(0, nlen):
105+
helpers.printcolor(" "*(nlen+1), True, colour)
106+
for x in range(0, len(hist)):
107+
num = str(bins[x])
108+
if x%2==0:
109+
print " ",
110+
elif i < len(num):
111+
print num[i],
112+
print
113+
center = max(map(len, map(str, [n, min_val, mean, max_val])))
114+
center += 15
115+
116+
if showSummary:
117+
print
118+
print "-"*(2 + center)
119+
print "|" + "Summary".center(center) + "|"
120+
print "-"*(2 + center)
121+
summary = "|" + ("observations: %d" % n).center(center) + "|\n"
122+
summary += "|" + ("min value: %f" % min_val).center(center) + "|\n"
123+
summary += "|" + ("mean : %f" % mean).center(center) + "|\n"
124+
summary += "|" + ("max value: %f" % max_val).center(center) + "|\n"
125+
summary += "-"*(2 + center)
126+
print summary

0 commit comments

Comments
 (0)