Skip to content

Commit 48d7e1e

Browse files
committed
Merge new changes from upstream.
2 parents 874a6ec + 845bded commit 48d7e1e

File tree

11 files changed

+258
-69
lines changed

11 files changed

+258
-69
lines changed

doc/changes.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ v0.4.0
2020
v0.3.0
2121
------
2222

23-
.. image:: https://zenodo.org/badge/4175/njsmith/zs.png
24-
:target: http://dx.doi.org/10.5281/zenodo.11445
23+
.. image:: https://zenodo.org/badge/doi/10.5281/zenodo.11444.svg
24+
:target: http://dx.doi.org/10.5281/zenodo.11444
2525

2626
|
2727
* New stateful transforms for computing natural and cylic cubic

patsy/build.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
CategoricalSniffer,
1818
categorical_to_int)
1919
from patsy.util import (atleast_2d_column_default,
20-
have_pandas, asarray_or_pandas)
20+
have_pandas, asarray_or_pandas,
21+
safe_issubdtype)
2122
from patsy.design_info import DesignMatrix, DesignInfo
2223
from patsy.redundancy import pick_contrasts_for_term
2324
from patsy.desc import ModelDesc
@@ -76,7 +77,7 @@ def eval(self, data, NA_action):
7677
% (self.factor.name(), self._expected_columns,
7778
result.shape[1]),
7879
self.factor)
79-
if not np.issubdtype(np.asarray(result).dtype, np.number):
80+
if not safe_issubdtype(np.asarray(result).dtype, np.number):
8081
raise PatsyError("when evaluating numeric factor %s, "
8182
"I got non-numeric data of type '%s'"
8283
% (self.factor.name(), result.dtype),

patsy/categorical.py

Lines changed: 70 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,15 @@
3838
from patsy import PatsyError
3939
from patsy.state import stateful_transform
4040
from patsy.util import (SortAnythingKey,
41-
have_pandas, have_pandas_categorical,
4241
safe_scalar_isnan,
43-
iterable)
42+
iterable,
43+
have_pandas, have_pandas_categorical,
44+
have_pandas_categorical_dtype,
45+
safe_is_pandas_categorical,
46+
pandas_Categorical_from_codes,
47+
pandas_Categorical_categories,
48+
pandas_Categorical_codes,
49+
safe_issubdtype)
4450

4551
if have_pandas:
4652
import pandas
@@ -113,18 +119,21 @@ def test_C():
113119
assert c4.levels == "LEVELS"
114120

115121
def guess_categorical(data):
116-
if have_pandas_categorical and isinstance(data, pandas.Categorical):
122+
if safe_is_pandas_categorical(data):
117123
return True
118124
if isinstance(data, _CategoricalBox):
119125
return True
120126
data = np.asarray(data)
121-
if np.issubdtype(data.dtype, np.number):
127+
if safe_issubdtype(data.dtype, np.number):
122128
return False
123129
return True
124130

125131
def test_guess_categorical():
126132
if have_pandas_categorical:
127-
assert guess_categorical(pandas.Categorical.from_array([1, 2, 3]))
133+
c = pandas.Categorical.from_array([1, 2, 3])
134+
assert guess_categorical(c)
135+
if have_pandas_categorical_dtype:
136+
assert guess_categorical(pandas.Series(c))
128137
assert guess_categorical(C([1, 2, 3]))
129138
assert guess_categorical([True, False])
130139
assert guess_categorical(["a", "b"])
@@ -168,21 +177,21 @@ def sniff(self, data):
168177
if hasattr(data, "contrast"):
169178
self._contrast = data.contrast
170179
# returns a bool: are we confident that we found all the levels?
171-
if have_pandas_categorical and isinstance(data, pandas.Categorical):
172-
# pandas.Categorical has its own NA detection, so don't try to
173-
# second-guess it.
174-
self._levels = tuple(data.levels)
175-
return True
176180
if isinstance(data, _CategoricalBox):
177181
if data.levels is not None:
178182
self._levels = tuple(data.levels)
179183
return True
180184
else:
181185
# unbox and fall through
182186
data = data.data
187+
if safe_is_pandas_categorical(data):
188+
# pandas.Categorical has its own NA detection, so don't try to
189+
# second-guess it.
190+
self._levels = tuple(pandas_Categorical_categories(data))
191+
return True
183192
# fastpath to avoid doing an item-by-item iteration over boolean
184193
# arrays, as requested by #44
185-
if hasattr(data, "dtype") and np.issubdtype(data.dtype, np.bool_):
194+
if hasattr(data, "dtype") and safe_issubdtype(data.dtype, np.bool_):
186195
self._level_set = set([True, False])
187196
return True
188197

@@ -218,18 +227,26 @@ def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
218227
assert sniffer.levels_contrast() == (exp_levels, exp_contrast)
219228

220229
if have_pandas_categorical:
221-
t([], [pandas.Categorical.from_array([1, 2, None])],
222-
True, (1, 2))
223-
# check order preservation
224-
t([], [pandas.Categorical([1, 0], ["a", "b"])],
225-
True, ("a", "b"))
226-
t([], [pandas.Categorical([1, 0], ["b", "a"])],
227-
True, ("b", "a"))
228-
# check that if someone sticks a .contrast field onto a Categorical
229-
# object, we pick it up:
230-
c = pandas.Categorical.from_array(["a", "b"])
231-
c.contrast = "CONTRAST"
232-
t([], [c], True, ("a", "b"), "CONTRAST")
230+
# We make sure to test with both boxed and unboxed pandas objects,
231+
# because we used to have a bug where boxed pandas objects would be
232+
# treated as categorical, but their levels would be lost...
233+
preps = [lambda x: x,
234+
C]
235+
if have_pandas_categorical_dtype:
236+
preps += [pandas.Series,
237+
lambda x: C(pandas.Series(x))]
238+
for prep in preps:
239+
t([], [prep(pandas.Categorical.from_array([1, 2, None]))],
240+
True, (1, 2))
241+
# check order preservation
242+
t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
243+
True, ("a", "b"))
244+
t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
245+
True, ("b", "a"))
246+
# check that if someone sticks a .contrast field onto our object
247+
obj = prep(pandas.Categorical.from_array(["a", "b"]))
248+
obj.contrast = "CONTRAST"
249+
t([], [obj], True, ("a", "b"), "CONTRAST")
233250

234251
t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3))
235252
# check order preservation
@@ -286,14 +303,14 @@ def categorical_to_int(data, levels, NA_action, origin=None):
286303
assert isinstance(levels, tuple)
287304
# In this function, missing values are always mapped to -1
288305

289-
if have_pandas_categorical and isinstance(data, pandas.Categorical):
290-
data_levels_tuple = tuple(data.levels)
306+
if safe_is_pandas_categorical(data):
307+
data_levels_tuple = tuple(pandas_Categorical_categories(data))
291308
if not data_levels_tuple == levels:
292309
raise PatsyError("mismatching levels: expected %r, got %r"
293310
% (levels, data_levels_tuple), origin)
294311
# pandas.Categorical also uses -1 to indicate NA, and we don't try to
295312
# second-guess its NA detection, so we can just pass it back.
296-
return data.labels
313+
return pandas_Categorical_codes(data)
297314

298315
if isinstance(data, _CategoricalBox):
299316
if data.levels is not None and tuple(data.levels) != levels:
@@ -311,7 +328,7 @@ def categorical_to_int(data, levels, NA_action, origin=None):
311328

312329
# fastpath to avoid doing an item-by-item iteration over boolean arrays,
313330
# as requested by #44
314-
if hasattr(data, "dtype") and np.issubdtype(data.dtype, np.bool_):
331+
if hasattr(data, "dtype") and safe_issubdtype(data.dtype, np.bool_):
315332
if level_to_int[False] == 0 and level_to_int[True] == 1:
316333
return data.astype(np.int_)
317334
out = np.empty(len(data), dtype=int)
@@ -358,24 +375,32 @@ def test_categorical_to_int():
358375
categorical_to_int,
359376
pandas.DataFrame({10: s}), ("a", "b", "c"), NAAction())
360377
if have_pandas_categorical:
361-
cat = pandas.Categorical([1, 0, -1], ("a", "b"))
362-
conv = categorical_to_int(cat, ("a", "b"), NAAction())
363-
assert np.all(conv == [1, 0, -1])
364-
# Trust pandas NA marking
365-
cat2 = pandas.Categorical([1, 0, -1], ("a", "None"))
366-
conv2 = categorical_to_int(cat, ("a", "b"), NAAction(NA_types=["None"]))
367-
assert np.all(conv2 == [1, 0, -1])
368-
# But levels must match
369-
assert_raises(PatsyError,
370-
categorical_to_int,
371-
pandas.Categorical([1, 0], ("a", "b")),
372-
("a", "c"),
373-
NAAction())
374-
assert_raises(PatsyError,
375-
categorical_to_int,
376-
pandas.Categorical([1, 0], ("a", "b")),
377-
("b", "a"),
378-
NAAction())
378+
constructors = [pandas_Categorical_from_codes]
379+
if have_pandas_categorical_dtype:
380+
def Series_from_codes(codes, categories):
381+
c = pandas_Categorical_from_codes(codes, categories)
382+
return pandas.Series(c)
383+
constructors.append(Series_from_codes)
384+
for con in constructors:
385+
cat = con([1, 0, -1], ("a", "b"))
386+
conv = categorical_to_int(cat, ("a", "b"), NAAction())
387+
assert np.all(conv == [1, 0, -1])
388+
# Trust pandas NA marking
389+
cat2 = con([1, 0, -1], ("a", "None"))
390+
conv2 = categorical_to_int(cat, ("a", "b"),
391+
NAAction(NA_types=["None"]))
392+
assert np.all(conv2 == [1, 0, -1])
393+
# But levels must match
394+
assert_raises(PatsyError,
395+
categorical_to_int,
396+
con([1, 0], ("a", "b")),
397+
("a", "c"),
398+
NAAction())
399+
assert_raises(PatsyError,
400+
categorical_to_int,
401+
con([1, 0], ("a", "b")),
402+
("b", "a"),
403+
NAAction())
379404

380405
def t(data, levels, expected, NA_action=NAAction()):
381406
got = categorical_to_int(data, levels, NA_action)

patsy/contrasts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import numpy as np
1616
from patsy import PatsyError
1717
from patsy.compat import triu_indices, tril_indices, diag_indices
18-
from patsy.util import repr_pretty_delegate, repr_pretty_impl
18+
from patsy.util import repr_pretty_delegate, repr_pretty_impl, safe_issubdtype
1919

2020
class ContrastMatrix(object):
2121
"""A simple container for a matrix used for coding categorical factors.
@@ -567,7 +567,7 @@ def code_contrast_matrix(intercept, levels, contrast, default=None):
567567
if isinstance(contrast, ContrastMatrix):
568568
return contrast
569569
as_array = np.asarray(contrast)
570-
if np.issubdtype(as_array.dtype, np.number):
570+
if safe_issubdtype(as_array.dtype, np.number):
571571
return ContrastMatrix(as_array,
572572
_name_levels("custom", range(as_array.shape[1])))
573573
if intercept:

patsy/design_info.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from patsy import PatsyError
2020
from patsy.util import atleast_2d_column_default
2121
from patsy.compat import OrderedDict
22-
from patsy.util import repr_pretty_delegate, repr_pretty_impl
22+
from patsy.util import repr_pretty_delegate, repr_pretty_impl, safe_issubdtype
2323
from patsy.constraint import linear_constraint
2424

2525
class DesignInfo(object):
@@ -278,7 +278,7 @@ def from_array(cls, array_like, default_column_prefix="column"):
278278
raise ValueError("design matrix can't have >2 dimensions")
279279
columns = getattr(arr, "columns", range(arr.shape[1]))
280280
if (hasattr(columns, "dtype")
281-
and not np.issubdtype(columns.dtype, np.integer)):
281+
and not safe_issubdtype(columns.dtype, np.integer)):
282282
column_names = [str(obj) for obj in columns]
283283
else:
284284
column_names = ["%s%s" % (default_column_prefix, i)
@@ -527,7 +527,7 @@ def __new__(cls, input_array, design_info=None,
527527
return input_array
528528
self = atleast_2d_column_default(input_array).view(cls)
529529
# Upcast integer to floating point
530-
if np.issubdtype(self.dtype, np.integer):
530+
if safe_issubdtype(self.dtype, np.integer):
531531
self = np.asarray(self, dtype=float).view(cls)
532532
if self.ndim > 2:
533533
raise ValueError("DesignMatrix must be 2d")
@@ -539,7 +539,7 @@ def __new__(cls, input_array, design_info=None,
539539
"(got %s, wanted %s)"
540540
% (len(design_info.column_names), self.shape[1]))
541541
self.design_info = design_info
542-
if not np.issubdtype(self.dtype, np.floating):
542+
if not safe_issubdtype(self.dtype, np.floating):
543543
raise ValueError("design matrix must be real-valued floating point")
544544
return self
545545

patsy/eval.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -546,12 +546,6 @@ def memorize_finish(self, state, which_pass):
546546
for obj_name in state["pass_bins"][which_pass]:
547547
state["transforms"][obj_name].memorize_finish()
548548

549-
# XX FIXME: consider doing something cleverer with exceptions raised here,
550-
# to make it clearer what's really going on. The new exception chaining
551-
# stuff doesn't appear to be present in any 2.x version of Python, so we
552-
# can't use that, but some other options:
553-
# http://blog.ianbicking.org/2007/09/12/re-raising-exceptions/
554-
# http://nedbatchelder.com/blog/200711/rethrowing_exceptions_in_python.html
555549
def eval(self, memorize_state, data):
556550
return self._eval(memorize_state["eval_code"], memorize_state["eval_env"],
557551
memorize_state, data)

patsy/mgcv_cubic_splines.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,7 @@ def test_crs_compat():
815815
start_idx = stop_idx + 1
816816
assert tests_ran == R_crs_num_tests
817817

818+
test_crs_compat.slow = True
818819

819820
def test_crs_with_specific_constraint():
820821
from patsy.highlevel import incr_dbuilder, build_design_matrices, dmatrix

patsy/state.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import numpy as np
2828
from patsy.util import (atleast_2d_column_default,
2929
asarray_or_pandas, pandas_friendly_reshape,
30-
wide_dtype_for)
30+
wide_dtype_for, safe_issubdtype)
3131
from patsy.compat import wraps
3232

3333
# These are made available in the patsy.* namespace
@@ -107,7 +107,7 @@ def transform(self, x):
107107
# heterogenous types. And in that case we're going to be munging the
108108
# types anyway, so copying isn't a big deal.
109109
x_arr = np.asarray(x)
110-
if np.issubdtype(x_arr.dtype, np.integer):
110+
if safe_issubdtype(x_arr.dtype, np.integer):
111111
dt = float
112112
else:
113113
dt = x_arr.dtype

patsy/test_highlevel.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
build_design_matrices,
2020
DesignMatrixBuilder)
2121
from patsy.highlevel import *
22-
from patsy.util import have_pandas
22+
from patsy.util import (have_pandas,
23+
have_pandas_categorical,
24+
have_pandas_categorical_dtype,
25+
pandas_Categorical_from_codes)
2326
from patsy.origin import Origin
2427

2528
if have_pandas:
@@ -712,3 +715,32 @@ def test_env_not_saved_in_builder():
712715
design_matrix2 = dmatrix(design_matrix.design_info.builder, {})
713716

714717
assert np.allclose(design_matrix, design_matrix2)
718+
719+
def test_C_and_pandas_categorical():
720+
if not have_pandas_categorical:
721+
return
722+
723+
objs = [pandas_Categorical_from_codes([1, 0, 1], ["b", "a"])]
724+
if have_pandas_categorical_dtype:
725+
objs.append(pandas.Series(objs[0]))
726+
for obj in objs:
727+
d = {"obj": obj}
728+
assert np.allclose(dmatrix("obj", d),
729+
[[1, 1],
730+
[1, 0],
731+
[1, 1]])
732+
733+
assert np.allclose(dmatrix("C(obj)", d),
734+
[[1, 1],
735+
[1, 0],
736+
[1, 1]])
737+
738+
assert np.allclose(dmatrix("C(obj, levels=['b', 'a'])", d),
739+
[[1, 1],
740+
[1, 0],
741+
[1, 1]])
742+
743+
assert np.allclose(dmatrix("C(obj, levels=['a', 'b'])", d),
744+
[[1, 0],
745+
[1, 1],
746+
[1, 0]])

0 commit comments

Comments
 (0)