99
1010import numpy as np
1111
12+ import matplotlib .cbook as cbook
1213import matplotlib .units as units
1314import matplotlib .ticker as ticker
1415
2223def to_array (data , maxlen = 100 ):
2324 if NP_NEW :
2425 return np .array (data , dtype = np .unicode )
26+ if cbook .is_scalar_or_string (data ):
27+ data = [data ]
2528 try :
2629 vals = np .array (data , dtype = ('|S' , maxlen ))
2730 except UnicodeEncodeError :
28- # pure hack
31+ # this yields gibberish
2932 vals = np .array ([convert_to_string (d ) for d in data ])
3033 return vals
3134
@@ -36,49 +39,53 @@ def convert(value, unit, axis):
3639 """Uses axis.unit_data map to encode
3740 data as floats
3841 """
39- vmap = dict (axis .unit_data )
42+ vmap = dict (zip ( axis .unit_data . seq , axis . unit_data . locs ) )
4043
4144 if isinstance (value , six .string_types ):
4245 return vmap [value ]
4346
4447 vals = to_array (value )
45- for lab , loc in axis . unit_data :
48+ for lab , loc in vmap . items () :
4649 vals [vals == lab ] = loc
4750
4851 return vals .astype ('float' )
4952
5053 @staticmethod
5154 def axisinfo (unit , axis ):
52- seq , locs = zip (* axis .unit_data )
53- majloc = StrCategoryLocator (locs )
54- majfmt = StrCategoryFormatter (seq )
55+ majloc = StrCategoryLocator (axis .unit_data .locs )
56+ majfmt = StrCategoryFormatter (axis .unit_data .seq )
5557 return units .AxisInfo (majloc = majloc , majfmt = majfmt )
5658
5759 @staticmethod
5860 def default_units (data , axis ):
5961 # the conversion call stack is:
6062 # default_units->axis_info->convert
61- axis .unit_data = map_categories (data , axis .unit_data )
63+ if axis .unit_data is None :
64+ axis .unit_data = UnitData (data )
65+ else :
66+ axis .unit_data .update (data )
6267 return None
6368
6469
6570class StrCategoryLocator (ticker .FixedLocator ):
6671 def __init__ (self , locs ):
67- super (StrCategoryLocator , self ).__init__ (locs , None )
72+ self .locs = locs
73+ self .nbins = None
6874
6975
7076class StrCategoryFormatter (ticker .FixedFormatter ):
7177 def __init__ (self , seq ):
72- super (StrCategoryFormatter , self ).__init__ (seq )
78+ self .seq = seq
79+ self .offset_string = ''
7380
7481
7582def convert_to_string (value ):
7683 """Helper function for numpy 1.6, can be replaced with
7784 np.array(...,dtype=unicode) for all later versions of numpy"""
7885
7986 if isinstance (value , six .string_types ):
80- return value
81- if np .isfinite (value ):
87+ pass
88+ elif np .isfinite (value ):
8289 value = np .asarray (value , dtype = str )[np .newaxis ][0 ]
8390 elif np .isnan (value ):
8491 value = 'nan'
@@ -91,59 +98,38 @@ def convert_to_string(value):
9198 return value
9299
93100
94- def map_categories (data , old_map = None ):
95- """Create mapping between unique categorical
96- values and numerical identifier.
97-
98- Paramters
99- ---------
100- data: iterable
101- sequence of values
102- old_map: list of tuple, optional
103- if not `None`, than old_mapping will be updated with new values and
104- previous mappings will remain unchanged)
105- sort: bool, optional
106- sort keys by ASCII value
107-
108- Returns
109- -------
110- list of tuple
111- [(label, ticklocation),...]
112-
113- """
114-
115- # code typical missing data in the negative range because
116- # everything else will always have positive encoding
117- # question able if it even makes sense
101+ class UnitData (object ):
102+ # debatable makes sense to special code missing values
118103 spdict = {'nan' : - 1.0 , 'inf' : - 2.0 , '-inf' : - 3.0 }
119104
120- if isinstance (data , six .string_types ):
121- data = [data ]
122-
123- # will update this post cbook/dict support
124- strdata = to_array (data )
125- uniq = np .unique (strdata )
126-
127- if old_map :
128- olabs , okeys = zip (* old_map )
129- svalue = max (okeys ) + 1
130- else :
131- old_map , olabs , okeys = [], [], []
132- svalue = 0
133-
134- category_map = old_map [:]
135-
136- new_labs = [u for u in uniq if u not in olabs ]
137- missing = [nl for nl in new_labs if nl in spdict .keys ()]
138-
139- category_map .extend ([(m , spdict [m ]) for m in missing ])
140-
141- new_labs = [nl for nl in new_labs if nl not in missing ]
142-
143- new_locs = np .arange (svalue , svalue + len (new_labs ), dtype = 'float' )
144- category_map .extend (list (zip (new_labs , new_locs )))
145- return category_map
146-
105+ def __init__ (self , data ):
106+ """Create mapping between unique categorical values
107+ and numerical identifier
108+ Paramters
109+ ---------
110+ data: iterable
111+ sequence of values
112+ """
113+ self .seq , self .locs = [], []
114+ self ._set_seq_locs (data , 0 )
115+
116+ def update (self , new_data ):
117+ # so as not to conflict with spdict
118+ value = max (max (self .locs ) + 1 , 0 )
119+ self ._set_seq_locs (new_data , value )
120+
121+ def _set_seq_locs (self , data , value ):
122+ # magic to make it work under np1.6
123+ strdata = to_array (data )
124+ # np.unique makes dateframes work
125+ new_s = [d for d in np .unique (strdata ) if d not in self .seq ]
126+ for ns in new_s :
127+ self .seq .append (convert_to_string (ns ))
128+ if ns in UnitData .spdict .keys ():
129+ self .locs .append (UnitData .spdict [ns ])
130+ else :
131+ self .locs .append (value )
132+ value += 1
147133
148134# Connects the convertor to matplotlib
149135units .registry [str ] = StrCategoryConverter ()
0 commit comments