simple wrapper on boundarynorm + listed colormap

story645 · story645 · commit 7998c6002157 · 2016-08-10T01:38:33.000-04:00
diff --git a/.travis.yml b/.travis.yml
@@ -110,7 +110,7 @@ install:
     # Install nose from a build which has partial
     # support for python36 and suport for coverage output suppressing
     pip install git+https://github.com/jenshnielsen/nose.git@matplotlibnose
-
+    pip install pytest
     # We manually install humor sans using the package from Ubuntu 14.10. Unfortunatly humor sans is not
     # availible in the Ubuntu version used by Travis but we can manually install the deb from a later
     # version since is it basically just a .ttf file
diff --git a/appveyor.yml b/appveyor.yml
@@ -82,7 +82,7 @@ install:
   # same things as the requirements in ci/conda_recipe/meta.yaml
   # if conda-forge gets a new pyqt, it might be nice to install it as well to have more backends
   # https://github.com/conda-forge/conda-forge.github.io/issues/157#issuecomment-223536381
-  - cmd: conda create -q -n test-environment python=%PYTHON_VERSION% pip setuptools numpy  python-dateutil freetype=2.6 msinttypes "tk=8.5" pyparsing pytz tornado "libpng>=1.6.21,<1.7" "zlib=1.2" "cycler>=0.10" nose mock
+  - cmd: conda create -q -n test-environment python=%PYTHON_VERSION% pip setuptools numpy  python-dateutil freetype=2.6 msinttypes "tk=8.5" pyparsing pytz tornado "libpng>=1.6.21,<1.7" "zlib=1.2" "cycler>=0.10" nose mock pytest
   - activate test-environment
   - cmd: echo %PYTHON_VERSION% %TARGET_ARCH%
   - cmd: IF %PYTHON_VERSION% == 2.7 conda install -q functools32
diff --git a/lib/matplotlib/axis.py b/lib/matplotlib/axis.py
@@ -642,7 +642,7 @@ def __init__(self, axes, pickradius=15):
         self.offsetText = self._get_offset_text()
         self.majorTicks = []
         self.minorTicks = []
-        self.unit_data = []
+        self.unit_data = None
         self.pickradius = pickradius
 
         # Initialize here for testing; later add API
@@ -695,14 +695,14 @@ def limit_range_for_scale(self, vmin, vmax):
 
     @property
     def unit_data(self):
-        """Holds data that a ConversionInterface subclass relys on
+        """Holds data that a ConversionInterface subclass uses
         to convert between labels and indexes
         """
         return self._unit_data
 
     @unit_data.setter
-    def unit_data(self, data):
-        self._unit_data = data
+    def unit_data(self, unit_data):
+        self._unit_data = unit_data
 
     def get_children(self):
         children = [self.label, self.offsetText]
diff --git a/lib/matplotlib/category.py b/lib/matplotlib/category.py
@@ -4,11 +4,12 @@
 """
 from __future__ import (absolute_import, division, print_function,
                         unicode_literals)
-
 import six
 
 import numpy as np
 
+import matplotlib.cbook as cbook
+import matplotlib.colors as mcolors
 import matplotlib.units as units
 import matplotlib.ticker as ticker
 
@@ -22,10 +23,12 @@
 def to_array(data, maxlen=100):
     if NP_NEW:
         return np.array(data, dtype=np.unicode)
+    if cbook.is_scalar_or_string(data):
+        data = [data]
     try:
         vals = np.array(data, dtype=('|S', maxlen))
     except UnicodeEncodeError:
-        # pure hack
+        # this yields gibberish
         vals = np.array([convert_to_string(d) for d in data])
     return vals
 
@@ -36,49 +39,83 @@ def convert(value, unit, axis):
         """Uses axis.unit_data map to encode
         data as floats
         """
-        vmap = dict(axis.unit_data)
+        vmap = dict(zip(axis.unit_data.seq, axis.unit_data.locs))
 
         if isinstance(value, six.string_types):
             return vmap[value]
 
         vals = to_array(value)
-        for lab, loc in axis.unit_data:
+        for lab, loc in vmap.items():
             vals[vals == lab] = loc
 
         return vals.astype('float')
 
     @staticmethod
     def axisinfo(unit, axis):
-        seq, locs = zip(*axis.unit_data)
-        majloc = StrCategoryLocator(locs)
-        majfmt = StrCategoryFormatter(seq)
+        majloc = StrCategoryLocator(axis.unit_data.locs)
+        majfmt = StrCategoryFormatter(axis.unit_data.seq)
         return units.AxisInfo(majloc=majloc, majfmt=majfmt)
 
     @staticmethod
     def default_units(data, axis):
         # the conversion call stack is:
         # default_units->axis_info->convert
-        axis.unit_data = map_categories(data, axis.unit_data)
+        if axis.unit_data is None:
+            axis.unit_data = UnitData(data)
+        else:
+            axis.unit_data.update(data)
         return None
 
 
 class StrCategoryLocator(ticker.FixedLocator):
     def __init__(self, locs):
-        super(StrCategoryLocator, self).__init__(locs, None)
+        self.locs = locs
+        self.nbins = None
 
 
 class StrCategoryFormatter(ticker.FixedFormatter):
     def __init__(self, seq):
-        super(StrCategoryFormatter, self).__init__(seq)
+        self.seq = seq
+        self.offset_string = ''
+
+
+def colors_from_categories(codings):
+    """
+    A helper routine to generate a cmap and a norm instance where
+    a given key in coding is associated with a color value in coding
+
+    Parameters
+    ----------
+    coding : sequence of [(key, value)] pairs where key is the
+             categorical variable, and value is its associated
+             color
+
+    Returns
+    -------
+    (cmap, norm) : tuple containing a :class:`Colormap` and a \
+                   :class:`Normalize` instance
+    """
+    if isinstance(codings, dict):
+        codings = codings.items()
+        if six.PY3:
+            codings = list(codings)
+
+    codings.sort()
+
+    cats, cols = zip(*codings)
+    cmap = mcolors.ListedColormap(cols)
+    cats = list(cats) + [np.inf]
+    norm = mcolors.BoundaryNorm(cats, cmap.N)
+    return cmap, norm
 
 
 def convert_to_string(value):
     """Helper function for numpy 1.6, can be replaced with
     np.array(...,dtype=unicode) for all later versions of numpy"""
 
     if isinstance(value, six.string_types):
-        return value
-    if np.isfinite(value):
+        pass
+    elif np.isfinite(value):
         value = np.asarray(value, dtype=str)[np.newaxis][0]
     elif np.isnan(value):
         value = 'nan'
@@ -91,59 +128,38 @@ def convert_to_string(value):
     return value
 
 
-def map_categories(data, old_map=None):
-    """Create mapping between unique categorical
-    values and numerical identifier.
-
-    Paramters
-    ---------
-    data: iterable
-        sequence of values
-    old_map: list of tuple, optional
-        if not `None`, than old_mapping will be updated with new values and
-        previous mappings will remain unchanged)
-    sort: bool, optional
-        sort keys by ASCII value
-
-    Returns
-    -------
-    list of tuple
-        [(label, ticklocation),...]
-
-    """
-
-    # code typical missing data in the negative range because
-    # everything else will always have positive encoding
-    # question able if it even makes sense
+class UnitData(object):
+    # debatable makes sense to special code missing values
     spdict = {'nan': -1.0, 'inf': -2.0, '-inf': -3.0}
 
-    if isinstance(data, six.string_types):
-        data = [data]
-
-    # will update this post cbook/dict support
-    strdata = to_array(data)
-    uniq = np.unique(strdata)
-
-    if old_map:
-        olabs, okeys = zip(*old_map)
-        svalue = max(okeys) + 1
-    else:
-        old_map, olabs, okeys = [], [], []
-        svalue = 0
-
-    category_map = old_map[:]
-
-    new_labs = [u for u in uniq if u not in olabs]
-    missing = [nl for nl in new_labs if nl in spdict.keys()]
-
-    category_map.extend([(m, spdict[m]) for m in missing])
-
-    new_labs = [nl for nl in new_labs if nl not in missing]
-
-    new_locs = np.arange(svalue, svalue + len(new_labs), dtype='float')
-    category_map.extend(list(zip(new_labs, new_locs)))
-    return category_map
-
+    def __init__(self, data):
+        """Create mapping between unique categorical values
+        and numerical identifier
+        Paramters
+        ---------
+        data: iterable
+            sequence of values
+        """
+        self.seq, self.locs = [], []
+        self._set_seq_locs(data, 0)
+
+    def update(self, new_data):
+        # so as not to conflict with spdict
+        value = max(max(self.locs) + 1, 0)
+        self._set_seq_locs(new_data, value)
+
+    def _set_seq_locs(self, data, value):
+        # magic to make it work under np1.6
+        strdata = to_array(data)
+        # np.unique makes dateframes work
+        new_s = [d for d in np.unique(strdata) if d not in self.seq]
+        for ns in new_s:
+            self.seq.append(convert_to_string(ns))
+            if ns in UnitData.spdict.keys():
+                self.locs.append(UnitData.spdict[ns])
+            else:
+                self.locs.append(value)
+                value += 1
 
 # Connects the convertor to matplotlib
 units.registry[str] = StrCategoryConverter()
diff --git a/lib/matplotlib/tests/test_category.py b/lib/matplotlib/tests/test_category.py
diff --git a/tox.ini b/tox.ini