|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +"""Additional containers and container utilities.""" |
| 3 | + |
| 4 | +__all__ = ["box", "frozendict", "ShadowedSequence", |
| 5 | + "get_collection_abcs", "in_slice", "index_in_slice"] |
| 6 | + |
| 7 | +from functools import wraps |
| 8 | +import collections |
| 9 | +from collections.abc import MutableMapping, Hashable, Sequence |
| 10 | +from inspect import isclass |
| 11 | +from operator import lt, le, ge, gt |
| 12 | + |
| 13 | +class box: |
| 14 | + """Minimalistic, mutable single-item container à la Racket. |
| 15 | +
|
| 16 | + Motivation:: |
| 17 | +
|
| 18 | + x = 17 |
| 19 | + def f(x): |
| 20 | + x = 23 # no! |
| 21 | + f(x) |
| 22 | + print(x) # still 17 |
| 23 | +
|
| 24 | + Solution - box it, to keep the actual data in an attribute:: |
| 25 | +
|
| 26 | + b = box(17) |
| 27 | + def f(b): |
| 28 | + b.x = 23 # yes! |
| 29 | + f(b) |
| 30 | + print(b.x) # 23 |
| 31 | +
|
| 32 | + (It's called ``x`` and not ``value`` to minimize the number of additional |
| 33 | + keystrokes needed.) |
| 34 | +
|
| 35 | + **Disclaimer**: maybe silly. The standard pythonic solutions are to box |
| 36 | + with a ``list`` (then trying to remember it represents a box, not a list), |
| 37 | + or use the ``nonlocal`` or ``global`` statements if lexically appropriate |
| 38 | + for the particular situation. This class just makes the programmer's intent |
| 39 | + more explicit. |
| 40 | + """ |
| 41 | + def __init__(self, x=None): |
| 42 | + self.x = x |
| 43 | + def __repr__(self): |
| 44 | + return "<box at 0x{:x}, x={}>".format(id(self), self.x) |
| 45 | + |
| 46 | +_the_empty_frozendict = None |
| 47 | +class frozendict: |
| 48 | + """Immutable dictionary. |
| 49 | +
|
| 50 | + Basic usage:: |
| 51 | +
|
| 52 | + d = frozendict(m) |
| 53 | + d = frozendict({'a': 1, 'b': 2}) |
| 54 | + d = frozendict(a=1, b=2) |
| 55 | +
|
| 56 | + where ``m`` is any type valid for ``E`` in ``dict.update``. |
| 57 | +
|
| 58 | + Functional update:: |
| 59 | +
|
| 60 | + d = frozendict(m0, m1, ...) |
| 61 | + d = frozendict({'a': 1, 'b': 2}, {'a': 42}) |
| 62 | + d = frozendict(m, a=1, b=2) |
| 63 | + d = frozendict(m0, m1, ..., a=1, b=2) |
| 64 | +
|
| 65 | + Then ``d`` behaves just like a regular dictionary, except it is not writable. |
| 66 | + As usual, this does **not** protect from mutating the values themselves, |
| 67 | + if they happen to be mutable objects (such as containers). |
| 68 | +
|
| 69 | + Any ``m`` used in the initialization of a ``frozendict`` is shallow-copied |
| 70 | + to make sure the bindings in the ``frozendict`` do not change even if the |
| 71 | + original is later mutated. |
| 72 | +
|
| 73 | + Just like for ``tuple`` and ``frozenset``, the empty ``frozendict`` is a |
| 74 | + singleton; each no-argument call ``frozendict()`` will return the same object |
| 75 | + instance. (But don't pickle it; it is freshly created in each session). |
| 76 | +
|
| 77 | + In terms of ``collections.abc``, a ``frozendict`` is a ``Container``, |
| 78 | + ``Hashable``, ``Iterable``, ``Mapping`` and ``Sized``. |
| 79 | +
|
| 80 | + Just like for ``dict``, the abstract superclasses are virtual; they are |
| 81 | + detected by the built-ins ``issubclass`` and ``isinstance``, but they are |
| 82 | + not part of the MRO. |
| 83 | + """ |
| 84 | + def __new__(cls, *ms, **mappings): # make the empty frozendict() a singleton |
| 85 | + if not ms and not mappings: |
| 86 | + global _the_empty_frozendict |
| 87 | + if _the_empty_frozendict is None: |
| 88 | + _the_empty_frozendict = super().__new__(cls) |
| 89 | + return _the_empty_frozendict |
| 90 | + return super().__new__(cls) # object() takes no args, but we need to see them |
| 91 | + |
| 92 | + def __init__(self, *ms, **mappings): |
| 93 | + """Arguments: |
| 94 | +
|
| 95 | + ms: mappings; optional |
| 96 | + If one argument is provided: the input mapping to freeze. |
| 97 | +
|
| 98 | + If more are provided, the second and later ones will |
| 99 | + functionally update the data, in the order given. |
| 100 | +
|
| 101 | + Accepts any type understood by ``dict.update``. |
| 102 | +
|
| 103 | + mappings: kwargs in the form key=value; optional |
| 104 | + Essentially like the ``**F`` argument of ``dict.update``. |
| 105 | +
|
| 106 | + Functional updates applied at the end, after the last mapping |
| 107 | + in ``ms``. Can be useful for overriding individual items. |
| 108 | + """ |
| 109 | + super().__init__() |
| 110 | + self._data = {} |
| 111 | + for m in ms: |
| 112 | + try: |
| 113 | + self._data.update(m) |
| 114 | + except TypeError: |
| 115 | + pass |
| 116 | + self._data.update(mappings) |
| 117 | + |
| 118 | + @wraps(dict.__repr__) |
| 119 | + def __repr__(self): |
| 120 | + return "frozendict({})".format(self._data.__repr__()) |
| 121 | + |
| 122 | + def __hash__(self): |
| 123 | + return hash(frozenset(self.items())) |
| 124 | + |
| 125 | + # Provide any read-access parts of the dict API. |
| 126 | + # |
| 127 | + # This is somewhat hacky, but "composition over inheritance", and if we |
| 128 | + # subclassed dict, that would give us the wrong ABC (MutableMapping) |
| 129 | + # with no way to customize it away (short of monkey-patching MutableMapping). |
| 130 | + # |
| 131 | + # https://docs.python.org/3/library/collections.abc.html |
| 132 | + # https://docs.python.org/3/reference/datamodel.html#emulating-container-types |
| 133 | + @wraps(dict.__getitem__) |
| 134 | + def __getitem__(self, k): |
| 135 | + return self._data.__getitem__(k) |
| 136 | + @wraps(dict.__iter__) |
| 137 | + def __iter__(self): |
| 138 | + return self._data.__iter__() |
| 139 | + @wraps(dict.__len__) |
| 140 | + def __len__(self): |
| 141 | + return self._data.__len__() |
| 142 | + @wraps(dict.__contains__) |
| 143 | + def __contains__(self, k): |
| 144 | + return self._data.__contains__(k) |
| 145 | + @wraps(dict.keys) |
| 146 | + def keys(self): |
| 147 | + return self._data.keys() |
| 148 | + @wraps(dict.items) |
| 149 | + def items(self): |
| 150 | + return self._data.items() |
| 151 | + @wraps(dict.values) |
| 152 | + def values(self): |
| 153 | + return self._data.values() |
| 154 | + @wraps(dict.get) |
| 155 | + def get(self, k, *d): |
| 156 | + return self._data.get(k, *d) |
| 157 | + @wraps(dict.__eq__) |
| 158 | + def __eq__(self, other): |
| 159 | + other = other._data if isinstance(other, frozendict) else other |
| 160 | + return self._data.__eq__(other) |
| 161 | + @wraps(dict.__ne__) |
| 162 | + def __ne__(self, other): |
| 163 | + other = other._data if isinstance(other, frozendict) else other |
| 164 | + return self._data.__ne__(other) |
| 165 | + |
| 166 | +# Register virtual ABCs for frozendict (like dict has). |
| 167 | +# |
| 168 | +# https://stackoverflow.com/questions/42781267/is-there-a-pythonics-way-to-distinguish-sequences-objects-like-tuple-and-list |
| 169 | +# https://docs.python.org/3/library/abc.html#abc.ABCMeta.register |
| 170 | +# Further reading: https://stackoverflow.com/questions/40764347/python-subclasscheck-subclasshook |
| 171 | +def get_collection_abcs(cls): |
| 172 | + """Return a set of the collections.abc superclasses of cls (virtuals too).""" |
| 173 | + return {v for k, v in vars(collections.abc).items() if isclass(v) and issubclass(cls, v)} |
| 174 | +for abscls in get_collection_abcs(dict) - {MutableMapping} | {Hashable}: |
| 175 | + abscls.register(frozendict) |
| 176 | +del abscls # namespace cleanup |
| 177 | + |
| 178 | +class ShadowedSequence(Sequence): |
| 179 | + """Sequence with some elements shadowed by those from another sequence. |
| 180 | +
|
| 181 | + Or in other words, a functionally updated view of a sequence. Or somewhat |
| 182 | + like ``collections.ChainMap``, but for sequences. |
| 183 | +
|
| 184 | + Essentially, ``out[k] = v[index_in_slice(k, ix)] if in_slice(k, ix) else seq[k]``, |
| 185 | + but doesn't actually allocate ``out``. |
| 186 | +
|
| 187 | + ``ix`` may be integer (if ``v`` represents one item only) |
| 188 | + or slice (if ``v`` is intended as a sequence). |
| 189 | + """ |
| 190 | + def __init__(self, seq, ix, v): |
| 191 | + if not isinstance(ix, (slice, int)): |
| 192 | + raise TypeError("ix: expected slice or int, got {} with value {}".format(type(ix), ix)) |
| 193 | + self.seq = seq |
| 194 | + self.ix = ix |
| 195 | + self.v = v |
| 196 | + |
| 197 | + def __getitem__(self, k): |
| 198 | + ix = self.ix |
| 199 | + l = len(self) |
| 200 | + if in_slice(k, ix, l): |
| 201 | + if isinstance(ix, int): |
| 202 | + return self.v # just one item |
| 203 | + # we already know k is in ix, so skip validation for speed. |
| 204 | + i = _index_in_slice(k, ix, l, _validate=False) |
| 205 | + if i >= len(self.v): |
| 206 | + # TODO: Would be nice to raise IndexError, but the genexpr in |
| 207 | + # unpythonic.fup.fupdate catches that, hiding the error. |
| 208 | + raise ValueError("Replacement sequence too short; attempted to access index {} with len {} (items: {})".format(i, len(self.v), self.v)) |
| 209 | + return self.v[i] |
| 210 | + return self.seq[k] # not in slice |
| 211 | + |
| 212 | + def __len__(self): |
| 213 | + return len(self.seq) |
| 214 | + |
| 215 | +def in_slice(i, s, l=None): |
| 216 | + """Return whether the int i is in the slice s. |
| 217 | +
|
| 218 | + For convenience, ``s`` may be int instead of slice; then return |
| 219 | + whether ``i == s``. |
| 220 | +
|
| 221 | + The optional ``l`` is the length of the sequence being indexed, used for |
| 222 | + interpreting any negative indices, and default start and stop values |
| 223 | + (if ``s.start`` or ``s.stop`` is ``None``). |
| 224 | +
|
| 225 | + If ``l is None``, negative or missing ``s.start`` or ``s.stop`` may raise |
| 226 | + ValueError. (A negative ``s.step`` by itself does not need ``l``.) |
| 227 | + """ |
| 228 | + if not isinstance(s, (slice, int)): |
| 229 | + raise TypeError("s must be slice or int, got {} with value {}".format(type(s), s)) |
| 230 | + if not isinstance(i, int): |
| 231 | + raise TypeError("i must be int, got {} with value {}".format(type(i), i)) |
| 232 | + wrap = _make_negidx_converter(l) |
| 233 | + i = wrap(i) |
| 234 | + if isinstance(s, int): |
| 235 | + s = wrap(s) |
| 236 | + return i == s |
| 237 | + start, stop, step = _canonize_slice(s, l, wrap) |
| 238 | + cmp_start, cmp_end = (ge, lt) if step > 0 else (le, gt) |
| 239 | + at_or_after_start = cmp_start(i, start) |
| 240 | + before_stop = cmp_end(i, stop) |
| 241 | + on_grid = (i - start) % step == 0 |
| 242 | + return at_or_after_start and on_grid and before_stop |
| 243 | + |
| 244 | +def index_in_slice(i, s, l=None): |
| 245 | + """Return the index of the int i in the slice s, or None if i is not in s. |
| 246 | +
|
| 247 | + (I.e. how-manyth item of the slice the index i is.) |
| 248 | +
|
| 249 | + The optional sequence length ``l`` works the same as in ``in_slice``. |
| 250 | + """ |
| 251 | + return _index_in_slice(i, s, l) |
| 252 | + |
| 253 | +# efficiency: allow skipping the validation check for call sites |
| 254 | +# that have already checked with in_slice(). |
| 255 | +def _index_in_slice(i, s, l=None, _validate=True): |
| 256 | + if (not _validate) or in_slice(i, s, l): |
| 257 | + wrap = _make_negidx_converter(l) |
| 258 | + start, _, step = _canonize_slice(s, l, wrap) |
| 259 | + return (wrap(i) - start) // step |
| 260 | + |
| 261 | +def _make_negidx_converter(l): # l: length of sequence being indexed |
| 262 | + if l is not None: |
| 263 | + if not isinstance(l, int): |
| 264 | + raise TypeError("l must be int, got {} with value {}".format(type(l), l)) |
| 265 | + if l <= 0: |
| 266 | + raise ValueError("l must be an int >= 1, got {}".format(l)) |
| 267 | + def apply_conversion(k): |
| 268 | + return k % l |
| 269 | + else: |
| 270 | + def apply_conversion(k): |
| 271 | + raise ValueError("Need l to interpret negative indices") |
| 272 | + def convert(k): |
| 273 | + if k is not None: |
| 274 | + if not isinstance(k, int): |
| 275 | + raise TypeError("k must be int, got {} with value {}".format(type(k), k)) |
| 276 | + return apply_conversion(k) if k < 0 else k |
| 277 | + return convert |
| 278 | + |
| 279 | +def _canonize_slice(s, l=None, w=None): # convert negatives, inject defaults. |
| 280 | + if not isinstance(s, slice): |
| 281 | + raise TypeError("s must be slice, got {} with value {}".format(type(s), s)) |
| 282 | + |
| 283 | + step = s.step if s.step is not None else +1 # no "s.step or +1"; someone may try step=0 |
| 284 | + if step == 0: |
| 285 | + raise ValueError("slice step cannot be zero") # message copied from range(5)[0:4:0] |
| 286 | + |
| 287 | + wrap = w or _make_negidx_converter(l) |
| 288 | + |
| 289 | + start = wrap(s.start) |
| 290 | + if start is None: |
| 291 | + if step > 0: |
| 292 | + start = 0 |
| 293 | + else: |
| 294 | + if l is None: |
| 295 | + raise ValueError("Need l to determine default start for step < 0") |
| 296 | + start = wrap(-1) |
| 297 | + |
| 298 | + stop = wrap(s.stop) |
| 299 | + if stop is None: |
| 300 | + if step > 0: |
| 301 | + if l is None: |
| 302 | + raise ValueError("Need l to determine default stop for step > 0") |
| 303 | + stop = l |
| 304 | + else: |
| 305 | + stop = -1 # yes, really -1 to have index 0 inside the slice |
| 306 | + |
| 307 | + return start, stop, step |
0 commit comments