forked from astropy/astropy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtable.py
More file actions
3915 lines (3208 loc) · 145 KB
/
table.py
File metadata and controls
3915 lines (3208 loc) · 145 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from .index import SlicedIndex, TableIndices, TableLoc, TableILoc, TableLocIndices
import sys
from collections import OrderedDict, defaultdict
from collections.abc import Mapping
import warnings
from copy import deepcopy
import types
import itertools
import weakref
import numpy as np
from numpy import ma
from astropy import log
from astropy.units import Quantity, QuantityInfo
from astropy.utils import isiterable, ShapedLikeNDArray
from astropy.utils.console import color_print
from astropy.utils.metadata import MetaData, MetaAttribute
from astropy.utils.data_info import BaseColumnInfo, MixinInfo, ParentDtypeInfo, DataInfo
from astropy.utils.decorators import format_doc
from astropy.io.registry import UnifiedReadWriteMethod
from . import groups
from .pprint import TableFormatter
from .column import (BaseColumn, Column, MaskedColumn, _auto_names, FalseArray,
col_copy, _convert_sequence_data_to_array)
from .row import Row
from .np_utils import fix_column_name
from .info import TableInfo
from .index import Index, _IndexModeContext, get_index
from .connect import TableRead, TableWrite
from . import conf
_implementation_notes = """
This string has informal notes concerning Table implementation for developers.
Things to remember:
- Table has customizable attributes ColumnClass, Column, MaskedColumn.
Table.Column is normally just column.Column (same w/ MaskedColumn)
but in theory they can be different. Table.ColumnClass is the default
class used to create new non-mixin columns, and this is a function of
the Table.masked attribute. Column creation / manipulation in a Table
needs to respect these.
- Column objects that get inserted into the Table.columns attribute must
have the info.parent_table attribute set correctly. Beware just dropping
an object into the columns dict since an existing column may
be part of another Table and have parent_table set to point at that
table. Dropping that column into `columns` of this Table will cause
a problem for the old one so the column object needs to be copied (but
not necessarily the data).
Currently replace_column is always making a copy of both object and
data if parent_table is set. This could be improved but requires a
generic way to copy a mixin object but not the data.
- Be aware of column objects that have indices set.
- `cls.ColumnClass` is a property that effectively uses the `masked` attribute
to choose either `cls.Column` or `cls.MaskedColumn`.
"""
__doctest_skip__ = ['Table.read', 'Table.write', 'Table._read',
'Table.convert_bytestring_to_unicode',
'Table.convert_unicode_to_bytestring',
]
__doctest_requires__ = {'*pandas': ['pandas>=1.1']}
_pprint_docs = """
{__doc__}
Parameters
----------
max_lines : int or `None`
Maximum number of lines in table output.
max_width : int or `None`
Maximum character width of output.
show_name : bool
Include a header row for column names. Default is True.
show_unit : bool
Include a header row for unit. Default is to show a row
for units only if one or more columns has a defined value
for the unit.
show_dtype : bool
Include a header row for column dtypes. Default is True.
align : str or list or tuple or `None`
Left/right alignment of columns. Default is right (None) for all
columns. Other allowed values are '>', '<', '^', and '0=' for
right, left, centered, and 0-padded, respectively. A list of
strings can be provided for alignment of tables with multiple
columns.
"""
_pformat_docs = """
{__doc__}
Parameters
----------
max_lines : int or `None`
Maximum number of rows to output
max_width : int or `None`
Maximum character width of output
show_name : bool
Include a header row for column names. Default is True.
show_unit : bool
Include a header row for unit. Default is to show a row
for units only if one or more columns has a defined value
for the unit.
show_dtype : bool
Include a header row for column dtypes. Default is True.
html : bool
Format the output as an HTML table. Default is False.
tableid : str or `None`
An ID tag for the table; only used if html is set. Default is
"table{id}", where id is the unique integer id of the table object,
id(self)
align : str or list or tuple or `None`
Left/right alignment of columns. Default is right (None) for all
columns. Other allowed values are '>', '<', '^', and '0=' for
right, left, centered, and 0-padded, respectively. A list of
strings can be provided for alignment of tables with multiple
columns.
tableclass : str or list of str or `None`
CSS classes for the table; only used if html is set. Default is
None.
Returns
-------
lines : list
Formatted table as a list of strings.
"""
class TableReplaceWarning(UserWarning):
"""
Warning class for cases when a table column is replaced via the
Table.__setitem__ syntax e.g. t['a'] = val.
This does not inherit from AstropyWarning because we want to use
stacklevel=3 to show the user where the issue occurred in their code.
"""
pass
def descr(col):
"""Array-interface compliant full description of a column.
This returns a 3-tuple (name, type, shape) that can always be
used in a structured array dtype definition.
"""
col_dtype = 'O' if (col.info.dtype is None) else col.info.dtype
col_shape = col.shape[1:] if hasattr(col, 'shape') else ()
return (col.info.name, col_dtype, col_shape)
def has_info_class(obj, cls):
"""Check if the object's info is an instance of cls."""
# We check info on the class of the instance, since on the instance
# itself accessing 'info' has side effects in that it sets
# obj.__dict__['info'] if it does not exist already.
return isinstance(getattr(obj.__class__, 'info', None), cls)
def _get_names_from_list_of_dict(rows):
"""Return list of column names if ``rows`` is a list of dict that
defines table data.
If rows is not a list of dict then return None.
"""
if rows is None:
return None
names = set()
for row in rows:
if not isinstance(row, dict):
return None
names.update(row)
return list(names)
# Note to future maintainers: when transitioning this to dict
# be sure to change the OrderedDict ref(s) in Row and in __len__().
class TableColumns(OrderedDict):
"""OrderedDict subclass for a set of columns.
This class enhances item access to provide convenient access to columns
by name or index, including slice access. It also handles renaming
of columns.
The initialization argument ``cols`` can be a list of ``Column`` objects
or any structure that is valid for initializing a Python dict. This
includes a dict, list of (key, val) tuples or [key, val] lists, etc.
Parameters
----------
cols : dict, list, tuple; optional
Column objects as data structure that can init dict (see above)
"""
def __init__(self, cols={}):
if isinstance(cols, (list, tuple)):
# `cols` should be a list of two-tuples, but it is allowed to have
# columns (BaseColumn or mixins) in the list.
newcols = []
for col in cols:
if has_info_class(col, BaseColumnInfo):
newcols.append((col.info.name, col))
else:
newcols.append(col)
cols = newcols
super().__init__(cols)
def __getitem__(self, item):
"""Get items from a TableColumns object.
::
tc = TableColumns(cols=[Column(name='a'), Column(name='b'), Column(name='c')])
tc['a'] # Column('a')
tc[1] # Column('b')
tc['a', 'b'] # <TableColumns names=('a', 'b')>
tc[1:3] # <TableColumns names=('b', 'c')>
"""
if isinstance(item, str):
return OrderedDict.__getitem__(self, item)
elif isinstance(item, (int, np.integer)):
return list(self.values())[item]
elif (isinstance(item, np.ndarray) and item.shape == () and item.dtype.kind == 'i'):
return list(self.values())[item.item()]
elif isinstance(item, tuple):
return self.__class__([self[x] for x in item])
elif isinstance(item, slice):
return self.__class__([self[x] for x in list(self)[item]])
else:
raise IndexError('Illegal key or index value for {} object'
.format(self.__class__.__name__))
def __setitem__(self, item, value, validated=False):
"""
Set item in this dict instance, but do not allow directly replacing an
existing column unless it is already validated (and thus is certain to
not corrupt the table).
NOTE: it is easily possible to corrupt a table by directly *adding* a new
key to the TableColumns attribute of a Table, e.g.
``t.columns['jane'] = 'doe'``.
"""
if item in self and not validated:
raise ValueError("Cannot replace column '{}'. Use Table.replace_column() instead."
.format(item))
super().__setitem__(item, value)
def __repr__(self):
names = (f"'{x}'" for x in self.keys())
return f"<{self.__class__.__name__} names=({','.join(names)})>"
def _rename_column(self, name, new_name):
if name == new_name:
return
if new_name in self:
raise KeyError(f"Column {new_name} already exists")
# Rename column names in pprint include/exclude attributes as needed
parent_table = self[name].info.parent_table
if parent_table is not None:
parent_table.pprint_exclude_names._rename(name, new_name)
parent_table.pprint_include_names._rename(name, new_name)
mapper = {name: new_name}
new_names = [mapper.get(name, name) for name in self]
cols = list(self.values())
self.clear()
self.update(list(zip(new_names, cols)))
def __delitem__(self, name):
# Remove column names from pprint include/exclude attributes as needed.
# __delitem__ also gets called for pop() and popitem().
parent_table = self[name].info.parent_table
if parent_table is not None:
# _remove() method does not require that `name` is in the attribute
parent_table.pprint_exclude_names._remove(name)
parent_table.pprint_include_names._remove(name)
return super().__delitem__(name)
def isinstance(self, cls):
"""
Return a list of columns which are instances of the specified classes.
Parameters
----------
cls : class or tuple of classes
Column class (including mixin) or tuple of Column classes.
Returns
-------
col_list : list of Columns
List of Column objects which are instances of given classes.
"""
cols = [col for col in self.values() if isinstance(col, cls)]
return cols
def not_isinstance(self, cls):
"""
Return a list of columns which are not instances of the specified classes.
Parameters
----------
cls : class or tuple of classes
Column class (including mixin) or tuple of Column classes.
Returns
-------
col_list : list of Columns
List of Column objects which are not instances of given classes.
"""
cols = [col for col in self.values() if not isinstance(col, cls)]
return cols
class TableAttribute(MetaAttribute):
"""
Descriptor to define a custom attribute for a Table subclass.
The value of the ``TableAttribute`` will be stored in a dict named
``__attributes__`` that is stored in the table ``meta``. The attribute
can be accessed and set in the usual way, and it can be provided when
creating the object.
Defining an attribute by this mechanism ensures that it will persist if
the table is sliced or serialized, for example as a pickle or ECSV file.
See the `~astropy.utils.metadata.MetaAttribute` documentation for additional
details.
Parameters
----------
default : object
Default value for attribute
Examples
--------
>>> from astropy.table import Table, TableAttribute
>>> class MyTable(Table):
... identifier = TableAttribute(default=1)
>>> t = MyTable(identifier=10)
>>> t.identifier
10
>>> t.meta
OrderedDict([('__attributes__', {'identifier': 10})])
"""
class PprintIncludeExclude(TableAttribute):
"""Maintain tuple that controls table column visibility for print output.
This is a descriptor that inherits from MetaAttribute so that the attribute
value is stored in the table meta['__attributes__'].
This gets used for the ``pprint_include_names`` and ``pprint_exclude_names`` Table
attributes.
"""
def __get__(self, instance, owner_cls):
"""Get the attribute.
This normally returns an instance of this class which is stored on the
owner object.
"""
# For getting from class not an instance
if instance is None:
return self
# If not already stored on `instance`, make a copy of the class
# descriptor object and put it onto the instance.
value = instance.__dict__.get(self.name)
if value is None:
value = deepcopy(self)
instance.__dict__[self.name] = value
# We set _instance_ref on every call, since if one makes copies of
# instances, this attribute will be copied as well, which will lose the
# reference.
value._instance_ref = weakref.ref(instance)
return value
def __set__(self, instance, names):
"""Set value of ``instance`` attribute to ``names``.
Parameters
----------
instance : object
Instance that owns the attribute
names : None, str, list, tuple
Column name(s) to store, or None to clear
"""
if isinstance(names, str):
names = [names]
if names is None:
# Remove attribute value from the meta['__attributes__'] dict.
# Subsequent access will just return None.
delattr(instance, self.name)
else:
# This stores names into instance.meta['__attributes__'] as tuple
return super().__set__(instance, tuple(names))
def __call__(self):
"""Get the value of the attribute.
Returns
-------
names : None, tuple
Include/exclude names
"""
# Get the value from instance.meta['__attributes__']
instance = self._instance_ref()
return super().__get__(instance, instance.__class__)
def __repr__(self):
if hasattr(self, '_instance_ref'):
out = f'<{self.__class__.__name__} name={self.name} value={self()}>'
else:
out = super().__repr__()
return out
def _add_remove_setup(self, names):
"""Common setup for add and remove.
- Coerce attribute value to a list
- Coerce names into a list
- Get the parent table instance
"""
names = [names] if isinstance(names, str) else list(names)
# Get the value. This is the same as self() but we need `instance` here.
instance = self._instance_ref()
value = super().__get__(instance, instance.__class__)
value = [] if value is None else list(value)
return instance, names, value
def add(self, names):
"""Add ``names`` to the include/exclude attribute.
Parameters
----------
names : str, list, tuple
Column name(s) to add
"""
instance, names, value = self._add_remove_setup(names)
value.extend(name for name in names if name not in value)
super().__set__(instance, tuple(value))
def remove(self, names):
"""Remove ``names`` from the include/exclude attribute.
Parameters
----------
names : str, list, tuple
Column name(s) to remove
"""
self._remove(names, raise_exc=True)
def _remove(self, names, raise_exc=False):
"""Remove ``names`` with optional checking if they exist"""
instance, names, value = self._add_remove_setup(names)
# Return now if there are no attributes and thus no action to be taken.
if not raise_exc and '__attributes__' not in instance.meta:
return
# Remove one by one, optionally raising an exception if name is missing.
for name in names:
if name in value:
value.remove(name) # Using the list.remove method
elif raise_exc:
raise ValueError(f'{name} not in {self.name}')
# Change to either None or a tuple for storing back to attribute
value = None if value == [] else tuple(value)
self.__set__(instance, value)
def _rename(self, name, new_name):
"""Rename ``name`` to ``new_name`` if ``name`` is in the list"""
names = self() or ()
if name in names:
new_names = list(names)
new_names[new_names.index(name)] = new_name
self.set(new_names)
def set(self, names):
"""Set value of include/exclude attribute to ``names``.
Parameters
----------
names : None, str, list, tuple
Column name(s) to store, or None to clear
"""
class _Context:
def __init__(self, descriptor_self):
self.descriptor_self = descriptor_self
self.names_orig = descriptor_self()
def __enter__(self):
pass
def __exit__(self, type, value, tb):
descriptor_self = self.descriptor_self
instance = descriptor_self._instance_ref()
descriptor_self.__set__(instance, self.names_orig)
def __repr__(self):
return repr(self.descriptor_self)
ctx = _Context(descriptor_self=self)
instance = self._instance_ref()
self.__set__(instance, names)
return ctx
class Table:
"""A class to represent tables of heterogeneous data.
`~astropy.table.Table` provides a class for heterogeneous tabular data.
A key enhancement provided by the `~astropy.table.Table` class over
e.g. a `numpy` structured array is the ability to easily modify the
structure of the table by adding or removing columns, or adding new
rows of data. In addition table and column metadata are fully supported.
`~astropy.table.Table` differs from `~astropy.nddata.NDData` by the
assumption that the input data consists of columns of homogeneous data,
where each column has a unique identifier and may contain additional
metadata such as the data unit, format, and description.
See also: https://docs.astropy.org/en/stable/table/
Parameters
----------
data : numpy ndarray, dict, list, Table, or table-like object, optional
Data to initialize table.
masked : bool, optional
Specify whether the table is masked.
names : list, optional
Specify column names.
dtype : list, optional
Specify column data types.
meta : dict, optional
Metadata associated with the table.
copy : bool, optional
Copy the input data. If the input is a Table the ``meta`` is always
copied regardless of the ``copy`` parameter.
Default is True.
rows : numpy ndarray, list of lists, optional
Row-oriented data for table instead of ``data`` argument.
copy_indices : bool, optional
Copy any indices in the input data. Default is True.
units : list, dict, optional
List or dict of units to apply to columns.
descriptions : list, dict, optional
List or dict of descriptions to apply to columns.
**kwargs : dict, optional
Additional keyword args when converting table-like object.
"""
meta = MetaData(copy=False)
# Define class attributes for core container objects to allow for subclass
# customization.
Row = Row
Column = Column
MaskedColumn = MaskedColumn
TableColumns = TableColumns
TableFormatter = TableFormatter
# Unified I/O read and write methods from .connect
read = UnifiedReadWriteMethod(TableRead)
write = UnifiedReadWriteMethod(TableWrite)
pprint_exclude_names = PprintIncludeExclude()
pprint_include_names = PprintIncludeExclude()
def as_array(self, keep_byteorder=False, names=None):
"""
Return a new copy of the table in the form of a structured np.ndarray or
np.ma.MaskedArray object (as appropriate).
Parameters
----------
keep_byteorder : bool, optional
By default the returned array has all columns in native byte
order. However, if this option is `True` this preserves the
byte order of all columns (if any are non-native).
names : list, optional:
List of column names to include for returned structured array.
Default is to include all table columns.
Returns
-------
table_array : np.ndarray (unmasked) or np.ma.MaskedArray (masked)
Copy of table as a numpy structured array
"""
masked = self.masked or self.has_masked_columns or self.has_masked_values
empty_init = ma.empty if masked else np.empty
if len(self.columns) == 0:
return empty_init(0, dtype=None)
dtype = []
cols = self.columns.values()
if names is not None:
cols = [col for col in cols if col.info.name in names]
for col in cols:
col_descr = descr(col)
if not (col.info.dtype.isnative or keep_byteorder):
new_dt = np.dtype(col_descr[1]).newbyteorder('=')
col_descr = (col_descr[0], new_dt, col_descr[2])
dtype.append(col_descr)
data = empty_init(len(self), dtype=dtype)
for col in cols:
# When assigning from one array into a field of a structured array,
# Numpy will automatically swap those columns to their destination
# byte order where applicable
data[col.info.name] = col
# For masked out, masked mixin columns need to set output mask attribute.
if masked and has_info_class(col, MixinInfo) and hasattr(col, 'mask'):
data[col.info.name].mask = col.mask
return data
def __init__(self, data=None, masked=False, names=None, dtype=None,
meta=None, copy=True, rows=None, copy_indices=True,
units=None, descriptions=None,
**kwargs):
# Set up a placeholder empty table
self._set_masked(masked)
self.columns = self.TableColumns()
self.formatter = self.TableFormatter()
self._copy_indices = True # copy indices from this Table by default
self._init_indices = copy_indices # whether to copy indices in init
self.primary_key = None
# Must copy if dtype are changing
if not copy and dtype is not None:
raise ValueError('Cannot specify dtype when copy=False')
# Specifies list of names found for the case of initializing table with
# a list of dict. If data are not list of dict then this is None.
names_from_list_of_dict = None
# Row-oriented input, e.g. list of lists or list of tuples, list of
# dict, Row instance. Set data to something that the subsequent code
# will parse correctly.
if rows is not None:
if data is not None:
raise ValueError('Cannot supply both `data` and `rows` values')
if isinstance(rows, types.GeneratorType):
# Without this then the all(..) test below uses up the generator
rows = list(rows)
# Get column names if `rows` is a list of dict, otherwise this is None
names_from_list_of_dict = _get_names_from_list_of_dict(rows)
if names_from_list_of_dict:
data = rows
elif isinstance(rows, self.Row):
data = rows
else:
data = list(zip(*rows))
# Infer the type of the input data and set up the initialization
# function, number of columns, and potentially the default col names
default_names = None
# Handle custom (subclass) table attributes that are stored in meta.
# These are defined as class attributes using the TableAttribute
# descriptor. Any such attributes get removed from kwargs here and
# stored for use after the table is otherwise initialized. Any values
# provided via kwargs will have precedence over existing values from
# meta (e.g. from data as a Table or meta via kwargs).
meta_table_attrs = {}
if kwargs:
for attr in list(kwargs):
descr = getattr(self.__class__, attr, None)
if isinstance(descr, TableAttribute):
meta_table_attrs[attr] = kwargs.pop(attr)
if hasattr(data, '__astropy_table__'):
# Data object implements the __astropy_table__ interface method.
# Calling that method returns an appropriate instance of
# self.__class__ and respects the `copy` arg. The returned
# Table object should NOT then be copied.
data = data.__astropy_table__(self.__class__, copy, **kwargs)
copy = False
elif kwargs:
raise TypeError('__init__() got unexpected keyword argument {!r}'
.format(list(kwargs.keys())[0]))
if (isinstance(data, np.ndarray)
and data.shape == (0,)
and not data.dtype.names):
data = None
if isinstance(data, self.Row):
data = data._table[data._index:data._index + 1]
if isinstance(data, (list, tuple)):
# Get column names from `data` if it is a list of dict, otherwise this is None.
# This might be previously defined if `rows` was supplied as an init arg.
names_from_list_of_dict = (names_from_list_of_dict
or _get_names_from_list_of_dict(data))
if names_from_list_of_dict:
init_func = self._init_from_list_of_dicts
n_cols = len(names_from_list_of_dict)
else:
init_func = self._init_from_list
n_cols = len(data)
elif isinstance(data, np.ndarray):
if data.dtype.names:
init_func = self._init_from_ndarray # _struct
n_cols = len(data.dtype.names)
default_names = data.dtype.names
else:
init_func = self._init_from_ndarray # _homog
if data.shape == ():
raise ValueError('Can not initialize a Table with a scalar')
elif len(data.shape) == 1:
data = data[np.newaxis, :]
n_cols = data.shape[1]
elif isinstance(data, Mapping):
init_func = self._init_from_dict
default_names = list(data)
n_cols = len(default_names)
elif isinstance(data, Table):
# If user-input meta is None then use data.meta (if non-trivial)
if meta is None and data.meta:
# At this point do NOT deepcopy data.meta as this will happen after
# table init_func() is called. But for table input the table meta
# gets a key copy here if copy=False because later a direct object ref
# is used.
meta = data.meta if copy else data.meta.copy()
# Handle indices on input table. Copy primary key and don't copy indices
# if the input Table is in non-copy mode.
self.primary_key = data.primary_key
self._init_indices = self._init_indices and data._copy_indices
# Extract default names, n_cols, and then overwrite ``data`` to be the
# table columns so we can use _init_from_list.
default_names = data.colnames
n_cols = len(default_names)
data = list(data.columns.values())
init_func = self._init_from_list
elif data is None:
if names is None:
if dtype is None:
# Table was initialized as `t = Table()`. Set up for empty
# table with names=[], data=[], and n_cols=0.
# self._init_from_list() will simply return, giving the
# expected empty table.
names = []
else:
try:
# No data nor names but dtype is available. This must be
# valid to initialize a structured array.
dtype = np.dtype(dtype)
names = dtype.names
dtype = [dtype[name] for name in names]
except Exception:
raise ValueError('dtype was specified but could not be '
'parsed for column names')
# names is guaranteed to be set at this point
init_func = self._init_from_list
n_cols = len(names)
data = [[]] * n_cols
else:
raise ValueError(f'Data type {type(data)} not allowed to init Table')
# Set up defaults if names and/or dtype are not specified.
# A value of None means the actual value will be inferred
# within the appropriate initialization routine, either from
# existing specification or auto-generated.
if dtype is None:
dtype = [None] * n_cols
elif isinstance(dtype, np.dtype):
if default_names is None:
default_names = dtype.names
# Convert a numpy dtype input to a list of dtypes for later use.
dtype = [dtype[name] for name in dtype.names]
if names is None:
names = default_names or [None] * n_cols
# Numpy does not support bytes column names on Python 3, so fix them
# up now.
names = [fix_column_name(name) for name in names]
self._check_names_dtype(names, dtype, n_cols)
# Finally do the real initialization
init_func(data, names, dtype, n_cols, copy)
# Set table meta. If copy=True then deepcopy meta otherwise use the
# user-supplied meta directly.
if meta is not None:
self.meta = deepcopy(meta) if copy else meta
# Update meta with TableAttributes supplied as kwargs in Table init.
# This takes precedence over previously-defined meta.
if meta_table_attrs:
for attr, value in meta_table_attrs.items():
setattr(self, attr, value)
# Whatever happens above, the masked property should be set to a boolean
if self.masked not in (None, True, False):
raise TypeError("masked property must be None, True or False")
self._set_column_attribute('unit', units)
self._set_column_attribute('description', descriptions)
def _set_column_attribute(self, attr, values):
"""Set ``attr`` for columns to ``values``, which can be either a dict (keyed by column
name) or a dict of name: value pairs. This is used for handling the ``units`` and
``descriptions`` kwargs to ``__init__``.
"""
if not values:
return
if isinstance(values, Row):
# For a Row object transform to an equivalent dict.
values = {name: values[name] for name in values.colnames}
if not isinstance(values, dict):
# If not a dict map, assume iterable and map to dict if the right length
if len(values) != len(self.columns):
raise ValueError(f'sequence of {attr} values must match number of columns')
values = dict(zip(self.colnames, values))
for name, value in values.items():
if name not in self.columns:
raise ValueError(f'invalid column name {name} for setting {attr} attribute')
# Special case: ignore unit if it is an empty or blank string
if attr == 'unit' and isinstance(value, str):
if value.strip() == '':
value = None
if value not in (np.ma.masked, None):
setattr(self[name].info, attr, value)
def __getstate__(self):
columns = OrderedDict((key, col if isinstance(col, BaseColumn) else col_copy(col))
for key, col in self.columns.items())
return (columns, self.meta)
def __setstate__(self, state):
columns, meta = state
self.__init__(columns, meta=meta)
@property
def mask(self):
# Dynamic view of available masks
if self.masked or self.has_masked_columns or self.has_masked_values:
mask_table = Table([getattr(col, 'mask', FalseArray(col.shape))
for col in self.itercols()],
names=self.colnames, copy=False)
# Set hidden attribute to force inplace setitem so that code like
# t.mask['a'] = [1, 0, 1] will correctly set the underlying mask.
# See #5556 for discussion.
mask_table._setitem_inplace = True
else:
mask_table = None
return mask_table
@mask.setter
def mask(self, val):
self.mask[:] = val
@property
def _mask(self):
"""This is needed so that comparison of a masked Table and a
MaskedArray works. The requirement comes from numpy.ma.core
so don't remove this property."""
return self.as_array().mask
def filled(self, fill_value=None):
"""Return copy of self, with masked values filled.
If input ``fill_value`` supplied then that value is used for all
masked entries in the table. Otherwise the individual
``fill_value`` defined for each table column is used.
Parameters
----------
fill_value : str
If supplied, this ``fill_value`` is used for all masked entries
in the entire table.
Returns
-------
filled_table : Table
New table with masked values filled
"""
if self.masked or self.has_masked_columns or self.has_masked_values:
# Get new columns with masked values filled, then create Table with those
# new cols (copy=False) but deepcopy the meta.
data = [col.filled(fill_value) if hasattr(col, 'filled') else col
for col in self.itercols()]
return self.__class__(data, meta=deepcopy(self.meta), copy=False)
else:
# Return copy of the original object.
return self.copy()
@property
def indices(self):
'''
Return the indices associated with columns of the table
as a TableIndices object.
'''
lst = []
for column in self.columns.values():
for index in column.info.indices:
if sum([index is x for x in lst]) == 0: # ensure uniqueness
lst.append(index)
return TableIndices(lst)
@property
def loc(self):
'''
Return a TableLoc object that can be used for retrieving
rows by index in a given data range. Note that both loc
and iloc work only with single-column indices.
'''
return TableLoc(self)
@property
def loc_indices(self):
"""
Return a TableLocIndices object that can be used for retrieving
the row indices corresponding to given table index key value or values.
"""
return TableLocIndices(self)
@property
def iloc(self):
'''
Return a TableILoc object that can be used for retrieving
indexed rows in the order they appear in the index.
'''
return TableILoc(self)
def add_index(self, colnames, engine=None, unique=False):
'''
Insert a new index among one or more columns.
If there are no indices, make this index the
primary table index.
Parameters
----------
colnames : str or list
List of column names (or a single column name) to index
engine : type or None
Indexing engine class to use, from among SortedArray, BST,
and SCEngine. If the supplied argument is None
(by default), use SortedArray.