astropy/astropy/table/table.py at master · astromancer/astropy

History

3915 lines (3208 loc) · 145 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

# Licensed under a 3-clause BSD style license - see LICENSE.rst

from .index import SlicedIndex, TableIndices, TableLoc, TableILoc, TableLocIndices

import sys

from collections import OrderedDict, defaultdict

from collections.abc import Mapping

import warnings

from copy import deepcopy

import types

import itertools

import weakref

import numpy as np

from numpy import ma

from astropy import log

from astropy.units import Quantity, QuantityInfo

from astropy.utils import isiterable, ShapedLikeNDArray

from astropy.utils.console import color_print

from astropy.utils.metadata import MetaData, MetaAttribute

from astropy.utils.data_info import BaseColumnInfo, MixinInfo, ParentDtypeInfo, DataInfo

from astropy.utils.decorators import format_doc

from astropy.io.registry import UnifiedReadWriteMethod

from . import groups

from .pprint import TableFormatter

from .column import (BaseColumn, Column, MaskedColumn, _auto_names, FalseArray,

col_copy, _convert_sequence_data_to_array)

from .row import Row

from .np_utils import fix_column_name

from .info import TableInfo

from .index import Index, _IndexModeContext, get_index

from .connect import TableRead, TableWrite

from . import conf

_implementation_notes = """

This string has informal notes concerning Table implementation for developers.

Things to remember:

- Table has customizable attributes ColumnClass, Column, MaskedColumn.

Table.Column is normally just column.Column (same w/ MaskedColumn)

but in theory they can be different. Table.ColumnClass is the default

class used to create new non-mixin columns, and this is a function of

the Table.masked attribute. Column creation / manipulation in a Table

needs to respect these.

- Column objects that get inserted into the Table.columns attribute must

have the info.parent_table attribute set correctly. Beware just dropping

an object into the columns dict since an existing column may

be part of another Table and have parent_table set to point at that

table. Dropping that column into `columns` of this Table will cause

a problem for the old one so the column object needs to be copied (but

not necessarily the data).

Currently replace_column is always making a copy of both object and

data if parent_table is set. This could be improved but requires a

generic way to copy a mixin object but not the data.

- Be aware of column objects that have indices set.

- `cls.ColumnClass` is a property that effectively uses the `masked` attribute

to choose either `cls.Column` or `cls.MaskedColumn`.

"""

__doctest_skip__ = ['Table.read', 'Table.write', 'Table._read',

'Table.convert_bytestring_to_unicode',

'Table.convert_unicode_to_bytestring',

]

__doctest_requires__ = {'*pandas': ['pandas>=1.1']}

_pprint_docs = """

{__doc__}

Parameters

----------

max_lines : int or `None`

Maximum number of lines in table output.

max_width : int or `None`

Maximum character width of output.

show_name : bool

Include a header row for column names. Default is True.

show_unit : bool

Include a header row for unit. Default is to show a row

for units only if one or more columns has a defined value

for the unit.

show_dtype : bool

Include a header row for column dtypes. Default is True.

align : str or list or tuple or `None`

Left/right alignment of columns. Default is right (None) for all

columns. Other allowed values are '>', '<', '^', and '0=' for

right, left, centered, and 0-padded, respectively. A list of

strings can be provided for alignment of tables with multiple

columns.

"""

_pformat_docs = """

{__doc__}

Parameters

----------

max_lines : int or `None`

Maximum number of rows to output

max_width : int or `None`

Maximum character width of output

show_name : bool

Include a header row for column names. Default is True.

show_unit : bool

Include a header row for unit. Default is to show a row

for units only if one or more columns has a defined value

for the unit.

show_dtype : bool

Include a header row for column dtypes. Default is True.

html : bool

Format the output as an HTML table. Default is False.

tableid : str or `None`

An ID tag for the table; only used if html is set. Default is

"table{id}", where id is the unique integer id of the table object,

id(self)

align : str or list or tuple or `None`

Left/right alignment of columns. Default is right (None) for all

columns. Other allowed values are '>', '<', '^', and '0=' for

right, left, centered, and 0-padded, respectively. A list of

strings can be provided for alignment of tables with multiple

columns.

tableclass : str or list of str or `None`

CSS classes for the table; only used if html is set. Default is

None.

Returns

-------

lines : list

Formatted table as a list of strings.

"""

class TableReplaceWarning(UserWarning):

"""

Warning class for cases when a table column is replaced via the

Table.__setitem__ syntax e.g. t['a'] = val.

This does not inherit from AstropyWarning because we want to use

stacklevel=3 to show the user where the issue occurred in their code.

"""

pass

def descr(col):

"""Array-interface compliant full description of a column.

This returns a 3-tuple (name, type, shape) that can always be

used in a structured array dtype definition.

"""

col_dtype = 'O' if (col.info.dtype is None) else col.info.dtype

col_shape = col.shape[1:] if hasattr(col, 'shape') else ()

return (col.info.name, col_dtype, col_shape)

def has_info_class(obj, cls):

"""Check if the object's info is an instance of cls."""

# We check info on the class of the instance, since on the instance

# itself accessing 'info' has side effects in that it sets

# obj.__dict__['info'] if it does not exist already.

return isinstance(getattr(obj.__class__, 'info', None), cls)

def _get_names_from_list_of_dict(rows):

"""Return list of column names if ``rows`` is a list of dict that

defines table data.

If rows is not a list of dict then return None.

"""

if rows is None:

return None

names = set()

for row in rows:

if not isinstance(row, dict):

return None

names.update(row)

return list(names)

# Note to future maintainers: when transitioning this to dict

# be sure to change the OrderedDict ref(s) in Row and in __len__().

class TableColumns(OrderedDict):

"""OrderedDict subclass for a set of columns.

This class enhances item access to provide convenient access to columns

by name or index, including slice access. It also handles renaming

of columns.

The initialization argument ``cols`` can be a list of ``Column`` objects

or any structure that is valid for initializing a Python dict. This

includes a dict, list of (key, val) tuples or [key, val] lists, etc.

Parameters

----------

cols : dict, list, tuple; optional

Column objects as data structure that can init dict (see above)

"""

def __init__(self, cols={}):

if isinstance(cols, (list, tuple)):

# `cols` should be a list of two-tuples, but it is allowed to have

# columns (BaseColumn or mixins) in the list.

newcols = []

for col in cols:

if has_info_class(col, BaseColumnInfo):

newcols.append((col.info.name, col))

else:

newcols.append(col)

cols = newcols

super().__init__(cols)

def __getitem__(self, item):

"""Get items from a TableColumns object.

tc = TableColumns(cols=[Column(name='a'), Column(name='b'), Column(name='c')])

tc['a'] # Column('a')

tc[1] # Column('b')

tc['a', 'b'] # <TableColumns names=('a', 'b')>

tc[1:3] # <TableColumns names=('b', 'c')>

"""

if isinstance(item, str):

return OrderedDict.__getitem__(self, item)

elif isinstance(item, (int, np.integer)):

return list(self.values())[item]

elif (isinstance(item, np.ndarray) and item.shape == () and item.dtype.kind == 'i'):

return list(self.values())[item.item()]

elif isinstance(item, tuple):

return self.__class__([self[x] for x in item])

elif isinstance(item, slice):

return self.__class__([self[x] for x in list(self)[item]])

else:

raise IndexError('Illegal key or index value for {} object'

.format(self.__class__.__name__))

def __setitem__(self, item, value, validated=False):

"""

Set item in this dict instance, but do not allow directly replacing an

existing column unless it is already validated (and thus is certain to

not corrupt the table).

NOTE: it is easily possible to corrupt a table by directly *adding* a new

key to the TableColumns attribute of a Table, e.g.

``t.columns['jane'] = 'doe'``.

"""

if item in self and not validated:

raise ValueError("Cannot replace column '{}'. Use Table.replace_column() instead."

.format(item))

super().__setitem__(item, value)

def __repr__(self):

names = (f"'{x}'" for x in self.keys())

return f"<{self.__class__.__name__} names=({','.join(names)})>"

def _rename_column(self, name, new_name):

if name == new_name:

return

if new_name in self:

raise KeyError(f"Column {new_name} already exists")

# Rename column names in pprint include/exclude attributes as needed

parent_table = self[name].info.parent_table

if parent_table is not None:

parent_table.pprint_exclude_names._rename(name, new_name)

parent_table.pprint_include_names._rename(name, new_name)

mapper = {name: new_name}

new_names = [mapper.get(name, name) for name in self]

cols = list(self.values())

self.clear()

self.update(list(zip(new_names, cols)))

def __delitem__(self, name):

# Remove column names from pprint include/exclude attributes as needed.

# __delitem__ also gets called for pop() and popitem().

parent_table = self[name].info.parent_table

if parent_table is not None:

# _remove() method does not require that `name` is in the attribute

parent_table.pprint_exclude_names._remove(name)

parent_table.pprint_include_names._remove(name)

return super().__delitem__(name)

def isinstance(self, cls):

"""

Return a list of columns which are instances of the specified classes.

Parameters

----------

cls : class or tuple of classes

Column class (including mixin) or tuple of Column classes.

Returns

-------

col_list : list of Columns

List of Column objects which are instances of given classes.

"""

cols = [col for col in self.values() if isinstance(col, cls)]

return cols

def not_isinstance(self, cls):

"""

Return a list of columns which are not instances of the specified classes.

Parameters

----------

cls : class or tuple of classes

Column class (including mixin) or tuple of Column classes.

Returns

-------

col_list : list of Columns

List of Column objects which are not instances of given classes.

"""

cols = [col for col in self.values() if not isinstance(col, cls)]

return cols

class TableAttribute(MetaAttribute):

"""

Descriptor to define a custom attribute for a Table subclass.

The value of the ``TableAttribute`` will be stored in a dict named

``__attributes__`` that is stored in the table ``meta``. The attribute

can be accessed and set in the usual way, and it can be provided when

creating the object.

Defining an attribute by this mechanism ensures that it will persist if

the table is sliced or serialized, for example as a pickle or ECSV file.

See the `~astropy.utils.metadata.MetaAttribute` documentation for additional

details.

Parameters

----------

default : object

Default value for attribute

Examples

--------

>>> from astropy.table import Table, TableAttribute

>>> class MyTable(Table):

... identifier = TableAttribute(default=1)

>>> t = MyTable(identifier=10)

>>> t.identifier

>>> t.meta

OrderedDict([('__attributes__', {'identifier': 10})])

"""

class PprintIncludeExclude(TableAttribute):

"""Maintain tuple that controls table column visibility for print output.

This is a descriptor that inherits from MetaAttribute so that the attribute

value is stored in the table meta['__attributes__'].

This gets used for the ``pprint_include_names`` and ``pprint_exclude_names`` Table

attributes.

"""

def __get__(self, instance, owner_cls):

"""Get the attribute.

This normally returns an instance of this class which is stored on the

owner object.

"""

# For getting from class not an instance

if instance is None:

return self

# If not already stored on `instance`, make a copy of the class

# descriptor object and put it onto the instance.

value = instance.__dict__.get(self.name)

if value is None:

value = deepcopy(self)

instance.__dict__[self.name] = value

# We set _instance_ref on every call, since if one makes copies of

# instances, this attribute will be copied as well, which will lose the

# reference.

value._instance_ref = weakref.ref(instance)

return value

def __set__(self, instance, names):

"""Set value of ``instance`` attribute to ``names``.

Parameters

----------

instance : object

Instance that owns the attribute

names : None, str, list, tuple

Column name(s) to store, or None to clear

"""

if isinstance(names, str):

names = [names]

if names is None:

# Remove attribute value from the meta['__attributes__'] dict.

# Subsequent access will just return None.

delattr(instance, self.name)

else:

# This stores names into instance.meta['__attributes__'] as tuple

return super().__set__(instance, tuple(names))

def __call__(self):

"""Get the value of the attribute.

Returns

-------

names : None, tuple

Include/exclude names

"""

# Get the value from instance.meta['__attributes__']

instance = self._instance_ref()

return super().__get__(instance, instance.__class__)

def __repr__(self):

if hasattr(self, '_instance_ref'):

out = f'<{self.__class__.__name__} name={self.name} value={self()}>'

else:

out = super().__repr__()

return out

def _add_remove_setup(self, names):

"""Common setup for add and remove.

- Coerce attribute value to a list

- Coerce names into a list

- Get the parent table instance

"""

names = [names] if isinstance(names, str) else list(names)

# Get the value. This is the same as self() but we need `instance` here.

instance = self._instance_ref()

value = super().__get__(instance, instance.__class__)

value = [] if value is None else list(value)

return instance, names, value

def add(self, names):

"""Add ``names`` to the include/exclude attribute.

Parameters

----------

names : str, list, tuple

Column name(s) to add

"""

instance, names, value = self._add_remove_setup(names)

value.extend(name for name in names if name not in value)

super().__set__(instance, tuple(value))

def remove(self, names):

"""Remove ``names`` from the include/exclude attribute.

Parameters

----------

names : str, list, tuple

Column name(s) to remove

"""

self._remove(names, raise_exc=True)

def _remove(self, names, raise_exc=False):

"""Remove ``names`` with optional checking if they exist"""

instance, names, value = self._add_remove_setup(names)

# Return now if there are no attributes and thus no action to be taken.

if not raise_exc and '__attributes__' not in instance.meta:

return

# Remove one by one, optionally raising an exception if name is missing.

for name in names:

if name in value:

value.remove(name) # Using the list.remove method

elif raise_exc:

raise ValueError(f'{name} not in {self.name}')

# Change to either None or a tuple for storing back to attribute

value = None if value == [] else tuple(value)

self.__set__(instance, value)

def _rename(self, name, new_name):

"""Rename ``name`` to ``new_name`` if ``name`` is in the list"""

names = self() or ()

if name in names:

new_names = list(names)

new_names[new_names.index(name)] = new_name

self.set(new_names)

def set(self, names):

"""Set value of include/exclude attribute to ``names``.

Parameters

----------

names : None, str, list, tuple

Column name(s) to store, or None to clear

"""

class _Context:

def __init__(self, descriptor_self):

self.descriptor_self = descriptor_self

self.names_orig = descriptor_self()

def __enter__(self):

pass

def __exit__(self, type, value, tb):

descriptor_self = self.descriptor_self

instance = descriptor_self._instance_ref()

descriptor_self.__set__(instance, self.names_orig)

def __repr__(self):

return repr(self.descriptor_self)

ctx = _Context(descriptor_self=self)

instance = self._instance_ref()

self.__set__(instance, names)

return ctx

class Table:

"""A class to represent tables of heterogeneous data.

`~astropy.table.Table` provides a class for heterogeneous tabular data.

A key enhancement provided by the `~astropy.table.Table` class over

e.g. a `numpy` structured array is the ability to easily modify the

structure of the table by adding or removing columns, or adding new

rows of data. In addition table and column metadata are fully supported.

`~astropy.table.Table` differs from `~astropy.nddata.NDData` by the

assumption that the input data consists of columns of homogeneous data,

where each column has a unique identifier and may contain additional

metadata such as the data unit, format, and description.

See also: https://docs.astropy.org/en/stable/table/

Parameters

----------

data : numpy ndarray, dict, list, Table, or table-like object, optional

Data to initialize table.

masked : bool, optional

Specify whether the table is masked.

names : list, optional

Specify column names.

dtype : list, optional

Specify column data types.

meta : dict, optional

Metadata associated with the table.

copy : bool, optional

Copy the input data. If the input is a Table the ``meta`` is always

copied regardless of the ``copy`` parameter.

Default is True.

rows : numpy ndarray, list of lists, optional

Row-oriented data for table instead of ``data`` argument.

copy_indices : bool, optional

Copy any indices in the input data. Default is True.

units : list, dict, optional

List or dict of units to apply to columns.

descriptions : list, dict, optional

List or dict of descriptions to apply to columns.

**kwargs : dict, optional

Additional keyword args when converting table-like object.

"""

meta = MetaData(copy=False)

# Define class attributes for core container objects to allow for subclass

# customization.

Row = Row

Column = Column

MaskedColumn = MaskedColumn

TableColumns = TableColumns

TableFormatter = TableFormatter

# Unified I/O read and write methods from .connect

read = UnifiedReadWriteMethod(TableRead)

write = UnifiedReadWriteMethod(TableWrite)

pprint_exclude_names = PprintIncludeExclude()

pprint_include_names = PprintIncludeExclude()

def as_array(self, keep_byteorder=False, names=None):

"""

Return a new copy of the table in the form of a structured np.ndarray or

np.ma.MaskedArray object (as appropriate).

Parameters

----------

keep_byteorder : bool, optional

By default the returned array has all columns in native byte

order. However, if this option is `True` this preserves the

byte order of all columns (if any are non-native).

names : list, optional:

List of column names to include for returned structured array.

Default is to include all table columns.

Returns

-------

table_array : np.ndarray (unmasked) or np.ma.MaskedArray (masked)

Copy of table as a numpy structured array

"""

masked = self.masked or self.has_masked_columns or self.has_masked_values

empty_init = ma.empty if masked else np.empty

if len(self.columns) == 0:

return empty_init(0, dtype=None)

dtype = []

cols = self.columns.values()

if names is not None:

cols = [col for col in cols if col.info.name in names]

for col in cols:

col_descr = descr(col)

if not (col.info.dtype.isnative or keep_byteorder):

new_dt = np.dtype(col_descr[1]).newbyteorder('=')

col_descr = (col_descr[0], new_dt, col_descr[2])

dtype.append(col_descr)

data = empty_init(len(self), dtype=dtype)

for col in cols:

# When assigning from one array into a field of a structured array,

# Numpy will automatically swap those columns to their destination

# byte order where applicable

data[col.info.name] = col

# For masked out, masked mixin columns need to set output mask attribute.

if masked and has_info_class(col, MixinInfo) and hasattr(col, 'mask'):

data[col.info.name].mask = col.mask

return data

def __init__(self, data=None, masked=False, names=None, dtype=None,

meta=None, copy=True, rows=None, copy_indices=True,

units=None, descriptions=None,

**kwargs):

# Set up a placeholder empty table

self._set_masked(masked)

self.columns = self.TableColumns()

self.formatter = self.TableFormatter()

self._copy_indices = True # copy indices from this Table by default

self._init_indices = copy_indices # whether to copy indices in init

self.primary_key = None

# Must copy if dtype are changing

if not copy and dtype is not None:

raise ValueError('Cannot specify dtype when copy=False')

# Specifies list of names found for the case of initializing table with

# a list of dict. If data are not list of dict then this is None.

names_from_list_of_dict = None

# Row-oriented input, e.g. list of lists or list of tuples, list of

# dict, Row instance. Set data to something that the subsequent code

# will parse correctly.

if rows is not None:

if data is not None:

raise ValueError('Cannot supply both `data` and `rows` values')

if isinstance(rows, types.GeneratorType):

# Without this then the all(..) test below uses up the generator

rows = list(rows)

# Get column names if `rows` is a list of dict, otherwise this is None

names_from_list_of_dict = _get_names_from_list_of_dict(rows)

if names_from_list_of_dict:

data = rows

elif isinstance(rows, self.Row):

data = rows

else:

data = list(zip(*rows))

# Infer the type of the input data and set up the initialization

# function, number of columns, and potentially the default col names

default_names = None

# Handle custom (subclass) table attributes that are stored in meta.

# These are defined as class attributes using the TableAttribute

# descriptor. Any such attributes get removed from kwargs here and

# stored for use after the table is otherwise initialized. Any values

# provided via kwargs will have precedence over existing values from

# meta (e.g. from data as a Table or meta via kwargs).

meta_table_attrs = {}

if kwargs:

for attr in list(kwargs):

descr = getattr(self.__class__, attr, None)

if isinstance(descr, TableAttribute):

meta_table_attrs[attr] = kwargs.pop(attr)

if hasattr(data, '__astropy_table__'):

# Data object implements the __astropy_table__ interface method.

# Calling that method returns an appropriate instance of

# self.__class__ and respects the `copy` arg. The returned

# Table object should NOT then be copied.

data = data.__astropy_table__(self.__class__, copy, **kwargs)

copy = False

elif kwargs:

raise TypeError('__init__() got unexpected keyword argument {!r}'

.format(list(kwargs.keys())[0]))

if (isinstance(data, np.ndarray)

and data.shape == (0,)

and not data.dtype.names):

data = None

if isinstance(data, self.Row):

data = data._table[data._index:data._index + 1]

if isinstance(data, (list, tuple)):

# Get column names from `data` if it is a list of dict, otherwise this is None.

# This might be previously defined if `rows` was supplied as an init arg.

names_from_list_of_dict = (names_from_list_of_dict

or _get_names_from_list_of_dict(data))

if names_from_list_of_dict:

init_func = self._init_from_list_of_dicts

n_cols = len(names_from_list_of_dict)

else:

init_func = self._init_from_list

n_cols = len(data)

elif isinstance(data, np.ndarray):

if data.dtype.names:

init_func = self._init_from_ndarray # _struct

n_cols = len(data.dtype.names)

default_names = data.dtype.names

else:

init_func = self._init_from_ndarray # _homog

if data.shape == ():

raise ValueError('Can not initialize a Table with a scalar')

elif len(data.shape) == 1:

data = data[np.newaxis, :]

n_cols = data.shape[1]

elif isinstance(data, Mapping):

init_func = self._init_from_dict

default_names = list(data)

n_cols = len(default_names)

elif isinstance(data, Table):

# If user-input meta is None then use data.meta (if non-trivial)

if meta is None and data.meta:

# At this point do NOT deepcopy data.meta as this will happen after

# table init_func() is called. But for table input the table meta

# gets a key copy here if copy=False because later a direct object ref

# is used.

meta = data.meta if copy else data.meta.copy()

# Handle indices on input table. Copy primary key and don't copy indices

# if the input Table is in non-copy mode.

self.primary_key = data.primary_key

self._init_indices = self._init_indices and data._copy_indices

# Extract default names, n_cols, and then overwrite ``data`` to be the

# table columns so we can use _init_from_list.

default_names = data.colnames

n_cols = len(default_names)

data = list(data.columns.values())

init_func = self._init_from_list

elif data is None:

if names is None:

if dtype is None:

# Table was initialized as `t = Table()`. Set up for empty

# table with names=[], data=[], and n_cols=0.

# self._init_from_list() will simply return, giving the

# expected empty table.

names = []

else:

try:

# No data nor names but dtype is available. This must be

# valid to initialize a structured array.

dtype = np.dtype(dtype)

names = dtype.names

dtype = [dtype[name] for name in names]

except Exception:

raise ValueError('dtype was specified but could not be '

'parsed for column names')

# names is guaranteed to be set at this point

init_func = self._init_from_list

n_cols = len(names)

data = [[]] * n_cols

else:

raise ValueError(f'Data type {type(data)} not allowed to init Table')

# Set up defaults if names and/or dtype are not specified.

# A value of None means the actual value will be inferred

# within the appropriate initialization routine, either from

# existing specification or auto-generated.

if dtype is None:

dtype = [None] * n_cols

elif isinstance(dtype, np.dtype):

if default_names is None:

default_names = dtype.names

# Convert a numpy dtype input to a list of dtypes for later use.

dtype = [dtype[name] for name in dtype.names]

if names is None:

names = default_names or [None] * n_cols

# Numpy does not support bytes column names on Python 3, so fix them

# up now.

names = [fix_column_name(name) for name in names]

self._check_names_dtype(names, dtype, n_cols)

# Finally do the real initialization

init_func(data, names, dtype, n_cols, copy)

# Set table meta. If copy=True then deepcopy meta otherwise use the

# user-supplied meta directly.

if meta is not None:

self.meta = deepcopy(meta) if copy else meta

# Update meta with TableAttributes supplied as kwargs in Table init.

# This takes precedence over previously-defined meta.

if meta_table_attrs:

for attr, value in meta_table_attrs.items():

setattr(self, attr, value)

# Whatever happens above, the masked property should be set to a boolean

if self.masked not in (None, True, False):

raise TypeError("masked property must be None, True or False")

self._set_column_attribute('unit', units)

self._set_column_attribute('description', descriptions)

def _set_column_attribute(self, attr, values):

"""Set ``attr`` for columns to ``values``, which can be either a dict (keyed by column

name) or a dict of name: value pairs. This is used for handling the ``units`` and

``descriptions`` kwargs to ``__init__``.

"""

if not values:

return

if isinstance(values, Row):

# For a Row object transform to an equivalent dict.

values = {name: values[name] for name in values.colnames}

if not isinstance(values, dict):

# If not a dict map, assume iterable and map to dict if the right length

if len(values) != len(self.columns):

raise ValueError(f'sequence of {attr} values must match number of columns')

values = dict(zip(self.colnames, values))

for name, value in values.items():

if name not in self.columns:

raise ValueError(f'invalid column name {name} for setting {attr} attribute')

# Special case: ignore unit if it is an empty or blank string

if attr == 'unit' and isinstance(value, str):

if value.strip() == '':

value = None

if value not in (np.ma.masked, None):

setattr(self[name].info, attr, value)

def __getstate__(self):

columns = OrderedDict((key, col if isinstance(col, BaseColumn) else col_copy(col))

for key, col in self.columns.items())

return (columns, self.meta)

def __setstate__(self, state):

columns, meta = state

self.__init__(columns, meta=meta)

@property

def mask(self):

# Dynamic view of available masks

if self.masked or self.has_masked_columns or self.has_masked_values:

mask_table = Table([getattr(col, 'mask', FalseArray(col.shape))

for col in self.itercols()],

names=self.colnames, copy=False)

# Set hidden attribute to force inplace setitem so that code like

# t.mask['a'] = [1, 0, 1] will correctly set the underlying mask.

# See #5556 for discussion.

mask_table._setitem_inplace = True

else:

mask_table = None

return mask_table

@mask.setter

def mask(self, val):

self.mask[:] = val

@property

def _mask(self):

"""This is needed so that comparison of a masked Table and a

MaskedArray works. The requirement comes from numpy.ma.core

so don't remove this property."""

return self.as_array().mask

def filled(self, fill_value=None):

"""Return copy of self, with masked values filled.

If input ``fill_value`` supplied then that value is used for all

masked entries in the table. Otherwise the individual

``fill_value`` defined for each table column is used.

Parameters

----------

fill_value : str

If supplied, this ``fill_value`` is used for all masked entries

in the entire table.

Returns

-------

filled_table : Table

New table with masked values filled

"""

if self.masked or self.has_masked_columns or self.has_masked_values:

# Get new columns with masked values filled, then create Table with those

# new cols (copy=False) but deepcopy the meta.

data = [col.filled(fill_value) if hasattr(col, 'filled') else col

for col in self.itercols()]

return self.__class__(data, meta=deepcopy(self.meta), copy=False)

else:

# Return copy of the original object.

return self.copy()

@property

def indices(self):

'''

Return the indices associated with columns of the table

as a TableIndices object.

'''

lst = []

for column in self.columns.values():

for index in column.info.indices:

if sum([index is x for x in lst]) == 0: # ensure uniqueness

lst.append(index)

return TableIndices(lst)

@property

def loc(self):

'''

Return a TableLoc object that can be used for retrieving

rows by index in a given data range. Note that both loc

and iloc work only with single-column indices.

'''

return TableLoc(self)

@property

def loc_indices(self):

"""

Return a TableLocIndices object that can be used for retrieving

the row indices corresponding to given table index key value or values.

"""

return TableLocIndices(self)

@property

def iloc(self):

'''

Return a TableILoc object that can be used for retrieving

indexed rows in the order they appear in the index.

'''

return TableILoc(self)

def add_index(self, colnames, engine=None, unique=False):

'''

Insert a new index among one or more columns.

If there are no indices, make this index the

primary table index.

Parameters

----------

colnames : str or list

List of column names (or a single column name) to index

engine : type or None

Indexing engine class to use, from among SortedArray, BST,

and SCEngine. If the supplied argument is None

(by default), use SortedArray.

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

table.py

Latest commit

History

table.py

File metadata and controls