# Licensed under a 3-clause BSD style license - see LICENSE.rst
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from ..extern import six
import weakref
from copy import deepcopy
import numpy as np
from numpy import ma
from ..units import Unit, Quantity
from ..utils.compat import NUMPY_LT_1_8
from ..utils.console import color_print
from ..utils.metadata import MetaData
from . import groups
from . import pprint
from .np_utils import fix_column_name
from ..config import ConfigAlias
AUTO_COLNAME = ConfigAlias(
'0.4', 'AUTO_COLNAME', 'auto_colname',
'astropy.table.column', 'astropy.table')
# Create a generic TableFormatter object for use by bare columns with no
# parent table.
FORMATTER = pprint.TableFormatter()
INTEGER_TYPES = (int, long, np.integer) if six.PY2 else (int, np.integer)
def _auto_names(n_cols):
from . import conf
return [str(conf.auto_colname).format(i) for i in range(n_cols)]
# list of one and two-dimensional comparison functions, which sometimes return
# a Column class and sometimes a plain array. Used in __array_wrap__ to ensure
# they only return plain (masked) arrays (see #1446 and #1685)
_comparison_functions = set(
[np.greater, np.greater_equal, np.less, np.less_equal,
np.not_equal, np.equal,
np.isfinite, np.isinf, np.isnan, np.sign, np.signbit])
COLUMN_ATTRS = set(['name', 'unit', 'dtype', 'format', 'description', 'meta', 'parent_table'])
def col_setattr(col, attr, value):
"""
Set one of the column attributes.
Warning: this function is subject to change or removal.
"""
if attr not in COLUMN_ATTRS:
raise AttributeError("attribute must be one of {0}".format(COLUMN_ATTRS))
# The unit and dtype attributes are considered universal and do NOT get
# stored in _astropy_column_attrs. For BaseColumn instances use the usual setattr.
if isinstance(col, BaseColumn):
setattr(col, attr, value)
else:
# If no _astropy_column_attrs or it is None then convert to dict
if getattr(col, '_astropy_column_attrs', None) is None:
col._astropy_column_attrs = {}
if attr == 'parent_table':
value = None if value is None else weakref.ref(value)
col._astropy_column_attrs[attr] = value
def col_getattr(col, attr, default=None):
"""
Get one of the column attributes
Warning: this function is subject to change or removal.
"""
if attr not in COLUMN_ATTRS:
raise AttributeError("attribute must be one of {0}".format(COLUMN_ATTRS))
# The unit and dtype attributes are considered universal and do NOT get
# stored in _astropy_column_attrs. For BaseColumn instances use the usual setattr.
if (isinstance(col, BaseColumn) or
(isinstance(col, Quantity) and attr in ('dtype', 'unit'))):
value = getattr(col, attr, default)
else:
# If col does not have _astropy_column_attrs or it is None (meaning
# nothing has been set yet) then return default, otherwise look for
# the attribute in the astropy_column_attrs dict.
if getattr(col, '_astropy_column_attrs', None) is None:
value = default
else:
value = col._astropy_column_attrs.get(attr, default)
# Weak ref for parent table
if attr == 'parent_table' and callable(value):
value = value()
# Mixins have a default dtype of Object if nothing else was set
if attr == 'dtype' and value is None:
value = np.dtype('O')
return value
def _col_update_attrs_from(newcol, col, exclude_attrs=['name', 'parent_table']):
"""
Update _astropy_column_attrs from mixin `col` to `newcol`. Does nothing
for BaseColumn cols
Warning: this function is subject to change or removal.
"""
if isinstance(newcol, BaseColumn):
return
attrs = COLUMN_ATTRS - set(exclude_attrs)
for attr in attrs:
val = col_getattr(col, attr)
if val is not None:
col_setattr(newcol, attr, deepcopy(val))
def col_iter_str_vals(col):
"""
This is a mixin-safe version of Column.iter_str_vals.
Warning: this function is subject to change or removal.
"""
parent_table = col_getattr(col, 'parent_table')
formatter = FORMATTER if parent_table is None else parent_table.formatter
_pformat_col_iter = formatter._pformat_col_iter
for str_val in _pformat_col_iter(col, -1, False, False, {}):
yield str_val
def col_copy(col):
"""
This is a mixin-safe version of Column.copy() (with copy_data=True).
Warning: this function is subject to change or removal.
"""
if isinstance(col, BaseColumn):
return col.copy()
if hasattr(col, '_astropy_column_attrs'):
col_setattr(col, 'parent_table', None) # Don't copy weakref to parent table
newcol = col.copy() if hasattr(col, 'copy') else deepcopy(col)
# Copy old attributes. Even deepcopy above may not get this (e.g. pandas).
if (not hasattr(newcol, '_astropy_column_attrs') or
newcol._astropy_column_attrs is None):
_column_attrs = deepcopy(getattr(col, '_astropy_column_attrs', {}))
newcol._astropy_column_attrs = _column_attrs
return newcol
class FalseArray(np.ndarray):
def __new__(cls, shape):
obj = np.zeros(shape, dtype=np.bool).view(cls)
return obj
def __setitem__(self, item, val):
val = np.asarray(val)
if np.any(val):
raise ValueError('Cannot set any element of {0} class to True'
.format(self.__class__.__name__))
def __setslice__(self, start, stop, val):
val = np.asarray(val)
if np.any(val):
raise ValueError('Cannot set any element of {0} class to True'
.format(self.__class__.__name__))
class BaseColumn(np.ndarray):
meta = MetaData()
def __new__(cls, data=None, name=None,
dtype=None, shape=(), length=0,
description=None, unit=None, format=None, meta=None, copy=False):
if data is None:
dtype = (np.dtype(dtype).str, shape)
self_data = np.zeros(length, dtype=dtype)
elif isinstance(data, BaseColumn) and hasattr(data, '_name'):
# When unpickling a MaskedColumn, ``data`` will be a bare
# BaseColumn with none of the expected attributes. In this case
# do NOT execute this block which initializes from ``data``
# attributes.
self_data = np.array(data.data, dtype=dtype, copy=copy)
if description is None:
description = data.description
if unit is None:
unit = unit or data.unit
if format is None:
format = data.format
if meta is None:
meta = deepcopy(data.meta)
if name is None:
name = data.name
elif isinstance(data, Quantity):
if unit is None:
self_data = np.array(data, dtype=dtype, copy=copy)
unit = data.unit
else:
self_data = np.array(data.to(unit), dtype=dtype, copy=copy)
if description is None:
description = col_getattr(data, 'description')
if format is None:
format = col_getattr(data, 'format')
if meta is None:
meta = deepcopy(col_getattr(data, 'meta'))
else:
self_data = np.array(data, dtype=dtype, copy=copy)
self = self_data.view(cls)
self._name = fix_column_name(name)
self.unit = unit
self.format = format
self.description = description
self.meta = meta
self._parent_table = None
return self
@property
def data(self):
return self.view(np.ndarray)
@property
def parent_table(self):
if self._parent_table is None:
return None
else:
return self._parent_table()
@parent_table.setter
def parent_table(self, table):
if table is None:
self._parent_table = None
else:
self._parent_table = weakref.ref(table)
def copy(self, order='C', data=None, copy_data=True):
"""
Return a copy of the current instance.
If ``data`` is supplied then a view (reference) of ``data`` is used,
and ``copy_data`` is ignored.
Parameters
----------
order : {'C', 'F', 'A', 'K'}, optional
Controls the memory layout of the copy. 'C' means C-order,
'F' means F-order, 'A' means 'F' if ``a`` is Fortran contiguous,
'C' otherwise. 'K' means match the layout of ``a`` as closely
as possible. (Note that this function and :func:numpy.copy are very
similar, but have different default values for their order=
arguments.) Default is 'C'.
data : array, optional
If supplied then use a view of ``data`` instead of the instance
data. This allows copying the instance attributes and meta.
copy_data : bool, optional
Make a copy of the internal numpy array instead of using a
reference. Default is True.
Returns
-------
col : Column or MaskedColumn
Copy of the current column (same type as original)
"""
if data is None:
data = self.data
if copy_data:
data = data.copy(order)
out = data.view(self.__class__)
out.__array_finalize__(self)
# for MaskedColumn, MaskedArray.__array_finalize__ also copies mask
# from self, which is not the idea here, so undo
if isinstance(self, MaskedColumn):
out._mask = data._mask
self._copy_groups(out)
return out
def __setstate__(self, state):
"""
Restore the internal state of the Column/MaskedColumn for pickling
purposes. This requires that the last element of ``state`` is a
5-tuple that has Column-specific state values.
"""
# Get the Column attributes and meta
name, unit, format, description, meta = state[-1]
state = state[:-1]
# Using super(type(self), self).__setstate__() gives an infinite
# recursion. Manually call the right super class to actually set up
# the array object.
super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
super_class.__setstate__(self, state)
# Set the Column attributes and meta
self._name = name
self.unit = unit
self.format = format
self.description = description
self.meta = meta
def __reduce__(self):
"""
Return a 3-tuple for pickling a Column. Use the super-class
functionality but then add in a 5-tuple of Column-specific values
that get used in __setstate__.
"""
super_class = ma.MaskedArray if isinstance(self, ma.MaskedArray) else np.ndarray
reconstruct_func, reconstruct_func_args, state = super_class.__reduce__(self)
# Define Column-specific attrs and meta that gets added to state.
column_state = (self.name, self.unit, self.format, self.description,
self.meta)
state = state + (column_state,)
return reconstruct_func, reconstruct_func_args, state
def __getitem__(self, item):
if isinstance(item, INTEGER_TYPES):
return self.data[item] # Return as plain ndarray or ma.MaskedArray
else:
return super(BaseColumn, self).__getitem__(item)
# avoid == and != to be done based on type of subclass
# (helped solve #1446; see also __array_wrap__)
def __eq__(self, other):
return self.data.__eq__(other)
def __ne__(self, other):
return self.data.__ne__(other)
def __array_finalize__(self, obj):
# Obj will be none for direct call to Column() creator
if obj is None:
return
if six.callable(super(BaseColumn, self).__array_finalize__):
super(BaseColumn, self).__array_finalize__(obj)
# Self was created from template (e.g. obj[slice] or (obj * 2))
# or viewcast e.g. obj.view(Column). In either case we want to
# init Column attributes for self from obj if possible.
self.parent_table = None
self._copy_attrs(obj)
def __array_wrap__(self, out_arr, context=None):
"""
__array_wrap__ is called at the end of every ufunc.
Normally, we want a Column object back and do not have to do anything
special. But there are two exceptions:
1) If the output shape is different (e.g. for reduction ufuncs
like sum() or mean()), a Column still linking to a parent_table
makes little sense, so we return the output viewed as the
column content (ndarray or MaskedArray).
For this case, we use "[()]" to select everything, and to ensure we
convert a zero rank array to a scalar. (For some reason np.sum()
returns a zero rank scalar array while np.mean() returns a scalar;
So the [()] is needed for this case.
2) When the output is created by any function that returns a boolean
we also want to consistently return an array rather than a column
(see #1446 and #1685)
"""
out_arr = super(BaseColumn, self).__array_wrap__(out_arr, context)
if (self.shape != out_arr.shape or
(isinstance(out_arr, BaseColumn) and
(context is not None and context[0] in _comparison_functions))):
return out_arr.data[()]
else:
return out_arr
@property
def name(self):
"""
The name of this column.
"""
return self._name
@name.setter
def name(self, val):
val = fix_column_name(val)
if self.parent_table is not None:
table = self.parent_table
table.columns._rename_column(self.name, val)
self._name = val
@property
def descr(self):
"""Array-interface compliant full description of the column.
This returns a 3-tuple (name, type, shape) that can always be
used in a structured array dtype definition.
"""
return (self.name, self.dtype.str, self.shape[1:])
def iter_str_vals(self):
"""
Return an iterator that yields the string-formatted values of this
column.
Returns
-------
str_vals : iterator
Column values formatted as strings
"""
# Iterate over formatted values with no max number of lines, no column
# name, no unit, and ignoring the returned header info in outs.
_pformat_col_iter = self._formatter._pformat_col_iter
for str_val in _pformat_col_iter(self, -1, show_name=False, show_unit=False,
show_dtype=False, outs={}):
yield str_val
def attrs_equal(self, col):
"""Compare the column attributes of ``col`` to this object.
The comparison attributes are: ``name``, ``unit``, ``dtype``,
``format``, ``description``, and ``meta``.
Parameters
----------
col : Column
Comparison column
Returns
-------
equal : boolean
True if all attributes are equal
"""
if not isinstance(col, BaseColumn):
raise ValueError('Comparison `col` must be a Column or '
'MaskedColumn object')
attrs = ('name', 'unit', 'dtype', 'format', 'description', 'meta')
equal = all(getattr(self, x) == getattr(col, x) for x in attrs)
return equal
@property
def _formatter(self):
return FORMATTER if (self.parent_table is None) else self.parent_table.formatter
def pformat(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False,
html=False):
"""Return a list of formatted string representation of column values.
If no value of ``max_lines`` is supplied then the height of the
screen terminal is used to set ``max_lines``. If the terminal
height cannot be determined then the default will be
determined using the ``astropy.conf.max_lines`` configuration
item. If a negative value of ``max_lines`` is supplied then
there is no line limit applied.
Parameters
----------
max_lines : int
Maximum lines of output (header + data rows)
show_name : bool
Include column name (default=True)
show_unit : bool
Include a header row for unit (default=False)
show_dtype : bool
Include column dtype (default=False)
html : bool
Format the output as an HTML table (default=False)
Returns
-------
lines : list
List of lines with header and formatted column values
"""
_pformat_col = self._formatter._pformat_col
lines, outs = _pformat_col(self, max_lines, show_name=show_name,
show_unit=show_unit, show_dtype=show_dtype,
html=html)
return lines
def pprint(self, max_lines=None, show_name=True, show_unit=False, show_dtype=False):
"""Print a formatted string representation of column values.
If no value of ``max_lines`` is supplied then the height of the
screen terminal is used to set ``max_lines``. If the terminal
height cannot be determined then the default will be
determined using the ``astropy.conf.max_lines`` configuration
item. If a negative value of ``max_lines`` is supplied then
there is no line limit applied.
Parameters
----------
max_lines : int
Maximum number of values in output
show_name : bool
Include column name (default=True)
show_unit : bool
Include a header row for unit (default=False)
show_dtype : bool
Include column dtype (default=True)
"""
_pformat_col = self._formatter._pformat_col
lines, outs = _pformat_col(self, max_lines, show_name=show_name, show_unit=show_unit,
show_dtype=show_dtype)
n_header = outs['n_header']
for i, line in enumerate(lines):
if i < n_header:
color_print(line, 'red')
else:
print(line)
def more(self, max_lines=None, show_name=True, show_unit=False):
"""Interactively browse column with a paging interface.
Supported keys::
f, <space> : forward one page
b : back one page
r : refresh same page
n : next row
p : previous row
< : go to beginning
> : go to end
q : quit browsing
h : print this help
Parameters
----------
max_lines : int
Maximum number of lines in table output
show_name : bool
Include a header row for column names (default=True)
show_unit : bool
Include a header row for unit (default=False)
"""
_more_tabcol = self._formatter._more_tabcol
_more_tabcol(self, max_lines=max_lines, show_name=show_name,
show_unit=show_unit)
@property
def unit(self):
"""
The unit associated with this column. May be a string or a
`astropy.units.UnitBase` instance.
Setting the ``unit`` property does not change the values of the
data. To perform a unit conversion, use ``convert_unit_to``.
"""
return self._unit
@unit.setter
def unit(self, unit):
if unit is None:
self._unit = None
else:
self._unit = Unit(unit, parse_strict='silent')
@unit.deleter
def unit(self):
self._unit = None
def convert_unit_to(self, new_unit, equivalencies=[]):
"""
Converts the values of the column in-place from the current
unit to the given unit.
To change the unit associated with this column without
actually changing the data values, simply set the ``unit``
property.
Parameters
----------
new_unit : str or `astropy.units.UnitBase` instance
The unit to convert to.
equivalencies : list of equivalence pairs, optional
A list of equivalence pairs to try if the unit are not
directly convertible. See :ref:`unit_equivalencies`.
Raises
------
astropy.units.UnitsError
If units are inconsistent
"""
if self.unit is None:
raise ValueError("No unit set on column")
self.data[:] = self.unit.to(
new_unit, self.data, equivalencies=equivalencies)
self.unit = new_unit
@property
def groups(self):
if not hasattr(self, '_groups'):
self._groups = groups.ColumnGroups(self)
return self._groups
def group_by(self, keys):
"""
Group this column by the specified ``keys``
This effectively splits the column into groups which correspond to
unique values of the ``keys`` grouping object. The output is a new
`Column` or `MaskedColumn` which contains a copy of this column but
sorted by row according to ``keys``.
The ``keys`` input to ``group_by`` must be a numpy array with the
same length as this column.
Parameters
----------
keys : numpy array
Key grouping object
Returns
-------
out : Column
New column with groups attribute set accordingly
"""
return groups.column_group_by(self, keys)
def _copy_groups(self, out):
"""
Copy current groups into a copy of self ``out``
"""
if self.parent_table:
if hasattr(self.parent_table, '_groups'):
out._groups = groups.ColumnGroups(out, indices=self.parent_table._groups._indices)
elif hasattr(self, '_groups'):
out._groups = groups.ColumnGroups(out, indices=self._groups._indices)
# Strip off the BaseColumn-ness for repr and str so that
# MaskedColumn.data __repr__ does not include masked_BaseColumn(data =
# [1 2], ...).
def __repr__(self):
return np.asarray(self).__repr__()
@property
def quantity(self):
"""
A view of this table column as a `~astropy.units.Quantity` object with
units given by the Column's `unit` parameter.
"""
# the Quantity initializer is used here because it correctly fails
# if the column's values are non-numeric (like strings), while .view
# will happily return a quantity with gibberish for numerical values
return Quantity(self, copy=False, dtype=self.dtype, order='A')
def to(self, unit, equivalencies=[], **kwargs):
"""
Converts this table column to a `~astropy.units.Quantity` object with
the requested units.
Parameters
----------
unit : `~astropy.units.Unit` or str
The unit to convert to (i.e., a valid argument to the
:meth:`astropy.units.Quantity.to` method).
equivalencies : list of equivalence pairs, optional
Equivalencies to use for this conversion. See
:meth:`astropy.units.Quantity.to` for more details.
Returns
-------
quantity : `~astropy.units.Quantity`
A quantity object with the contents of this column in the units
``unit``.
"""
return self.quantity.to(unit, equivalencies)
def _copy_attrs(self, obj):
"""
Copy key column attributes from ``obj`` to self
"""
for attr in ('name', 'unit', 'format', 'description'):
val = getattr(obj, attr, None)
setattr(self, attr, val)
self.meta = deepcopy(getattr(obj, 'meta', {}))
[docs]class Column(BaseColumn):
"""Define a data column for use in a Table object.
Parameters
----------
data : list, ndarray or None
Column data values
name : str
Column name and key for reference within Table
dtype : numpy.dtype compatible value
Data type for column
shape : tuple or ()
Dimensions of a single row element in the column data
length : int or 0
Number of row elements in column data
description : str or None
Full description of column
unit : str or None
Physical unit
format : str or None or function or callable
Format string for outputting column values. This can be an
"old-style" (``format % value``) or "new-style" (`str.format`)
format specification string or a function or any callable object that
accepts a single value and returns a string.
meta : dict-like or None
Meta-data associated with the column
Examples
--------
A Column can be created in two different ways:
- Provide a ``data`` value but not ``shape`` or ``length`` (which are
inferred from the data).
Examples::
col = Column(data=[1, 2], name='name') # shape=(2,)
col = Column(data=[[1, 2], [3, 4]], name='name') # shape=(2, 2)
col = Column(data=[1, 2], name='name', dtype=float)
col = Column(data=np.array([1, 2]), name='name')
col = Column(data=['hello', 'world'], name='name')
The ``dtype`` argument can be any value which is an acceptable
fixed-size data-type initializer for the numpy.dtype() method. See
`<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
Examples include:
- Python non-string type (float, int, bool)
- Numpy non-string type (e.g. np.float32, np.int64, np.bool)
- Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
If no ``dtype`` value is provide then the type is inferred using
``np.array(data)``.
- Provide ``length`` and optionally ``shape``, but not ``data``
Examples::
col = Column(name='name', length=5)
col = Column(name='name', dtype=int, length=10, shape=(3,4))
The default ``dtype`` is ``np.float64``. The ``shape`` argument is the
array shape of a single cell in the column.
"""
def __new__(cls, data=None, name=None,
dtype=None, shape=(), length=0,
description=None, unit=None, format=None, meta=None, copy=False):
if isinstance(data, MaskedColumn) and np.any(data.mask):
raise TypeError("Cannot convert a MaskedColumn with masked value to a Column")
self = super(Column, cls).__new__(cls, data=data, name=name, dtype=dtype,
shape=shape, length=length, description=description,
unit=unit, format=format, meta=meta, copy=copy)
return self
def _base_repr_(self, html=False):
descr_vals = [self.__class__.__name__]
unit = None if self.unit is None else str(self.unit)
shape = None if self.ndim <= 1 else self.shape[1:]
for attr, val in (('name', self.name),
('dtype', self.dtype.name),
('shape', shape),
('unit', unit),
('format', self.format),
('description', self.description),
('length', len(self))):
if val is not None:
descr_vals.append('{0}={1}'.format(attr, repr(val)))
descr = '<' + ' '.join(descr_vals) + '>\n'
if html:
from ..utils.xml.writer import xml_escape
descr = xml_escape(descr)
data_lines, outs = self._formatter._pformat_col(
self, show_name=False, show_unit=False, show_length=False, html=html)
out = descr + '\n'.join(data_lines)
if six.PY2 and isinstance(out, six.text_type):
out = out.encode('utf-8')
return out
def _repr_html_(self):
return self._base_repr_(html=True)
def __repr__(self):
return self._base_repr_(html=False)
def __unicode__(self):
lines, outs = self._formatter._pformat_col(self)
return '\n'.join(lines)
if six.PY3:
__str__ = __unicode__
def __bytes__(self):
return six.text_type(self).encode('utf-8')
if six.PY2:
__str__ = __bytes__
# Set items using a view of the underlying data, as it gives an
# order-of-magnitude speed-up. [#2994]
def __setitem__(self, index, value):
self.data[index] = value
# # Set slices using a view of the underlying data, as it gives an
# # order-of-magnitude speed-up. Only gets called in Python 2. [#3020]
def __setslice__(self, start, stop, value):
self.data.__setslice__(start, stop, value)
[docs] def insert(self, obj, values):
"""
Insert values before the given indices in the column and return
a new `~astropy.table.Column` object.
Parameters
----------
obj : int, slice or sequence of ints
Object that defines the index or indices before which ``values`` is
inserted.
values : array_like
Value(s) to insert. If the type of ``values`` is different
from that of quantity, ``values`` is converted to the matching type.
``values`` should be shaped so that it can be broadcast appropriately
Returns
-------
out : `~astropy.table.Column`
A copy of column with ``values`` and ``mask`` inserted. Note that the
insertion does not occur in-place: a new column is returned.
"""
if self.dtype.kind == 'O':
# Even if values is array-like (e.g. [1,2,3]), insert as a single
# object. Numpy.insert instead inserts each element in an array-like
# input individually.
data = np.insert(self, obj, None, axis=0)
data[obj] = values
else:
# Explicitly convert to dtype of this column. Needed because numpy 1.7
# enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+.
values = np.asarray(values, dtype=self.dtype)
data = np.insert(self, obj, values, axis=0)
out = data.view(self.__class__)
out.__array_finalize__(self)
return out
# We do this to make the methods show up in the API docs
name = BaseColumn.name
unit = BaseColumn.unit
copy = BaseColumn.copy
more = BaseColumn.more
pprint = BaseColumn.pprint
pformat = BaseColumn.pformat
convert_unit_to = BaseColumn.convert_unit_to
quantity = BaseColumn.quantity
to = BaseColumn.to
[docs]class MaskedColumn(Column, ma.MaskedArray):
"""Define a masked data column for use in a Table object.
Parameters
----------
data : list, ndarray or None
Column data values
name : str
Column name and key for reference within Table
mask : list, ndarray or None
Boolean mask for which True indicates missing or invalid data
fill_value : float, int, str or None
Value used when filling masked column elements
dtype : numpy.dtype compatible value
Data type for column
shape : tuple or ()
Dimensions of a single row element in the column data
length : int or 0
Number of row elements in column data
description : str or None
Full description of column
unit : str or None
Physical unit
format : str or None or function or callable
Format string for outputting column values. This can be an
"old-style" (``format % value``) or "new-style" (`str.format`)
format specification string or a function or any callable object that
accepts a single value and returns a string.
meta : dict-like or None
Meta-data associated with the column
Examples
--------
A MaskedColumn is similar to a Column except that it includes ``mask`` and
``fill_value`` attributes. It can be created in two different ways:
- Provide a ``data`` value but not ``shape`` or ``length`` (which are
inferred from the data).
Examples::
col = MaskedColumn(data=[1, 2], name='name')
col = MaskedColumn(data=[1, 2], name='name', mask=[True, False])
col = MaskedColumn(data=[1, 2], name='name', dtype=float, fill_value=99)
The ``mask`` argument will be cast as a boolean array and specifies
which elements are considered to be missing or invalid.
The ``dtype`` argument can be any value which is an acceptable
fixed-size data-type initializer for the numpy.dtype() method. See
`<http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html>`_.
Examples include:
- Python non-string type (float, int, bool)
- Numpy non-string type (e.g. np.float32, np.int64, np.bool)
- Numpy.dtype array-protocol type strings (e.g. 'i4', 'f8', 'S15')
If no ``dtype`` value is provide then the type is inferred using
``np.array(data)``. When ``data`` is provided then the ``shape``
and ``length`` arguments are ignored.
- Provide ``length`` and optionally ``shape``, but not ``data``
Examples::
col = MaskedColumn(name='name', length=5)
col = MaskedColumn(name='name', dtype=int, length=10, shape=(3,4))
The default ``dtype`` is ``np.float64``. The ``shape`` argument is the
array shape of a single cell in the column.
"""
def __new__(cls, data=None, name=None, mask=None, fill_value=None,
dtype=None, shape=(), length=0,
description=None, unit=None, format=None, meta=None, copy=False):
if mask is None and hasattr(data, 'mask'):
mask = data.mask
else:
mask = deepcopy(mask)
# Create self using MaskedArray as a wrapper class, following the example of
# class MSubArray in
# https://github.com/numpy/numpy/blob/maintenance/1.8.x/numpy/ma/tests/test_subclassing.py
# This pattern makes it so that __array_finalize__ is called as expected (e.g. #1471 and
# https://github.com/astropy/astropy/commit/ff6039e8)
# First just pass through all args and kwargs to BaseColumn, then wrap that object
# with MaskedArray.
self_data = BaseColumn(data, dtype=dtype, shape=shape, length=length, name=name,
unit=unit, format=format, description=description, meta=meta, copy=copy)
self = ma.MaskedArray.__new__(cls, data=self_data, mask=mask)
# Note: do not set fill_value in the MaskedArray constructor because this does not
# go through the fill_value workarounds (see _fix_fill_value below).
if fill_value is None and hasattr(data, 'fill_value'):
fill_value = data.fill_value
self.fill_value = fill_value
self.parent_table = None
return self
def _fix_fill_value(self, val):
"""Fix a fill value (if needed) to work around a bug with setting the fill
value of a string array in MaskedArray with Python 3.x. See
https://github.com/numpy/numpy/pull/2733. This mimics the check in
numpy.ma.core._check_fill_value() (version < 1.8) which incorrectly sets
fill_value to a default if self.dtype.char is 'U' (which is the case for Python
3). Here we change the string to a byte string so that in Python 3 the
isinstance(val, basestring) part fails.
"""
if (NUMPY_LT_1_8 and isinstance(val, six.string_types) and
(self.dtype.char not in 'SV')):
val = val.encode()
return val
@property
def fill_value(self):
return self.get_fill_value() # defer to native ma.MaskedArray method
@fill_value.setter
def fill_value(self, val):
"""Set fill value both in the masked column view and in the parent table
if it exists. Setting one or the other alone doesn't work."""
val = self._fix_fill_value(val)
# Yet another ma bug workaround: If the value of fill_value for a string array is
# requested but not yet set then it gets created as 'N/A'. From this point onward
# any new fill_values are truncated to 3 characters. Note that this does not
# occur if the masked array is a structured array (as in the previous block that
# deals with the parent table).
#
# >>> x = ma.array(['xxxx'])
# >>> x.fill_value # fill_value now gets represented as an 'S3' array
# 'N/A'
# >>> x.fill_value='yyyy'
# >>> x.fill_value
# 'yyy'
#
# To handle this we are forced to reset a private variable first:
self._fill_value = None
self.set_fill_value(val) # defer to native ma.MaskedArray method
@property
def data(self):
out = self.view(ma.MaskedArray)
# The following is necessary because of a bug in Numpy, which was
# fixed in numpy/numpy#2703. The fix should be included in Numpy 1.8.0.
out.fill_value = self.fill_value
return out
[docs] def filled(self, fill_value=None):
"""Return a copy of self, with masked values filled with a given value.
Parameters
----------
fill_value : scalar; optional
The value to use for invalid entries (`None` by default). If
`None`, the ``fill_value`` attribute of the array is used
instead.
Returns
-------
filled_column : Column
A copy of ``self`` with masked entries replaced by `fill_value`
(be it the function argument or the attribute of ``self``).
"""
if fill_value is None:
fill_value = self.fill_value
fill_value = self._fix_fill_value(fill_value)
data = super(MaskedColumn, self).filled(fill_value)
# Use parent table definition of Column if available
column_cls = self.parent_table.Column if (self.parent_table is not None) else Column
out = column_cls(name=self.name, data=data, unit=self.unit,
format=self.format, description=self.description,
meta=deepcopy(self.meta))
return out
[docs] def insert(self, obj, values, mask=None):
"""
Insert values along the given axis before the given indices and return
a new `~astropy.table.MaskedColumn` object.
Parameters
----------
obj : int, slice or sequence of ints
Object that defines the index or indices before which ``values`` is
inserted.
values : array_like
Value(s) to insert. If the type of ``values`` is different
from that of quantity, ``values`` is converted to the matching type.
``values`` should be shaped so that it can be broadcast appropriately
mask : boolean array_like
Mask value(s) to insert. If not supplied then False is used.
Returns
-------
out : `~astropy.table.MaskedColumn`
A copy of column with ``values`` and ``mask`` inserted. Note that the
insertion does not occur in-place: a new masked column is returned.
"""
self_ma = self.data # self viewed as MaskedArray
if self.dtype.kind == 'O':
# Even if values is array-like (e.g. [1,2,3]), insert as a single
# object. Numpy.insert instead inserts each element in an array-like
# input individually.
new_data = np.insert(self_ma.data, obj, None, axis=0)
new_data[obj] = values
else:
# Explicitly convert to dtype of this column. Needed because numpy 1.7
# enforces safe casting by default, so . This isn't the case for 1.6 or 1.8+.
values = np.asarray(values, dtype=self.dtype)
new_data = np.insert(self_ma.data, obj, values, axis=0)
if mask is None:
if self.dtype.kind == 'O':
mask = False
else:
mask = np.zeros(values.shape, dtype=np.bool)
new_mask = np.insert(self_ma.mask, obj, mask, axis=0)
new_ma = np.ma.array(new_data, mask=new_mask, copy=False)
out = new_ma.view(self.__class__)
out.parent_table = None
out._copy_attrs(self)
return out
def __getitem__(self, item):
out = super(MaskedColumn, self).__getitem__(item)
# Fixes issue #3023: when calling getitem with a MaskedArray subclass
# the original object attributes are not copied.
if out.__class__ is self.__class__:
out.parent_table = None
out._copy_attrs(self)
return out
# Set items and slices using MaskedArray method, instead of falling through
# to the (faster) Column version which uses an ndarray view. This doesn't
# copy the mask properly. See test_setting_from_masked_column test.
def __setitem__(self, index, value):
ma.MaskedArray.__setitem__(self, index, value)
def __setslice__(self, start, stop, value):
ma.MaskedArray.__setslice__(self, start, stop, value)
# We do this to make the methods show up in the API docs
name = BaseColumn.name
copy = BaseColumn.copy
more = BaseColumn.more
pprint = BaseColumn.pprint
pformat = BaseColumn.pformat
convert_unit_to = BaseColumn.convert_unit_to