Source code for nbodykit.source.catalog.array

from nbodykit.base.catalog import CatalogSource
from nbodykit import CurrentMPIComm
import numpy

[docs]class ArrayCatalog(CatalogSource): """ A CatalogSource initialized from a dictionary or structured ndarray. Parameters ---------- data : obj:`dict` or :class:`numpy.ndarray` a dictionary or structured ndarray; items are interpreted as the columns of the catalog; the length of any item is used as the size of the catalog. comm : MPI Communicator, optional the MPI communicator instance; default (``None``) sets to the current communicator use_cache : bool, optional whether to cache data read from disk; default is ``False`` **kwargs : additional keywords to store as meta-data in :attr:`attrs` """ @CurrentMPIComm.enable def __init__(self, data, comm=None, use_cache=False, **kwargs): self.comm = comm self._source = data if hasattr(data, 'dtype'): keys = sorted(data.dtype.names) else: keys = sorted(data.keys()) dtype = numpy.dtype([(key, (data[key].dtype, data[key].shape[1:])) for key in keys]) # verify data types are the same dtypes = self.comm.gather(dtype, root=0) if self.comm.rank == 0: if any(dt != dtypes[0] for dt in dtypes): raise ValueError("mismatch between dtypes across ranks in Array") self._size = len(self._source[keys[0]]) for key in keys: if len(self._source[key]) != self._size: raise ValueError("column `%s` and column `%s` has different size" % (keys[0], key)) self._dtype = dtype # update the meta-data self.attrs.update(kwargs) CatalogSource.__init__(self, comm=comm, use_cache=use_cache) @property def hardcolumns(self): """ The union of the columns in the file and any transformed columns. """ defaults = CatalogSource.hardcolumns.fget(self) return list(self._dtype.names) + defaults
[docs] def get_hardcolumn(self, col): """ Return a column from the underlying data array/dict. Columns are returned as dask arrays. """ if col in self._dtype.names: return self.make_column(self._source[col]) else: return CatalogSource.get_hardcolumn(self, col)