Source code for nbodykit.algorithms.paircount_tpcf.tpcf

from ..pair_counters import SimulationBoxPairCount, SurveyDataPairCount
from .estimators import LandySzalayEstimator, NaturalEstimator, WedgeBinnedStatistic
from nbodykit import CurrentMPIComm
from nbodykit.binned_statistic import BinnedStatistic

import numpy
import warnings
import logging

[docs]class BasePairCount2PCF(object): """ Base class for two-point correlation function algorithms that use pair counting. The API largely follows that of :class:`~nbodykit.algorithms.SimulationBoxPairCount` and :class:`~nbodykit.algorithms.SurveyDataPairCount`. Parameters ---------- mode : '1d', '2d', 'projected', 'angular' the type of two point correlation function to compute data1 : CatalogSource the data catalog; must have a 'Position' column edges : array_like the bin edges along the first binning dimension Nmu : int, optional when ``mode`` is '2d', the number of mu bins, ranging from 0 to 1 pimax : float, optional when ``mode`` is 'projected', the maximum separation along the line-of-sight randoms1 : CatalogSource, optional the catalog specifying the un-clustered, random distribution for ``data1``; if not provided, analytic randoms will be used randoms2 : CatalogSource, optional the catalog specifying the un-clustered, random distribution for ``data2``; if not provided, analytic randoms will be used data2 : CatalogSource, optional the second data catalog to cross-correlate; must have a 'Position' column R1R2 : SimulationBoxPairCount, SurveyDataPairCount, optional if provided, random pairs R1R2 are not recalculated in the Landy-Szalay estimator **kws : additional keyword arguments passed to the appropriate pair counting class """ def __init__(self, mode, data1, edges, Nmu=None, pimax=None, randoms1=None, randoms2=None, data2=None, R1R2=None, **kws): self.comm = data1.comm edges = numpy.array(edges) # store the attributes self.attrs = {'mode':mode, 'edges':edges, 'Nmu':Nmu, 'pimax':pimax} self.attrs.update(kws) # store the catalogs self.data1 = data1 self.data2 = data2 self.randoms1 = randoms1 self.randoms2 = randoms2 self.R1R2 = R1R2
[docs] def run(self): """ Run the two-point correlation function algorithm. There are two cases here: 1. If no randoms were provided, and the data is in a simulation box with periodic boundary conditions, the natural estimator :math:`DD/RR - 1` is used. 2. If randoms were provided, the Landy-Szalay estimator is used: :math:`(D_1 D_2 - D_1 R_2 - D_2 R_1 + R_1 R_2) / R_1 R_2` Raises ------ ValueError : if periodic boundary conditions were not requested, and ``randoms1`` is ``None`` ValueError : if periodic boundary conditions were not requested, and ``data2`` is not None, but ``randoms2`` is ``None`` """ # get the config attrs = self.attrs.copy() config = attrs.pop('config') attrs.update(config) # whether we are doing sim volume or mock survey if 'periodic' in attrs: pair_counter = SimulationBoxPairCount else: pair_counter = SurveyDataPairCount # use analytic randoms for a periodic box if 'periodic' in attrs and attrs['periodic'] and self.randoms1 is None: if attrs['mode'] == 'angular': self.data1 = _restrict_to_spherical_volume(self.data1, self.data1[attrs['position']], self.data1.attrs['BoxSize']) if self.data2 is not None: self.data2 = _restrict_to_spherical_volume(self.data2, self.data2[attrs['position']], self.data2.attrs['BoxSize']) if self.comm.rank == 0: msg = "when using analytic randoms for the angular correlation function, " msg += "we restrict the input data to a spherical volume, throwing away objects. " msg += "To use all data, pass in a UniformCatalog as the 'randoms1' keyword" warnings.warn(msg) # count the data-data pairs using analytic randoms DD = pair_counter(first=self.data1, second=self.data2, **attrs) self.R1R2, self.corr = NaturalEstimator(DD) self.D1D2 = DD.pairs self.D1R2 = self.D2R1 = None # need catalog randoms else: if self.randoms1 is None: msg = "a catalog of randoms must be specified as the ``randoms1`` keyword " msg += "when the data is not in a simulation box with periodic boundary conditions" raise ValueError(msg) # use the first randoms for both if self.data2 is not None and self.randoms2 is None: self.randoms2 = self.randoms1 # use the Landy-Szalay estimator result = LandySzalayEstimator(pair_counter, self.data1, self.data2, self.randoms1, self.randoms2, R1R2=self.R1R2, logger=self.logger, **attrs) self.D1D2, self.D1R2, self.D2R1, self.R1R2, self.corr = result
def __getstate__(self): # the correlation edges = [self.corr.edges[d] for d in self.corr.dims] state = {'corr':self.corr.data, 'dims':self.corr.dims, 'edges':edges} # the pair counts for pc in ['D1D2', 'D1R2', 'D2R1', 'R1R2', 'wp']: if getattr(self, pc, None) is not None: state[pc] = getattr(self, pc).data else: state[pc] = None state['attrs'] = self.attrs return state def __setstate__(self, state): edges = state.pop('edges') dims = state.pop('dims') self.__dict__.update(state) self.corr = WedgeBinnedStatistic(dims, edges, self.corr) if self.wp is not None: # NOTE: only edges[0], second dimension was summed over self.wp = WedgeBinnedStatistic(dims[:1], edges[:1], self.wp) for pc in ['D1D2', 'D1R2', 'D2R1', 'R1R2']: val = getattr(self, pc) if val is not None: setattr(self, pc, WedgeBinnedStatistic(dims, edges, val))
[docs] def save(self, output): """ Save result as a JSON file with name ``output`` """ import json from nbodykit.utils import JSONEncoder # only the master rank writes if self.comm.rank == 0: self.logger.info('measurement done; saving result to %s' % output) with open(output, 'w') as ff: json.dump(self.__getstate__(), ff, cls=JSONEncoder)
[docs] @classmethod @CurrentMPIComm.enable def load(cls, output, comm=None): """ Load a result has been saved to disk with :func:`save`. """ import json from nbodykit.utils import JSONDecoder if comm.rank == 0: with open(output, 'r') as ff: state = json.load(ff, cls=JSONDecoder) else: state = None state = comm.bcast(state) self = object.__new__(cls) self.__setstate__(state) self.comm = comm return self
[docs]class SimulationBox2PCF(BasePairCount2PCF): r""" Compute the two-point correlation function for data in a simulation box as a function of :math:`r`, :math:`(r,\mu)`, :math:`(r_p, \pi)`, or :math:`\theta` using pair counting. This uses analytic randoms when using periodic conditions, unless a randoms catalog is specified. The "natural" estimator (DD/RR-1) is used in the former case, and the Landy-Szalay estimator (DD/RR - 2DR/RR + 1) in the latter case. .. note:: When using analytic randoms, the expected counts are assumed to be unweighted. Parameters ---------- mode : '1d', '2d', 'projected', 'angular' the type of two-point correlation function to compute; see the Notes below data1 : CatalogSource the data catalog; edges : array_like the separation bin edges along the first coordinate dimension; depending on ``mode``, the options are :math:`r`, :math:`r_p`, or :math:`\theta`. Expected units for distances are :math:`\mathrm{Mpc}/h` and degrees for angles. Length of nbins+1 Nmu : int, optional the number of :math:`\mu` bins, ranging from 0 to 1; requred if ``mode='2d'`` pimax : float, optional The maximum separation along the line-of-sight when ``mode='projected'``. Distances along the :math:`\pi` direction are binned with unit depth. For instance, if ``pimax=40``, then 40 bins will be created along the :math:`\pi` direction. data2 : CatalogSource, optional the second data catalog to cross-correlate; randoms1 : CatalogSource, optional the catalog specifying the un-clustered, random distribution for ``data1``; if not provided, analytic randoms will be used randoms2 : CatalogSource, optional the catalog specifying the un-clustered, random distribution for ``data2``; if not provided, analytic randoms will be used R1R2 : SimulationBoxPairCount, optional if provided, random pairs R1R2 are not recalculated in the Landy-Szalay estimator periodic : bool, optional whether to use periodic boundary conditions BoxSize : float, 3-vector, optional the size of the box; if 'BoxSize' is not provided in the source 'attrs', it must be provided here los : 'x', 'y', 'z'; int, optional the axis of the simulation box to treat as the line-of-sight direction; this can be provided as string identifying one of 'x', 'y', 'z' or the equivalent integer number of the axis weight : str, optional the name of the column in the source specifying the particle weights position : str, optional the name of the column in the source specifying the particle positions show_progress : bool, optional if ``True``, perform the pair counting calculation in 10 iterations, logging the progress after each iteration; this is useful for understanding the scaling of the code **config : key/value pairs additional keywords to pass to the :mod:`Corrfunc` function Notes ----- This class can compute correlation functions using several different coordinate choices, based on the value of the input argument ``mode``. The choices are: * ``mode='1d'`` : compute pairs as a function of the 3D separation :math:`r` * ``mode='2d'`` : compute pairs as a function of the 3D separation :math:`r` and the cosine of the angle to the line-of-sight, :math:`\mu` * ``mode='projected'`` : compute pairs as a function of distance perpendicular and parallel to the line-of-sight, :math:`r_p` and :math:`\pi` * ``mode='angular'`` : compute pairs as a function of angle on the sky, :math:`\theta` If ``mode='projected'``, the projected correlation function :math:`w_p(r_p)` is also computed, using the input :math:`\pi_\mathrm{max}` value. """ logger = logging.getLogger('SimulationBox2PCF') def __init__(self, mode, data1, edges, Nmu=None, pimax=None, data2=None, randoms1=None, randoms2=None, R1R2=None, periodic=True, BoxSize=None, los='z', weight='Weight', position='Position', show_progress=False, **config): # format the input arguments args = dict(locals()) args.pop('self') # init the base class BasePairCount2PCF.__init__(self, **args) # and run self.run()
[docs] def run(self): """ Run the two-point correlation function algorithm. This attaches the following attributes: - :attr:`SimulationBox2PCF.D1D2` - :attr:`SimulationBox2PCF.D1R2` - :attr:`SimulationBox2PCF.D2R1` - :attr:`SimulationBox2PCF.R1R2` - :attr:`SimulationBox2PCF.corr` - :attr:`SimulationBox2PCF.wp` (if ``mode='projected'``) Attributes ---------- D1D2 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the data1 - data2 pair counts D1R2 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the data1 - randoms2 pair counts D2R1 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the data2 - randoms1 pair counts R1R2 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the randoms1 - randoms2 pair counts corr : :class:`~nbodykit.binned_statistic.BinnedStatistic` the correlation function values, stored as the ``corr`` variable, computed from the pair counts wp : :class:`~nbodykit.binned_statistic.BinnedStatistic` the projected correlation function, :math:`w_p(r_p)`, computed if ``mode='projected'``; correlation is stored as the ``corr`` variable Notes ----- The :attr:`D1D2`, :attr:`D1R2`, :attr:`D2R1`, and :attr:`R1R2` attributes are identical to the :attr:`~nbodykit.algorithms.SimulationBoxPairCount.pairs` attribute of :class:`~nbodykit.algorithms.SimulationBoxPairCount`. """ # this does most of the work BasePairCount2PCF.run(self) # compute wp(rp) if we computed xi(rp, pi) if self.attrs['mode'] == 'projected': self.wp = _compute_wp(self.corr)
[docs]class SurveyData2PCF(BasePairCount2PCF): r""" Compute the two-point correlation function for observational survey data as a function of :math:`r`, :math:`(r,\mu)`, :math:`(r_p, \pi)`, or :math:`\theta` using pair counting. The Landy-Szalay estimator (DD/RR - 2 DD/RR + 1) is used to transform pair counts in to the correlation function. Parameters ---------- mode : '1d', '2d', 'projected', 'angular' the type of two-point correlation function to compute; see the Notes below data1 : CatalogSource the data catalog; must have ra, dec, redshift, columns randoms1 : CatalogSource the catalog specifying the un-clustered, random distribution for ``data1`` edges : array_like the separation bin edges along the first coordinate dimension; depending on ``mode``, the options are :math:`r`, :math:`r_p`, or :math:`\theta`. Expected units for distances are :math:`\mathrm{Mpc}/h` and degrees for angles. Length of nbins+1 cosmo : :class:`~nbodykit.cosmology.cosmology.Cosmology`, optional the cosmology instance used to convert redshift into comoving distance; this is required for all cases except ``mode='angular'`` Nmu : int, optional the number of :math:`\mu` bins, ranging from 0 to 1; requred if ``mode='2d'`` pimax : float, optional The maximum separation along the line-of-sight when ``mode='projected'``. Distances along the :math:`\pi` direction are binned with unit depth. For instance, if ``pimax=40``, then 40 bins will be created along the :math:`\pi` direction. data2 : CatalogSource, optional the second data catalog to cross-correlate; randoms2 : CatalogSource, optional the catalog specifying the un-clustered, random distribution for ``data2``; if not specified and ``data2`` is provied, then ``randoms1`` will be used for both. R1R2 : SurveyDataPairCount, optional if provided, random pairs R1R2 are not recalculated in the Landy-Szalay estimator ra : str, optional the name of the column in the source specifying the right ascension coordinates in units of degrees; default is 'RA' dec : str, optional the name of the column in the source specifying the declination coordinates; default is 'DEC' redshift : str, optional the name of the column in the source specifying the redshift coordinates; default is 'Redshift' weight : str, optional the name of the column in the source specifying the object weights show_progress : bool, optional if ``True``, perform the pair counting calculation in 10 iterations, logging the progress after each iteration; this is useful for understanding the scaling of the code **config : key/value pairs additional keywords to pass to the :mod:`Corrfunc` function Notes ----- This class can compute correlation functions using several different coordinate choices, based on the value of the input argument ``mode``. The choices are: * ``mode='1d'`` : compute pairs as a function of the 3D separation :math:`r` * ``mode='2d'`` : compute pairs as a function of the 3D separation :math:`r` and the cosine of the angle to the line-of-sight, :math:`\mu` * ``mode='projected'`` : compute pairs as a function of distance perpendicular and parallel to the line-of-sight, :math:`r_p` and :math:`\pi` * ``mode='angular'`` : compute pairs as a function of angle on the sky, :math:`\theta` If ``mode='projected'``, the projected correlation function :math:`w_p(r_p)` is also computed, using the input :math:`\pi_\mathrm{max}` value. """ logger = logging.getLogger('SurveyData2PCF') def __init__(self, mode, data1, randoms1, edges, cosmo=None, Nmu=None, pimax=None, data2=None, randoms2=None, R1R2=None, ra='RA', dec='DEC', redshift='Redshift', weight='Weight', show_progress=False, **config): # format the input arguments args = dict(locals()) args.pop('self') # init the base class BasePairCount2PCF.__init__(self, **args) # and run self.run()
[docs] def run(self): """ Run the two-point correlation function algorithm. This attaches the following attributes: - :attr:`SurveyData2PCF.D1D2` - :attr:`SurveyData2PCF.D1R2` - :attr:`SurveyData2PCF.D2R1` - :attr:`SurveyData2PCF.R1R2` - :attr:`SurveyData2PCF.corr` - :attr:`SurveyData2PCF.wp` (if ``mode='projected'``) Attributes ---------- D1D2 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the data1 - data2 pair counts D1R2 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the data1 - randoms2 pair counts D2R1 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the data2 - randoms1 pair counts R1R2 : :class:`~nbodykit.binned_statistic.BinnedStatistic` the randoms1 - randoms2 pair counts corr : :class:`~nbodykit.binned_statistic.BinnedStatistic` the correlation function values, stored as the ``corr`` variable, computed from the pair counts wp : :class:`~nbodykit.binned_statistic.BinnedStatistic` the projected correlation function, :math:`w_p(r_p)`, computed if ``mode='projected'``; correlation is stored as the ``corr`` variable Notes ----- The :attr:`D1D2`, :attr:`D1R2`, :attr:`D2R1`, and :attr:`R1R2` attributes are identical to the :attr:`~nbodykit.algorithms.SurveyDataPairCount.pairs` attribute of :class:`~nbodykit.algorithms.SurveyDataPairCount`. """ # this does most of the work BasePairCount2PCF.run(self) # compute wp(rp) if we computed xi(rp, pi) if self.attrs['mode'] == 'projected': self.wp = _compute_wp(self.corr)
def _compute_wp(corr): r""" Compute the projected correlation function :math:`w_p(r_p)` from :math:`\xi(r_p, \pi)`. """ # compute wp(rp) dpi = numpy.diff(corr.edges['pi']) wp = (2*corr['corr']*dpi).sum(axis=-1) # return a BinnedStatistic toret = corr.copy() if len(toret.dims) > 1: toret = toret.average(toret.dims[-1]) toret['corr'] = wp return toret def _restrict_to_spherical_volume(source, position, BoxSize): """ Slice the input source such that it only includes a spherical volume instead the simulation box. This returns a CatalogSource only containing objects within a radius of 1/2 the minimum box side length of the box center. """ # compute the distance from box center origin = 0.5 * source.attrs['BoxSize'] pos = position - origin r = (pos**2).sum(axis=-1)**0.5 # restrict to sphere less than half of box size keep = r < 0.5 * source.attrs['BoxSize'].min() return source[keep]