Source code for nbodykit.algorithms.paircount_tpcf.estimators

from nbodykit.binned_statistic import BinnedStatistic
import numpy
import warnings

[docs]class WedgeBinnedStatistic(BinnedStatistic): """ A BinnedStatistic of wedges that can be converted to multiples. """
[docs] def to_poles(self, poles): r""" Invert the measured wedges :math:`\xi(r,mu)` into correlation multipoles, :math:`\xi_\ell(r)`. To select a mu_range, use .. code:: python poles = self.sel(mu=slice(*mu_range), method='nearest').to_poles(poles) Parameters ---------- poles: array_like the list of multipoles to compute Returns ------- xi_ell : BinnedStatistic a data set holding the :math:`\xi_\ell(r)` multipoles """ from scipy.special import legendre from scipy.integrate import quad # new data array x = str(self.dims[0]) dtype = numpy.dtype([(x, 'f8')] + [('corr_%d' %ell, 'f8') for ell in poles]) data = numpy.zeros((self.shape[0]), dtype=dtype) dims = [x] edges = [self.edges[x]] # FIXME: use something fancier than the central point. mu_bins = numpy.diff(self.edges['mu']) mu_mid = (self.edges['mu'][1:] + self.edges['mu'][:-1])/2. for ell in poles: legendrePolynomial = (2.*ell+1.)*legendre(ell)(mu_mid) data['corr_%d' %ell] = numpy.sum(self['corr']*legendrePolynomial*mu_bins,axis=-1)/numpy.sum(mu_bins) data[x] = numpy.mean(self[x],axis=-1) return BinnedStatistic(dims=dims, edges=edges, data=data, poles=poles)
[docs]class AnalyticUniformRandoms(object): """ Internal class to compute analytic pair counts for uniformly distributed randoms. This can compute the expected randoms counts for several coordinate choices, based on ``mode``. The relevant volume/area calculations for the various coordinate modes are: * mode='1d': volume of sphere * mode='2d': volume of spherical sector * mode='projected': volume of cylinder * mode='angular': area of spherical cap """ def __init__(self, mode, dims, edges, BoxSize): assert mode in ['1d', '2d', 'projected', 'angular'] self.mode = mode self.edges = edges self.dims = dims self.BoxSize = BoxSize
[docs] def get_filling_factor(self): """ This gives the ratio of the volume (or area) occupied by each bin to the global volume (or area). It is different based on the value of :attr:`mode`. """ # based on a volume of a sphere if self.mode == '1d': r_edges = self.edges['r'] V = 4. / 3. * numpy.pi * r_edges**3 dV = numpy.diff(V) return dV / self.BoxSize.prod() # based on the volume of a spherical sector elif self.mode == '2d': r_edges, mu_edges = self.edges['r'], self.edges['mu'] V = 2. / 3. * numpy.pi * numpy.outer(r_edges**3, mu_edges) V *= 2.0 # accounts for the fact that we bin in abs(mu) dV = numpy.diff(numpy.diff(V, axis=0), axis=1) return dV / self.BoxSize.prod() # based on volume of a cylinder elif self.mode == 'projected': rp_edges, pi_edges = self.edges['rp'], self.edges['pi'] V = numpy.pi * numpy.outer(rp_edges**2, (2. * pi_edges)) # height is double pimax! dV = numpy.diff(numpy.diff(V, axis=0), axis=1) return dV / self.BoxSize.prod() # based on the surface area of a spherical cap elif self.mode == 'angular': theta_bins = self.edges['theta'] chord = 2*numpy.sin(0.5*numpy.deg2rad(theta_bins)) # chord distance h = 1. - numpy.sqrt(1. - chord**2) A = numpy.pi*(chord**2 + h**2) dA = numpy.diff(A) return dA / (4*numpy.pi)
[docs] def __call__(self, NR1, NR2=None): """ Evaluate the expected randoms pair counts, and the total_weight, returns as an object that looks like the result of paircount. """ edges = [self.edges[d] for d in self.dims] # sequentialize it, poor API! if NR2 is None: R1R2 = NR1 ** 2 * self.get_filling_factor() total_wnpairs = NR1 * (NR1 - 1) * 0.5 else: R1R2 = NR1 * NR2 * self.get_filling_factor() total_wnpairs = NR1 * NR2 * 0.5 data = numpy.empty_like(R1R2, dtype=[('npairs', 'f8'), ('wnpairs', 'f8')]) data['npairs'] = R1R2 data['wnpairs'] = R1R2 pairs = WedgeBinnedStatistic(self.dims, edges, data) R1R2 = lambda : None R1R2.attrs = {} R1R2.pairs = pairs R1R2.attrs['total_wnpairs']= total_wnpairs R1R2.pairs.attrs['total_wnpairs']= total_wnpairs return R1R2
[docs]def LandySzalayEstimator(pair_counter, data1, data2, randoms1, randoms2, R1R2=None, logger=None, **kwargs): """ Compute the correlation function from data/randoms using the Landy - Szalay estimator to compute the correlation function. Parameters ---------- pair_counter : SimulationBoxPairCount, SurveyDataPairCount the pair counting algorithm class data1 : CatalogSource the first data source data2 : CatalogSource, None the second data catalog to cross-correlate; can be None for auto-correlations randoms1 : CatalogSource the randoms catalog corresponding to ``data1`` randoms2 : CatalogSource, None the second randoms catalog; can be None for auto-correlations R1R2 : SimulationBoxPairCount, SurveyDataPairCount, optional if provided, random pairs R1R2 are not recalculated **kwargs : the parameters passed to the ``pair_counter`` class to count pairs Returns ------- D1D2, D1R2, D2R1, R1R2, CF : BinnedStatistic the various terms of the LS estimator + the correlation function result References ---------- http://adsabs.harvard.edu/abs/1993ApJ...412...64L """ # make sure we have the randoms assert randoms1 is not None comm = data1.comm if randoms2 is None: randoms2 = randoms1 # and randoms - randoms calculation if logger is not None and comm.rank == 0: logger.info("computing randoms1 - randoms2 pair counts") if not R1R2: R1R2 = pair_counter(first=randoms1, second=randoms2, **kwargs) # data1 x data2 if logger is not None and comm.rank == 0: logger.info("computing data1 - data2 pair counts") D1D2 = pair_counter(first=data1, second=data2, **kwargs) # do data - randoms correlation if logger is not None and comm.rank == 0: logger.info("computing data1 - randoms2 pair counts") D1R2 = pair_counter(first=data1, second=randoms2, **kwargs) if data2 is not None: if logger is not None and comm.rank == 0: logger.info("computing data2 - randoms1 pair counts") D2R1 = pair_counter(first=data2, second=randoms1, **kwargs) else: D2R1 = D1R2 fDD = R1R2.attrs['total_wnpairs']/D1D2.attrs['total_wnpairs'] fDR = R1R2.attrs['total_wnpairs']/D1R2.attrs['total_wnpairs'] fRD = R1R2.attrs['total_wnpairs']/D2R1.attrs['total_wnpairs'] nonzero = R1R2.pairs['npairs'] > 0 # init CF = numpy.zeros(D1D2.pairs.shape) CF[:] = numpy.nan Error = numpy.zeros(D1D2.pairs.shape) Error[:] = numpy.nan # the Landy - Szalay estimator # (DD - DR - RD + RR) / RR DD = (D1D2.pairs['wnpairs'])[nonzero] DR = (D1R2.pairs['wnpairs'])[nonzero] RD = (D2R1.pairs['wnpairs'])[nonzero] RR = (R1R2.pairs['wnpairs'])[nonzero] xi = (fDD * DD - fDR * DR - fRD * RD)/RR + 1 CF[nonzero] = xi[:] # warn about NaNs in the estimator if comm.rank == 0 and numpy.isnan(CF).any(): msg = ("The RR calculation in the Landy-Szalay estimator contains" " separation bins with no bins. This will result in NaN values in the resulting" " correlation function. Try increasing the number of randoms and/or using" " broader bins.") warnings.warn(msg) CF = _create_tpcf_result(D1D2.pairs, CF) return D1D2.pairs, D1R2.pairs, D2R1.pairs, R1R2.pairs, CF
[docs]def NaturalEstimator(D1D2): """ Internal function to computing the correlation function using analytic randoms and the so-called "natural" correlation function estimator, :math:`DD/RR - 1`. """ attrs = D1D2.attrs # determine the sample sizes if attrs['is_cross']: ND1, ND2 = attrs['N1'], attrs['N2'] else: ND1, ND2 = attrs['N1'], None mode = attrs['mode'] BoxSize = attrs['BoxSize'] # analytic randoms - randoms calculation assuming uniform distribution R1R2 = AnalyticUniformRandoms(mode, D1D2.pairs.dims, D1D2.pairs.edges, BoxSize)(ND1, ND2) # and compute the correlation function as DD/RR - 1 fDD = R1R2.attrs['total_wnpairs'] / D1D2.attrs['total_wnpairs'] RR = R1R2.pairs['wnpairs'] DD = D1D2.pairs['wnpairs'] CF = (DD * fDD) / RR - 1. # create a BinnedStatistic holding the CF CF = _create_tpcf_result(D1D2.pairs, CF) return R1R2.pairs, CF
def _create_tpcf_result(D1D2, CF): """ Create a BinnedStatistic holding the correlation function and average bin separation. """ x = D1D2.dims[0] data = numpy.empty_like(CF, dtype=[('corr', 'f8'), (x, 'f8')]) data['corr'] = CF[:] data[x] = D1D2[x] edges = [D1D2.edges[d] for d in D1D2.dims] return WedgeBinnedStatistic(D1D2.dims, edges, data)