Source code for nbodykit.source.catalog.hod

from nbodykit.base.catalog import column
from nbodykit.source.catalog.array import ArrayCatalog
from nbodykit import CurrentMPIComm
from nbodykit.utils import GatherArray, ScatterArray

from six import add_metaclass
import abc
import logging
import numpy

[docs]def gal_type_integers(galtab): """ Convert ``centrals`` to 0 and ``satellites`` to 1 in the input galaxy Table This operatio is done in place """ gal_type = numpy.zeros(len(galtab), dtype='i4') sats = galtab['gal_type'] == 'satellites' gal_type[sats] = 1 galtab.replace_column('gal_type', gal_type)
[docs]def find_object_dtypes(data): """ Utility function to convert 'O' data types to strings """ for col in data.colnames: if data.dtype[col] == 'O': raise TypeError("column '%s' is of type 'O'; must convert to integer or string" %col) return data
[docs]@add_metaclass(abc.ABCMeta) class HODBase(ArrayCatalog): """ A base class to be used for HOD population of a halo catalog. The user must supply the :func:`__makemodel__` function, which returns the halotools composite HOD model. This abstraction allows the user to potentially implement several different types of HOD models quickly, while using the population framework of this base class. """ logger = logging.getLogger("HODBase") @CurrentMPIComm.enable def __init__(self, halos, seed=None, use_cache=False, comm=None, **params): from halotools.empirical_models import model_defaults # check type from halotools.sim_manager import UserSuppliedHaloCatalog if not isinstance(halos, UserSuppliedHaloCatalog): raise TypeError("input halos catalog for HOD should be halotools.sim_manager.UserSuppliedHaloCatalog") # try to extract meta-data from catalog mdef = getattr(halos, 'mdef', 'vir') cosmo = getattr(halos, 'cosmology', None) Lbox = getattr(halos, 'Lbox', None) redshift = getattr(halos, 'redshift', None) # fail if we are missing any of these for name, attr in zip(['cosmology', 'Lbox', 'redshift'], [cosmo, Lbox, redshift]): if attr is None: raise AttributeError("input UserSuppliedHaloCatalog must have '%s attribute" %name) # promote astropy cosmology to nbodykit's Cosmology from astropy.cosmology import FLRW if isinstance(cosmo, FLRW): from nbodykit.cosmology import Cosmology cosmo = Cosmology.from_astropy(cosmo) # store the halotools catalog self._halos = halos self.cosmo = cosmo self.comm = comm # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) # grab the BoxSize from the halotools catalog self.attrs['BoxSize'] = numpy.empty(3) self.attrs['BoxSize'][:] = Lbox # store mass and radius keys self.mass = model_defaults.get_halo_mass_key(mdef) self.radius = model_defaults.get_halo_boundary_key(mdef) # check for any missing columns needed = ['halo_%s' %s for s in ['x','y','z','vx','vy','vz', 'id', 'upid']] + [self.mass, self.radius] missing = set(needed) - set(halos.halo_table.colnames) if len(missing): raise ValueError("missing columns from halotools UserSuppliedHaloCatalog: %s" %str(missing)) # this ensures that all columns in the halo catalog will propagate to the galaxy catalog model_defaults.default_haloprop_list_inherited_by_mock = halos.halo_table.colnames # store the attributes self.attrs['mdef'] = mdef self.attrs['redshift'] = redshift self.attrs['seed'] = seed self.attrs.update(params) # add cosmo attributes self.attrs['cosmo'] = dict(cosmo) # make the model! self._model = self.__makemodel__() # set the HOD params for param in self._model.param_dict: if param not in self.attrs: raise ValueError("missing '%s' parameter when initializing HOD" %param) self._model.param_dict[param] = self.attrs[param] # make the actual source ArrayCatalog.__init__(self, self.__makesource__(), comm=comm, use_cache=use_cache) # crash with no particles! if self.csize == 0: raise ValueError("no particles in catalog after populating HOD")
[docs] @abc.abstractmethod def __makemodel__(self): """ Abstract class to be overwritten by user; this should return the HOD model instance that will be used to do the mock population. See :ref:`the documentation <custom-hod-mocks>` for more details. Returns ------- :class:`~halotools.empirical_models.HodModelFactory` the halotools object implementing the HOD model """ pass
[docs] def __makesource__(self): """ Make the source of galaxies by performing the halo HOD population .. note:: The mock population is only done by the root, and the resulting catalog is then distributed evenly amongst the available ranks """ from astropy.table import Table # gather all halos to root halo_table = find_object_dtypes(self._halos.halo_table) all_halos = GatherArray(halo_table.as_array(), self.comm, root=0) # root does the mock population if self.comm.rank == 0: # set the halo table on the root to the Table containing all halo self._halos.halo_table = Table(data=all_halos, copy=True) del all_halos # populate self._model.populate_mock(halocat=self._halos, halo_mass_column_key=self.mass, Num_ptcl_requirement=1, seed=self.attrs['seed']) # remap gal_type to integers (cen: 0, sats: 1) gal_type_integers(self._model.mock.galaxy_table) # crash if any object dtypes data = find_object_dtypes(self._model.mock.galaxy_table).as_array() del self._model.mock.galaxy_table else: data = None # log the stats if self.comm.rank == 0: self._log_populated_stats(data) return ScatterArray(data, self.comm)
[docs] def repopulate(self, seed=None, **params): """ Update the HOD parameters and then re-populate the mock catalog .. warning:: This operation is done in-place, so the size of the Source changes Parameters ---------- seed : int, optional the new seed to use when populating the mock params : key/value pairs of HOD parameters to update """ # set the seed randomly if it is None if seed is None: if self.comm.rank == 0: seed = numpy.random.randint(0, 4294967295) seed = self.comm.bcast(seed) self.attrs['seed'] = seed # update the HOD model parameters for name in params: if name not in self._model.param_dict: valid = list(self._model.param_dict.keys()) raise ValueError("'%s' is not a valid Hod parameter name; valid are: %s" %(name, str(valid))) self._model.param_dict[name] = params[name] self.attrs[name] = params[name] # the root will do the mock population if self.comm.rank == 0: # re-populate the mock (without halo catalog pre-processing) self._model.mock.populate(Num_ptcl_requirement=1, halo_mass_column_key=self.mass, seed=self.attrs['seed']) # remap gal_type to integers (cen: 0, sats: 1) gal_type_integers(self._model.mock.galaxy_table) # crash if any object dtypes data = find_object_dtypes(self._model.mock.galaxy_table).as_array() del self._model.mock.galaxy_table else: data = None # log the stats if self.comm.rank == 0: self._log_populated_stats(data) # re-initialize with new source ArrayCatalog.__init__(self, ScatterArray(data, self.comm), comm=self.comm, use_cache=self.use_cache)
def _log_populated_stats(self, data): """ Internal function to log statistics of the populated catalog """ if len(data) > 0: fsat = 1.*(data['gal_type'] == 1).sum()/len(data) self.logger.info("satellite fraction: %.2f" %fsat) self.attrs['fsat'] = fsat logmass = numpy.log10(data[self.mass]) self.logger.info("mean log10 halo mass: %.2f" %logmass.mean()) self.logger.info("std log10 halo mass: %.2f" %logmass.std())
[docs] @column def Position(self): """ Galaxy positions, in units of Mpc/h """ pos = numpy.vstack([self._source['x'], self._source['y'], self._source['z']]).T return self.make_column(pos)
[docs] @column def Velocity(self): """ Galaxy velocity, in units of km/s """ vel = numpy.vstack([self._source['vx'], self._source['vy'], self._source['vz']]).T return self.make_column(vel)
[docs] @column def VelocityOffset(self): """ The RSD velocity offset, in units of Mpc/h This multiplies Velocity by 1 / (a*100*E(z)) = 1 / (a H(z)/h) """ z = self.attrs['redshift'] rsd_factor = (1+z) / (100*self.cosmo.efunc(z)) return self['Velocity'] * rsd_factor
[docs]class HODCatalog(HODBase): """ A CatalogSource that uses the HOD prescription of Zheng et al 2007 to populate an input halo catalog with galaxies. The mock population is done using :mod:`halotools`. See the documentation for :class:`halotools.empirical_models.Zheng07Cens` and :class:`halotools.empirical_models.Zheng07Sats` for further details regarding the HOD. The columns generated in this catalog are: #. **Position**: the galaxy position #. **Velocity**: the galaxy velocity #. **VelocityOffset**: the RSD velocity offset, in units of distance #. **conc_NFWmodel**: the concentration of the halo #. **gal_type**: the galaxy type, 0 for centrals and 1 for satellites #. **halo_id**: the global ID of the halo this galaxy belongs to, between 0 and :attr:`csize` #. **halo_local_id**: the local ID of the halo this galaxy belongs to, between 0 and :attr:`size` #. **halo_mvir**: the halo mass #. **halo_nfw_conc**: alias of ``conc_NFWmodel`` #. **halo_num_centrals**: the number of centrals that this halo hosts, either 0 or 1 #. **halo_num_satellites**: the number of satellites that this halo hosts #. **halo_rvir**: the halo radius #. **halo_upid**: equal to -1; should be ignored by the user #. **halo_vx, halo_vy, halo_vz**: the three components of the halo velocity #. **halo_x, halo_y, halo_z**: the three components of the halo position #. **host_centric_distance**: the distance from this galaxy to the center of the halo #. **vx, vy, vz**: the three components of the galaxy velocity, equal to ``Velocity`` #. **x,y,z**: the three components of the galaxy position, equal to ``Position`` For futher details, please see the :ref:`documentation <hod-mock-data>`. .. note:: Default HOD values are from `Reid et al. 2014 <https://arxiv.org/abs/1404.3742>`_ Parameters ---------- halos : :class:`~halotools.sim_manager.UserSuppliedHaloCatalog` the halotools table holding the halo data; this object must have the following attributes: `cosmology`, `Lbox`, `redshift` logMmin : float, optional Minimum mass required for a halo to host a central galaxy sigma_logM : float, optional Rate of transition from <Ncen>=0 --> <Ncen>=1 alpha : float, optional Power law slope of the relation between halo mass and <Nsat> logM0 : float, optional Low-mass cutoff in <Nsat> logM1 : float, optional Characteristic halo mass where <Nsat> begins to assume a power law form seed : int, optional the random seed to generate deterministic mocks References ---------- `Zheng et al. (2007), arXiv:0703457 <https://arxiv.org/abs/astro-ph/0703457>`_ """ logger = logging.getLogger("HOD") @CurrentMPIComm.enable def __init__(self, halos, logMmin=13.031, sigma_logM=0.38, alpha=0.76, logM0=13.27, logM1=14.08, seed=None, use_cache=False, comm=None): params = {} params['logMmin'] = logMmin params['sigma_logM'] = sigma_logM params['alpha'] = alpha params['logM0'] = logM0 params['logM1'] = logM1 HODBase.__init__(self, halos, seed=seed, use_cache=use_cache, comm=comm, **params) def __repr__(self): names = ['logMmin', 'sigma_logM', 'alpha', 'logM0', 'logM1'] s = ', '.join(['%s=%.2f' %(k,self.attrs[k]) for k in names]) return "HODCatalog(%s)" %s
[docs] def __makemodel__(self): """ Return the Zheng 07 HOD model. This model evaluates Eqs. 2 and 5 of Zheng et al. 2007 """ from halotools.empirical_models import HodModelFactory from halotools.empirical_models import Zheng07Sats, Zheng07Cens from halotools.empirical_models import NFWPhaseSpace, TrivialPhaseSpace model = {} # use concentration from halo table if 'halo_nfw_conc' in self._halos.halo_table.colnames: conc_mass_model = 'direct_from_halo_catalog' # use empirical prescription for c(M) else: conc_mass_model = 'dutton_maccio14' # occupation functions cenocc = Zheng07Cens(prim_haloprop_key=self.mass) satocc = Zheng07Sats(prim_haloprop_key=self.mass, modulate_with_cenocc=True, cenocc_model=cenocc) satocc._suppress_repeated_param_warning = True # add to model model['centrals_occupation'] = cenocc model['satellites_occupation'] = satocc # profile functions kws = {'cosmology':self.cosmo.to_astropy(), 'redshift':self.attrs['redshift'], 'mdef':self.attrs['mdef']} model['centrals_profile'] = TrivialPhaseSpace(**kws) model['satellites_profile'] = NFWPhaseSpace(conc_mass_model=conc_mass_model, **kws) return HodModelFactory(**model)