Source code for nbodykit.io.bigfile

from __future__ import absolute_import
# the future import is important. or in python 2.7 we try to
# import this module itself. Due to the unfortnate name conflict!

import numpy

from .base import FileType
from six import string_types
import json
from nbodykit.utils import JSONDecoder

class Automatic: pass

[docs]class BigFile(FileType): """ A file object to handle the reading of columns of data from a :mod:`bigfile` file. :mod:`bigfile` is a reproducible, massively parallel IO library for large, hierarchical datasets, and it is the default format of the `FastPM <https://github.com/rainwoodman/fastpm>`_ and the `MP-Gadget <https://github.com/bluetides-project/MP-Gadget>`_ simulations. See also: https://github.com/rainwoodman/bigfile Parameters ---------- path : str the name of the directory holding the bigfile data exclude : list of str, optional the data sets to exlude from loading within bigfile; default is the header header : str, optional the path to the header; default is to use a column 'Header'. It is relative to the file, not the dataset. dataset : str load a specific dataset from the bigfile; default is to starting from the root. """ def __init__(self, path, exclude=None, header=Automatic, dataset='./'): if not dataset.endswith('/'): dataset = dataset + '/' import bigfile self.dataset = dataset self.path = path # store the attributes self.attrs = {} # the file path with bigfile.BigFile(filename=path) as ff: columns = ff[self.dataset].blocks if header is Automatic: for header in ['Header', 'header', './']: if header in columns: break if exclude is None: exclude = [header] columns = list(set(columns) - set(exclude)) ds = bigfile.BigData(ff[self.dataset], columns) # set the data type and size self.dtype = ds.dtype self.size = ds.size header = ff[header] attrs = header.attrs # copy over the attrs for k in attrs.keys(): # load a JSON representation if str starts with json::// if isinstance(attrs[k], string_types) and attrs[k].startswith('json://'): self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder) # copy over an array else: self.attrs[k] = numpy.array(attrs[k], copy=True)
[docs] def read(self, columns, start, stop, step=1): """ Read the specified column(s) over the given range, as a dictionary 'start' and 'stop' should be between 0 and :attr:`size`, which is the total size of the binary file (in particles) """ import bigfile if isinstance(columns, string_types): columns = [columns] with bigfile.BigFile(filename=self.path)[self.dataset] as f: ds = bigfile.BigData(f, columns) return ds[start:stop][::step]