Skip to content

Class: HdfLoader

HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation.

E.G.

hdf = HdfLoader('file.hdf')
[data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
data = hdf.eval('dataset_name_1 * 100 + 2')
string = hdf.format('my data is {dataset_name_1:.2f}')
print(hdf.summary())
Source code in src/hdfmap/reloader_class.py
class HdfLoader:
    """
    HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.

    ### E.G.
        hdf = HdfLoader('file.hdf')
        [data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
        print(hdf.summary())
    """

    def __init__(self, hdf_filename: str, hdf_map: HdfMap | NexusMap | None = None):
        self.filename = hdf_filename
        if hdf_map is None:
            self.map = create_hdf_map(hdf_filename)
        else:
            self.map = hdf_map
        self._prefer_local_data = True
        self._local_data = {
            'filepath': os.path.abspath(hdf_filename),
            'filename': os.path.basename(hdf_filename),
        }

    def __repr__(self):
        return f"HdfReloader('{self.filename}')"

    def __str__(self):
        with self._load() as hdf:
            out = self.map.info_data(hdf)
        return out

    def __getitem__(self, item):
        return self.get_data(item)

    # def __iter__(self):
    #     return iter(self.combined)

    def __contains__(self, item):
        return item in self.map or item in self._local_data

    def __call__(self, expression):
        return self.eval(expression)

    def _load(self) -> h5py.File:
        return load_hdf(self.filename)

    def add_local(self, **kwargs):
        """Add value to the local namespace, used in eval and format"""
        self._local_data.update(kwargs)

    def live_mode(self, live_mode: bool = True):
        """
        Activate the option to reload data from the file each time, rather than from local data

        self.eval('cmd') -> default will load 'cmd' from local storage if available, or from the file
        self.live_mode() -> self.eval('cmd') will return 'cmd' from the file using hdfmap
        self.live_mode(False) -> returns to default behavior
        """
        self._prefer_local_data = live_mode

    def get_hdf_path(self, name_or_path: str) -> str | None:
        """Return hdf path of object in HdfMap"""
        return self.map.get_path(name_or_path)

    def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
        """
        Find any dataset paths that contain the given string argument
        :param string: str to find in list of datasets
        :param name_only: if True, search only the name of the dataset, not the full path
        :param whole_word: if True, search only for case in-sensitive name
        :return: list of hdf paths
        """
        return self.map.find_paths(string, name_only, whole_word)

    def find_names(self, string: str) -> list[str]:
        """
        Find any dataset names that contain the given string argument, searching names in self.combined
        :param string: str to find in list of datasets
        :return: list of names
        """
        return self.map.find_names(string)

    def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
        """
        Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
        :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
        """
        with self._load() as hdf:
            out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_string(self, *name_or_path, index: slice = (), default='', units=False):
        """
        Return data from dataset in file, converted into summary string
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param units: if True and attribute 'units' available, append this to the result
        :return: dataset2str(dataset) -> str
        """
        with self._load() as hdf:
            out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_image(self, index: slice = None) -> np.ndarray:
        """
        Get image data from file, using default image path
        :param index: (slice,) or None to take the middle image
        :return: numpy array of image
        """
        with self._load() as hdf:
            return self.map.get_image(hdf, index)

    def get_metadata(self, defaults=None):
        with self._load() as hdf:
            return self.map.get_metadata(hdf, default=defaults)

    def get_scannables(self):
        """Return scannables from file (values associated with hdfmap.scannables)"""
        with self._load() as hdf:
            return self.map.get_scannables(hdf)

    def summary(self) -> str:
        """Return string summary of datasets"""
        with self._load() as hdf:
            return self.map.create_dataset_summary(hdf)

    def eval(self, expression: str, default=DEFAULT, prefer_local: bool | None = None, raise_errors: bool = True):
        """
        Evaluate an expression using the namespace of the hdf file

        The following patterns are allowed:
         - 'filename': str, name of hdf_file
         - 'filepath': str, full path of hdf_file
         - '_*name*': str hdf path of *name*
         - '__*name*': str internal name of *name* (e.g. for 'axes')
         - 's_*name*': string representation of dataset (includes units if available)
         - 'd_*name*': return dataset object. **warning**: may result in file not closing on completion
         - '*name*@attr': returns attribute of dataset *name*
         - '*name*?(default)': returns default if *name* doesn't exist
         - '(name1|name2|name3)': returns the first available of the names
         - '(name1|name2?(default))': returns the first available name or default

        :param expression: str expression to be evaluated
        :param default: returned if varname not in namespace
        :param prefer_local: if True, uses values in local_data first if available
        :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
        :return: eval(expression)
        """
        prefer_local = self._prefer_local_data if prefer_local is None else prefer_local
        if prefer_local and expression in self._local_data:
            return self._local_data[expression]
        with self._load() as hdf:
            return self.map.eval(
                hdf_file=hdf,
                expression=expression,
                default=default,
                local_data=self._local_data,
                prefer_local=prefer_local,
                raise_errors=raise_errors
            )

    def format(self, expression: str, default=DEFAULT, prefer_local: bool | None = None, raise_errors: bool = True) -> str:
        """
        Evaluate a formatted string expression using the namespace of the hdf file
        Identifiers from the namespace can be called inside {} as a
        formatted f-string.

        E.G.
            expression = '{scan_command} E={mean(incident_energy):.2f}'
            output = scan.format(expression)

        :param expression: str expression using {name} format specifiers
        :param default: returned if varname not in namespace
        :param prefer_local: if True, uses values in local_data first if available
        :param raise_errors: raise exceptions if True, otherwise return str error message
        :return: eval_hdf(f"expression")
        """
        with self._load() as hdf:
            return self.map.format_hdf(
                hdf_file=hdf,
                expression=expression,
                default=default,
                local_data=self._local_data,
                prefer_local=self._prefer_local_data if prefer_local is None else prefer_local,
                raise_errors=raise_errors
            )

add_local(**kwargs)

Add value to the local namespace, used in eval and format

Source code in src/hdfmap/reloader_class.py
def add_local(self, **kwargs):
    """Add value to the local namespace, used in eval and format"""
    self._local_data.update(kwargs)

eval(expression, default=DEFAULT, prefer_local=None, raise_errors=True)

Evaluate an expression using the namespace of the hdf file

The following patterns are allowed: - 'filename': str, name of hdf_file - 'filepath': str, full path of hdf_file - 'name': str hdf path of name - '__name': str internal name of name (e.g. for 'axes') - 'sname': string representation of dataset (includes units if available) - 'd_name': return dataset object. warning: may result in file not closing on completion - 'name@attr': returns attribute of dataset name - 'name?(default)': returns default if name doesn't exist - '(name1|name2|name3)': returns the first available of the names - '(name1|name2?(default))': returns the first available name or default

Parameters:

Name Type Description Default
expression str

str expression to be evaluated

required
default

returned if varname not in namespace

DEFAULT
prefer_local bool | None

if True, uses values in local_data first if available

None
raise_errors bool

raise exceptions if True, otherwise return str error message as result and log the error

True

Returns:

Type Description

eval(expression)

Source code in src/hdfmap/reloader_class.py
def eval(self, expression: str, default=DEFAULT, prefer_local: bool | None = None, raise_errors: bool = True):
    """
    Evaluate an expression using the namespace of the hdf file

    The following patterns are allowed:
     - 'filename': str, name of hdf_file
     - 'filepath': str, full path of hdf_file
     - '_*name*': str hdf path of *name*
     - '__*name*': str internal name of *name* (e.g. for 'axes')
     - 's_*name*': string representation of dataset (includes units if available)
     - 'd_*name*': return dataset object. **warning**: may result in file not closing on completion
     - '*name*@attr': returns attribute of dataset *name*
     - '*name*?(default)': returns default if *name* doesn't exist
     - '(name1|name2|name3)': returns the first available of the names
     - '(name1|name2?(default))': returns the first available name or default

    :param expression: str expression to be evaluated
    :param default: returned if varname not in namespace
    :param prefer_local: if True, uses values in local_data first if available
    :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
    :return: eval(expression)
    """
    prefer_local = self._prefer_local_data if prefer_local is None else prefer_local
    if prefer_local and expression in self._local_data:
        return self._local_data[expression]
    with self._load() as hdf:
        return self.map.eval(
            hdf_file=hdf,
            expression=expression,
            default=default,
            local_data=self._local_data,
            prefer_local=prefer_local,
            raise_errors=raise_errors
        )

find_hdf_paths(string, name_only=True, whole_word=False)

Find any dataset paths that contain the given string argument

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required
name_only bool

if True, search only the name of the dataset, not the full path

True
whole_word bool

if True, search only for case in-sensitive name

False

Returns:

Type Description
list[str]

list of hdf paths

Source code in src/hdfmap/reloader_class.py
def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
    """
    Find any dataset paths that contain the given string argument
    :param string: str to find in list of datasets
    :param name_only: if True, search only the name of the dataset, not the full path
    :param whole_word: if True, search only for case in-sensitive name
    :return: list of hdf paths
    """
    return self.map.find_paths(string, name_only, whole_word)

find_names(string)

Find any dataset names that contain the given string argument, searching names in self.combined

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required

Returns:

Type Description
list[str]

list of names

Source code in src/hdfmap/reloader_class.py
def find_names(self, string: str) -> list[str]:
    """
    Find any dataset names that contain the given string argument, searching names in self.combined
    :param string: str to find in list of datasets
    :return: list of names
    """
    return self.map.find_names(string)

format(expression, default=DEFAULT, prefer_local=None, raise_errors=True)

Evaluate a formatted string expression using the namespace of the hdf file Identifiers from the namespace can be called inside {} as a formatted f-string.

E.G. expression = '{scan_command} E={mean(incident_energy):.2f}' output = scan.format(expression)

Parameters:

Name Type Description Default
expression str

str expression using {name} format specifiers

required
default

returned if varname not in namespace

DEFAULT
prefer_local bool | None

if True, uses values in local_data first if available

None
raise_errors bool

raise exceptions if True, otherwise return str error message

True

Returns:

Type Description
str

eval_hdf(f"expression")

Source code in src/hdfmap/reloader_class.py
def format(self, expression: str, default=DEFAULT, prefer_local: bool | None = None, raise_errors: bool = True) -> str:
    """
    Evaluate a formatted string expression using the namespace of the hdf file
    Identifiers from the namespace can be called inside {} as a
    formatted f-string.

    E.G.
        expression = '{scan_command} E={mean(incident_energy):.2f}'
        output = scan.format(expression)

    :param expression: str expression using {name} format specifiers
    :param default: returned if varname not in namespace
    :param prefer_local: if True, uses values in local_data first if available
    :param raise_errors: raise exceptions if True, otherwise return str error message
    :return: eval_hdf(f"expression")
    """
    with self._load() as hdf:
        return self.map.format_hdf(
            hdf_file=hdf,
            expression=expression,
            default=default,
            local_data=self._local_data,
            prefer_local=self._prefer_local_data if prefer_local is None else prefer_local,
            raise_errors=raise_errors
        )

get_data(*name_or_path, index=(), default=None, direct_load=False)

Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
name_or_path

str name or path pointing to dataset in hdf file

()
index slice

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

None
direct_load

return str, datetime or squeezed array if False, otherwise load data directly

False

Returns:

Type Description

dataset2data(dataset) -> datetime, str or squeezed array as required.

Source code in src/hdfmap/reloader_class.py
def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
    """
    Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
    :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
    """
    with self._load() as hdf:
        out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

get_hdf_path(name_or_path)

Return hdf path of object in HdfMap

Source code in src/hdfmap/reloader_class.py
def get_hdf_path(self, name_or_path: str) -> str | None:
    """Return hdf path of object in HdfMap"""
    return self.map.get_path(name_or_path)

get_image(index=None)

Get image data from file, using default image path

Parameters:

Name Type Description Default
index slice

(slice,) or None to take the middle image

None

Returns:

Type Description
ndarray

numpy array of image

Source code in src/hdfmap/reloader_class.py
def get_image(self, index: slice = None) -> np.ndarray:
    """
    Get image data from file, using default image path
    :param index: (slice,) or None to take the middle image
    :return: numpy array of image
    """
    with self._load() as hdf:
        return self.map.get_image(hdf, index)

get_scannables()

Return scannables from file (values associated with hdfmap.scannables)

Source code in src/hdfmap/reloader_class.py
def get_scannables(self):
    """Return scannables from file (values associated with hdfmap.scannables)"""
    with self._load() as hdf:
        return self.map.get_scannables(hdf)

get_string(*name_or_path, index=(), default='', units=False)

Return data from dataset in file, converted into summary string See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
name_or_path

str name or path pointing to dataset in hdf file

()
index slice

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

''
units

if True and attribute 'units' available, append this to the result

False

Returns:

Type Description

dataset2str(dataset) -> str

Source code in src/hdfmap/reloader_class.py
def get_string(self, *name_or_path, index: slice = (), default='', units=False):
    """
    Return data from dataset in file, converted into summary string
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param units: if True and attribute 'units' available, append this to the result
    :return: dataset2str(dataset) -> str
    """
    with self._load() as hdf:
        out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

live_mode(live_mode=True)

Activate the option to reload data from the file each time, rather than from local data

self.eval('cmd') -> default will load 'cmd' from local storage if available, or from the file self.live_mode() -> self.eval('cmd') will return 'cmd' from the file using hdfmap self.live_mode(False) -> returns to default behavior

Source code in src/hdfmap/reloader_class.py
def live_mode(self, live_mode: bool = True):
    """
    Activate the option to reload data from the file each time, rather than from local data

    self.eval('cmd') -> default will load 'cmd' from local storage if available, or from the file
    self.live_mode() -> self.eval('cmd') will return 'cmd' from the file using hdfmap
    self.live_mode(False) -> returns to default behavior
    """
    self._prefer_local_data = live_mode

summary()

Return string summary of datasets

Source code in src/hdfmap/reloader_class.py
def summary(self) -> str:
    """Return string summary of datasets"""
    with self._load() as hdf:
        return self.map.create_dataset_summary(hdf)