Skip to content

Code Description

HdfMap

hdfmap Map objects within an HDF5 file and create a dataset namespace.

Usage

HdfMap from NeXus file

from hdfmap import create_nexus_map, load_hdf
hmap = create_nexus_map('file.nxs')
with load_hdf('file.nxs') as nxs:
    address = hmap.get_address('energy')
    energy = nxs[address][()]
    string = hmap.format_hdf(nxs, "the energy is {energy:.2f} keV")
    d = hmap.get_dataholder(nxs)  # classic data table, d.scannable, d.metadata

Shortcuts - single file reloading class

from hdfmap import NexusLoader
scan = NexusLoader('file.nxs')
[data1, data2] = scan.get_data(['dataset_name_1', 'dataset_name_2'])
data = scan.eval('dataset_name_1 * 100 + 2')
string = scan.format('my data is {dataset_name_1:.2f}')

Shortcuts - multifile load data

from hdfmap import hdf_data, hdf_eval, hdf_format, hdf_image
all_data = hdf_data([f"file{n}.nxs" for n in range(100)], 'dataset_name')
normalised_data = hdf_eval(filenames, 'total / Transmission / (rc / 300.)')
descriptions = hdf_eval(filenames, 'Energy: {en:5.3f} keV')
image = hdf_image(filenames, index=31)

Copyright 2024-2025 Daniel G. Porter

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

By Dr Dan Porter Diamond Light Source Ltd 2024-2025

HdfLoader

HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation.

E.G.
hdf = HdfLoader('file.hdf')
[data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
data = hdf.eval('dataset_name_1 * 100 + 2')
string = hdf.format('my data is {dataset_name_1:.2f}')
print(hdf.summary())
Source code in src/hdfmap/reloader_class.py
class HdfLoader:
    """
    HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.

    ### E.G.
        hdf = HdfLoader('file.hdf')
        [data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
        print(hdf.summary())
    """

    def __init__(self, hdf_filename: str, hdf_map: HdfMap | NexusMap | None = None):
        self.filename = hdf_filename
        if hdf_map is None:
            self.map = create_hdf_map(hdf_filename)
        else:
            self.map = hdf_map

    def __repr__(self):
        return f"HdfReloader('{self.filename}')"

    def __str__(self):
        with self._load() as hdf:
            out = self.map.info_data(hdf)
        return out

    def __getitem__(self, item):
        return self.get_data(item)

    def __call__(self, expression):
        return self.eval(expression)

    def _load(self) -> h5py.File:
        return load_hdf(self.filename)

    def get_hdf_path(self, name_or_path: str) -> str or None:
        """Return hdf path of object in HdfMap"""
        return self.map.get_path(name_or_path)

    def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
        """
        Find any dataset paths that contain the given string argument
        :param string: str to find in list of datasets
        :param name_only: if True, search only the name of the dataset, not the full path
        :param whole_word: if True, search only for case in-sensitive name
        :return: list of hdf paths
        """
        return self.map.find_paths(string, name_only, whole_word)

    def find_names(self, string: str) -> list[str]:
        """
        Find any dataset names that contain the given string argument, searching names in self.combined
        :param string: str to find in list of datasets
        :return: list of names
        """
        return self.map.find_names(string)

    def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
        """
        Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
        :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
        """
        with self._load() as hdf:
            out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_string(self, *name_or_path, index: slice = (), default='', units=False):
        """
        Return data from dataset in file, converted into summary string
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param units: if True and attribute 'units' available, append this to the result
        :return: dataset2str(dataset) -> str
        """
        with self._load() as hdf:
            out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_image(self, index: slice = None) -> np.ndarray:
        """
        Get image data from file, using default image path
        :param index: (slice,) or None to take the middle image
        :return: numpy array of image
        """
        with self._load() as hdf:
            return self.map.get_image(hdf, index)

    def get_metadata(self, defaults=None):
        with self._load() as hdf:
            return self.map.get_metadata(hdf, default=defaults)

    def get_scannables(self):
        """Return scannables from file (values associated with hdfmap.scannables)"""
        with self._load() as hdf:
            return self.map.get_scannables(hdf)

    def summary(self) -> str:
        """Return string summary of datasets"""
        with self._load() as hdf:
            return self.map.create_dataset_summary(hdf)

    def eval(self, expression: str, default=DEFAULT):
        """
        Evaluate an expression using the namespace of the hdf file
        :param expression: str expression to be evaluated
        :param default: returned if varname not in namespace
        :return: eval(expression)
        """
        with self._load() as hdf:
            return self.map.eval(hdf, expression, default)

    def format(self, expression: str, default=DEFAULT):
        """
        Evaluate a formatted string expression using the namespace of the hdf file
        :param expression: str expression using {name} format specifiers
        :param default: returned if varname not in namespace
        :return: eval_hdf(f"expression")
        """
        with self._load() as hdf:
            return self.map.format_hdf(hdf, expression, default)

eval(expression, default=DEFAULT)

Evaluate an expression using the namespace of the hdf file

Parameters:

Name Type Description Default
expression str

str expression to be evaluated

required
default

returned if varname not in namespace

DEFAULT

Returns:

Type Description

eval(expression)

Source code in src/hdfmap/reloader_class.py
def eval(self, expression: str, default=DEFAULT):
    """
    Evaluate an expression using the namespace of the hdf file
    :param expression: str expression to be evaluated
    :param default: returned if varname not in namespace
    :return: eval(expression)
    """
    with self._load() as hdf:
        return self.map.eval(hdf, expression, default)

find_hdf_paths(string, name_only=True, whole_word=False)

Find any dataset paths that contain the given string argument

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required
name_only bool

if True, search only the name of the dataset, not the full path

True
whole_word bool

if True, search only for case in-sensitive name

False

Returns:

Type Description
list[str]

list of hdf paths

Source code in src/hdfmap/reloader_class.py
def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
    """
    Find any dataset paths that contain the given string argument
    :param string: str to find in list of datasets
    :param name_only: if True, search only the name of the dataset, not the full path
    :param whole_word: if True, search only for case in-sensitive name
    :return: list of hdf paths
    """
    return self.map.find_paths(string, name_only, whole_word)

find_names(string)

Find any dataset names that contain the given string argument, searching names in self.combined

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required

Returns:

Type Description
list[str]

list of names

Source code in src/hdfmap/reloader_class.py
def find_names(self, string: str) -> list[str]:
    """
    Find any dataset names that contain the given string argument, searching names in self.combined
    :param string: str to find in list of datasets
    :return: list of names
    """
    return self.map.find_names(string)

format(expression, default=DEFAULT)

Evaluate a formatted string expression using the namespace of the hdf file

Parameters:

Name Type Description Default
expression str

str expression using {name} format specifiers

required
default

returned if varname not in namespace

DEFAULT

Returns:

Type Description

eval_hdf(f"expression")

Source code in src/hdfmap/reloader_class.py
def format(self, expression: str, default=DEFAULT):
    """
    Evaluate a formatted string expression using the namespace of the hdf file
    :param expression: str expression using {name} format specifiers
    :param default: returned if varname not in namespace
    :return: eval_hdf(f"expression")
    """
    with self._load() as hdf:
        return self.map.format_hdf(hdf, expression, default)

get_data(*name_or_path, index=(), default=None, direct_load=False)

Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
name_or_path

str name or path pointing to dataset in hdf file

()
index slice

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

None
direct_load

return str, datetime or squeezed array if False, otherwise load data directly

False

Returns:

Type Description

dataset2data(dataset) -> datetime, str or squeezed array as required.

Source code in src/hdfmap/reloader_class.py
def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
    """
    Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
    :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
    """
    with self._load() as hdf:
        out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

get_hdf_path(name_or_path)

Return hdf path of object in HdfMap

Source code in src/hdfmap/reloader_class.py
def get_hdf_path(self, name_or_path: str) -> str or None:
    """Return hdf path of object in HdfMap"""
    return self.map.get_path(name_or_path)

get_image(index=None)

Get image data from file, using default image path

Parameters:

Name Type Description Default
index slice

(slice,) or None to take the middle image

None

Returns:

Type Description
ndarray

numpy array of image

Source code in src/hdfmap/reloader_class.py
def get_image(self, index: slice = None) -> np.ndarray:
    """
    Get image data from file, using default image path
    :param index: (slice,) or None to take the middle image
    :return: numpy array of image
    """
    with self._load() as hdf:
        return self.map.get_image(hdf, index)

get_scannables()

Return scannables from file (values associated with hdfmap.scannables)

Source code in src/hdfmap/reloader_class.py
def get_scannables(self):
    """Return scannables from file (values associated with hdfmap.scannables)"""
    with self._load() as hdf:
        return self.map.get_scannables(hdf)

get_string(*name_or_path, index=(), default='', units=False)

Return data from dataset in file, converted into summary string See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
name_or_path

str name or path pointing to dataset in hdf file

()
index slice

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

''
units

if True and attribute 'units' available, append this to the result

False

Returns:

Type Description

dataset2str(dataset) -> str

Source code in src/hdfmap/reloader_class.py
def get_string(self, *name_or_path, index: slice = (), default='', units=False):
    """
    Return data from dataset in file, converted into summary string
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param units: if True and attribute 'units' available, append this to the result
    :return: dataset2str(dataset) -> str
    """
    with self._load() as hdf:
        out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

summary()

Return string summary of datasets

Source code in src/hdfmap/reloader_class.py
def summary(self) -> str:
    """Return string summary of datasets"""
    with self._load() as hdf:
        return self.map.create_dataset_summary(hdf)

NexusLoader

Bases: HdfLoader

Nexus Loader contains the filename and hdfmap for a NeXus file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation. E.G. hdf = NexusLoader('file.hdf') [data1, data2] = hdf.get_data(['dataset_name_1', 'dataset_name_2']) data = hdf.eval('dataset_name_1 * 100 + 2') string = hdf.format('my data is {dataset_name_1:.2f}')

Source code in src/hdfmap/reloader_class.py
class NexusLoader(HdfLoader):
    """
    Nexus Loader
    contains the filename and hdfmap for a NeXus file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.
    E.G.
        hdf = NexusLoader('file.hdf')
        [data1, data2] = hdf.get_data(['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
    """

    def __init__(self, nxs_filename: str, hdf_map: NexusMap | None = None):
        if not hdf_map:
            hdf_map = create_nexus_map(nxs_filename)
        super().__init__(nxs_filename, hdf_map)

    def get_plot_data(self) -> dict:
        """Return dict of useful plot data"""
        with self._load() as hdf:
            return self.map.get_plot_data(hdf)

get_plot_data()

Return dict of useful plot data

Source code in src/hdfmap/reloader_class.py
def get_plot_data(self) -> dict:
    """Return dict of useful plot data"""
    with self._load() as hdf:
        return self.map.get_plot_data(hdf)

compare_maps(map1, map2)

Compare two HdfMap objects

Source code in src/hdfmap/file_functions.py
def compare_maps(map1: HdfMap | NexusMap, map2: HdfMap | NexusMap) -> str:
    """
    Compare two HdfMap objects
    """
    missing_in_2 = []
    missing_in_1 = []
    different = []
    same = []
    for name1, path1 in map1.combined.items():
        if name1 in map2.combined:
            path2 = map2.combined[name1]
            if path2 != path1:
                different.append(f"{name1}: {path1} != {path2}")
            dataset1 = map1.datasets[path1]
            dataset2 = map2.datasets[path2]
            if dataset1.shape != dataset2.shape:
                different.append(f"{name1}: {dataset1.shape}, {dataset2.shape}")
            else:
                same.append(f"{name1}: {dataset1.shape} : {path1}, {path2}")
        else:
            missing_in_2.append(f"{name1}: {path1}")

    for name2, path2 in map2.combined.items():
        if name2 not in map1.combined:
            missing_in_1.append(f"{name2}: {path2}")

    output = f"Comparing:\n  {map1.filename}, with\n  {map2.filename}\n\n"
    output += "Different items:\n  " + '\n  '.join(different)
    output += f"\n\nMissing in {map1.filename}:\n  " + '\n  '.join(missing_in_1)
    output += f"\n\nMissing in {map2.filename}:\n  " + '\n  '.join(missing_in_2)
    output += '\n'
    return output

create_hdf_map(hdf_filename)

Create a HdfMap from a hdf file

Parameters:

Name Type Description Default
hdf_filename str

str filename of hdf file

required

Returns:

Type Description
HdfMap

HdfMap

Source code in src/hdfmap/file_functions.py
def create_hdf_map(hdf_filename: str) -> HdfMap:
    """
    Create a HdfMap from a hdf file
    :param hdf_filename: str filename of hdf file
    :return: HdfMap
    """
    with load_hdf(hdf_filename) as hdf:
        hdf_map = HdfMap(hdf)
    return hdf_map

create_nexus_map(hdf_filename, groups=None, default_entry_only=False)

Create a HdfMap from a NeXus file, loading default parameters and allowing a reduced, single entry map

Parameters:

Name Type Description Default
hdf_filename str

str filename of hdf file

required
groups None | list[str]

list of groups to collect datasets from

None
default_entry_only bool

if True, only the first or default entry will be loaded

False

Returns:

Type Description
NexusMap

NexusMap

Source code in src/hdfmap/file_functions.py
def create_nexus_map(hdf_filename: str, groups: None | list[str] = None,
                     default_entry_only: bool = False) -> NexusMap:
    """
    Create a HdfMap from a NeXus file, loading default parameters and allowing a reduced, single entry map
    :param hdf_filename: str filename of hdf file
    :param groups: list of groups to collect datasets from
    :param default_entry_only: if True, only the first or default entry will be loaded
    :return: NexusMap
    """
    hdf_map = NexusMap()
    with load_hdf(hdf_filename) as hdf:
        hdf_map.populate(hdf, groups=groups, default_entry_only=default_entry_only)
    return hdf_map

hdf_compare(hdf_filename1, hdf_filename2, all_links=False)

Compare hdf tree structure between two files

Parameters:

Name Type Description Default
hdf_filename1 str

filename of hdf file

required
hdf_filename2 str

filename of hdf file

required
all_links

bool, if True, also show soft links

False

Returns:

Type Description
str

str

Source code in src/hdfmap/hdf_loader.py
def hdf_compare(hdf_filename1: str, hdf_filename2: str, all_links=False) -> str:
    """
    Compare hdf tree structure between two files
    :param hdf_filename1: filename of hdf file
    :param hdf_filename2: filename of hdf file
    :param all_links: bool, if True, also show soft links
    :return: str
    """
    datasets1 = hdf_dataset_list(hdf_filename1, all_links)
    datasets2 = hdf_dataset_list(hdf_filename2, all_links)

    # both = [ds for ds in datasets1 if ds in datasets2]
    only_in_1 = '\n  '.join([ds for ds in datasets1 if ds not in datasets2])
    only_in_2 = '\n  '.join([ds for ds in datasets2 if ds not in datasets1])

    output = f"Compare\n    {hdf_filename1}, with\n    {hdf_filename2}\n\n"
    output += f"Datasets only in {os.path.basename(hdf_filename1)}:\n\n"
    output += f"  {only_in_1}\n"
    output += f"Datasets only in {os.path.basename(hdf_filename2)}:\n\n"
    output += f"  {only_in_2}\n"
    return output

hdf_data(filenames, name_or_path, hdf_map=None, index=(), default=None, fixed_output=False)

General purpose function to retrieve data from HDF files

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
name_or_path str | list[str]

str or list of str - names or paths of HDF datasets

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
index

dataset index or slice

()
default

value to give if dataset doesn't exist in file

None
fixed_output

if True, always returns list of list

False

Returns:

Type Description

list[files: list[names]]

Source code in src/hdfmap/file_functions.py
def hdf_data(filenames: str | list[str], name_or_path: str | list[str], hdf_map: HdfMap = None,
             index=(), default=None, fixed_output=False):
    """
    General purpose function to retrieve data from HDF files
    :param filenames: str or list of str - file paths
    :param name_or_path: str or list of str - names or paths of HDF datasets
    :param hdf_map: HdfMap object, or None to generate from first file
    :param index: dataset index or slice
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list of list
    :return if single file, single dataset: single value
    :return if multi file or multi dataset: list, len(filenames) or len(name_or_path)
    :return if multi file and multi dataset: list[files: list[names]]
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    name_or_path = as_str_list(name_or_path)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append([hdf_map.get_data(hdf, name, index=index, default=default) for name in name_or_path])
    if fixed_output:
        return out
    if len(filenames) == 1 and len(name_or_path) == 1:
        return out[0][0]
    if len(filenames) == 1 and len(name_or_path) > 1:
        return out[0]
    if len(name_or_path) == 1:
        return [val[0] for val in out]
    return out

hdf_dataset_list(hdf_filename, all_links=True, group='/')

Generate list of all datasets in the hdf file structure

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
all_links

bool, if True, also include soft links

True
group str

only display tree structure of this group (default root)

'/'

Returns:

Type Description
list[str]

list of str addresses

Source code in src/hdfmap/hdf_loader.py
def hdf_dataset_list(hdf_filename: str, all_links=True, group: str = '/') -> list[str]:
    """
    Generate list of all datasets in the hdf file structure
    :param hdf_filename: filename of hdf file
    :param all_links: bool, if True, also include soft links
    :param group: only display tree structure of this group (default root)
    :return: list of str addresses
    """

    output = []

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)

        def visit_paths(name, obj: h5py.Group | h5py.Dataset):
            if isinstance(obj, h5py.Dataset):
                output.append(name)

        def visit_links(name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            h5py_obj = hdf_group.get(name)
            if isinstance(h5py_obj, h5py.Dataset) and (
                    isinstance(obj, h5py.ExternalLink) if not all_links else True):
                output.append(name)
        if not all_links:  # visititems_links visits all items, don't double up
            hdf_group.visititems(visit_paths)
        hdf_group.visititems_links(visit_links)
    return output

hdf_eval(filenames, expression, hdf_map=None, default=None, fixed_output=False)

Evaluate expression using dataset names

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
expression str

str expression to evaluate in each file, e.g. "roi2_sum / Transmission"

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
default

value to give if dataset doesn't exist in file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def hdf_eval(filenames: str | list[str], expression: str, hdf_map: HdfMap = None, default=None, fixed_output=False):
    """
    Evaluate expression using dataset names
    :param filenames: str or list of str - file paths
    :param expression: str expression to evaluate in each file, e.g. "roi2_sum / Transmission"
    :param hdf_map: HdfMap object, or None to generate from first file
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.eval(hdf, expression, default=default))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

hdf_find(hdf_filename, *names_or_classes, attributes=('NX_class', 'local_name'))

find groups and datasets within hdf file matching a set of names or class names

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
names_or_classes str

object names or NXclass names to search for

()
attributes tuple[str]

list of attr fields to check against names

('NX_class', 'local_name')

Returns:

Type Description
tuple[list[str], list[str]]

groups[], datasets[]

Source code in src/hdfmap/hdf_loader.py
def hdf_find(hdf_filename: str, *names_or_classes: str,
             attributes: tuple[str] = ('NX_class', 'local_name')) -> tuple[list[str], list[str]]:
    """
    find groups and datasets within hdf file matching a set of names or class names
    :param hdf_filename: filename of hdf file
    :params names_or_classes: object names or NXclass names to search for
    :params attributes: list of attr fields to check against names
    :return: groups[], datasets[]
    """

    with load_hdf(hdf_filename) as hdf_file:
        group_paths = []
        dataset_paths = []

        def visit_links(name):
            # For each path in the file, create tree of parent-groups
            sub_groups = name.split('/')
            sub_group_paths = ['/'.join(sub_groups[:n]) for n in range(1, len(sub_groups) + 1)]
            sub_group_names = [
                bytes2str(grp.attrs.get(attr, '')) for attr in attributes for path in sub_group_paths
                if (grp := hdf_file.get(path))
            ] + sub_groups
            if all(arg in sub_group_names for arg in names_or_classes):
                h5py_obj = hdf_file.get(name)
                if isinstance(h5py_obj, h5py.Group):
                    group_paths.append(name)
                elif isinstance(h5py_obj, h5py.Dataset):
                    dataset_paths.append(name)
        hdf_file.visit_links(visit_links)
    return group_paths, dataset_paths

hdf_find_first(hdf_filename, *names_or_classes, attributes=('NX_class', 'local_name'))

return the first path of object matching a set of names or class names

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
names_or_classes str

object names or NXclass names to search for

()
attributes tuple[str]

list of attr fields to check against names

('NX_class', 'local_name')

Returns:

Type Description
str | None

hdf_path or None if no match

Source code in src/hdfmap/hdf_loader.py
def hdf_find_first(hdf_filename: str, *names_or_classes: str,
                   attributes: tuple[str] = ('NX_class', 'local_name')) -> str | None:
    """
    return the first path of object matching a set of names or class names
    :param hdf_filename: filename of hdf file
    :params names_or_classes: object names or NXclass names to search for
    :params attributes: list of attr fields to check against names
    :return: hdf_path or None if no match
    """

    with load_hdf(hdf_filename) as hdf_file:

        def visit_links(name):
            # For each path in the file, create tree of parent-groups
            parent_groups = name.split('/')
            parent_group_paths = ['/'.join(parent_groups[:n]) for n in range(1, len(parent_groups) + 1)]
            parent_group_names = [
                bytes2str(grp.attrs.get(attr, '')) for attr in attributes for path in parent_group_paths
                if (grp := hdf_file.get(path))
            ] + parent_groups
            if all(arg in parent_group_names for arg in names_or_classes):
                return name
            return None

        return hdf_file.visit_links(visit_links)

hdf_format(filenames, expression, hdf_map=None, default=None, fixed_output=False)

Evaluate string format expression using dataset names

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
expression str

str expression to evaluate in each file, e.g. "the energy is {en:.2f} keV"

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
default

value to give if dataset doesn't exist in file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def hdf_format(filenames: str | list[str], expression: str, hdf_map: HdfMap = None, default=None, fixed_output=False):
    """
    Evaluate string format expression using dataset names
    :param filenames: str or list of str - file paths
    :param expression: str expression to evaluate in each file, e.g. "the energy is {en:.2f} keV"
    :param hdf_map: HdfMap object, or None to generate from first file
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.format_hdf(hdf, expression, default=default))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

hdf_image(filenames, index=None, hdf_map=None, fixed_output=False)

Evaluate string format expression using dataset names

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
index slice

index or slice of dataset volume, or None to use middle index

None
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def hdf_image(filenames: str | list[str], index: slice = None, hdf_map: HdfMap = None, fixed_output=False):
    """
    Evaluate string format expression using dataset names
    :param filenames: str or list of str - file paths
    :param index: index or slice of dataset volume, or None to use middle index
    :param hdf_map: HdfMap object, or None to generate from first file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output - numpy array
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.get_image(hdf, index=index))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

hdf_linked_files(hdf_filename, group='/')

Return a list of files linked to the current file, looking for all external links.

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
group str

only look at links within this group (default root)

'/'

Returns:

Type Description
list[str]

list of str filenames (usually relative file paths)

Source code in src/hdfmap/hdf_loader.py
def hdf_linked_files(hdf_filename: str, group: str = '/') -> list[str]:
    """
    Return a list of files linked to the current file, looking for all external links.

    :param hdf_filename: filename of hdf file
    :param group: only look at links within this group (default root)
    :return: list of str filenames (usually relative file paths)
    """

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)
        external_files = []

        def visit_links(_name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            if isinstance(obj, h5py.ExternalLink) and obj.filename not in external_files:
                external_files.append(obj.filename)
        hdf_group.visititems_links(visit_links)
    return external_files

hdf_tree_dict(hdf_filename)

Generate summary dict of the hdf tree structure The structure is: {'group': {'@attrs': str, 'sub-group': {}, 'dataset': str}, ...}

Group attributes are stored with names pre-fixed with '@'

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required

Returns:

Type Description
dict

{'entry': {'dataset': value}...}

Source code in src/hdfmap/hdf_loader.py
def hdf_tree_dict(hdf_filename: str) -> dict:
    """
    Generate summary dict of the hdf tree structure
    The structure is:
        {'group': {'@attrs': str, 'sub-group': {}, 'dataset': str}, ...}

    Group attributes are stored with names pre-fixed with '@'

    :param hdf_filename: filename of hdf file
    :return: {'entry': {'dataset': value}...}
    """

    def store(hdf_dict: dict, hdf_group: h5py.Group) -> dict:
        for key in hdf_group:
            obj = hdf_group.get(key)
            link = hdf_group.get(key, getlink=True)
            if obj is None:
                hdf_dict[key] = '! Missing'
                continue  # dataset may be missing due to a broken link
            # Group
            if isinstance(obj, h5py.Group):
                hdf_dict[key] = {f"@{attr}": str(val) for attr, val in obj.attrs.items()}
                store(hdf_dict[key], obj)
            # Dataset
            elif isinstance(obj, h5py.Dataset):
                if obj.size <= 1:
                    detail = str(obj[()])
                else:
                    detail = f"{obj.dtype}, {obj.shape}"
                if isinstance(link, (h5py.SoftLink, h5py.ExternalLink)):
                    detail = f"LINK: " + detail
                hdf_dict[key] = detail
        return hdf_dict
    return store({}, load_hdf(hdf_filename))

hdf_tree_string(hdf_filename, all_links=True, group='/', attributes=True)

Generate string of the hdf file structure, similar to h5ls. Uses h5py.visititems

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
all_links bool

bool, if True, also show links

True
group str

only display tree structure of this group (default root)

'/'
attributes bool

if True, display the attributes of groups and datasets

True

Returns:

Type Description
str

str

Source code in src/hdfmap/hdf_loader.py
def hdf_tree_string(hdf_filename: str, all_links: bool = True, group: str = '/', attributes: bool = True) -> str:
    """
    Generate string of the hdf file structure, similar to h5ls. Uses h5py.visititems
    :param hdf_filename: filename of hdf file
    :param all_links: bool, if True, also show links
    :param group: only display tree structure of this group (default root)
    :param attributes: if True, display the attributes of groups and datasets
    :return: str
    """
    output = [f"########## {hdf_filename} ##########"]

    def grp(path):
        return f"-------------- {path} " + "-" * (63 - (17 + len(path)))

    def ds(path, detail):
        return f"{path:60}  :  {detail}"

    def attr(path, name, value):
        return f"{' ' * len(path) + '@' + name} = {value}"

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)
        output.append(grp(hdf_group.name))
        if attributes:
            output.extend([attr(hdf_group.name, name, value) for name, value in hdf_group.attrs.items()])

        def visit_paths(name, obj: h5py.Group | h5py.Dataset):
            if isinstance(obj, h5py.Dataset):
                if obj.size <= 1:
                    detail = f"{obj[()]}"
                else:
                    detail = f"{obj.dtype}, {obj.shape}"
                output.append(ds(name, detail))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in obj.attrs.items()])
            elif isinstance(obj, h5py.Group):
                output.append(grp(name))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in obj.attrs.items()])

        def visit_links(name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            h5py_obj = hdf_group.get(name)

            if isinstance(h5py_obj, h5py.Dataset):
                if isinstance(obj, h5py.ExternalLink):
                    detail = f"LINK: {h5py_obj.dtype}, {h5py_obj.shape}"
                elif h5py_obj.size <= 1:
                    detail = f"{h5py_obj[()]}"
                else:
                    detail = f"{h5py_obj.dtype}, {h5py_obj.shape}"
                output.append(ds(name, detail))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in h5py_obj.attrs.items()])
            elif isinstance(h5py_obj, h5py.Group):
                output.append(grp(name))
                if attributes:
                    output.append(f"{name}")
                    output.extend([attr(name, _attr, value) for _attr, value in h5py_obj.attrs.items()])

        if all_links:
            hdf_group.visititems_links(visit_links)
        else:
            hdf_group.visititems(visit_paths)
        output.append('\n --- End --- ')
    return '\n'.join(output)

list_files(folder_directory, extension=DEFAULT_EXTENSION)

Return list of files in directory with extension, returning list of full file paths

Source code in src/hdfmap/file_functions.py
def list_files(folder_directory: str, extension=DEFAULT_EXTENSION) -> list[str]:
    """Return list of files in directory with extension, returning list of full file paths"""
    try:
        return sorted(
            (file.path for file in os.scandir(folder_directory) if file.is_file() and file.name.endswith(extension)),
            key=lambda x: os.path.getmtime(x)
        )
    except FileNotFoundError:
        return []

load_hdf(hdf_filename, **kwargs)

Load hdf file, return h5py.File object

Source code in src/hdfmap/hdf_loader.py
def load_hdf(hdf_filename: str, **kwargs) -> h5py.File:
    """Load hdf file, return h5py.File object"""
    options = HDF_FILE_OPTIONS  #.copy()
    options.update(kwargs)
    return h5py.File(hdf_filename, 'r', **options)

nexus_data_block(filenames, hdf_map=None, fixed_output=False)

Create classic dict like dataloader objects from nexus files E.G. d = nexus_data_block('filename') d.scannable -> array d.metadata.filename -> value d.keys() -> list of items

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def nexus_data_block(filenames: str | list[str], hdf_map: HdfMap = None, fixed_output=False):
    """
    Create classic dict like dataloader objects from nexus files
    E.G.
        d = nexus_data_block('filename')
        d.scannable -> array
        d.metadata.filename -> value
        d.keys() -> list of items

    :param filenames: str or list of str - file paths
    :param hdf_map: HdfMap object, or None to generate from first file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output - dict like DataObject
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_nexus_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.get_dataholder(hdf))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

set_all_logging_level(level)

Set logging level of all loggers Logging Levels (see builtin module logging) 'notset' | 0 'debug' | 10 'info' | 20 'warning' | 30 'error' | 40 'critical' | 50

Parameters:

Name Type Description Default
level str | int

str level name or int level

required

Returns:

Type Description

None

Source code in src/hdfmap/logging.py
def set_all_logging_level(level: str | int):
    """
    Set logging level of all loggers
    Logging Levels (see builtin module logging)
        'notset'   |  0
        'debug'    |  10
        'info'     |  20
        'warning'  |  30
        'error'    |  40
        'critical' |  50
    :param level: str level name or int level
    :return: None
    """
    try:
        level = level.upper()
        # level = logging.getLevelNamesMapping()[level]  # Python >3.11
        level = logging._nameToLevel[level]
    except AttributeError:
        level = int(level)

    logging_logger = logging.getLogger(__name__)
    for logger in [logging.getLogger(name) for name in logging.root.manager.loggerDict]:
        logger.setLevel(level)
    logging_logger.info(f"Logging level set to {level}")