Code Description¶

HdfMap¶

hdfmap Map objects within an HDF5 file and create a dataset namespace.

Usage¶

HdfMap from NeXus file¶

from hdfmap import create_nexus_map, load_hdf
hmap = create_nexus_map('file.nxs')
with load_hdf('file.nxs') as nxs:
    address = hmap.get_address('energy')
    energy = nxs[address][()]
    string = hmap.format_hdf(nxs, "the energy is {energy:.2f} keV")
    d = hmap.get_dataholder(nxs)  # classic data table, d.scannable, d.metadata

Shortcuts - single file reloading class¶

from hdfmap import NexusLoader
scan = NexusLoader('file.nxs')
[data1, data2] = scan.get_data(['dataset_name_1', 'dataset_name_2'])
data = scan.eval('dataset_name_1 * 100 + 2')
string = scan.format('my data is {dataset_name_1:.2f}')

Shortcuts - multifile load data¶

from hdfmap import hdf_data, hdf_eval, hdf_format, hdf_image
all_data = hdf_data([f"file{n}.nxs" for n in range(100)], 'dataset_name')
normalised_data = hdf_eval(filenames, 'total / Transmission / (rc / 300.)')
descriptions = hdf_eval(filenames, 'Energy: {en:5.3f} keV')
image = hdf_image(filenames, index=31)

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

By Dr Dan Porter Diamond Light Source Ltd 2024-2025

`HdfLoader` ¶

HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation.

E.G.¶

hdf = HdfLoader('file.hdf')
[data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
data = hdf.eval('dataset_name_1 * 100 + 2')
string = hdf.format('my data is {dataset_name_1:.2f}')
print(hdf.summary())

Source code in src/hdfmap/reloader_class.py

class HdfLoader:
    """
    HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.

    ### E.G.
        hdf = HdfLoader('file.hdf')
        [data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
        print(hdf.summary())
    """

    def __init__(self, hdf_filename: str, hdf_map: HdfMap | NexusMap | None = None):
        self.filename = hdf_filename
        if hdf_map is None:
            self.map = create_hdf_map(hdf_filename)
        else:
            self.map = hdf_map

    def __repr__(self):
        return f"HdfReloader('{self.filename}')"

    def __str__(self):
        with self._load() as hdf:
            out = self.map.info_data(hdf)
        return out

    def __getitem__(self, item):
        return self.get_data(item)

    def __call__(self, expression):
        return self.eval(expression)

    def _load(self) -> h5py.File:
        return load_hdf(self.filename)

    def get_hdf_path(self, name_or_path: str) -> str or None:
        """Return hdf path of object in HdfMap"""
        return self.map.get_path(name_or_path)

    def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
        """
        Find any dataset paths that contain the given string argument
        :param string: str to find in list of datasets
        :param name_only: if True, search only the name of the dataset, not the full path
        :param whole_word: if True, search only for case in-sensitive name
        :return: list of hdf paths
        """
        return self.map.find_paths(string, name_only, whole_word)

    def find_names(self, string: str) -> list[str]:
        """
        Find any dataset names that contain the given string argument, searching names in self.combined
        :param string: str to find in list of datasets
        :return: list of names
        """
        return self.map.find_names(string)

    def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
        """
        Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
        :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
        """
        with self._load() as hdf:
            out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_string(self, *name_or_path, index: slice = (), default='', units=False):
        """
        Return data from dataset in file, converted into summary string
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param units: if True and attribute 'units' available, append this to the result
        :return: dataset2str(dataset) -> str
        """
        with self._load() as hdf:
            out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_image(self, index: slice = None) -> np.ndarray:
        """
        Get image data from file, using default image path
        :param index: (slice,) or None to take the middle image
        :return: numpy array of image
        """
        with self._load() as hdf:
            return self.map.get_image(hdf, index)

    def get_metadata(self, defaults=None):
        with self._load() as hdf:
            return self.map.get_metadata(hdf, default=defaults)

    def get_scannables(self):
        """Return scannables from file (values associated with hdfmap.scannables)"""
        with self._load() as hdf:
            return self.map.get_scannables(hdf)

    def summary(self) -> str:
        """Return string summary of datasets"""
        with self._load() as hdf:
            return self.map.create_dataset_summary(hdf)

    def eval(self, expression: str, default=DEFAULT):
        """
        Evaluate an expression using the namespace of the hdf file
        :param expression: str expression to be evaluated
        :param default: returned if varname not in namespace
        :return: eval(expression)
        """
        with self._load() as hdf:
            return self.map.eval(hdf, expression, default)

    def format(self, expression: str, default=DEFAULT):
        """
        Evaluate a formatted string expression using the namespace of the hdf file
        :param expression: str expression using {name} format specifiers
        :param default: returned if varname not in namespace
        :return: eval_hdf(f"expression")
        """
        with self._load() as hdf:
            return self.map.format_hdf(hdf, expression, default)

`eval(expression, default=DEFAULT)` ¶

Evaluate an expression using the namespace of the hdf file

Parameters:

Name	Type	Description	Default
`expression`	`str`	str expression to be evaluated	required
`default`		returned if varname not in namespace	`DEFAULT`

Returns:

Type	Description
	eval(expression)

Source code in src/hdfmap/reloader_class.py

def eval(self, expression: str, default=DEFAULT):
    """
    Evaluate an expression using the namespace of the hdf file
    :param expression: str expression to be evaluated
    :param default: returned if varname not in namespace
    :return: eval(expression)
    """
    with self._load() as hdf:
        return self.map.eval(hdf, expression, default)

`find_hdf_paths(string, name_only=True, whole_word=False)` ¶

Find any dataset paths that contain the given string argument

Parameters:

Name	Type	Description	Default
`string`	`str`	str to find in list of datasets	required
`name_only`	`bool`	if True, search only the name of the dataset, not the full path	`True`
`whole_word`	`bool`	if True, search only for case in-sensitive name	`False`

Returns:

Type	Description
`list[str]`	list of hdf paths

Source code in src/hdfmap/reloader_class.py

def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
    """
    Find any dataset paths that contain the given string argument
    :param string: str to find in list of datasets
    :param name_only: if True, search only the name of the dataset, not the full path
    :param whole_word: if True, search only for case in-sensitive name
    :return: list of hdf paths
    """
    return self.map.find_paths(string, name_only, whole_word)

`find_names(string)` ¶

Find any dataset names that contain the given string argument, searching names in self.combined

Parameters:

Name	Type	Description	Default
`string`	`str`	str to find in list of datasets	required

Returns:

Type	Description
`list[str]`	list of names

Source code in src/hdfmap/reloader_class.py

def find_names(self, string: str) -> list[str]:
    """
    Find any dataset names that contain the given string argument, searching names in self.combined
    :param string: str to find in list of datasets
    :return: list of names
    """
    return self.map.find_names(string)

`format(expression, default=DEFAULT)` ¶

Evaluate a formatted string expression using the namespace of the hdf file

Parameters:

Name	Type	Description	Default
`expression`	`str`	str expression using {name} format specifiers	required
`default`		returned if varname not in namespace	`DEFAULT`

Returns:

Type	Description
	eval_hdf(f"expression")

Source code in src/hdfmap/reloader_class.py

def format(self, expression: str, default=DEFAULT):
    """
    Evaluate a formatted string expression using the namespace of the hdf file
    :param expression: str expression using {name} format specifiers
    :param default: returned if varname not in namespace
    :return: eval_hdf(f"expression")
    """
    with self._load() as hdf:
        return self.map.format_hdf(hdf, expression, default)

`get_data(*name_or_path, index=(), default=None, direct_load=False)` ¶

Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name	Type	Description	Default
`name_or_path`		str name or path pointing to dataset in hdf file	`()`
`index`	`slice`	index or slice of data in hdf file	`()`
`default`		value to return if name not found in hdf file	`None`
`direct_load`		return str, datetime or squeezed array if False, otherwise load data directly	`False`

Returns:

Type	Description
	dataset2data(dataset) -> datetime, str or squeezed array as required.

Source code in src/hdfmap/reloader_class.py

def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
    """
    Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
    :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
    """
    with self._load() as hdf:
        out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

`get_hdf_path(name_or_path)` ¶

Return hdf path of object in HdfMap

Source code in src/hdfmap/reloader_class.py

def get_hdf_path(self, name_or_path: str) -> str or None:
    """Return hdf path of object in HdfMap"""
    return self.map.get_path(name_or_path)

`get_image(index=None)` ¶

Get image data from file, using default image path

Parameters:

Name	Type	Description	Default
`index`	`slice`	(slice,) or None to take the middle image	`None`

Returns:

Type	Description
`ndarray`	numpy array of image

Source code in src/hdfmap/reloader_class.py

def get_image(self, index: slice = None) -> np.ndarray:
    """
    Get image data from file, using default image path
    :param index: (slice,) or None to take the middle image
    :return: numpy array of image
    """
    with self._load() as hdf:
        return self.map.get_image(hdf, index)

`get_scannables()` ¶

Return scannables from file (values associated with hdfmap.scannables)

Source code in src/hdfmap/reloader_class.py

def get_scannables(self):
    """Return scannables from file (values associated with hdfmap.scannables)"""
    with self._load() as hdf:
        return self.map.get_scannables(hdf)

`get_string(*name_or_path, index=(), default='', units=False)` ¶

Return data from dataset in file, converted into summary string See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name	Type	Description	Default
`name_or_path`		str name or path pointing to dataset in hdf file	`()`
`index`	`slice`	index or slice of data in hdf file	`()`
`default`		value to return if name not found in hdf file	`''`
`units`		if True and attribute 'units' available, append this to the result	`False`

Returns:

Type	Description
	dataset2str(dataset) -> str

Source code in src/hdfmap/reloader_class.py

def get_string(self, *name_or_path, index: slice = (), default='', units=False):
    """
    Return data from dataset in file, converted into summary string
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param units: if True and attribute 'units' available, append this to the result
    :return: dataset2str(dataset) -> str
    """
    with self._load() as hdf:
        out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

`summary()` ¶

Return string summary of datasets

Source code in src/hdfmap/reloader_class.py

def summary(self) -> str:
    """Return string summary of datasets"""
    with self._load() as hdf:
        return self.map.create_dataset_summary(hdf)

`HdfMap` ¶

HdfMap object, container for paths of different objects in an HDF file

with h5py.File('file.hdf') as hdf:
    map = HdfMap(hdf)

map.get_path('data') -> '/entry/measurement/data'
map['data'] -> '/entry/measurement/data'

with h5py.File('another_file.hdf') as hdf:
    data = map.get_data(hdf, 'data')
    array = map.get_scannables_array(hdf)
    metadata = map.get_metadata(hdf)
    out = map.eval(hdf, 'data / 10')
    outstr = map.format(hdf, 'the data looks like: {data}')

Objects within the HDF file are separated into Groups and Datasets. Each object has a defined 'path' and 'name' paramater, as well as other attribute:

path -> '/entry/measurement/data' -> the location of an object within the file
name -> 'data' -> a path expressed as a simple variable name

Paths are unique location within the file but can be used to identify similar objects in other files Names may not be unique within a file and are generated from the final element of the hdf path.

When multiple paths produce the same name, the name is overwritten each time, so the last path in the file has priority.
Names are also stored using the 'local_name' attribute, if it exists

Names of different types of datasets are stored for arrays (size > 0) and values (size 0) Names for scannables relate to all arrays of a particular size A combined list of names is provided where scannables > arrays > values

Attributes¶

map.groups stores attributes of each group by path
map.classes stores list of group paths by nx_class
map.datasets stores attributes of each dataset by path
map.arrays stores array dataset paths by name
map.values stores value dataset paths by name
map.metadata stores value dataset path by altname only
map.scannables stores array dataset paths with given size, by name, all arrays have the same shape
map.combined stores array and value paths (arrays overwrite values)
map.image_data stores dataset paths of image data (arrays with 2+ dimensions or arrays of image files)

E.G.¶

map.groups = {'/hdf/group': ('class', 'name', {attrs}, [datasets])}
map.classes = {'class_name': ['/hdf/group1', '/hdf/group2']}
map.datasets = {'/hdf/group/dataset': ('name', size, shape, {attrs})}
map.arrays = {'name': '/hdf/group/dataset'}
map.values = {'name': '/hdf/group/dataset'}
map.scannables = {'name': '/hdf/group/dataset'}
map.image_data = {'name': '/hdf/group/dataset'}

Methods¶

map.populate(h5py.File) -> populates the dictionaries using the given file
map.generate_scannables(array_size) -> populates scannables namespace with arrays of same size
map.most_common_size -> returns the most common dataset size > 1
map.get_attr('name_or_path', 'attr') -> return value of dataset attribute
map.get_path('name_or_group_or_class') -> returns path of object with name
map.get_image_path() -> returns default path of detector dataset (or largest dataset)
map.get_group_path('name_or_path_or_class') -> return path of group with class
map.get_group_datasets('name_or_path_or_class') -> return list of dataset paths in class
map.find_groups(*names_or_classes) -> return list of group paths matching given group names or classes
map.find_paths('string') -> return list of dataset paths containing string
map.find_names('string') -> return list of dataset names containing string
map.find_attr('attr_name') -> return list of paths of groups or datasets containing attribute 'attr_name'
map.add_local(local_variable=value) -> add to the local namespace accessed by eval
map.add_named_expression(alternate_name='expression') -> add local variables for expressions replaced during eval

File Methods¶

map.get_metadata(h5py.File) -> returns dict of value datasets
map.get_scannables(h5py.File) -> returns dict of scannable datasets
map.get_scannables_array(h5py.File) -> returns numpy array of scannable datasets
map.get_dataholder(h5py.File) -> returns dict like object with metadata and scannables
map.get_image(h5py.File, index) -> returns image data (2D float array or str image filename)
map.get_data(h5py.File, 'name') -> returns data from dataset
map.get_string(h5py.File, 'name') -> returns string summary of dataset
map.eval(h5py.File, 'expression') -> returns output of expression
map.format(h5py.File, 'string {name}') -> returns output of str expression

Source code in src/hdfmap/hdfmap_class.py

class HdfMap:
    """
    HdfMap object, container for paths of different objects in an HDF file

        with h5py.File('file.hdf') as hdf:
            map = HdfMap(hdf)

        map.get_path('data') -> '/entry/measurement/data'
        map['data'] -> '/entry/measurement/data'

        with h5py.File('another_file.hdf') as hdf:
            data = map.get_data(hdf, 'data')
            array = map.get_scannables_array(hdf)
            metadata = map.get_metadata(hdf)
            out = map.eval(hdf, 'data / 10')
            outstr = map.format(hdf, 'the data looks like: {data}')

    Objects within the HDF file are separated into Groups and Datasets. Each object has a
    defined 'path' and 'name' paramater, as well as other attribute:

    - path -> '/entry/measurement/data' -> the location of an object within the file
    - name -> 'data' -> a path expressed as a simple variable name

    Paths are unique location within the file but can be used to identify similar objects in other files
    Names may not be unique within a file and are generated from the final element of the hdf path.

    - When multiple paths produce the same name, the name is overwritten each time, so the last path in the
    file has priority.
    - Names are also stored using the 'local_name' attribute, if it exists

    Names of different types of datasets are stored for arrays (size > 0) and values (size 0)
    Names for scannables relate to all arrays of a particular size
    A combined list of names is provided where scannables > arrays > values

    ### Attributes
    - map.groups      stores attributes of each group by path
    - map.classes     stores list of group paths by nx_class
    - map.datasets    stores attributes of each dataset by path
    - map.arrays      stores array dataset paths by name
    - map.values      stores value dataset paths by name
    - map.metadata   stores value dataset path by altname only
    - map.scannables  stores array dataset paths with given size, by name, all arrays have the same shape
    - map.combined    stores array and value paths (arrays overwrite values)
    - map.image_data  stores dataset paths of image data (arrays with 2+ dimensions or arrays of image files)
    #### E.G.
    - map.groups = {'/hdf/group': ('class', 'name', {attrs}, [datasets])}
    - map.classes = {'class_name': ['/hdf/group1', '/hdf/group2']}
    - map.datasets = {'/hdf/group/dataset': ('name', size, shape, {attrs})}
    - map.arrays = {'name': '/hdf/group/dataset'}
    - map.values = {'name': '/hdf/group/dataset'}
    - map.scannables = {'name': '/hdf/group/dataset'}
    - map.image_data = {'name': '/hdf/group/dataset'}

    ### Methods
    - map.populate(h5py.File) -> populates the dictionaries using the  given file
    - map.generate_scannables(array_size) -> populates scannables namespace with arrays of same size
    - map.most_common_size -> returns the most common dataset size > 1
    - map.get_attr('name_or_path', 'attr') -> return value of dataset attribute
    - map.get_path('name_or_group_or_class') -> returns path of object with name
    - map.get_image_path() -> returns default path of detector dataset (or largest dataset)
    - map.get_group_path('name_or_path_or_class') -> return path of group with class
    - map.get_group_datasets('name_or_path_or_class') -> return list of dataset paths in class
    - map.find_groups(*names_or_classes) -> return list of group paths matching given group names or classes
    - map.find_paths('string') -> return list of dataset paths containing string
    - map.find_names('string') -> return list of dataset names containing string
    - map.find_attr('attr_name') -> return list of paths of groups or datasets containing attribute 'attr_name'
    - map.add_local(local_variable=value) -> add to the local namespace accessed by eval
    - map.add_named_expression(alternate_name='expression') -> add local variables for expressions replaced during eval
    ### File Methods
    - map.get_metadata(h5py.File) -> returns dict of value datasets
    - map.get_scannables(h5py.File) -> returns dict of scannable datasets
    - map.get_scannables_array(h5py.File) -> returns numpy array of scannable datasets
    - map.get_dataholder(h5py.File) -> returns dict like object with metadata and scannables
    - map.get_image(h5py.File, index) -> returns image data (2D float array or str image filename)
    - map.get_data(h5py.File, 'name') -> returns data from dataset
    - map.get_string(h5py.File, 'name') -> returns string summary of dataset
    - map.eval(h5py.File, 'expression') -> returns output of expression
    - map.format(h5py.File, 'string {name}') -> returns output of str expression
    """

    def __init__(self, file: h5py.File | None = None):
        self.filename = ''
        self.all_paths = []
        self.groups = {}  # stores attributes of each group by path
        self.datasets = {}  # stores attributes of each dataset by path
        self.classes = defaultdict(list)  # stores lists of group paths by nx_class
        self.arrays = {}  # stores array dataset paths by name, altname + group_name
        self.values = {}  # stores value dataset paths by name, altname + group_name
        self.metadata = {}  # stores value dataset path by altname only
        self.scannables = {}  # stores array dataset paths with given size, by name
        self.combined = {}  # stores array and value paths (arrays overwrite values)
        self.image_data = {}  # stores dataset paths of image data
        self._local_data = {}  # stores variables and data to be used in eval
        self._alternate_names = {}  # stores variable names for expressions to be evaluated
        self._default_image_path = None
        self._use_local_data = False  # if True, preferentially loads data from _local_data

        if isinstance(file, h5py.File):
            self.populate(file)

    def __getitem__(self, item):
        return self.combined[item]

    def __iter__(self):
        return iter(self.combined)

    def __contains__(self, item):
        return item in self.combined or item in self.datasets

    def __call__(self, expression, **kwargs):
        if 'hdf_file' not in kwargs:
            kwargs['hdf_file'] = self.load_hdf()
        return self.eval(expression=expression, **kwargs)

    def __repr__(self):
        return f"HdfMap based on '{self.filename}'"

    def __str__(self):
        out = f"{repr(self)}\n"
        out += self.info_summary()
        out += "\n*use print(self.info_names(combined=True, scannables=True, image_data=True)) to see detail\n"
        return out

    def info_groups(self) -> str:
        """Return str info on groups"""
        out = f"{repr(self)}\n"
        out += "Groups:\n"
        for path, group in self.groups.items():
            out += f"{path} [{group.nx_class}: '{group.name}']\n"
            out += '\n'.join(f"  @{attr}: {self.get_attr(path, attr)}" for attr in group.attrs)
            out += '\n'
            for dataset_name in group.datasets:
                dataset_path = build_hdf_path(path, dataset_name)
                if dataset_path in self.datasets:
                    dataset = self.datasets[dataset_path]
                    out += f"  {dataset_name}: {dataset.shape}\n"
        return out

    def info_classes(self) -> str:
        """Return str info on group class names"""
        out = f"{repr(self)}\n"
        out += 'Classes:\n'
        out += disp_dict(self.classes, 20)
        return out

    def info_datasets(self) -> str:
        """Return str info on datasets"""
        out = f"{repr(self)}\n"
        out += "Datasets:\n"
        out += disp_dict(self.datasets, 20)
        return out

    def info_names(self, arrays=False, values=False, combined=False,
                   metadata=False, scannables=False, image_data=False) -> str:
        """Return str info for different namespaces"""
        if not any((arrays, values, combined, metadata, scannables, image_data)):
            combined = True
        options = [
            ('Arrays', arrays, self.arrays),
            ('Values', values, self.values),
            ('Combined', combined, self.combined),
            ('Metadata', metadata, self.metadata),
            ('Scannables', scannables, self.scannables),
            ('Image Data', image_data, self.image_data),
        ]
        out = ''
        for name, show, namespace in options:
            if show:
                out += f"\n{name} Namespace:\n"
                out += '\n'.join([
                    f"{name:>30}: {str(self.datasets[path].shape):10} : {path:60}"
                    for name, path in namespace.items()
                ])
                out += '\n'
        return out

    def info_summary(self):
        out = [
            "--Paths--",
            f"All paths: {len(self.all_paths)}",
            f"Groups: {len(self.groups)}",
            f"Datasets: {len(self.datasets)}",
            "--Names--",
            f"Classes: {len(self.classes)}",
            f"Arrays: {len(self.arrays)}",
            f"Values: {len(self.values)}",
            f"Combined: {len(self.combined)}",
            f"Metadata: {len(self.metadata)}",
            f"Scannables: {len(self.scannables)}, shape={self.scannables_shape()}, size={self.scannables_length()}",
            f"Image Data: {len(self.image_data)}, shape={self.get_image_shape()}",
        ]
        return '\n'.join(out)

    def _store_class(self, name, path):
        if path not in self.classes[name]:
            self.classes[name].append(path)

    def _store_group(self, hdf_group: h5py.Group, path: str, name: str):

        nx_class = hdf_group.attrs.get('NX_class', default='Group')
        if hasattr(nx_class, 'decode'):
            nx_class = nx_class.decode()
        self.groups[path] = Group(
            nx_class,
            name,
            dict(hdf_group.attrs),
            [key for key, item in hdf_group.items() if isinstance(item, h5py.Dataset)]
        )
        self._store_class(name, path)
        self._store_class(nx_class, path)
        logger.debug(f"{path}  HDFGroup: {nx_class}")
        return nx_class

    def _store_dataset(self, hdf_dataset: h5py.Dataset, hdf_path: str, name: str):
        # New: add group_name to namespace as standard, helps with names like s5/x + s4/x
        # this significantly increases the number of names in namespaces
        group = self.groups[SEP.join(hdf_path.split(SEP)[:-1])]  # group is already stored
        group_name = f"{group.name}_{name}"
        class_name = f"{group.nx_class}_{name}"
        # group_name = generate_identifier(f"{hdf_path.split(SEP)[-2]}_{name}")
        # alt_name = generate_identifier(hdf_dataset.attrs[LOCAL_NAME]) if LOCAL_NAME in hdf_dataset.attrs else None
        alt_name = generate_alt_name(hdf_dataset)
        names = {n: hdf_path for n in {name, group_name, class_name, alt_name} if n}
        self.datasets[hdf_path] = Dataset(
            name=name,
            names=list(names),
            size=hdf_dataset.size,
            shape=hdf_dataset.shape,
            attrs=dict(hdf_dataset.attrs),
        )
        if is_image(hdf_dataset.shape):
            self.image_data[name] = hdf_path
            self.image_data[group_name] = hdf_path
            self.arrays.update(names)
            logger.debug(f"{hdf_path}  HDFDataset: image_data & array {name, hdf_dataset.size, hdf_dataset.shape}")
        elif hdf_dataset.ndim > 0:
            self.arrays.update(names)
            logger.debug(f"{hdf_path}  HDFDataset: array {name, hdf_dataset.size, hdf_dataset.shape}")
        else:
            self.values.update(names)
            if alt_name:
                self.metadata[alt_name] = hdf_path
            logger.debug(f"{hdf_path}  HDFDataset: value")

    def _populate(self, hdf_group: h5py.Group, root: str = '',
                  recursive: bool = True, groups: list[str] = None):
        """
        populate HdfMap dictionary's using recursive method
        :param hdf_group: HDF group object, from HDF File
        :param root: str path of hdf Group, used to build dataset paths
        :param recursive: if True, will recursively search through subgroups
        :param groups: if not None, will only search subgroups named in list, e.g. ['entry','NX_DATA']
        :return: None
        """
        logger.debug(f"{repr(self)}._populate root='{root}'")
        for key in hdf_group:
            obj = hdf_group.get(key)
            link = hdf_group.get(key, getlink=True)
            logger.debug(f"{key}: {repr(obj)} : {repr(link)}")
            if obj is None:
                continue  # dataset may be missing due to a broken link
            hdf_path = root + SEP + key  # build hdf path - a cross-file unique identifier
            # New: store all paths in file, useful for checking if anything was missed, but might be slow
            self.all_paths.append(hdf_path)
            name = generate_identifier(hdf_path)
            logger.debug(f"{hdf_path}:  {name}, link={repr(link)}")

            # Group
            if isinstance(obj, h5py.Group):
                nx_class = self._store_group(obj, hdf_path, name)
                if recursive and (key in groups or nx_class in groups if groups else True):
                    self._populate(obj, hdf_path, recursive)

            # Dataset
            elif isinstance(obj, h5py.Dataset): #18 remove link omission
                self._store_dataset(obj, hdf_path, name)

    def add_local(self, **kwargs):
        """Add value to the local namespace, used in eval"""
        self._local_data.update(kwargs)

    def use_local_data(self, use_data: bool = True):
        """
        Activate the option to reload data from the namespace locally, rather than from the file.

        self.eval(hdf, 'cmd') -> default will load 'cmd' from the file based on the hdf path associated with cmd
        self.use_local_data() -> self.eval(hdf, 'cmd') will return 'cmd' from local data if available, or from the file.
        self.use_local_data(False) -> returns to default behaviour
        """
        self._use_local_data = use_data

    def add_named_expression(self, **kwargs):
        """Add named expression to the local namespace, used in eval"""
        self._alternate_names.update(kwargs)

    def add_roi(self, name: str, cen_i: int | str, cen_j: int | str,
                wid_i: int = 30, wid_j: int = 30, image_name: str = 'IMAGE'):
        """
        Add an image ROI (region of interest) to the named expressions
        The ROI operates on the default IMAGE dataset, loading only the required region from the file.
        The following expressions will be added, for use in self.eval etc.
            *name* -> returns the whole ROI array as a HDF5 dataset
            *name*_total -> returns the sum of each image in the ROI array
            *name*_max -> returns the max of each image in the ROI array
            *name*_min -> returns the min of each image in the ROI array
            *name*_mean -> returns the mean of each image in the ROI array
            *name*_bkg -> returns the background ROI array (area around ROI)
            *name*_rmbkg -> returns the total with background subtracted
            *name*_box -> returns the pixel positions of the ROI corners
            *name*_bkg_box -> returns the pixel positions of the background ROI

        :param name: string name of the ROI
        :param cen_i: central pixel index along first dimension, can be callable string
        :param cen_j: central pixel index along second dimension, can be callable string
        :param wid_i: full width along first dimension, in pixels
        :param wid_j: full width along second dimension, in pixels
        :param image_name: string name of the image
        """
        wid_i = abs(wid_i) // 2
        wid_j = abs(wid_j) // 2
        islice = f"{cen_i}-{wid_i:.0f} : {cen_i}+{wid_i:.0f}"
        jslice = f"{cen_j}-{wid_j:.0f} : {cen_j}+{wid_j:.0f}"
        dataset = f"d_{image_name}"
        roi_array = dataset + f"[..., {islice}, {jslice}]"
        roi_total = f"{roi_array}.sum(axis=(-1, -2))"
        roi_max = f"{roi_array}.max(axis=(-1, -2))"
        roi_min = f"{roi_array}.min(axis=(-1, -2))"
        roi_mean = f"{roi_array}.mean(axis=(-1, -2))"
        roi_box = (
            'array([' +
            f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
            f"[{cen_i}-{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
            f"[{cen_i}+{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
            f"[{cen_i}+{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
            f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
            '])'
        )

        islice = f"{cen_i}-{wid_i * 2:.0f} : {cen_i}+{wid_i * 2:.0f}"
        jslice = f"{cen_j}-{wid_j * 2:.0f} : {cen_j}+{wid_j * 2:.0f}"
        bkg_array = dataset + f"[..., {islice}, {jslice}]"
        bkg_total = f"{bkg_array}.sum(axis=(-1, -2))"
        roi_bkg_total = f"({bkg_total} - {roi_total})"
        roi_bkg_mean = f"{roi_bkg_total}/(12*{wid_i * wid_j})"
        # Transpose array to broadcast bkg_total
        roi_rmbkg = f"({roi_array}.T - {roi_bkg_mean}).sum(axis=(0, 1))"
        roi_bkg_box = (
            'array([' +
            f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
            f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
            f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
            f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
            f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
            '])'
        )

        alternate_names = {
            f"{name}_total": roi_total,
            f"{name}_max": roi_max,
            f"{name}_min": roi_min,
            f"{name}_mean": roi_mean,
            f"{name}_bkg": roi_bkg_total,
            f"{name}_rmbkg": roi_rmbkg,
            f"{name}_box": roi_box,
            f"{name}_bkg_box": roi_bkg_box,
            name: roi_array,
        }
        self.add_named_expression(**alternate_names)

    def populate(self, hdf_file: h5py.File):
        """Populate all datasets from file"""
        self.filename = hdf_file.filename
        self._local_data.update(extra_hdf_data(hdf_file))
        self._populate(hdf_file)
        size = self.most_common_size()
        self.generate_scannables(size)
        self.generate_combined()

    def generate_combined(self):
        """Finalise the mapped namespace by combining dataset names"""
        # if self.scannables:
        #     # check image datasets are larger than scannables_shape
        #     ndim = len(self.scannables_shape())
        #     self.image_data = {
        #         name: path for name, path in self.image_data.items()
        #         if is_image(self.datasets[path].shape, ndim + 1)
        #     }
        if self.image_data:
            # add default 'image_data'
            self.image_data[IMAGE_DATA] = next(iter(self.image_data.values()))
        self.combined = {**self.values, **self.arrays, **self.image_data, **self.scannables}

    def all_attrs(self) -> dict:
        """Return dict of all attributes in self.datasets and self.groups"""
        ds_attrs = {k: v for path, ds in self.datasets.items() for k, v in ds.attrs.items()}
        grp_attrs = {k: v for path, grp in self.groups.items() for k, v in grp.attrs.items()}
        return {**grp_attrs, **ds_attrs}

    def most_common_size(self) -> int:
        """Return most common array size > 1"""
        array_sizes = [size for name, path in self.arrays.items() if (size := self.datasets[path].size) > 1]
        return max(set(array_sizes), key=array_sizes.count)

    def most_common_shape(self) -> tuple:
        """Return most common non-singular array shape"""
        array_shapes = [shape for name, path in self.arrays.items() if len(shape := self.datasets[path].shape) > 0]
        return max(set(array_shapes), key=array_shapes.count)

    def scannables_length(self) -> int:
        """Return the length of the first axis of scannables array"""
        if not self.scannables:
            return 0
        path = next(iter(self.scannables.values()))
        return self.datasets[path].size

    def scannables_shape(self) -> tuple:
        """Return the shape of the first axis of scannables array"""
        if not self.scannables:
            return (0, )
        path = next(iter(self.scannables.values()))
        return self.datasets[path].shape

    def generate_scannables(self, array_size):
        """Populate self.scannables field with datasets size that match array_size"""
        # self.scannables = {k: v for k, v in self.arrays.items() if self.datasets[v].size == array_size}
        self.scannables = {ds.name: path for path, ds in self.datasets.items() if ds.size == array_size}
        # create combined dict, scannables and arrays overwrite values with same name
        # self.generate_combined()

    def generate_scannables_from_group(self, hdf_group: h5py.Group, group_path: str = None,
                                       dataset_names: list[str] = None):
        """
        Generate scannables list from a specific group, using the first item to define array size
        :param hdf_group: h5py.Group
        :param group_path: str path of group hdf_group if hdf_group.name is incorrect
        :param dataset_names: list of names of group sub-entries to use (use all if None)
        """
        # watch out - hdf_group.name may not point to a location in the file!
        hdf_path = hdf_group.name if group_path is None else group_path
        # list of datasets within group
        if dataset_names:
            dataset_names = [
                name for name in dataset_names if isinstance(hdf_group.get(name), h5py.Dataset)
            ]
        else:
            dataset_names = [name for name, item in hdf_group.items() if isinstance(item, h5py.Dataset)]

        # catch empty groups
        if len(dataset_names) == 0:
            logger.warning(f"HDF Group {hdf_path} has no datasets for scannables")
            self.scannables = {}
        else:
            # use min size dataset as scannable_shape (avoiding image datasets)
            array_size = min(hdf_group[name].size for name in dataset_names)
            self._populate(hdf_group, root=hdf_path, recursive=False)
            self.scannables = {
                name: build_hdf_path(hdf_path, name)
                for name in dataset_names if hdf_group[name].size == array_size  # doesn't check if link
            }
            if len(self.scannables) < 2:
                logger.warning(f"HDF Group {hdf_path} has no consistent datasets for scannables")
                self.scannables = {}
        logger.debug(f"Scannables from group: {list(self.scannables.keys())}")
        # self.generate_combined()

    def generate_scannables_from_names(self, names: list[str]):
        """Generate scannables list from a set of dataset names, using the first item to define array size"""
        # concert names or paths to name (to match alt_name)
        array_names = [n for name in names if (n := generate_identifier(name)) in self.arrays]
        logger.debug(f"Scannables from names: {array_names}")
        array_size = self.datasets[self.arrays[array_names[0]]].size
        self.scannables = {
            name: self.arrays[name] for name in array_names if self.datasets[self.arrays[name]].size == array_size
        }
        # self.generate_combined()

    def first_last_scannables(self, first_names: list[str] = (),
                              last_names: list[str] = (),
                              alt_names: dict[str, list[str]] | None = None) -> tuple[dict[str, str], dict[str, str]]:
        """
        Returns default names from scannables
            output first_names returns dict of N names, where N is the number of dimensions in scannable shape
                if fewer axes_names are provided than required, use the first items of scannables instead
            output signal_names returns the last dict item in the list of scannables + signal_names

        :param first_names: list of names of plottable axes in scannables
        :param last_names: list of names of plottable values in scannables
        :param alt_names: dict of alternative names for each plottable value
        :return {first_names: path}, {last_names: path}
        """
        if alt_names is None:
            alt_names = {}
        list_names = list(first_names) + list(self.scannables.keys()) + list(last_names)
        # check names are in scannables
        warnings = []
        all_names = []
        for name in list_names:
            if name in self.scannables:
                all_names.append(name)
            elif name in alt_names:
                alt_name = next((alt for alt in alt_names[name] if alt in self.scannables), None)
                if alt_name:
                    all_names.append(alt_name)
                else:
                    warnings.append(name)
            else:
                warnings.append(name)

        for name in warnings:
            logger.warning(f"name: '{name}' not in scannables")
        # return correct number of values from start and end
        ndims = len(self.scannables_shape())
        first = {name: self.scannables[name] for name in all_names[:ndims]}
        last = {name: self.scannables[name] for name in all_names[-(len(last_names) or 1):]}
        return first, last

    def get_path(self, name_or_path):
        """Return hdf path of object in HdfMap"""
        if name_or_path in self.datasets or name_or_path in self.groups:
            return name_or_path
        if name_or_path in self.combined:
            return self.combined[name_or_path]
        if name_or_path in self.image_data:
            return self.image_data[name_or_path]
        if name_or_path in self.classes:
            return self.classes[name_or_path][0]  # return first path in list
        return None

    def get_group_path(self, name_or_path):
        """Return group path of object in HdfMap"""
        hdf_path = self.get_path(name_or_path)
        while hdf_path and hdf_path not in self.groups:
            hdf_path = SEP.join(hdf_path.split(SEP)[:-1])
        if not hdf_path:
            return SEP
        return hdf_path

    def get_group_classes(self, name_or_path) -> list[str]:
        """Return list of class names associated with a group or parent group of dataset"""
        group_path = self.get_group_path(name_or_path)
        sub_groups = group_path.split(SEP)
        sub_group_paths = [SEP.join(sub_groups[:n]) for n in range(1, len(sub_groups)+1)]
        sub_group_classes = [self.groups[g].nx_class for g in sub_group_paths if g in self.groups]
        return sub_group_classes

    def get_group_dataset_path(self, group_name, dataset_name) -> str | None:
        """Return path of dataset defined by group and dataset name/attribute"""
        if group_name in self.groups:
            group_paths = [group_name]
        else:
            group_paths = self.classes[group_name]
        for group_path in group_paths:
            group = self.groups[group_path]
            for name in group.datasets:
                dataset_path = build_hdf_path(group_path, name)
                dataset = self.datasets[dataset_path]
                if dataset_name in dataset.names:
                    return dataset_path
        return None

    def find_groups(self, *names_or_classes: str) -> list[str]:
        """
        Find groups that are associated with several names or class names

            [paths, ] = m.find_groups('NXslit', 'NXtransformations', 's1')

        Intended for use finding groups with a certain hierarchy
        :params names_or_classes:  group names or group class names
        :returns: list of hdf group paths, where all groups are associated with all given names or classes.
        """
        # generate a list of all names and class names associated with each group
        # TODO: add all_names to self.generate_combined
        all_names = {p: self.get_group_classes(p) + p.split('/') for p in self.groups}
        return [path for path, names in all_names.items() if all(arg in names for arg in names_or_classes)]

    def find_datasets(self, *names_or_classes: str) -> list[str]:
        """
        Find datasets that are associated with several names or class names

            [paths, ] = m.find_datasets('NXslit', 'x_gap')

        Intended for use finding datasets associated with groups with a certain hierarchy

        Note that arguments are checked against the dataset namespace first, so if the argument appears
        in both lists, it will be assumed to be a dataset.

        :params names_or_classes:  dataset names, group names or group class names
        :returns: list of hdf dataset paths
        """
        args = list(names_or_classes)
        # split args by dataset names
        dataset_names = [args.pop(n) for n, a in enumerate(args) if a in self.combined]
        # find groups from remaining arguments
        group_paths = self.find_groups(*args)
        if not dataset_names:
            # if no datasets are given, return all dataset in group
            return [build_hdf_path(path, name) for path in group_paths for name in self.groups[path].datasets]
        # find all dataset paths associated with name
        dataset_paths = {
            path for name in dataset_names for path in [
                p for p, ds in self.datasets.items() if name in ds.names
            ] + [self.combined[name]] if self.get_group_path(path) in group_paths
        }
        return list(dataset_paths)

    def find_paths(self, string: str, name_only=True, whole_word=False) -> list[str]:
        """
        Find any dataset paths that contain the given string argument

            [paths, ] = m.find_paths('en')  # finds all datasets with name including 'en'

        :param string: str to find in list of datasets
        :param name_only: if True, search only the name of the dataset, not the full path
        :param whole_word: if True, search only for whole-word names (case in-sensitive)
        :return: list of hdf paths
        """
        if whole_word:
            return [path for name, path in self.combined.items() if string.lower() == name.lower()]
        # find string in combined
        combined_paths = {path for name, path in self.combined.items() if string in name}
        if name_only:
            return [
                path for path, dataset in self.datasets.items()
                if string in dataset.name and path not in combined_paths
            ] + list(combined_paths)
        return [
            path for path in self.datasets if string in path and path not in combined_paths
        ] + list(combined_paths)

    def find_names(self, string: str, match_case=False) -> list[str]:
        """
        Find any dataset names that contain the given string argument, searching names in self.combined

            ['m1x', 'm1y', ...] = m.find_names('m1')

        :param string: str to find in list of datasets
        :param match_case: if True, match must be case-sensitive
        :return: list of names
        """
        if match_case:
            return [name for name in self.combined if string in name]
        return [name for name in self.combined if string.lower() in name.lower()]

    def find_attr(self, attr_name: str) -> list[str]:
        """
        Find any dataset or group path with an attribute that contains attr_name.
        :param attr_name: str name of hdfobj.attr
        :return: list of hdf paths
        """
        return [
            path for path, ds in self.datasets.items() if attr_name in ds.attrs
        ] + [
            path for path, grp in self.groups.items() if attr_name in grp.attrs
        ]

    def get_attrs(self, name_or_path: str) -> dict | None:
        """Return attributes of dataset or group"""
        if name_or_path in self.datasets:
            return self.datasets[name_or_path].attrs
        if name_or_path in self.groups:
            return self.groups[name_or_path].attrs
        if name_or_path in self.combined:
            return self.datasets[self.combined[name_or_path]].attrs
        if name_or_path in self.classes:
            return self.groups[self.classes[name_or_path][0]].attrs
        return None

    def get_attr(self, name_or_path: str, attr_label: str, default: str | typing.Any = '') -> str | None:
        """Return named attribute from dataset or group, or default"""
        attrs = self.get_attrs(name_or_path)
        if attrs and attr_label in attrs:
            return attr.decode() if hasattr(attr := attrs[attr_label], 'decode') else attr
        return default

    def set_image_path(self, name_or_path: str):
        """Set the default image path, used by get_image"""
        if name_or_path is None:
            self._default_image_path = None
        else:
            path = self.get_path(name_or_path)
            if path:
                self._default_image_path = path
        logger.info(f"Default image path: {self._default_image_path}")

    def get_image_path(self) -> str:
        """Return HDF path of first dataset in self.image_data"""
        if self._default_image_path:
            return self._default_image_path
        return next(iter(self.image_data.values()), '')

    def get_image_shape(self) -> tuple:
        """Return the scan shape of the detector dataset"""
        path = self.get_image_path()
        if path in self.datasets:
            return self.datasets[path].shape[-2:]
        return 0, 0

    def get_image_index(self, index: int) -> tuple:
        """Return image slice index for index along total scan size"""
        return np.unravel_index(index, self.scannables_shape())

    def get_group_datasets(self, name_or_path: str) -> list[str] | None:
        """Find the path associate with the given name and return all datasets in that group"""
        group_path = self.get_group_path(name_or_path)
        if group_path:
            return self.groups[group_path].datasets
        return None

    def generate_ids(self, *names: str, modify_missing: bool = True) -> list[str]:
        """
        Will return the path identifier of the given name if the name is in the namespace,
        otherwise a valid identifier will be generated.

            xlabel, ylabel = generate_axis_labels('axes', 'signal')
            generate_axis_labels('my/data/label', modify_missing=True) #-> ['label', ]
            generate_axis_labels('(x-y)/y', modify_missing=False) #-> ['(x-y)/y', ]

        :param names: names to generate axis labels for
        :param modify_missing: if True, modifies names even if they are not in namespace
        :return: list of axis labels as valid identifiers
        """
        return [
            generate_identifier(self.combined.get(name, name)) if modify_missing else (
                generate_identifier(self.combined[name]) if name in self.combined else name
            )
            for name in names
        ]

    "--------------------------------------------------------"
    "---------------------- FILE READERS --------------------"
    "--------------------------------------------------------"

    def load_hdf(self, filename: str | None = None, name_or_path: str = None, **kwargs) -> h5py.File | h5py.Dataset:
        """
        Load hdf file or hdf dataset in open state
        :param filename: str filename of hdf file, or None to use self.filename
        :param name_or_path: if given, returns the dataset
        :param kwargs: additional key-word arguments to pass to h5py.File(...)
        :return: h5py.File object or h5py.dataset object if dataset name given
        """
        if filename is None:
            filename = self.filename
        if name_or_path is None:
            return load_hdf(filename, **kwargs)
        return load_hdf(filename, **kwargs).get(self.get_path(name_or_path))

    def get_data(self, hdf_file: h5py.File, name_or_path: str, index=(), default=None, direct_load=False):
        """
        Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
        See hdfmap.eval_functions.dataset2data for more information.
        :param hdf_file: hdf file object
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
        :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
        """
        path = self.get_path(name_or_path)
        if path and path in hdf_file:
            return dataset2data(hdf_file[path], index, direct_load)
        return default

    def get_string(self, hdf_file: h5py.File, name_or_path: str, index=(), default='', units=False) -> str:
        """
        Return data from dataset in file, converted into string summary of data
        See hdfmap.eval_functions.dataset2str for more information.
        :param hdf_file: hdf file object
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param units: if True and attribute 'units' available, append this to the result
        :return: dataset2str(dataset) -> str
        """
        path = self.get_path(name_or_path)
        if path and path in hdf_file:
            return dataset2str(hdf_file[path], index, units=units)
        return default

    def get_metadata(self, hdf_file: h5py.File, default=None, direct_load=False,
                     name_list: list = None, string_output=False) -> dict:
        """
        Return metadata dict from file, loading data for each item in the metadata list
        The metadata list is taken from name_list, otherwise self.metadata or self.values
        :param hdf_file: hdf file object
        :param default: Value to return for names not associated with a dataset
        :param direct_load: if True, loads data from hdf file directory, without conversion
        :param name_list: if available, uses this list of dataset names to generate the metadata list
        :param string_output: if True, returns string summary of each value
        :return: {name: value}
        """
        extra = extra_hdf_data(hdf_file)
        if name_list:
            metadata_paths = {name: self.combined.get(name, '') for name in name_list}
        elif self.metadata:
            metadata_paths = self.metadata
        else:
            logger.warning("'local_names' metadata is not available, using all size=1 datasets.")
            # metadata_paths = self.values
            metadata_paths = {ds.name: path for path, ds in self.datasets.items() if ds.size <= 1}
        if string_output:
            extra = {key: f"'{val}'" for key, val in extra.items()}
            metadata = {
                name: dataset2str(hdf_file[path]) if path in hdf_file else str(default)
                for name, path in metadata_paths.items()
            }
        else:
            metadata = {
                name: dataset2data(hdf_file[path], direct_load=direct_load) if path in hdf_file else default
                for name, path in metadata_paths.items()
            }
        return {**extra, **metadata}

    def create_metadata_list(self, hdf_file: h5py.File, default=None, name_list: list = None,
                             line_separator: str = '\n', value_separator: str = '=') -> str:
        """
        Return a metadata string, using self.get_metadata
        :param hdf_file: hdf file object
        :param default: Value to return for names not associated with a dataset
        :param name_list: if available, uses this list of dataset names to generate the metadata list
        :param line_separator: str separating each metadata parameter
        :param value_separator: str separating name from value
        :return: multi-line string
        """
        return line_separator.join(
            f"{name}{value_separator}{value}"
            for name, value in self.get_metadata(hdf_file, default=default,
                                                 name_list=name_list, string_output=True).items()
        )

    def get_scannables(self, hdf_file: h5py.File, flatten: bool = False, numeric_only: bool = False) -> dict:
        """Return scannables from file (values associated with hdfmap.scannables)"""
        return {
            name: dataset[()].flatten() if flatten else hdf_file[path][()]
            for name, path in self.scannables.items()
            if (dataset := hdf_file.get(path)) and
               (np.issubdtype(dataset.dtype, np.number) if numeric_only else True)
        }

    def get_image(self, hdf_file: h5py.File, index: int | tuple | slice | None = None) -> np.ndarray | None:
        """
        Get image data from file, using default image path
            - If the image path points to a numeric 2+D dataset, returns dataset[index, :, :] -> ndarray
            - If the image path points to a string dataset, returns dataset[index] -> '/path/to/image.tiff'

        Image filenames may be relative to the location of the current file (this is not checked)

        :param hdf_file: hdf file object
        :param index: (slice,) or None to take the middle image
        :return: 2D numpy array of image, or string file path of image
        """
        if index is None:
            index = self.get_image_index(self.scannables_length() // 2)
        if isinstance(index, int):
            index = self.get_image_index(index)
        image_path = self.get_image_path()
        logger.info(f"image path: {image_path}")
        if image_path and image_path in hdf_file:
            # return hdf_file[image_path][index].squeeze()  # remove trailing dimensions
            return self.get_data(hdf_file, image_path, index)  # return array or image paths
        return None

    def _get_numeric_scannables(self, hdf_file: h5py.File) -> list[tuple[str, str, np.ndarray]]:
        """Return numeric scannables available in file"""
        return [
            (name, path, dataset[()].flatten()) for name, path in self.scannables.items()
            if (dataset := hdf_file.get(path)) and np.issubdtype(dataset.dtype, np.number)
        ]

    def get_scannables_array(self, hdf_file: h5py.File, return_structured_array=False) -> np.ndarray:
        """
        Return 2D array of all numeric scannables in file

        :param hdf_file: h5py.File object
        :param return_structured_array: bool, if True, return a Numpy structured array with column headers
        :returns: numpy array with a row for each scannable, shape: (no_scannables, flattened_length)
        """
        _scannables = self._get_numeric_scannables(hdf_file)
        array = np.array([array for name, path, array in _scannables])
        if return_structured_array:
            dtypes = np.dtype([
                (name, hdf_file[path].dtype) for name, path, array in _scannables
            ])
            return np.array([tuple(row) for row in np.transpose(array)], dtype=dtypes)
        return array

    def create_scannables_table(self, hdf_file: h5py.File, delimiter=', ',
                                string_spec='', format_spec='f', default_decimals=8) -> str:
        """
        Return str representation of scannables as a table
        The table starts with a header row given by names of the scannables.
        Each row contains the numeric values for each scannable, formated by the given string spec:
                {value: "string_spec.decimals format_spec"}
            e.g. {value: "5.8f"}
        decimals is taken from each scannables "decimals" attribute if it exits, otherwise uses default
        :param hdf_file: h5py.File object
        :param delimiter: str seperator between each column
        :param string_spec: str first element of float format specifier - length of string
        :param format_spec: str type element of format specifier - 'f'=float, 'e'=exponential, 'g'=general
        :param default_decimals: int default number of decimals given
        :return: str
        """
        _scannables = self._get_numeric_scannables(hdf_file)
        fmt = string_spec + '.%d' + format_spec
        formats = [
            '{:' + fmt % self.get_attr(path, 'decimals', default=default_decimals) + '}'
            for name, path, array in _scannables
        ]

        length = self.scannables_length()
        out = delimiter.join([name for name, _, _ in _scannables]) + '\n'
        out += '\n'.join([
            delimiter.join([
                fmt.format(array[n])
                for (_, path, array), fmt in zip(_scannables, formats)
            ])
            for n in range(length)
        ])
        return out

    def get_dataholder(self, hdf_file: h5py.File, flatten_scannables: bool = False) -> DataHolder:
        """
        Return DataHolder object - a simple replication of scisoftpy.dictutils.DataHolder
        Also known as DLS dat format.
            dataholder.scannable -> array
            dataholder.metadata.value -> metadata
            dataholder['scannable'] -> array
            dataholder.metadata['value'] -> metadata
        :param hdf_file: h5py.File object
        :param flatten_scannables: bool, it True the scannables will be flattened arrays
        :return: data_object (similar to dict)
        """
        metadata = self.get_metadata(hdf_file)
        scannables = self.get_scannables(hdf_file, flatten=flatten_scannables)
        scannables['metadata'] = DataHolder(**metadata)
        return DataHolder(**scannables)

    def eval(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True):
        """
        Evaluate an expression using the namespace of the hdf file
        :param hdf_file: h5py.File object
        :param expression: str expression to be evaluated
        :param default: returned if varname not in namespace
        :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
        :return: eval(expression)
        """
        return eval_hdf(
            hdf_file=hdf_file,
            expression=expression,
            hdf_namespace=self.combined,
            data_namespace=self._local_data,
            replace_names=self._alternate_names,
            default=default,
            use_stored_data=self._use_local_data,
            raise_errors=raise_errors
        )

    def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True) -> str:
        """
        Evaluate a formatted string expression using the namespace of the hdf file
        :param hdf_file: h5py.File object
        :param expression: str expression using {name} format specifiers
        :param default: returned if varname not in namespace
        :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
        :return: eval_hdf(f"expression")
        """
        return format_hdf(
            hdf_file=hdf_file,
            expression=expression,
            hdf_namespace=self.combined,
            data_namespace=self._local_data,
            replace_names=self._alternate_names,
            default=default,
            use_stored_data=self._use_local_data,
            raise_errors=raise_errors
        )

    def create_interpreter(self, default=DEFAULT):
        """
        Create an interpreter object for the current file
        The interpreter is a sub-class of asteval.Interpreter that parses expressions for hdfmap eval patters
        and loads data when required.

        The hdf file self.filename is used to extract data and is only opened during evaluation.

            ii = HdfMap.create_interpreter()
            out = ii.eval('expression')
        """
        interpreter = HdfMapInterpreter(
            hdfmap=self,
            replace_names=self._alternate_names,
            default=default,
            user_symbols=self._local_data,
            use_numpy=True
        )
        interpreter.use_stored_data = self._use_local_data
        return interpreter

    def create_dataset_summary(self, hdf_file: h5py.File) -> str:
        """Create summary of all datasets in file"""
        return '\n'.join(f"{path:60}: {self.get_string(hdf_file, path)}" for path in self.datasets)

    def info_data(self, hdf_file: h5py.File) -> str:
        """Return string showing metadata values associated with names"""
        out = repr(self) + '\n'
        out += "Combined Namespace:\n"
        out += '\n'.join([
            f"{name:>30}: " +
            f"{dataset2str(hdf_file[path]):20}" +
            f": {path:60}"
            for name, path in self.combined.items()
        ])
        out += f"\n{self.info_names(scannables=True)}"
        return out

`add_local(**kwargs)` ¶

Add value to the local namespace, used in eval

Source code in src/hdfmap/hdfmap_class.py

def add_local(self, **kwargs):
    """Add value to the local namespace, used in eval"""
    self._local_data.update(kwargs)

`add_named_expression(**kwargs)` ¶

Add named expression to the local namespace, used in eval

Source code in src/hdfmap/hdfmap_class.py

def add_named_expression(self, **kwargs):
    """Add named expression to the local namespace, used in eval"""
    self._alternate_names.update(kwargs)

`add_roi(name, cen_i, cen_j, wid_i=30, wid_j=30, image_name='IMAGE')` ¶

Add an image ROI (region of interest) to the named expressions The ROI operates on the default IMAGE dataset, loading only the required region from the file. The following expressions will be added, for use in self.eval etc. name -> returns the whole ROI array as a HDF5 dataset name_total -> returns the sum of each image in the ROI array name_max -> returns the max of each image in the ROI array name_min -> returns the min of each image in the ROI array name_mean -> returns the mean of each image in the ROI array name_bkg -> returns the background ROI array (area around ROI) name_rmbkg -> returns the total with background subtracted name_box -> returns the pixel positions of the ROI corners name_bkg_box -> returns the pixel positions of the background ROI

Parameters:

Name	Type	Description	Default
`name`	`str`	string name of the ROI	required
`cen_i`	`int \| str`	central pixel index along first dimension, can be callable string	required
`cen_j`	`int \| str`	central pixel index along second dimension, can be callable string	required
`wid_i`	`int`	full width along first dimension, in pixels	`30`
`wid_j`	`int`	full width along second dimension, in pixels	`30`
`image_name`	`str`	string name of the image	`'IMAGE'`

Source code in src/hdfmap/hdfmap_class.py

def add_roi(self, name: str, cen_i: int | str, cen_j: int | str,
            wid_i: int = 30, wid_j: int = 30, image_name: str = 'IMAGE'):
    """
    Add an image ROI (region of interest) to the named expressions
    The ROI operates on the default IMAGE dataset, loading only the required region from the file.
    The following expressions will be added, for use in self.eval etc.
        *name* -> returns the whole ROI array as a HDF5 dataset
        *name*_total -> returns the sum of each image in the ROI array
        *name*_max -> returns the max of each image in the ROI array
        *name*_min -> returns the min of each image in the ROI array
        *name*_mean -> returns the mean of each image in the ROI array
        *name*_bkg -> returns the background ROI array (area around ROI)
        *name*_rmbkg -> returns the total with background subtracted
        *name*_box -> returns the pixel positions of the ROI corners
        *name*_bkg_box -> returns the pixel positions of the background ROI

    :param name: string name of the ROI
    :param cen_i: central pixel index along first dimension, can be callable string
    :param cen_j: central pixel index along second dimension, can be callable string
    :param wid_i: full width along first dimension, in pixels
    :param wid_j: full width along second dimension, in pixels
    :param image_name: string name of the image
    """
    wid_i = abs(wid_i) // 2
    wid_j = abs(wid_j) // 2
    islice = f"{cen_i}-{wid_i:.0f} : {cen_i}+{wid_i:.0f}"
    jslice = f"{cen_j}-{wid_j:.0f} : {cen_j}+{wid_j:.0f}"
    dataset = f"d_{image_name}"
    roi_array = dataset + f"[..., {islice}, {jslice}]"
    roi_total = f"{roi_array}.sum(axis=(-1, -2))"
    roi_max = f"{roi_array}.max(axis=(-1, -2))"
    roi_min = f"{roi_array}.min(axis=(-1, -2))"
    roi_mean = f"{roi_array}.mean(axis=(-1, -2))"
    roi_box = (
        'array([' +
        f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
        f"[{cen_i}-{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
        f"[{cen_i}+{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
        f"[{cen_i}+{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
        f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
        '])'
    )

    islice = f"{cen_i}-{wid_i * 2:.0f} : {cen_i}+{wid_i * 2:.0f}"
    jslice = f"{cen_j}-{wid_j * 2:.0f} : {cen_j}+{wid_j * 2:.0f}"
    bkg_array = dataset + f"[..., {islice}, {jslice}]"
    bkg_total = f"{bkg_array}.sum(axis=(-1, -2))"
    roi_bkg_total = f"({bkg_total} - {roi_total})"
    roi_bkg_mean = f"{roi_bkg_total}/(12*{wid_i * wid_j})"
    # Transpose array to broadcast bkg_total
    roi_rmbkg = f"({roi_array}.T - {roi_bkg_mean}).sum(axis=(0, 1))"
    roi_bkg_box = (
        'array([' +
        f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
        f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
        f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
        f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
        f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
        '])'
    )

    alternate_names = {
        f"{name}_total": roi_total,
        f"{name}_max": roi_max,
        f"{name}_min": roi_min,
        f"{name}_mean": roi_mean,
        f"{name}_bkg": roi_bkg_total,
        f"{name}_rmbkg": roi_rmbkg,
        f"{name}_box": roi_box,
        f"{name}_bkg_box": roi_bkg_box,
        name: roi_array,
    }
    self.add_named_expression(**alternate_names)

`all_attrs()` ¶

Return dict of all attributes in self.datasets and self.groups

Source code in src/hdfmap/hdfmap_class.py

def all_attrs(self) -> dict:
    """Return dict of all attributes in self.datasets and self.groups"""
    ds_attrs = {k: v for path, ds in self.datasets.items() for k, v in ds.attrs.items()}
    grp_attrs = {k: v for path, grp in self.groups.items() for k, v in grp.attrs.items()}
    return {**grp_attrs, **ds_attrs}

`create_dataset_summary(hdf_file)` ¶

Create summary of all datasets in file

Source code in src/hdfmap/hdfmap_class.py

def create_dataset_summary(self, hdf_file: h5py.File) -> str:
    """Create summary of all datasets in file"""
    return '\n'.join(f"{path:60}: {self.get_string(hdf_file, path)}" for path in self.datasets)

`create_interpreter(default=DEFAULT)` ¶

Create an interpreter object for the current file The interpreter is a sub-class of asteval.Interpreter that parses expressions for hdfmap eval patters and loads data when required.

The hdf file self.filename is used to extract data and is only opened during evaluation.

ii = HdfMap.create_interpreter()
out = ii.eval('expression')

Source code in src/hdfmap/hdfmap_class.py

def create_interpreter(self, default=DEFAULT):
    """
    Create an interpreter object for the current file
    The interpreter is a sub-class of asteval.Interpreter that parses expressions for hdfmap eval patters
    and loads data when required.

    The hdf file self.filename is used to extract data and is only opened during evaluation.

        ii = HdfMap.create_interpreter()
        out = ii.eval('expression')
    """
    interpreter = HdfMapInterpreter(
        hdfmap=self,
        replace_names=self._alternate_names,
        default=default,
        user_symbols=self._local_data,
        use_numpy=True
    )
    interpreter.use_stored_data = self._use_local_data
    return interpreter

`create_metadata_list(hdf_file, default=None, name_list=None, line_separator='\n', value_separator='=')` ¶

Return a metadata string, using self.get_metadata

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	hdf file object	required
`default`		Value to return for names not associated with a dataset	`None`
`name_list`	`list`	if available, uses this list of dataset names to generate the metadata list	`None`
`line_separator`	`str`	str separating each metadata parameter	`'\n'`
`value_separator`	`str`	str separating name from value	`'='`

Returns:

Type	Description
`str`	multi-line string

Source code in src/hdfmap/hdfmap_class.py

def create_metadata_list(self, hdf_file: h5py.File, default=None, name_list: list = None,
                         line_separator: str = '\n', value_separator: str = '=') -> str:
    """
    Return a metadata string, using self.get_metadata
    :param hdf_file: hdf file object
    :param default: Value to return for names not associated with a dataset
    :param name_list: if available, uses this list of dataset names to generate the metadata list
    :param line_separator: str separating each metadata parameter
    :param value_separator: str separating name from value
    :return: multi-line string
    """
    return line_separator.join(
        f"{name}{value_separator}{value}"
        for name, value in self.get_metadata(hdf_file, default=default,
                                             name_list=name_list, string_output=True).items()
    )

`create_scannables_table(hdf_file, delimiter=', ', string_spec='', format_spec='f', default_decimals=8)` ¶

Return str representation of scannables as a table The table starts with a header row given by names of the scannables. Each row contains the numeric values for each scannable, formated by the given string spec: {value: "string_spec.decimals format_spec"} e.g. {value: "5.8f"} decimals is taken from each scannables "decimals" attribute if it exits, otherwise uses default

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	h5py.File object	required
`delimiter`		str seperator between each column	`', '`
`string_spec`		str first element of float format specifier - length of string	`''`
`format_spec`		str type element of format specifier - 'f'=float, 'e'=exponential, 'g'=general	`'f'`
`default_decimals`		int default number of decimals given	`8`

Returns:

Type	Description
`str`	str

Source code in src/hdfmap/hdfmap_class.py

def create_scannables_table(self, hdf_file: h5py.File, delimiter=', ',
                            string_spec='', format_spec='f', default_decimals=8) -> str:
    """
    Return str representation of scannables as a table
    The table starts with a header row given by names of the scannables.
    Each row contains the numeric values for each scannable, formated by the given string spec:
            {value: "string_spec.decimals format_spec"}
        e.g. {value: "5.8f"}
    decimals is taken from each scannables "decimals" attribute if it exits, otherwise uses default
    :param hdf_file: h5py.File object
    :param delimiter: str seperator between each column
    :param string_spec: str first element of float format specifier - length of string
    :param format_spec: str type element of format specifier - 'f'=float, 'e'=exponential, 'g'=general
    :param default_decimals: int default number of decimals given
    :return: str
    """
    _scannables = self._get_numeric_scannables(hdf_file)
    fmt = string_spec + '.%d' + format_spec
    formats = [
        '{:' + fmt % self.get_attr(path, 'decimals', default=default_decimals) + '}'
        for name, path, array in _scannables
    ]

    length = self.scannables_length()
    out = delimiter.join([name for name, _, _ in _scannables]) + '\n'
    out += '\n'.join([
        delimiter.join([
            fmt.format(array[n])
            for (_, path, array), fmt in zip(_scannables, formats)
        ])
        for n in range(length)
    ])
    return out

`eval(hdf_file, expression, default=DEFAULT, raise_errors=True)` ¶

Evaluate an expression using the namespace of the hdf file

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	h5py.File object	required
`expression`	`str`	str expression to be evaluated	required
`default`		returned if varname not in namespace	`DEFAULT`
`raise_errors`	`bool`	raise exceptions if True, otherwise return str error message as result and log the error	`True`

Returns:

Type	Description
	eval(expression)

Source code in src/hdfmap/hdfmap_class.py

def eval(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True):
    """
    Evaluate an expression using the namespace of the hdf file
    :param hdf_file: h5py.File object
    :param expression: str expression to be evaluated
    :param default: returned if varname not in namespace
    :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
    :return: eval(expression)
    """
    return eval_hdf(
        hdf_file=hdf_file,
        expression=expression,
        hdf_namespace=self.combined,
        data_namespace=self._local_data,
        replace_names=self._alternate_names,
        default=default,
        use_stored_data=self._use_local_data,
        raise_errors=raise_errors
    )

`find_attr(attr_name)` ¶

Find any dataset or group path with an attribute that contains attr_name.

Parameters:

Name	Type	Description	Default
`attr_name`	`str`	str name of hdfobj.attr	required

Returns:

Type	Description
`list[str]`	list of hdf paths

Source code in src/hdfmap/hdfmap_class.py

def find_attr(self, attr_name: str) -> list[str]:
    """
    Find any dataset or group path with an attribute that contains attr_name.
    :param attr_name: str name of hdfobj.attr
    :return: list of hdf paths
    """
    return [
        path for path, ds in self.datasets.items() if attr_name in ds.attrs
    ] + [
        path for path, grp in self.groups.items() if attr_name in grp.attrs
    ]

`find_datasets(*names_or_classes)` ¶

Find datasets that are associated with several names or class names

[paths, ] = m.find_datasets('NXslit', 'x_gap')

Intended for use finding datasets associated with groups with a certain hierarchy

Note that arguments are checked against the dataset namespace first, so if the argument appears in both lists, it will be assumed to be a dataset.

Parameters:

Name	Type	Description	Default
`names_or_classes`	`str`	dataset names, group names or group class names	`()`

Returns:

Type	Description
`list[str]`	list of hdf dataset paths

Source code in src/hdfmap/hdfmap_class.py

def find_datasets(self, *names_or_classes: str) -> list[str]:
    """
    Find datasets that are associated with several names or class names

        [paths, ] = m.find_datasets('NXslit', 'x_gap')

    Intended for use finding datasets associated with groups with a certain hierarchy

    Note that arguments are checked against the dataset namespace first, so if the argument appears
    in both lists, it will be assumed to be a dataset.

    :params names_or_classes:  dataset names, group names or group class names
    :returns: list of hdf dataset paths
    """
    args = list(names_or_classes)
    # split args by dataset names
    dataset_names = [args.pop(n) for n, a in enumerate(args) if a in self.combined]
    # find groups from remaining arguments
    group_paths = self.find_groups(*args)
    if not dataset_names:
        # if no datasets are given, return all dataset in group
        return [build_hdf_path(path, name) for path in group_paths for name in self.groups[path].datasets]
    # find all dataset paths associated with name
    dataset_paths = {
        path for name in dataset_names for path in [
            p for p, ds in self.datasets.items() if name in ds.names
        ] + [self.combined[name]] if self.get_group_path(path) in group_paths
    }
    return list(dataset_paths)

`find_groups(*names_or_classes)` ¶

Find groups that are associated with several names or class names

[paths, ] = m.find_groups('NXslit', 'NXtransformations', 's1')

Intended for use finding groups with a certain hierarchy

Parameters:

Name	Type	Description	Default
`names_or_classes`	`str`	group names or group class names	`()`

Returns:

Type	Description
`list[str]`	list of hdf group paths, where all groups are associated with all given names or classes.

Source code in src/hdfmap/hdfmap_class.py

def find_groups(self, *names_or_classes: str) -> list[str]:
    """
    Find groups that are associated with several names or class names

        [paths, ] = m.find_groups('NXslit', 'NXtransformations', 's1')

    Intended for use finding groups with a certain hierarchy
    :params names_or_classes:  group names or group class names
    :returns: list of hdf group paths, where all groups are associated with all given names or classes.
    """
    # generate a list of all names and class names associated with each group
    # TODO: add all_names to self.generate_combined
    all_names = {p: self.get_group_classes(p) + p.split('/') for p in self.groups}
    return [path for path, names in all_names.items() if all(arg in names for arg in names_or_classes)]

`find_names(string, match_case=False)` ¶

Find any dataset names that contain the given string argument, searching names in self.combined

['m1x', 'm1y', ...] = m.find_names('m1')

Parameters:

Name	Type	Description	Default
`string`	`str`	str to find in list of datasets	required
`match_case`		if True, match must be case-sensitive	`False`

Returns:

Type	Description
`list[str]`	list of names

Source code in src/hdfmap/hdfmap_class.py

def find_names(self, string: str, match_case=False) -> list[str]:
    """
    Find any dataset names that contain the given string argument, searching names in self.combined

        ['m1x', 'm1y', ...] = m.find_names('m1')

    :param string: str to find in list of datasets
    :param match_case: if True, match must be case-sensitive
    :return: list of names
    """
    if match_case:
        return [name for name in self.combined if string in name]
    return [name for name in self.combined if string.lower() in name.lower()]

`find_paths(string, name_only=True, whole_word=False)` ¶

Find any dataset paths that contain the given string argument

[paths, ] = m.find_paths('en')  # finds all datasets with name including 'en'

Parameters:

Name	Type	Description	Default
`string`	`str`	str to find in list of datasets	required
`name_only`		if True, search only the name of the dataset, not the full path	`True`
`whole_word`		if True, search only for whole-word names (case in-sensitive)	`False`

Returns:

Type	Description
`list[str]`	list of hdf paths

Source code in src/hdfmap/hdfmap_class.py

def find_paths(self, string: str, name_only=True, whole_word=False) -> list[str]:
    """
    Find any dataset paths that contain the given string argument

        [paths, ] = m.find_paths('en')  # finds all datasets with name including 'en'

    :param string: str to find in list of datasets
    :param name_only: if True, search only the name of the dataset, not the full path
    :param whole_word: if True, search only for whole-word names (case in-sensitive)
    :return: list of hdf paths
    """
    if whole_word:
        return [path for name, path in self.combined.items() if string.lower() == name.lower()]
    # find string in combined
    combined_paths = {path for name, path in self.combined.items() if string in name}
    if name_only:
        return [
            path for path, dataset in self.datasets.items()
            if string in dataset.name and path not in combined_paths
        ] + list(combined_paths)
    return [
        path for path in self.datasets if string in path and path not in combined_paths
    ] + list(combined_paths)

`first_last_scannables(first_names=(), last_names=(), alt_names=None)` ¶

Returns default names from scannables output first_names returns dict of N names, where N is the number of dimensions in scannable shape if fewer axes_names are provided than required, use the first items of scannables instead output signal_names returns the last dict item in the list of scannables + signal_names

Parameters:

Name	Type	Description	Default
`first_names`	`list[str]`	list of names of plottable axes in scannables	`()`
`last_names`	`list[str]`	list of names of plottable values in scannables	`()`
`alt_names`	`dict[str, list[str]] \| None`	dict of alternative names for each plottable value	`None`

Returns:

Type	Description
`tuple[dict[str, str], dict[str, str]]`	path}, {last_names: path}

Source code in src/hdfmap/hdfmap_class.py

def first_last_scannables(self, first_names: list[str] = (),
                          last_names: list[str] = (),
                          alt_names: dict[str, list[str]] | None = None) -> tuple[dict[str, str], dict[str, str]]:
    """
    Returns default names from scannables
        output first_names returns dict of N names, where N is the number of dimensions in scannable shape
            if fewer axes_names are provided than required, use the first items of scannables instead
        output signal_names returns the last dict item in the list of scannables + signal_names

    :param first_names: list of names of plottable axes in scannables
    :param last_names: list of names of plottable values in scannables
    :param alt_names: dict of alternative names for each plottable value
    :return {first_names: path}, {last_names: path}
    """
    if alt_names is None:
        alt_names = {}
    list_names = list(first_names) + list(self.scannables.keys()) + list(last_names)
    # check names are in scannables
    warnings = []
    all_names = []
    for name in list_names:
        if name in self.scannables:
            all_names.append(name)
        elif name in alt_names:
            alt_name = next((alt for alt in alt_names[name] if alt in self.scannables), None)
            if alt_name:
                all_names.append(alt_name)
            else:
                warnings.append(name)
        else:
            warnings.append(name)

    for name in warnings:
        logger.warning(f"name: '{name}' not in scannables")
    # return correct number of values from start and end
    ndims = len(self.scannables_shape())
    first = {name: self.scannables[name] for name in all_names[:ndims]}
    last = {name: self.scannables[name] for name in all_names[-(len(last_names) or 1):]}
    return first, last

`format_hdf(hdf_file, expression, default=DEFAULT, raise_errors=True)` ¶

Evaluate a formatted string expression using the namespace of the hdf file

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	h5py.File object	required
`expression`	`str`	str expression using {name} format specifiers	required
`default`		returned if varname not in namespace	`DEFAULT`
`raise_errors`	`bool`	raise exceptions if True, otherwise return str error message as result and log the error	`True`

Returns:

Type	Description
`str`	eval_hdf(f"expression")

Source code in src/hdfmap/hdfmap_class.py

def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True) -> str:
    """
    Evaluate a formatted string expression using the namespace of the hdf file
    :param hdf_file: h5py.File object
    :param expression: str expression using {name} format specifiers
    :param default: returned if varname not in namespace
    :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
    :return: eval_hdf(f"expression")
    """
    return format_hdf(
        hdf_file=hdf_file,
        expression=expression,
        hdf_namespace=self.combined,
        data_namespace=self._local_data,
        replace_names=self._alternate_names,
        default=default,
        use_stored_data=self._use_local_data,
        raise_errors=raise_errors
    )

`generate_combined()` ¶

Finalise the mapped namespace by combining dataset names

Source code in src/hdfmap/hdfmap_class.py

def generate_combined(self):
    """Finalise the mapped namespace by combining dataset names"""
    # if self.scannables:
    #     # check image datasets are larger than scannables_shape
    #     ndim = len(self.scannables_shape())
    #     self.image_data = {
    #         name: path for name, path in self.image_data.items()
    #         if is_image(self.datasets[path].shape, ndim + 1)
    #     }
    if self.image_data:
        # add default 'image_data'
        self.image_data[IMAGE_DATA] = next(iter(self.image_data.values()))
    self.combined = {**self.values, **self.arrays, **self.image_data, **self.scannables}

`generate_ids(*names, modify_missing=True)` ¶

Will return the path identifier of the given name if the name is in the namespace, otherwise a valid identifier will be generated.

xlabel, ylabel = generate_axis_labels('axes', 'signal')
generate_axis_labels('my/data/label', modify_missing=True) #-> ['label', ]
generate_axis_labels('(x-y)/y', modify_missing=False) #-> ['(x-y)/y', ]

Parameters:

Name	Type	Description	Default
`names`	`str`	names to generate axis labels for	`()`
`modify_missing`	`bool`	if True, modifies names even if they are not in namespace	`True`

Returns:

Type	Description
`list[str]`	list of axis labels as valid identifiers

Source code in src/hdfmap/hdfmap_class.py

def generate_ids(self, *names: str, modify_missing: bool = True) -> list[str]:
    """
    Will return the path identifier of the given name if the name is in the namespace,
    otherwise a valid identifier will be generated.

        xlabel, ylabel = generate_axis_labels('axes', 'signal')
        generate_axis_labels('my/data/label', modify_missing=True) #-> ['label', ]
        generate_axis_labels('(x-y)/y', modify_missing=False) #-> ['(x-y)/y', ]

    :param names: names to generate axis labels for
    :param modify_missing: if True, modifies names even if they are not in namespace
    :return: list of axis labels as valid identifiers
    """
    return [
        generate_identifier(self.combined.get(name, name)) if modify_missing else (
            generate_identifier(self.combined[name]) if name in self.combined else name
        )
        for name in names
    ]

`generate_scannables(array_size)` ¶

Populate self.scannables field with datasets size that match array_size

Source code in src/hdfmap/hdfmap_class.py

def generate_scannables(self, array_size):
    """Populate self.scannables field with datasets size that match array_size"""
    # self.scannables = {k: v for k, v in self.arrays.items() if self.datasets[v].size == array_size}
    self.scannables = {ds.name: path for path, ds in self.datasets.items() if ds.size == array_size}

`generate_scannables_from_group(hdf_group, group_path=None, dataset_names=None)` ¶

Generate scannables list from a specific group, using the first item to define array size

Parameters:

Name	Type	Description	Default
`hdf_group`	`Group`	h5py.Group	required
`group_path`	`str`	str path of group hdf_group if hdf_group.name is incorrect	`None`
`dataset_names`	`list[str]`	list of names of group sub-entries to use (use all if None)	`None`

Source code in src/hdfmap/hdfmap_class.py

def generate_scannables_from_group(self, hdf_group: h5py.Group, group_path: str = None,
                                   dataset_names: list[str] = None):
    """
    Generate scannables list from a specific group, using the first item to define array size
    :param hdf_group: h5py.Group
    :param group_path: str path of group hdf_group if hdf_group.name is incorrect
    :param dataset_names: list of names of group sub-entries to use (use all if None)
    """
    # watch out - hdf_group.name may not point to a location in the file!
    hdf_path = hdf_group.name if group_path is None else group_path
    # list of datasets within group
    if dataset_names:
        dataset_names = [
            name for name in dataset_names if isinstance(hdf_group.get(name), h5py.Dataset)
        ]
    else:
        dataset_names = [name for name, item in hdf_group.items() if isinstance(item, h5py.Dataset)]

    # catch empty groups
    if len(dataset_names) == 0:
        logger.warning(f"HDF Group {hdf_path} has no datasets for scannables")
        self.scannables = {}
    else:
        # use min size dataset as scannable_shape (avoiding image datasets)
        array_size = min(hdf_group[name].size for name in dataset_names)
        self._populate(hdf_group, root=hdf_path, recursive=False)
        self.scannables = {
            name: build_hdf_path(hdf_path, name)
            for name in dataset_names if hdf_group[name].size == array_size  # doesn't check if link
        }
        if len(self.scannables) < 2:
            logger.warning(f"HDF Group {hdf_path} has no consistent datasets for scannables")
            self.scannables = {}
    logger.debug(f"Scannables from group: {list(self.scannables.keys())}")

`generate_scannables_from_names(names)` ¶

Generate scannables list from a set of dataset names, using the first item to define array size

Source code in src/hdfmap/hdfmap_class.py

def generate_scannables_from_names(self, names: list[str]):
    """Generate scannables list from a set of dataset names, using the first item to define array size"""
    # concert names or paths to name (to match alt_name)
    array_names = [n for name in names if (n := generate_identifier(name)) in self.arrays]
    logger.debug(f"Scannables from names: {array_names}")
    array_size = self.datasets[self.arrays[array_names[0]]].size
    self.scannables = {
        name: self.arrays[name] for name in array_names if self.datasets[self.arrays[name]].size == array_size
    }

`get_attr(name_or_path, attr_label, default='')` ¶

Return named attribute from dataset or group, or default

Source code in src/hdfmap/hdfmap_class.py

def get_attr(self, name_or_path: str, attr_label: str, default: str | typing.Any = '') -> str | None:
    """Return named attribute from dataset or group, or default"""
    attrs = self.get_attrs(name_or_path)
    if attrs and attr_label in attrs:
        return attr.decode() if hasattr(attr := attrs[attr_label], 'decode') else attr
    return default

`get_attrs(name_or_path)` ¶

Return attributes of dataset or group

Source code in src/hdfmap/hdfmap_class.py

def get_attrs(self, name_or_path: str) -> dict | None:
    """Return attributes of dataset or group"""
    if name_or_path in self.datasets:
        return self.datasets[name_or_path].attrs
    if name_or_path in self.groups:
        return self.groups[name_or_path].attrs
    if name_or_path in self.combined:
        return self.datasets[self.combined[name_or_path]].attrs
    if name_or_path in self.classes:
        return self.groups[self.classes[name_or_path][0]].attrs
    return None

`get_data(hdf_file, name_or_path, index=(), default=None, direct_load=False)` ¶

Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	hdf file object	required
`name_or_path`	`str`	str name or path pointing to dataset in hdf file	required
`index`		index or slice of data in hdf file	`()`
`default`		value to return if name not found in hdf file	`None`
`direct_load`		return str, datetime or squeezed array if False, otherwise load data directly	`False`

Returns:

Type	Description
	dataset2data(dataset) -> datetime, str or squeezed array as required.

Source code in src/hdfmap/hdfmap_class.py

def get_data(self, hdf_file: h5py.File, name_or_path: str, index=(), default=None, direct_load=False):
    """
    Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
    See hdfmap.eval_functions.dataset2data for more information.
    :param hdf_file: hdf file object
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
    :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
    """
    path = self.get_path(name_or_path)
    if path and path in hdf_file:
        return dataset2data(hdf_file[path], index, direct_load)
    return default

`get_dataholder(hdf_file, flatten_scannables=False)` ¶

Return DataHolder object - a simple replication of scisoftpy.dictutils.DataHolder Also known as DLS dat format. dataholder.scannable -> array dataholder.metadata.value -> metadata dataholder['scannable'] -> array dataholder.metadata['value'] -> metadata

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	h5py.File object	required
`flatten_scannables`	`bool`	bool, it True the scannables will be flattened arrays	`False`

Returns:

Type	Description
`DataHolder`	data_object (similar to dict)

Source code in src/hdfmap/hdfmap_class.py

def get_dataholder(self, hdf_file: h5py.File, flatten_scannables: bool = False) -> DataHolder:
    """
    Return DataHolder object - a simple replication of scisoftpy.dictutils.DataHolder
    Also known as DLS dat format.
        dataholder.scannable -> array
        dataholder.metadata.value -> metadata
        dataholder['scannable'] -> array
        dataholder.metadata['value'] -> metadata
    :param hdf_file: h5py.File object
    :param flatten_scannables: bool, it True the scannables will be flattened arrays
    :return: data_object (similar to dict)
    """
    metadata = self.get_metadata(hdf_file)
    scannables = self.get_scannables(hdf_file, flatten=flatten_scannables)
    scannables['metadata'] = DataHolder(**metadata)
    return DataHolder(**scannables)

`get_group_classes(name_or_path)` ¶

Return list of class names associated with a group or parent group of dataset

Source code in src/hdfmap/hdfmap_class.py

def get_group_classes(self, name_or_path) -> list[str]:
    """Return list of class names associated with a group or parent group of dataset"""
    group_path = self.get_group_path(name_or_path)
    sub_groups = group_path.split(SEP)
    sub_group_paths = [SEP.join(sub_groups[:n]) for n in range(1, len(sub_groups)+1)]
    sub_group_classes = [self.groups[g].nx_class for g in sub_group_paths if g in self.groups]
    return sub_group_classes

`get_group_dataset_path(group_name, dataset_name)` ¶

Return path of dataset defined by group and dataset name/attribute

Source code in src/hdfmap/hdfmap_class.py

def get_group_dataset_path(self, group_name, dataset_name) -> str | None:
    """Return path of dataset defined by group and dataset name/attribute"""
    if group_name in self.groups:
        group_paths = [group_name]
    else:
        group_paths = self.classes[group_name]
    for group_path in group_paths:
        group = self.groups[group_path]
        for name in group.datasets:
            dataset_path = build_hdf_path(group_path, name)
            dataset = self.datasets[dataset_path]
            if dataset_name in dataset.names:
                return dataset_path
    return None

`get_group_datasets(name_or_path)` ¶

Find the path associate with the given name and return all datasets in that group

Source code in src/hdfmap/hdfmap_class.py

def get_group_datasets(self, name_or_path: str) -> list[str] | None:
    """Find the path associate with the given name and return all datasets in that group"""
    group_path = self.get_group_path(name_or_path)
    if group_path:
        return self.groups[group_path].datasets
    return None

`get_group_path(name_or_path)` ¶

Return group path of object in HdfMap

Source code in src/hdfmap/hdfmap_class.py

def get_group_path(self, name_or_path):
    """Return group path of object in HdfMap"""
    hdf_path = self.get_path(name_or_path)
    while hdf_path and hdf_path not in self.groups:
        hdf_path = SEP.join(hdf_path.split(SEP)[:-1])
    if not hdf_path:
        return SEP
    return hdf_path

`get_image(hdf_file, index=None)` ¶

Get image data from file, using default image path - If the image path points to a numeric 2+D dataset, returns dataset[index, :, :] -> ndarray - If the image path points to a string dataset, returns dataset[index] -> '/path/to/image.tiff'

Image filenames may be relative to the location of the current file (this is not checked)

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	hdf file object	required
`index`	`int \| tuple \| slice \| None`	(slice,) or None to take the middle image	`None`

Returns:

Type	Description
`ndarray \| None`	2D numpy array of image, or string file path of image

Source code in src/hdfmap/hdfmap_class.py

def get_image(self, hdf_file: h5py.File, index: int | tuple | slice | None = None) -> np.ndarray | None:
    """
    Get image data from file, using default image path
        - If the image path points to a numeric 2+D dataset, returns dataset[index, :, :] -> ndarray
        - If the image path points to a string dataset, returns dataset[index] -> '/path/to/image.tiff'

    Image filenames may be relative to the location of the current file (this is not checked)

    :param hdf_file: hdf file object
    :param index: (slice,) or None to take the middle image
    :return: 2D numpy array of image, or string file path of image
    """
    if index is None:
        index = self.get_image_index(self.scannables_length() // 2)
    if isinstance(index, int):
        index = self.get_image_index(index)
    image_path = self.get_image_path()
    logger.info(f"image path: {image_path}")
    if image_path and image_path in hdf_file:
        # return hdf_file[image_path][index].squeeze()  # remove trailing dimensions
        return self.get_data(hdf_file, image_path, index)  # return array or image paths
    return None

`get_image_index(index)` ¶

Return image slice index for index along total scan size

Source code in src/hdfmap/hdfmap_class.py

def get_image_index(self, index: int) -> tuple:
    """Return image slice index for index along total scan size"""
    return np.unravel_index(index, self.scannables_shape())

`get_image_path()` ¶

Return HDF path of first dataset in self.image_data

Source code in src/hdfmap/hdfmap_class.py

def get_image_path(self) -> str:
    """Return HDF path of first dataset in self.image_data"""
    if self._default_image_path:
        return self._default_image_path
    return next(iter(self.image_data.values()), '')

`get_image_shape()` ¶

Return the scan shape of the detector dataset

Source code in src/hdfmap/hdfmap_class.py

def get_image_shape(self) -> tuple:
    """Return the scan shape of the detector dataset"""
    path = self.get_image_path()
    if path in self.datasets:
        return self.datasets[path].shape[-2:]
    return 0, 0

`get_metadata(hdf_file, default=None, direct_load=False, name_list=None, string_output=False)` ¶

Return metadata dict from file, loading data for each item in the metadata list The metadata list is taken from name_list, otherwise self.metadata or self.values

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	hdf file object	required
`default`		Value to return for names not associated with a dataset	`None`
`direct_load`		if True, loads data from hdf file directory, without conversion	`False`
`name_list`	`list`	if available, uses this list of dataset names to generate the metadata list	`None`
`string_output`		if True, returns string summary of each value	`False`

Returns:

Type	Description
`dict`	{name: value}

Source code in src/hdfmap/hdfmap_class.py

def get_metadata(self, hdf_file: h5py.File, default=None, direct_load=False,
                 name_list: list = None, string_output=False) -> dict:
    """
    Return metadata dict from file, loading data for each item in the metadata list
    The metadata list is taken from name_list, otherwise self.metadata or self.values
    :param hdf_file: hdf file object
    :param default: Value to return for names not associated with a dataset
    :param direct_load: if True, loads data from hdf file directory, without conversion
    :param name_list: if available, uses this list of dataset names to generate the metadata list
    :param string_output: if True, returns string summary of each value
    :return: {name: value}
    """
    extra = extra_hdf_data(hdf_file)
    if name_list:
        metadata_paths = {name: self.combined.get(name, '') for name in name_list}
    elif self.metadata:
        metadata_paths = self.metadata
    else:
        logger.warning("'local_names' metadata is not available, using all size=1 datasets.")
        # metadata_paths = self.values
        metadata_paths = {ds.name: path for path, ds in self.datasets.items() if ds.size <= 1}
    if string_output:
        extra = {key: f"'{val}'" for key, val in extra.items()}
        metadata = {
            name: dataset2str(hdf_file[path]) if path in hdf_file else str(default)
            for name, path in metadata_paths.items()
        }
    else:
        metadata = {
            name: dataset2data(hdf_file[path], direct_load=direct_load) if path in hdf_file else default
            for name, path in metadata_paths.items()
        }
    return {**extra, **metadata}

`get_path(name_or_path)` ¶

Return hdf path of object in HdfMap

Source code in src/hdfmap/hdfmap_class.py

def get_path(self, name_or_path):
    """Return hdf path of object in HdfMap"""
    if name_or_path in self.datasets or name_or_path in self.groups:
        return name_or_path
    if name_or_path in self.combined:
        return self.combined[name_or_path]
    if name_or_path in self.image_data:
        return self.image_data[name_or_path]
    if name_or_path in self.classes:
        return self.classes[name_or_path][0]  # return first path in list
    return None

`get_scannables(hdf_file, flatten=False, numeric_only=False)` ¶

Return scannables from file (values associated with hdfmap.scannables)

Source code in src/hdfmap/hdfmap_class.py

def get_scannables(self, hdf_file: h5py.File, flatten: bool = False, numeric_only: bool = False) -> dict:
    """Return scannables from file (values associated with hdfmap.scannables)"""
    return {
        name: dataset[()].flatten() if flatten else hdf_file[path][()]
        for name, path in self.scannables.items()
        if (dataset := hdf_file.get(path)) and
           (np.issubdtype(dataset.dtype, np.number) if numeric_only else True)
    }

`get_scannables_array(hdf_file, return_structured_array=False)` ¶

Return 2D array of all numeric scannables in file

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	h5py.File object	required
`return_structured_array`		bool, if True, return a Numpy structured array with column headers	`False`

Returns:

Type	Description
`ndarray`	numpy array with a row for each scannable, shape: (no_scannables, flattened_length)

Source code in src/hdfmap/hdfmap_class.py

def get_scannables_array(self, hdf_file: h5py.File, return_structured_array=False) -> np.ndarray:
    """
    Return 2D array of all numeric scannables in file

    :param hdf_file: h5py.File object
    :param return_structured_array: bool, if True, return a Numpy structured array with column headers
    :returns: numpy array with a row for each scannable, shape: (no_scannables, flattened_length)
    """
    _scannables = self._get_numeric_scannables(hdf_file)
    array = np.array([array for name, path, array in _scannables])
    if return_structured_array:
        dtypes = np.dtype([
            (name, hdf_file[path].dtype) for name, path, array in _scannables
        ])
        return np.array([tuple(row) for row in np.transpose(array)], dtype=dtypes)
    return array

`get_string(hdf_file, name_or_path, index=(), default='', units=False)` ¶

Return data from dataset in file, converted into string summary of data See hdfmap.eval_functions.dataset2str for more information.

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	hdf file object	required
`name_or_path`	`str`	str name or path pointing to dataset in hdf file	required
`index`		index or slice of data in hdf file	`()`
`default`		value to return if name not found in hdf file	`''`
`units`		if True and attribute 'units' available, append this to the result	`False`

Returns:

Type	Description
`str`	dataset2str(dataset) -> str

Source code in src/hdfmap/hdfmap_class.py

def get_string(self, hdf_file: h5py.File, name_or_path: str, index=(), default='', units=False) -> str:
    """
    Return data from dataset in file, converted into string summary of data
    See hdfmap.eval_functions.dataset2str for more information.
    :param hdf_file: hdf file object
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param units: if True and attribute 'units' available, append this to the result
    :return: dataset2str(dataset) -> str
    """
    path = self.get_path(name_or_path)
    if path and path in hdf_file:
        return dataset2str(hdf_file[path], index, units=units)
    return default

`info_classes()` ¶

Return str info on group class names

Source code in src/hdfmap/hdfmap_class.py

def info_classes(self) -> str:
    """Return str info on group class names"""
    out = f"{repr(self)}\n"
    out += 'Classes:\n'
    out += disp_dict(self.classes, 20)
    return out

`info_data(hdf_file)` ¶

Return string showing metadata values associated with names

Source code in src/hdfmap/hdfmap_class.py

def info_data(self, hdf_file: h5py.File) -> str:
    """Return string showing metadata values associated with names"""
    out = repr(self) + '\n'
    out += "Combined Namespace:\n"
    out += '\n'.join([
        f"{name:>30}: " +
        f"{dataset2str(hdf_file[path]):20}" +
        f": {path:60}"
        for name, path in self.combined.items()
    ])
    out += f"\n{self.info_names(scannables=True)}"
    return out

`info_datasets()` ¶

Return str info on datasets

Source code in src/hdfmap/hdfmap_class.py

def info_datasets(self) -> str:
    """Return str info on datasets"""
    out = f"{repr(self)}\n"
    out += "Datasets:\n"
    out += disp_dict(self.datasets, 20)
    return out

`info_groups()` ¶

Return str info on groups

Source code in src/hdfmap/hdfmap_class.py

def info_groups(self) -> str:
    """Return str info on groups"""
    out = f"{repr(self)}\n"
    out += "Groups:\n"
    for path, group in self.groups.items():
        out += f"{path} [{group.nx_class}: '{group.name}']\n"
        out += '\n'.join(f"  @{attr}: {self.get_attr(path, attr)}" for attr in group.attrs)
        out += '\n'
        for dataset_name in group.datasets:
            dataset_path = build_hdf_path(path, dataset_name)
            if dataset_path in self.datasets:
                dataset = self.datasets[dataset_path]
                out += f"  {dataset_name}: {dataset.shape}\n"
    return out

`info_names(arrays=False, values=False, combined=False, metadata=False, scannables=False, image_data=False)` ¶

Return str info for different namespaces

Source code in src/hdfmap/hdfmap_class.py

def info_names(self, arrays=False, values=False, combined=False,
               metadata=False, scannables=False, image_data=False) -> str:
    """Return str info for different namespaces"""
    if not any((arrays, values, combined, metadata, scannables, image_data)):
        combined = True
    options = [
        ('Arrays', arrays, self.arrays),
        ('Values', values, self.values),
        ('Combined', combined, self.combined),
        ('Metadata', metadata, self.metadata),
        ('Scannables', scannables, self.scannables),
        ('Image Data', image_data, self.image_data),
    ]
    out = ''
    for name, show, namespace in options:
        if show:
            out += f"\n{name} Namespace:\n"
            out += '\n'.join([
                f"{name:>30}: {str(self.datasets[path].shape):10} : {path:60}"
                for name, path in namespace.items()
            ])
            out += '\n'
    return out

`load_hdf(filename=None, name_or_path=None, **kwargs)` ¶

Load hdf file or hdf dataset in open state

Parameters:

Name	Type	Description	Default
`filename`	`str \| None`	str filename of hdf file, or None to use self.filename	`None`
`name_or_path`	`str`	if given, returns the dataset	`None`
`kwargs`		additional key-word arguments to pass to h5py.File(...)	`{}`

Returns:

Type	Description
`File \| Dataset`	h5py.File object or h5py.dataset object if dataset name given

Source code in src/hdfmap/hdfmap_class.py

def load_hdf(self, filename: str | None = None, name_or_path: str = None, **kwargs) -> h5py.File | h5py.Dataset:
    """
    Load hdf file or hdf dataset in open state
    :param filename: str filename of hdf file, or None to use self.filename
    :param name_or_path: if given, returns the dataset
    :param kwargs: additional key-word arguments to pass to h5py.File(...)
    :return: h5py.File object or h5py.dataset object if dataset name given
    """
    if filename is None:
        filename = self.filename
    if name_or_path is None:
        return load_hdf(filename, **kwargs)
    return load_hdf(filename, **kwargs).get(self.get_path(name_or_path))

`most_common_shape()` ¶

Return most common non-singular array shape

Source code in src/hdfmap/hdfmap_class.py

def most_common_shape(self) -> tuple:
    """Return most common non-singular array shape"""
    array_shapes = [shape for name, path in self.arrays.items() if len(shape := self.datasets[path].shape) > 0]
    return max(set(array_shapes), key=array_shapes.count)

`most_common_size()` ¶

Return most common array size > 1

Source code in src/hdfmap/hdfmap_class.py

def most_common_size(self) -> int:
    """Return most common array size > 1"""
    array_sizes = [size for name, path in self.arrays.items() if (size := self.datasets[path].size) > 1]
    return max(set(array_sizes), key=array_sizes.count)

`populate(hdf_file)` ¶

Populate all datasets from file

Source code in src/hdfmap/hdfmap_class.py

def populate(self, hdf_file: h5py.File):
    """Populate all datasets from file"""
    self.filename = hdf_file.filename
    self._local_data.update(extra_hdf_data(hdf_file))
    self._populate(hdf_file)
    size = self.most_common_size()
    self.generate_scannables(size)
    self.generate_combined()

`scannables_length()` ¶

Return the length of the first axis of scannables array

Source code in src/hdfmap/hdfmap_class.py

def scannables_length(self) -> int:
    """Return the length of the first axis of scannables array"""
    if not self.scannables:
        return 0
    path = next(iter(self.scannables.values()))
    return self.datasets[path].size

`scannables_shape()` ¶

Return the shape of the first axis of scannables array

Source code in src/hdfmap/hdfmap_class.py

def scannables_shape(self) -> tuple:
    """Return the shape of the first axis of scannables array"""
    if not self.scannables:
        return (0, )
    path = next(iter(self.scannables.values()))
    return self.datasets[path].shape

`set_image_path(name_or_path)` ¶

Set the default image path, used by get_image

Source code in src/hdfmap/hdfmap_class.py

def set_image_path(self, name_or_path: str):
    """Set the default image path, used by get_image"""
    if name_or_path is None:
        self._default_image_path = None
    else:
        path = self.get_path(name_or_path)
        if path:
            self._default_image_path = path
    logger.info(f"Default image path: {self._default_image_path}")

`use_local_data(use_data=True)` ¶

Activate the option to reload data from the namespace locally, rather than from the file.

self.eval(hdf, 'cmd') -> default will load 'cmd' from the file based on the hdf path associated with cmd self.use_local_data() -> self.eval(hdf, 'cmd') will return 'cmd' from local data if available, or from the file. self.use_local_data(False) -> returns to default behaviour

Source code in src/hdfmap/hdfmap_class.py

def use_local_data(self, use_data: bool = True):
    """
    Activate the option to reload data from the namespace locally, rather than from the file.

    self.eval(hdf, 'cmd') -> default will load 'cmd' from the file based on the hdf path associated with cmd
    self.use_local_data() -> self.eval(hdf, 'cmd') will return 'cmd' from local data if available, or from the file.
    self.use_local_data(False) -> returns to default behaviour
    """
    self._use_local_data = use_data

`NexusLoader` ¶

Bases: HdfLoader

Nexus Loader contains the filename and hdfmap for a NeXus file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation. E.G. hdf = NexusLoader('file.hdf') [data1, data2] = hdf.get_data(['dataset_name_1', 'dataset_name_2']) data = hdf.eval('dataset_name_1 * 100 + 2') string = hdf.format('my data is {dataset_name_1:.2f}')

Source code in src/hdfmap/reloader_class.py

class NexusLoader(HdfLoader):
    """
    Nexus Loader
    contains the filename and hdfmap for a NeXus file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.
    E.G.
        hdf = NexusLoader('file.hdf')
        [data1, data2] = hdf.get_data(['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
    """

    def __init__(self, nxs_filename: str, hdf_map: NexusMap | None = None):
        if not hdf_map:
            hdf_map = create_nexus_map(nxs_filename)
        super().__init__(nxs_filename, hdf_map)

    def get_plot_data(self) -> dict:
        """Return dict of useful plot data"""
        with self._load() as hdf:
            return self.map.get_plot_data(hdf)

`get_plot_data()` ¶

Return dict of useful plot data

Source code in src/hdfmap/reloader_class.py

def get_plot_data(self) -> dict:
    """Return dict of useful plot data"""
    with self._load() as hdf:
        return self.map.get_plot_data(hdf)

`NexusMap` ¶

Bases: HdfMap

HdfMap for Nexus (.nxs) files

Extends the HdfMap class with additional behaviours for NeXus files. http://www.nexusformat.org/

E.G. nxmap = NexusMap() with h5py.File('file.nxs', 'r') as nxs: nxmap.populate(nxs, default_entry_only=True) # populates only from the default entry

Special behaviour¶

nxmap.image_data is preferentially populated by NXdetector groups nxmap['axes'] -> return path of default axes dataset nxmap['signal'] -> return path of default signal dataset nxmap['image_data'] -> return path of first area detector data object [axes_paths], [signal_paths] = nxmap.nexus_default_paths() [axes_names], [signal_names] = nxmap.nexus_default_names() # returns default names in nxmap.scannables

Source code in src/hdfmap/nexus.py

class NexusMap(HdfMap):
    """
    HdfMap for Nexus (.nxs) files

    Extends the HdfMap class with additional behaviours for NeXus files.
    http://www.nexusformat.org/

    E.G.
    nxmap = NexusMap()
    with h5py.File('file.nxs', 'r') as nxs:
        nxmap.populate(nxs, default_entry_only=True)  # populates only from the default entry

    # Special behaviour
    nxmap.image_data is preferentially populated by NXdetector groups
    nxmap['axes'] -> return path of default axes dataset
    nxmap['signal'] -> return path of default signal dataset
    nxmap['image_data'] -> return path of first area detector data object
    [axes_paths], [signal_paths] = nxmap.nexus_default_paths()
    [axes_names], [signal_names] = nxmap.nexus_default_names()  # returns default names in nxmap.scannables
    """

    def __repr__(self):
        return f"NexusMap based on '{self.filename}'"

    def all_nxclasses(self) -> list[str]:
        """Return list of unique NX_class attributes used in NXgroups"""
        return list({
            nxclass.decode() if isinstance(nxclass, bytes) else nxclass
            for path, grp in self.groups.items() if (nxclass := grp.attrs.get(NX_CLASS))
        })

    def info_nexus(self, scannables=True, image_data=True, metadata=False) -> str:
        """Return str info on nexus format"""
        out = f"{repr(self)}\n"
        out += f"{NX_CLASS}:\n"
        nx_classes = self.all_nxclasses()
        out += disp_dict({k: v for k, v in self.classes.items() if k in nx_classes}, 20)
        out += '\nDefaults:\n'
        out += f"  @{NX_DEFAULT}: {self.find_attr(NX_DEFAULT)}\n"
        out += f"  @{NX_AXES}: {self.get_path(NX_AXES)}\n"
        out += f"  @{NX_SIGNAL}: {self.get_path(NX_SIGNAL)}\n"
        out += f"{self.info_names(scannables=scannables, image_data=image_data, metadata=metadata)}"
        out += f""
        return out

    def _store_group(self, hdf_group: h5py.Group, path: str, name: str):
        super()._store_group(hdf_group, path, name)
        if NX_DEFINITION in hdf_group:
            definition = hdf_group[NX_DEFINITION].asstr()[()]  # e.g. NXmx or NXxas
            self._store_class(definition, path)

    def _store_default_nexus_paths(self, hdf_file):
        """Load Nexus default axes and signal"""
        try:
            # find the default NXentry group
            nx_entry_name = default_nxentry(hdf_file)
            nx_entry = hdf_file[nx_entry_name]
            nx_entry_path = build_hdf_path(nx_entry_name)
            self._store_group(nx_entry, nx_entry_path, NX_ENTRY)
            # find the default NXdata group
            nx_data_name = default_nxdata(nx_entry)
            nx_data = nx_entry[nx_data_name]
            nx_data_path = build_hdf_path(nx_entry_name, nx_data_name)
            self._store_group(nx_data, nx_data_path, NX_DATA)

            axes_paths, signal_paths = find_nexus_defaults(hdf_file, nx_data_path)
            if axes_paths and isinstance(hdf_file.get(axes_paths[0]), h5py.Dataset):
                self.arrays[NX_AXES] = axes_paths[0]
                n = 0
                for axes_path in axes_paths:
                    if isinstance(hdf_file.get(axes_path), h5py.Dataset):
                        self.arrays[f"{NX_AXES}{n}"] = axes_path
                        n += 1
                logger.info(f"DEFAULT axes: {axes_paths}")
            if signal_paths and isinstance(hdf_file.get(signal_paths[0]), h5py.Dataset):
                self.arrays[NX_SIGNAL] = signal_paths[0]
                n = 0
                for signal_path in signal_paths:
                    if isinstance(hdf_file.get(signal_path), h5py.Dataset):
                        self.arrays[f"{NX_SIGNAL}{n}"] = signal_path
                        n += 1
                logger.info(f"DEFAULT signals: {signal_paths}")
        except KeyError:
            pass

    def nexus_default_paths(self) -> tuple[list[str], list[str]]:
        """Return default axes and signal paths"""
        axes_paths = [self.arrays[axes] for n in range(10) if (axes := f"{NX_AXES}{n}") in self.arrays]
        signal_paths = [self.arrays[signal] for n in range(10) if (signal := f"{NX_SIGNAL}{n}") in self.arrays]
        return axes_paths, signal_paths

    def nexus_default_names(self) -> tuple[dict[str, str], dict[str, str]]:
        """Return name of default axes and signal paths, as defined in scannables"""
        axes_paths, signal_paths = self.nexus_default_paths()
        axes_names = [self.datasets[path].name for path in axes_paths]
        signal_names = [self.datasets[path].name for path in signal_paths]
        # axes_names = [name for path in axes_paths for name in self.datasets[path].names]
        # signal_names = [name for path in signal_paths for name in self.datasets[path].names]
        alt_names = {
            self.datasets[path].name: self.datasets[path].names
            for path in axes_paths + signal_paths
        }
        return self.first_last_scannables(axes_names, signal_names, alt_names)

    def generate_scannables_from_nxdata(self, hdf_file: h5py.File, use_auxiliary: bool = True):
        """Generate scannables from default NXdata, using axuiliary_names if available"""
        # find the default NXdata group and generate the scannables list
        # nx_entry = hdf_file.get(default_nxentry(hdf_file))
        # nx_data = nx_entry.get(default_nxdata(nx_entry))
        nx_entry = hdf_file.get(self.classes[NX_ENTRY][0])  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
        nx_data = hdf_file.get(self.classes[NX_DATA][0])  # classes[NX_DATA] pre-populated by _default_nexus_paths
        logger.info(f"{nx_entry}, {nx_data}")
        if nx_data:
            logger.info(f"Generating Scannables from NXData: {nx_data.name}")
            if use_auxiliary and NX_AUXILIARY in nx_data.attrs:
                signals = list(nx_data.attrs[NX_AUXILIARY])
                if NX_SIGNAL in nx_data.attrs:
                    signals.insert(0, nx_data.attrs[NX_SIGNAL])
                if NX_AXES in nx_data.attrs:
                    signals.extend(list(nx_data.attrs[NX_AXES]))
                signals = [i.decode() if isinstance(i, bytes) else i for i in signals]  # convert bytes to str
                logger.info(f"NX Data - using auxiliary_names: {signals}")
                self.generate_scannables_from_group(nx_data, dataset_names=signals)
            else:
                self.generate_scannables_from_group(nx_data)

    def generate_scannables_from_scan_fields_or_nxdata(self, hdf_file: h5py.File):
        """Generate scannables from scan_field names or default NXdata"""

        # find 'scan_fields' to generate scannables list
        if NX_SCANFIELDS in self.arrays:
            scan_fields_path = self.arrays[NX_SCANFIELDS]
            # scan_fields = hdf_file[scan_fields_path][()]
            scan_fields = names_from_scan_fields(hdf_file, scan_fields_path)
            if scan_fields:
                logger.info(f"Generating Scannables from NX ScanFields: {scan_fields_path}: {scan_fields}")
                self.generate_scannables_from_names(scan_fields)
            else:
                self.generate_scannables_from_nxdata(hdf_file)
        else:
            self.generate_scannables_from_nxdata(hdf_file)

        if not self.scannables:
            logger.warning("No NXdata found, scannables not populated!")

    def generate_image_data_from_nxdetector(self):
        """
        find the NXdetector group and assign the image data
        Must be called after the scannables have been defined as the scan shape is required
        """
        self.image_data = {}
        image_ndim = len(self.scannables_shape()) + 2 if self.scannables else 3
        if NX_DETECTOR in self.classes:
            group_paths = set(self.classes[NX_DETECTOR])
        elif NX_DATA in self.classes:
            # if no detectors, check for NXdata->dataset with > 2 dimensions
            group_paths = set(self.classes[NX_DATA])
        else:
            group_paths = []

        for group_path in group_paths:
            detector_name = generate_identifier(group_path)
            # detector data is stored in NXdata in dataset 'data'
            data_path = build_hdf_path(group_path, NX_DETECTOR_DATA)
            image_data_path = build_hdf_path(group_path, NX_IMAGE_DATA)
            image_data_numbers = build_hdf_path(group_path, NX_IMAGE_NUMBER)
            logger.debug(f"Looking for image_data at: '{data_path}' or '{image_data_path}'")
            if data_path in self.datasets and is_image(self.datasets[data_path].shape, image_ndim):
                logger.info(f"Adding image_data ['{detector_name}'] = '{data_path}'")
                self.image_data[detector_name] = data_path
                self.arrays[detector_name] = data_path
                # also save image_data if available
                if image_data_path in self.datasets:
                    detector_name = f"{detector_name}_image_list"
                    logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
                    self.image_data[detector_name] = image_data_path
                    self.arrays[detector_name] = image_data_path
                elif image_data_numbers in self.datasets:
                    detector_name = f"{detector_name}_image_list"
                    logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
                    self.image_data[detector_name] = image_data_numbers
                    self.arrays[detector_name] = image_data_numbers
            elif image_data_path in self.datasets:
                logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
                self.image_data[detector_name] = image_data_path
                self.arrays[detector_name] = image_data_path
            elif image_data_numbers in self.datasets:
                logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
                self.image_data[detector_name] = image_data_numbers
                self.arrays[detector_name] = image_data_numbers
            else:
                # Use first dataset with > 2 dimensions
                image_dataset = next((
                    path for name in self.get_group_datasets(group_path)
                    if is_image(self.datasets[path := build_hdf_path(group_path, name)].shape, image_ndim)
                ), False)
                if image_dataset:
                    logger.info(f"Adding image_data ['{detector_name}'] = '{image_dataset}'")
                    self.image_data[detector_name] = image_dataset
                    self.arrays[detector_name] = image_dataset

        if not self.image_data:
            logger.info("No NXdetector image found, image_data not populated.")

    def populate(self, hdf_file: h5py.File, groups=None, default_entry_only=False):
        """
        Populate only datasets from default or first entry, with scannables from given groups.
        Automatically load defaults (axes, signal) and generate scannables from default group
        :param hdf_file: HDF File object
        :param groups: list of group names or NXClass names to search for datasets, within default entry
        :param default_entry_only: if True, only the first or default entry will be loaded
        """
        self.filename = hdf_file.filename

        # Add defaults to arrays
        self._store_default_nexus_paths(hdf_file)

        entry_paths = [
            build_hdf_path(name) for name in (
                self.classes[NX_ENTRY] +  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
                [entry for entry in hdf_file if check_nexus_class(hdf_file.get(entry), NX_ENTRY)]  # all NXentry
            )
        ]
        # remove duplicates, sort of default is first
        entry_paths = sorted(set(entry_paths), key=entry_paths.index)

        if default_entry_only:
            entry_paths = entry_paths[:1]

        for entry_path in entry_paths:
            entry = os.path.basename(entry_path)
            nx_entry = hdf_file.get(entry)
            if nx_entry is None:
                continue  # group may be missing due to a broken link
            hdf_path = build_hdf_path(entry)
            logger.debug(f"NX Entry: {hdf_path}")
            self.all_paths.append(hdf_path)
            self._store_group(nx_entry, hdf_path, entry)
            self._populate(nx_entry, root=hdf_path, groups=groups)  # nx_entry.name can be wrong!

        if not self.datasets:
            logger.warning("No datasets found!")

        # find the scannable arrays and generate self.combined
        self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)
        if not self.scannables:
            logger.warning('NXdata not found, getting scannables from most common array size')
            size = self.most_common_size()
            self.generate_scannables(size)
            if len(self.scannables) < len(self.scannables_shape()):
                logger.warning('Less scannables than most common shape dimensions, removing scannables')
                self.scannables = {}
        # find the NXdetector group and assign the image data
        self.generate_image_data_from_nxdetector()
        # finalise map with combined namespace
        self.generate_combined()

    def get_plot_data(self, hdf_file: h5py.File):
        """
        Return plotting data from scannables
        :returns: {
            'xlabel': str label of first axes
            'ylabel': str label of first signal
            'xdata': flattened array of first axes
            'ydata': flattend array of first signal
            'axes_names': list of axes names,
            'signal_names': list of signal + auxilliary signal names,
            'axes_data': list of ND arrays of data for axes,
            'signal_data': list of ND array of data for signal + auxilliary signals,
            'axes_labels': list of axes labels as 'name [units]',
            'signal_labels': list of signal labels,
            'data': dict of all scannables axes,
            'title': str title as 'filename\nNXtitle'
        if dataset is a 2D grid scan, additional rows:
            'grid_xlabel': str label of grid x-axis
            'grid_ylabel': str label of grid y-axis
            'grid_label': str label of height or colour
            'grid_xdata': 2D array of x-coordinates
            'grid_ydata': 2D array of y-coordinates
            'grid_data': 2D array of height or colour
        }
        """
        axes, signals = self.nexus_default_names()
        axes_units = [self.get_attr(path, NX_UNITS, '') for name, path in axes.items()]
        signal_units = [self.get_attr(path, NX_UNITS, '') for name, path in signals.items()]
        axes_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(axes, axes_units)]
        signal_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(signals, signal_units)]
        title = f"{os.path.basename(self.filename)}\n{self.get_data(hdf_file, NX_TITLE)}"

        xdata = (
            self.get_data(hdf_file, next(iter(axes.values()))).flatten()
            if axes else range(self.scannables_length())
        )
        ydata = (
            self.get_data(hdf_file, next(iter(signals.values()))).flatten()
            if signals else [1.0] * self.scannables_length()
        )

        data = {
            'xlabel': next(iter(axes_labels), 'x'),
            'ylabel': next(iter(signal_labels), 'y'),
            'xdata': xdata,
            'ydata': ydata,
            'axes_names': list(axes.keys()),
            'signal_names': list(signals.keys()),
            'axes_data': [self.get_data(hdf_file, ax) for ax in axes.values()],
            'signal_data': [self.get_data(hdf_file, sig) for sig in signals.values()],
            'axes_labels': axes_labels,
            'signal_labels': signal_labels,
            'data': self.get_scannables(hdf_file, numeric_only=True),
            'title': title
        }
        if len(axes) == 2 and len(self.scannables_shape()) == 2:
            # 2D grid scan
            xpath, ypath = axes.values()
            data_path = next(iter(signals.values()))
            data['grid_xlabel'] = axes_labels[0]
            data['grid_ylabel'] = axes_labels[1]
            data['grid_label'] = signal_labels[0]
            data['grid_xdata'] = self.get_data(hdf_file, xpath)
            data['grid_ydata'] = self.get_data(hdf_file, ypath)
            data['grid_data'] = self.get_data(hdf_file, data_path)
        return data

`all_nxclasses()` ¶

Return list of unique NX_class attributes used in NXgroups

Source code in src/hdfmap/nexus.py

def all_nxclasses(self) -> list[str]:
    """Return list of unique NX_class attributes used in NXgroups"""
    return list({
        nxclass.decode() if isinstance(nxclass, bytes) else nxclass
        for path, grp in self.groups.items() if (nxclass := grp.attrs.get(NX_CLASS))
    })

`generate_image_data_from_nxdetector()` ¶

find the NXdetector group and assign the image data Must be called after the scannables have been defined as the scan shape is required

Source code in src/hdfmap/nexus.py

def generate_image_data_from_nxdetector(self):
    """
    find the NXdetector group and assign the image data
    Must be called after the scannables have been defined as the scan shape is required
    """
    self.image_data = {}
    image_ndim = len(self.scannables_shape()) + 2 if self.scannables else 3
    if NX_DETECTOR in self.classes:
        group_paths = set(self.classes[NX_DETECTOR])
    elif NX_DATA in self.classes:
        # if no detectors, check for NXdata->dataset with > 2 dimensions
        group_paths = set(self.classes[NX_DATA])
    else:
        group_paths = []

    for group_path in group_paths:
        detector_name = generate_identifier(group_path)
        # detector data is stored in NXdata in dataset 'data'
        data_path = build_hdf_path(group_path, NX_DETECTOR_DATA)
        image_data_path = build_hdf_path(group_path, NX_IMAGE_DATA)
        image_data_numbers = build_hdf_path(group_path, NX_IMAGE_NUMBER)
        logger.debug(f"Looking for image_data at: '{data_path}' or '{image_data_path}'")
        if data_path in self.datasets and is_image(self.datasets[data_path].shape, image_ndim):
            logger.info(f"Adding image_data ['{detector_name}'] = '{data_path}'")
            self.image_data[detector_name] = data_path
            self.arrays[detector_name] = data_path
            # also save image_data if available
            if image_data_path in self.datasets:
                detector_name = f"{detector_name}_image_list"
                logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
                self.image_data[detector_name] = image_data_path
                self.arrays[detector_name] = image_data_path
            elif image_data_numbers in self.datasets:
                detector_name = f"{detector_name}_image_list"
                logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
                self.image_data[detector_name] = image_data_numbers
                self.arrays[detector_name] = image_data_numbers
        elif image_data_path in self.datasets:
            logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
            self.image_data[detector_name] = image_data_path
            self.arrays[detector_name] = image_data_path
        elif image_data_numbers in self.datasets:
            logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
            self.image_data[detector_name] = image_data_numbers
            self.arrays[detector_name] = image_data_numbers
        else:
            # Use first dataset with > 2 dimensions
            image_dataset = next((
                path for name in self.get_group_datasets(group_path)
                if is_image(self.datasets[path := build_hdf_path(group_path, name)].shape, image_ndim)
            ), False)
            if image_dataset:
                logger.info(f"Adding image_data ['{detector_name}'] = '{image_dataset}'")
                self.image_data[detector_name] = image_dataset
                self.arrays[detector_name] = image_dataset

    if not self.image_data:
        logger.info("No NXdetector image found, image_data not populated.")

`generate_scannables_from_nxdata(hdf_file, use_auxiliary=True)` ¶

Generate scannables from default NXdata, using axuiliary_names if available

Source code in src/hdfmap/nexus.py

def generate_scannables_from_nxdata(self, hdf_file: h5py.File, use_auxiliary: bool = True):
    """Generate scannables from default NXdata, using axuiliary_names if available"""
    # find the default NXdata group and generate the scannables list
    # nx_entry = hdf_file.get(default_nxentry(hdf_file))
    # nx_data = nx_entry.get(default_nxdata(nx_entry))
    nx_entry = hdf_file.get(self.classes[NX_ENTRY][0])  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
    nx_data = hdf_file.get(self.classes[NX_DATA][0])  # classes[NX_DATA] pre-populated by _default_nexus_paths
    logger.info(f"{nx_entry}, {nx_data}")
    if nx_data:
        logger.info(f"Generating Scannables from NXData: {nx_data.name}")
        if use_auxiliary and NX_AUXILIARY in nx_data.attrs:
            signals = list(nx_data.attrs[NX_AUXILIARY])
            if NX_SIGNAL in nx_data.attrs:
                signals.insert(0, nx_data.attrs[NX_SIGNAL])
            if NX_AXES in nx_data.attrs:
                signals.extend(list(nx_data.attrs[NX_AXES]))
            signals = [i.decode() if isinstance(i, bytes) else i for i in signals]  # convert bytes to str
            logger.info(f"NX Data - using auxiliary_names: {signals}")
            self.generate_scannables_from_group(nx_data, dataset_names=signals)
        else:
            self.generate_scannables_from_group(nx_data)

`generate_scannables_from_scan_fields_or_nxdata(hdf_file)` ¶

Generate scannables from scan_field names or default NXdata

Source code in src/hdfmap/nexus.py

def generate_scannables_from_scan_fields_or_nxdata(self, hdf_file: h5py.File):
    """Generate scannables from scan_field names or default NXdata"""

    # find 'scan_fields' to generate scannables list
    if NX_SCANFIELDS in self.arrays:
        scan_fields_path = self.arrays[NX_SCANFIELDS]
        # scan_fields = hdf_file[scan_fields_path][()]
        scan_fields = names_from_scan_fields(hdf_file, scan_fields_path)
        if scan_fields:
            logger.info(f"Generating Scannables from NX ScanFields: {scan_fields_path}: {scan_fields}")
            self.generate_scannables_from_names(scan_fields)
        else:
            self.generate_scannables_from_nxdata(hdf_file)
    else:
        self.generate_scannables_from_nxdata(hdf_file)

    if not self.scannables:
        logger.warning("No NXdata found, scannables not populated!")

`get_plot_data(hdf_file)` ¶

    Return plotting data from scannables
    :returns: {
        'xlabel': str label of first axes
        'ylabel': str label of first signal
        'xdata': flattened array of first axes
        'ydata': flattend array of first signal
        'axes_names': list of axes names,
        'signal_names': list of signal + auxilliary signal names,
        'axes_data': list of ND arrays of data for axes,
        'signal_data': list of ND array of data for signal + auxilliary signals,
        'axes_labels': list of axes labels as 'name [units]',
        'signal_labels': list of signal labels,
        'data': dict of all scannables axes,
        'title': str title as 'filename

NXtitle' if dataset is a 2D grid scan, additional rows: 'grid_xlabel': str label of grid x-axis 'grid_ylabel': str label of grid y-axis 'grid_label': str label of height or colour 'grid_xdata': 2D array of x-coordinates 'grid_ydata': 2D array of y-coordinates 'grid_data': 2D array of height or colour }

Source code in src/hdfmap/nexus.py

def get_plot_data(self, hdf_file: h5py.File):
    """
    Return plotting data from scannables
    :returns: {
        'xlabel': str label of first axes
        'ylabel': str label of first signal
        'xdata': flattened array of first axes
        'ydata': flattend array of first signal
        'axes_names': list of axes names,
        'signal_names': list of signal + auxilliary signal names,
        'axes_data': list of ND arrays of data for axes,
        'signal_data': list of ND array of data for signal + auxilliary signals,
        'axes_labels': list of axes labels as 'name [units]',
        'signal_labels': list of signal labels,
        'data': dict of all scannables axes,
        'title': str title as 'filename\nNXtitle'
    if dataset is a 2D grid scan, additional rows:
        'grid_xlabel': str label of grid x-axis
        'grid_ylabel': str label of grid y-axis
        'grid_label': str label of height or colour
        'grid_xdata': 2D array of x-coordinates
        'grid_ydata': 2D array of y-coordinates
        'grid_data': 2D array of height or colour
    }
    """
    axes, signals = self.nexus_default_names()
    axes_units = [self.get_attr(path, NX_UNITS, '') for name, path in axes.items()]
    signal_units = [self.get_attr(path, NX_UNITS, '') for name, path in signals.items()]
    axes_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(axes, axes_units)]
    signal_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(signals, signal_units)]
    title = f"{os.path.basename(self.filename)}\n{self.get_data(hdf_file, NX_TITLE)}"

    xdata = (
        self.get_data(hdf_file, next(iter(axes.values()))).flatten()
        if axes else range(self.scannables_length())
    )
    ydata = (
        self.get_data(hdf_file, next(iter(signals.values()))).flatten()
        if signals else [1.0] * self.scannables_length()
    )

    data = {
        'xlabel': next(iter(axes_labels), 'x'),
        'ylabel': next(iter(signal_labels), 'y'),
        'xdata': xdata,
        'ydata': ydata,
        'axes_names': list(axes.keys()),
        'signal_names': list(signals.keys()),
        'axes_data': [self.get_data(hdf_file, ax) for ax in axes.values()],
        'signal_data': [self.get_data(hdf_file, sig) for sig in signals.values()],
        'axes_labels': axes_labels,
        'signal_labels': signal_labels,
        'data': self.get_scannables(hdf_file, numeric_only=True),
        'title': title
    }
    if len(axes) == 2 and len(self.scannables_shape()) == 2:
        # 2D grid scan
        xpath, ypath = axes.values()
        data_path = next(iter(signals.values()))
        data['grid_xlabel'] = axes_labels[0]
        data['grid_ylabel'] = axes_labels[1]
        data['grid_label'] = signal_labels[0]
        data['grid_xdata'] = self.get_data(hdf_file, xpath)
        data['grid_ydata'] = self.get_data(hdf_file, ypath)
        data['grid_data'] = self.get_data(hdf_file, data_path)
    return data

`info_nexus(scannables=True, image_data=True, metadata=False)` ¶

Return str info on nexus format

Source code in src/hdfmap/nexus.py

def info_nexus(self, scannables=True, image_data=True, metadata=False) -> str:
    """Return str info on nexus format"""
    out = f"{repr(self)}\n"
    out += f"{NX_CLASS}:\n"
    nx_classes = self.all_nxclasses()
    out += disp_dict({k: v for k, v in self.classes.items() if k in nx_classes}, 20)
    out += '\nDefaults:\n'
    out += f"  @{NX_DEFAULT}: {self.find_attr(NX_DEFAULT)}\n"
    out += f"  @{NX_AXES}: {self.get_path(NX_AXES)}\n"
    out += f"  @{NX_SIGNAL}: {self.get_path(NX_SIGNAL)}\n"
    out += f"{self.info_names(scannables=scannables, image_data=image_data, metadata=metadata)}"
    out += f""
    return out

`nexus_default_names()` ¶

Return name of default axes and signal paths, as defined in scannables

Source code in src/hdfmap/nexus.py

def nexus_default_names(self) -> tuple[dict[str, str], dict[str, str]]:
    """Return name of default axes and signal paths, as defined in scannables"""
    axes_paths, signal_paths = self.nexus_default_paths()
    axes_names = [self.datasets[path].name for path in axes_paths]
    signal_names = [self.datasets[path].name for path in signal_paths]
    # axes_names = [name for path in axes_paths for name in self.datasets[path].names]
    # signal_names = [name for path in signal_paths for name in self.datasets[path].names]
    alt_names = {
        self.datasets[path].name: self.datasets[path].names
        for path in axes_paths + signal_paths
    }
    return self.first_last_scannables(axes_names, signal_names, alt_names)

`nexus_default_paths()` ¶

Return default axes and signal paths

Source code in src/hdfmap/nexus.py

def nexus_default_paths(self) -> tuple[list[str], list[str]]:
    """Return default axes and signal paths"""
    axes_paths = [self.arrays[axes] for n in range(10) if (axes := f"{NX_AXES}{n}") in self.arrays]
    signal_paths = [self.arrays[signal] for n in range(10) if (signal := f"{NX_SIGNAL}{n}") in self.arrays]
    return axes_paths, signal_paths

`populate(hdf_file, groups=None, default_entry_only=False)` ¶

Populate only datasets from default or first entry, with scannables from given groups. Automatically load defaults (axes, signal) and generate scannables from default group

Parameters:

Name	Type	Description	Default
`hdf_file`	`File`	HDF File object	required
`groups`		list of group names or NXClass names to search for datasets, within default entry	`None`
`default_entry_only`		if True, only the first or default entry will be loaded	`False`

Source code in src/hdfmap/nexus.py

def populate(self, hdf_file: h5py.File, groups=None, default_entry_only=False):
    """
    Populate only datasets from default or first entry, with scannables from given groups.
    Automatically load defaults (axes, signal) and generate scannables from default group
    :param hdf_file: HDF File object
    :param groups: list of group names or NXClass names to search for datasets, within default entry
    :param default_entry_only: if True, only the first or default entry will be loaded
    """
    self.filename = hdf_file.filename

    # Add defaults to arrays
    self._store_default_nexus_paths(hdf_file)

    entry_paths = [
        build_hdf_path(name) for name in (
            self.classes[NX_ENTRY] +  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
            [entry for entry in hdf_file if check_nexus_class(hdf_file.get(entry), NX_ENTRY)]  # all NXentry
        )
    ]
    # remove duplicates, sort of default is first
    entry_paths = sorted(set(entry_paths), key=entry_paths.index)

    if default_entry_only:
        entry_paths = entry_paths[:1]

    for entry_path in entry_paths:
        entry = os.path.basename(entry_path)
        nx_entry = hdf_file.get(entry)
        if nx_entry is None:
            continue  # group may be missing due to a broken link
        hdf_path = build_hdf_path(entry)
        logger.debug(f"NX Entry: {hdf_path}")
        self.all_paths.append(hdf_path)
        self._store_group(nx_entry, hdf_path, entry)
        self._populate(nx_entry, root=hdf_path, groups=groups)  # nx_entry.name can be wrong!

    if not self.datasets:
        logger.warning("No datasets found!")

    # find the scannable arrays and generate self.combined
    self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)
    if not self.scannables:
        logger.warning('NXdata not found, getting scannables from most common array size')
        size = self.most_common_size()
        self.generate_scannables(size)
        if len(self.scannables) < len(self.scannables_shape()):
            logger.warning('Less scannables than most common shape dimensions, removing scannables')
            self.scannables = {}
    # find the NXdetector group and assign the image data
    self.generate_image_data_from_nxdetector()
    # finalise map with combined namespace
    self.generate_combined()

`compare_maps(map1, map2)` ¶

Compare two HdfMap objects

Source code in src/hdfmap/file_functions.py

def compare_maps(map1: HdfMap | NexusMap, map2: HdfMap | NexusMap) -> str:
    """
    Compare two HdfMap objects
    """
    missing_in_2 = []
    missing_in_1 = []
    different = []
    same = []
    for name1, path1 in map1.combined.items():
        if name1 in map2.combined:
            path2 = map2.combined[name1]
            if path2 != path1:
                different.append(f"{name1}: {path1} != {path2}")
            dataset1 = map1.datasets[path1]
            dataset2 = map2.datasets[path2]
            if dataset1.shape != dataset2.shape:
                different.append(f"{name1}: {dataset1.shape}, {dataset2.shape}")
            else:
                same.append(f"{name1}: {dataset1.shape} : {path1}, {path2}")
        else:
            missing_in_2.append(f"{name1}: {path1}")

    for name2, path2 in map2.combined.items():
        if name2 not in map1.combined:
            missing_in_1.append(f"{name2}: {path2}")

    output = f"Comparing:\n  {map1.filename}, with\n  {map2.filename}\n\n"
    output += "Different items:\n  " + '\n  '.join(different)
    output += f"\n\nMissing in {map1.filename}:\n  " + '\n  '.join(missing_in_1)
    output += f"\n\nMissing in {map2.filename}:\n  " + '\n  '.join(missing_in_2)
    output += '\n'
    return output

`create_hdf_map(hdf_filename)` ¶

Create a HdfMap from a hdf file

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	str filename of hdf file	required

Returns:

Type	Description
`HdfMap`	HdfMap

Source code in src/hdfmap/file_functions.py

def create_hdf_map(hdf_filename: str) -> HdfMap:
    """
    Create a HdfMap from a hdf file
    :param hdf_filename: str filename of hdf file
    :return: HdfMap
    """
    with load_hdf(hdf_filename) as hdf:
        hdf_map = HdfMap(hdf)
    return hdf_map

`create_nexus_map(hdf_filename, groups=None, default_entry_only=False)` ¶

Create a HdfMap from a NeXus file, loading default parameters and allowing a reduced, single entry map

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	str filename of hdf file	required
`groups`	`None \| list[str]`	list of groups to collect datasets from	`None`
`default_entry_only`	`bool`	if True, only the first or default entry will be loaded	`False`

Returns:

Type	Description
`NexusMap`	NexusMap

Source code in src/hdfmap/file_functions.py

def create_nexus_map(hdf_filename: str, groups: None | list[str] = None,
                     default_entry_only: bool = False) -> NexusMap:
    """
    Create a HdfMap from a NeXus file, loading default parameters and allowing a reduced, single entry map
    :param hdf_filename: str filename of hdf file
    :param groups: list of groups to collect datasets from
    :param default_entry_only: if True, only the first or default entry will be loaded
    :return: NexusMap
    """
    hdf_map = NexusMap()
    with load_hdf(hdf_filename) as hdf:
        hdf_map.populate(hdf, groups=groups, default_entry_only=default_entry_only)
    return hdf_map

`hdf_compare(hdf_filename1, hdf_filename2, all_links=False)` ¶

Compare hdf tree structure between two files

Parameters:

Name	Type	Description	Default
`hdf_filename1`	`str`	filename of hdf file	required
`hdf_filename2`	`str`	filename of hdf file	required
`all_links`		bool, if True, also show soft links	`False`

Returns:

Type	Description
`str`	str

Source code in src/hdfmap/hdf_loader.py

def hdf_compare(hdf_filename1: str, hdf_filename2: str, all_links=False) -> str:
    """
    Compare hdf tree structure between two files
    :param hdf_filename1: filename of hdf file
    :param hdf_filename2: filename of hdf file
    :param all_links: bool, if True, also show soft links
    :return: str
    """
    datasets1 = hdf_dataset_list(hdf_filename1, all_links)
    datasets2 = hdf_dataset_list(hdf_filename2, all_links)

    # both = [ds for ds in datasets1 if ds in datasets2]
    only_in_1 = '\n  '.join([ds for ds in datasets1 if ds not in datasets2])
    only_in_2 = '\n  '.join([ds for ds in datasets2 if ds not in datasets1])

    output = f"Compare\n    {hdf_filename1}, with\n    {hdf_filename2}\n\n"
    output += f"Datasets only in {os.path.basename(hdf_filename1)}:\n\n"
    output += f"  {only_in_1}\n"
    output += f"Datasets only in {os.path.basename(hdf_filename2)}:\n\n"
    output += f"  {only_in_2}\n"
    return output

`hdf_data(filenames, name_or_path, hdf_map=None, index=(), default=None, fixed_output=False)` ¶

General purpose function to retrieve data from HDF files

Parameters:

Name	Type	Description	Default
`filenames`	`str \| list[str]`	str or list of str - file paths	required
`name_or_path`	`str \| list[str]`	str or list of str - names or paths of HDF datasets	required
`hdf_map`	`HdfMap`	HdfMap object, or None to generate from first file	`None`
`index`		dataset index or slice	`()`
`default`		value to give if dataset doesn't exist in file	`None`
`fixed_output`		if True, always returns list of list	`False`

Returns:

Type	Description
	list[files: list[names]]

Source code in src/hdfmap/file_functions.py

def hdf_data(filenames: str | list[str], name_or_path: str | list[str], hdf_map: HdfMap = None,
             index=(), default=None, fixed_output=False):
    """
    General purpose function to retrieve data from HDF files
    :param filenames: str or list of str - file paths
    :param name_or_path: str or list of str - names or paths of HDF datasets
    :param hdf_map: HdfMap object, or None to generate from first file
    :param index: dataset index or slice
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list of list
    :return if single file, single dataset: single value
    :return if multi file or multi dataset: list, len(filenames) or len(name_or_path)
    :return if multi file and multi dataset: list[files: list[names]]
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    name_or_path = as_str_list(name_or_path)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append([hdf_map.get_data(hdf, name, index=index, default=default) for name in name_or_path])
    if fixed_output:
        return out
    if len(filenames) == 1 and len(name_or_path) == 1:
        return out[0][0]
    if len(filenames) == 1 and len(name_or_path) > 1:
        return out[0]
    if len(name_or_path) == 1:
        return [val[0] for val in out]
    return out

`hdf_dataset_list(hdf_filename, all_links=True, group='/')` ¶

Generate list of all datasets in the hdf file structure

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	filename of hdf file	required
`all_links`		bool, if True, also include soft links	`True`
`group`	`str`	only display tree structure of this group (default root)	`'/'`

Returns:

Type	Description
`list[str]`	list of str addresses

Source code in src/hdfmap/hdf_loader.py

def hdf_dataset_list(hdf_filename: str, all_links=True, group: str = '/') -> list[str]:
    """
    Generate list of all datasets in the hdf file structure
    :param hdf_filename: filename of hdf file
    :param all_links: bool, if True, also include soft links
    :param group: only display tree structure of this group (default root)
    :return: list of str addresses
    """

    output = []

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)

        def visit_paths(name, obj: h5py.Group | h5py.Dataset):
            if isinstance(obj, h5py.Dataset):
                output.append(name)

        def visit_links(name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            h5py_obj = hdf_group.get(name)
            if isinstance(h5py_obj, h5py.Dataset) and (
                    isinstance(obj, h5py.ExternalLink) if not all_links else True):
                output.append(name)
        if not all_links:  # visititems_links visits all items, don't double up
            hdf_group.visititems(visit_paths)
        hdf_group.visititems_links(visit_links)
    return output

`hdf_eval(filenames, expression, hdf_map=None, default=None, fixed_output=False)` ¶

Evaluate expression using dataset names

Parameters:

Name	Type	Description	Default
`filenames`	`str \| list[str]`	str or list of str - file paths	required
`expression`	`str`	str expression to evaluate in each file, e.g. "roi2_sum / Transmission"	required
`hdf_map`	`HdfMap`	HdfMap object, or None to generate from first file	`None`
`default`		value to give if dataset doesn't exist in file	`None`
`fixed_output`		if True, always returns list len(filenames)	`False`

Returns:

Type	Description
	list, len(filenames)

Source code in src/hdfmap/file_functions.py

def hdf_eval(filenames: str | list[str], expression: str, hdf_map: HdfMap = None, default=None, fixed_output=False):
    """
    Evaluate expression using dataset names
    :param filenames: str or list of str - file paths
    :param expression: str expression to evaluate in each file, e.g. "roi2_sum / Transmission"
    :param hdf_map: HdfMap object, or None to generate from first file
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.eval(hdf, expression, default=default))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

`hdf_find(hdf_filename, *names_or_classes, attributes=('NX_class', 'local_name'))` ¶

find groups and datasets within hdf file matching a set of names or class names

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	filename of hdf file	required
`names_or_classes`	`str`	object names or NXclass names to search for	`()`
`attributes`	`tuple[str]`	list of attr fields to check against names	`('NX_class', 'local_name')`

Returns:

Type	Description
`tuple[list[str], list[str]]`	groups[], datasets[]

Source code in src/hdfmap/hdf_loader.py

def hdf_find(hdf_filename: str, *names_or_classes: str,
             attributes: tuple[str] = ('NX_class', 'local_name')) -> tuple[list[str], list[str]]:
    """
    find groups and datasets within hdf file matching a set of names or class names
    :param hdf_filename: filename of hdf file
    :params names_or_classes: object names or NXclass names to search for
    :params attributes: list of attr fields to check against names
    :return: groups[], datasets[]
    """

    with load_hdf(hdf_filename) as hdf_file:
        group_paths = []
        dataset_paths = []

        def visit_links(name):
            # For each path in the file, create tree of parent-groups
            sub_groups = name.split('/')
            sub_group_paths = ['/'.join(sub_groups[:n]) for n in range(1, len(sub_groups) + 1)]
            sub_group_names = [
                bytes2str(grp.attrs.get(attr, '')) for attr in attributes for path in sub_group_paths
                if (grp := hdf_file.get(path))
            ] + sub_groups
            if all(arg in sub_group_names for arg in names_or_classes):
                h5py_obj = hdf_file.get(name)
                if isinstance(h5py_obj, h5py.Group):
                    group_paths.append(name)
                elif isinstance(h5py_obj, h5py.Dataset):
                    dataset_paths.append(name)
        hdf_file.visit_links(visit_links)
    return group_paths, dataset_paths

`hdf_find_first(hdf_filename, *names_or_classes, attributes=('NX_class', 'local_name'))` ¶

return the first path of object matching a set of names or class names

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	filename of hdf file	required
`names_or_classes`	`str`	object names or NXclass names to search for	`()`
`attributes`	`tuple[str]`	list of attr fields to check against names	`('NX_class', 'local_name')`

Returns:

Type	Description
`str \| None`	hdf_path or None if no match

Source code in src/hdfmap/hdf_loader.py

def hdf_find_first(hdf_filename: str, *names_or_classes: str,
                   attributes: tuple[str] = ('NX_class', 'local_name')) -> str | None:
    """
    return the first path of object matching a set of names or class names
    :param hdf_filename: filename of hdf file
    :params names_or_classes: object names or NXclass names to search for
    :params attributes: list of attr fields to check against names
    :return: hdf_path or None if no match
    """

    with load_hdf(hdf_filename) as hdf_file:

        def visit_links(name):
            # For each path in the file, create tree of parent-groups
            parent_groups = name.split('/')
            parent_group_paths = ['/'.join(parent_groups[:n]) for n in range(1, len(parent_groups) + 1)]
            parent_group_names = [
                bytes2str(grp.attrs.get(attr, '')) for attr in attributes for path in parent_group_paths
                if (grp := hdf_file.get(path))
            ] + parent_groups
            if all(arg in parent_group_names for arg in names_or_classes):
                return name
            return None

        return hdf_file.visit_links(visit_links)

`hdf_format(filenames, expression, hdf_map=None, default=None, fixed_output=False)` ¶

Evaluate string format expression using dataset names

Parameters:

Name	Type	Description	Default
`filenames`	`str \| list[str]`	str or list of str - file paths	required
`expression`	`str`	str expression to evaluate in each file, e.g. "the energy is {en:.2f} keV"	required
`hdf_map`	`HdfMap`	HdfMap object, or None to generate from first file	`None`
`default`		value to give if dataset doesn't exist in file	`None`
`fixed_output`		if True, always returns list len(filenames)	`False`

Returns:

Type	Description
	list, len(filenames)

Source code in src/hdfmap/file_functions.py

def hdf_format(filenames: str | list[str], expression: str, hdf_map: HdfMap = None, default=None, fixed_output=False):
    """
    Evaluate string format expression using dataset names
    :param filenames: str or list of str - file paths
    :param expression: str expression to evaluate in each file, e.g. "the energy is {en:.2f} keV"
    :param hdf_map: HdfMap object, or None to generate from first file
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.format_hdf(hdf, expression, default=default))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

`hdf_image(filenames, index=None, hdf_map=None, fixed_output=False)` ¶

Evaluate string format expression using dataset names

Parameters:

Name	Type	Description	Default
`filenames`	`str \| list[str]`	str or list of str - file paths	required
`index`	`slice`	index or slice of dataset volume, or None to use middle index	`None`
`hdf_map`	`HdfMap`	HdfMap object, or None to generate from first file	`None`
`fixed_output`		if True, always returns list len(filenames)	`False`

Returns:

Type	Description
	list, len(filenames)

Source code in src/hdfmap/file_functions.py

def hdf_image(filenames: str | list[str], index: slice = None, hdf_map: HdfMap = None, fixed_output=False):
    """
    Evaluate string format expression using dataset names
    :param filenames: str or list of str - file paths
    :param index: index or slice of dataset volume, or None to use middle index
    :param hdf_map: HdfMap object, or None to generate from first file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output - numpy array
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.get_image(hdf, index=index))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

`hdf_linked_files(hdf_filename, group='/')` ¶

Return a list of files linked to the current file, looking for all external links.

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	filename of hdf file	required
`group`	`str`	only look at links within this group (default root)	`'/'`

Returns:

Type	Description
`list[str]`	list of str filenames (usually relative file paths)

Source code in src/hdfmap/hdf_loader.py

def hdf_linked_files(hdf_filename: str, group: str = '/') -> list[str]:
    """
    Return a list of files linked to the current file, looking for all external links.

    :param hdf_filename: filename of hdf file
    :param group: only look at links within this group (default root)
    :return: list of str filenames (usually relative file paths)
    """

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)
        external_files = []

        def visit_links(_name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            if isinstance(obj, h5py.ExternalLink) and obj.filename not in external_files:
                external_files.append(obj.filename)
        hdf_group.visititems_links(visit_links)
    return external_files

`hdf_tree_dict(hdf_filename)` ¶

Generate summary dict of the hdf tree structure The structure is: {'group': {'@attrs': str, 'sub-group': {}, 'dataset': str}, ...}

Group attributes are stored with names pre-fixed with '@'

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	filename of hdf file	required

Returns:

Type	Description
`dict`	{'entry': {'dataset': value}...}

Source code in src/hdfmap/hdf_loader.py

def hdf_tree_dict(hdf_filename: str) -> dict:
    """
    Generate summary dict of the hdf tree structure
    The structure is:
        {'group': {'@attrs': str, 'sub-group': {}, 'dataset': str}, ...}

    Group attributes are stored with names pre-fixed with '@'

    :param hdf_filename: filename of hdf file
    :return: {'entry': {'dataset': value}...}
    """

    def store(hdf_dict: dict, hdf_group: h5py.Group) -> dict:
        for key in hdf_group:
            obj = hdf_group.get(key)
            link = hdf_group.get(key, getlink=True)
            if obj is None:
                hdf_dict[key] = '! Missing'
                continue  # dataset may be missing due to a broken link
            # Group
            if isinstance(obj, h5py.Group):
                hdf_dict[key] = {f"@{attr}": str(val) for attr, val in obj.attrs.items()}
                store(hdf_dict[key], obj)
            # Dataset
            elif isinstance(obj, h5py.Dataset):
                if obj.size <= 1:
                    detail = str(obj[()])
                else:
                    detail = f"{obj.dtype}, {obj.shape}"
                if isinstance(link, (h5py.SoftLink, h5py.ExternalLink)):
                    detail = f"LINK: " + detail
                hdf_dict[key] = detail
        return hdf_dict
    return store({}, load_hdf(hdf_filename))

`hdf_tree_string(hdf_filename, all_links=True, group='/', attributes=True)` ¶

Generate string of the hdf file structure, similar to h5ls. Uses h5py.visititems

Parameters:

Name	Type	Description	Default
`hdf_filename`	`str`	filename of hdf file	required
`all_links`	`bool`	bool, if True, also show links	`True`
`group`	`str`	only display tree structure of this group (default root)	`'/'`
`attributes`	`bool`	if True, display the attributes of groups and datasets	`True`

Returns:

Type	Description
`str`	str

Source code in src/hdfmap/hdf_loader.py

def hdf_tree_string(hdf_filename: str, all_links: bool = True, group: str = '/', attributes: bool = True) -> str:
    """
    Generate string of the hdf file structure, similar to h5ls. Uses h5py.visititems
    :param hdf_filename: filename of hdf file
    :param all_links: bool, if True, also show links
    :param group: only display tree structure of this group (default root)
    :param attributes: if True, display the attributes of groups and datasets
    :return: str
    """
    output = [f"########## {hdf_filename} ##########"]

    def grp(path):
        return f"-------------- {path} " + "-" * (63 - (17 + len(path)))

    def ds(path, detail):
        return f"{path:60}  :  {detail}"

    def attr(path, name, value):
        return f"{' ' * len(path) + '@' + name} = {value}"

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)
        output.append(grp(hdf_group.name))
        if attributes:
            output.extend([attr(hdf_group.name, name, value) for name, value in hdf_group.attrs.items()])

        def visit_paths(name, obj: h5py.Group | h5py.Dataset):
            if isinstance(obj, h5py.Dataset):
                if obj.size <= 1:
                    detail = f"{obj[()]}"
                else:
                    detail = f"{obj.dtype}, {obj.shape}"
                output.append(ds(name, detail))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in obj.attrs.items()])
            elif isinstance(obj, h5py.Group):
                output.append(grp(name))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in obj.attrs.items()])

        def visit_links(name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            h5py_obj = hdf_group.get(name)

            if isinstance(h5py_obj, h5py.Dataset):
                if isinstance(obj, h5py.ExternalLink):
                    detail = f"LINK: {h5py_obj.dtype}, {h5py_obj.shape}"
                elif h5py_obj.size <= 1:
                    detail = f"{h5py_obj[()]}"
                else:
                    detail = f"{h5py_obj.dtype}, {h5py_obj.shape}"
                output.append(ds(name, detail))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in h5py_obj.attrs.items()])
            elif isinstance(h5py_obj, h5py.Group):
                output.append(grp(name))
                if attributes:
                    output.append(f"{name}")
                    output.extend([attr(name, _attr, value) for _attr, value in h5py_obj.attrs.items()])

        if all_links:
            hdf_group.visititems_links(visit_links)
        else:
            hdf_group.visititems(visit_paths)
        output.append('\n --- End --- ')
    return '\n'.join(output)

`list_files(folder_directory, extension=DEFAULT_EXTENSION)` ¶

Return list of files in directory with extension, returning list of full file paths

Source code in src/hdfmap/file_functions.py

def list_files(folder_directory: str, extension=DEFAULT_EXTENSION) -> list[str]:
    """Return list of files in directory with extension, returning list of full file paths"""
    try:
        return sorted(
            (file.path for file in os.scandir(folder_directory) if file.is_file() and file.name.endswith(extension)),
            key=lambda x: os.path.getmtime(x)
        )
    except FileNotFoundError:
        return []

`load_hdf(hdf_filename, **kwargs)` ¶

Load hdf file, return h5py.File object

Source code in src/hdfmap/hdf_loader.py

def load_hdf(hdf_filename: str, **kwargs) -> h5py.File:
    """Load hdf file, return h5py.File object"""
    options = HDF_FILE_OPTIONS  #.copy()
    options.update(kwargs)
    return h5py.File(hdf_filename, 'r', **options)

`nexus_data_block(filenames, hdf_map=None, fixed_output=False)` ¶

Create classic dict like dataloader objects from nexus files E.G. d = nexus_data_block('filename') d.scannable -> array d.metadata.filename -> value d.keys() -> list of items

Parameters:

Name	Type	Description	Default
`filenames`	`str \| list[str]`	str or list of str - file paths	required
`hdf_map`	`HdfMap`	HdfMap object, or None to generate from first file	`None`
`fixed_output`		if True, always returns list len(filenames)	`False`

Returns:

Type	Description
	list, len(filenames)

Source code in src/hdfmap/file_functions.py

def nexus_data_block(filenames: str | list[str], hdf_map: HdfMap = None, fixed_output=False):
    """
    Create classic dict like dataloader objects from nexus files
    E.G.
        d = nexus_data_block('filename')
        d.scannable -> array
        d.metadata.filename -> value
        d.keys() -> list of items

    :param filenames: str or list of str - file paths
    :param hdf_map: HdfMap object, or None to generate from first file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output - dict like DataObject
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_nexus_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.get_dataholder(hdf))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

`set_all_logging_level(level)` ¶

Parameters:

Name	Type	Description	Default
`level`	`str \| int`	str level name or int level	required

Returns:

Type	Description
	None

Source code in src/hdfmap/logging.py

def set_all_logging_level(level: str | int):
    """
    Set logging level of all loggers
    Logging Levels (see builtin module logging)
        'notset'   |  0
        'debug'    |  10
        'info'     |  20
        'warning'  |  30
        'error'    |  40
        'critical' |  50
    :param level: str level name or int level
    :return: None
    """
    try:
        level = level.upper()
        # level = logging.getLevelNamesMapping()[level]  # Python >3.11
        level = logging._nameToLevel[level]
    except AttributeError:
        level = int(level)

    logging_logger = logging.getLogger(__name__)
    for logger in [logging.getLogger(name) for name in logging.root.manager.loggerDict]:
        logger.setLevel(level)
    logging_logger.info(f"Logging level set to {level}")

Code Description¶

HdfMap¶

Usage¶

HdfMap from NeXus file¶

Shortcuts - single file reloading class¶

Shortcuts - multifile load data¶

HdfLoader ¶

E.G.¶

eval(expression, default=DEFAULT) ¶

find_hdf_paths(string, name_only=True, whole_word=False) ¶

find_names(string) ¶

format(expression, default=DEFAULT) ¶

get_data(*name_or_path, index=(), default=None, direct_load=False) ¶

get_hdf_path(name_or_path) ¶

get_image(index=None) ¶

get_scannables() ¶

get_string(*name_or_path, index=(), default='', units=False) ¶

summary() ¶

HdfMap ¶

Attributes¶

E.G.¶

Methods¶

File Methods¶

add_local(**kwargs) ¶

add_named_expression(**kwargs) ¶

add_roi(name, cen_i, cen_j, wid_i=30, wid_j=30, image_name='IMAGE') ¶

all_attrs() ¶

create_dataset_summary(hdf_file) ¶

create_interpreter(default=DEFAULT) ¶

create_metadata_list(hdf_file, default=None, name_list=None, line_separator='\n', value_separator='=') ¶

create_scannables_table(hdf_file, delimiter=', ', string_spec='', format_spec='f', default_decimals=8) ¶

eval(hdf_file, expression, default=DEFAULT, raise_errors=True) ¶

find_attr(attr_name) ¶

find_datasets(*names_or_classes) ¶

find_groups(*names_or_classes) ¶

find_names(string, match_case=False) ¶

find_paths(string, name_only=True, whole_word=False) ¶

first_last_scannables(first_names=(), last_names=(), alt_names=None) ¶

format_hdf(hdf_file, expression, default=DEFAULT, raise_errors=True) ¶

generate_combined() ¶

generate_ids(*names, modify_missing=True) ¶

generate_scannables(array_size) ¶

generate_scannables_from_group(hdf_group, group_path=None, dataset_names=None) ¶

generate_scannables_from_names(names) ¶

get_attr(name_or_path, attr_label, default='') ¶

get_attrs(name_or_path) ¶

get_data(hdf_file, name_or_path, index=(), default=None, direct_load=False) ¶

get_dataholder(hdf_file, flatten_scannables=False) ¶

get_group_classes(name_or_path) ¶

get_group_dataset_path(group_name, dataset_name) ¶

get_group_datasets(name_or_path) ¶

get_group_path(name_or_path) ¶

get_image(hdf_file, index=None) ¶

get_image_index(index) ¶

get_image_path() ¶

get_image_shape() ¶

get_metadata(hdf_file, default=None, direct_load=False, name_list=None, string_output=False) ¶

get_path(name_or_path) ¶

get_scannables(hdf_file, flatten=False, numeric_only=False) ¶

get_scannables_array(hdf_file, return_structured_array=False) ¶

get_string(hdf_file, name_or_path, index=(), default='', units=False) ¶

info_classes() ¶

info_data(hdf_file) ¶

info_datasets() ¶

info_groups() ¶

info_names(arrays=False, values=False, combined=False, metadata=False, scannables=False, image_data=False) ¶

load_hdf(filename=None, name_or_path=None, **kwargs) ¶

most_common_shape() ¶

most_common_size() ¶

populate(hdf_file) ¶

scannables_length() ¶

scannables_shape() ¶

set_image_path(name_or_path) ¶

use_local_data(use_data=True) ¶

NexusLoader ¶

get_plot_data() ¶

NexusMap ¶

Special behaviour¶

all_nxclasses() ¶

generate_image_data_from_nxdetector() ¶

`HdfLoader` ¶

`eval(expression, default=DEFAULT)` ¶

`find_hdf_paths(string, name_only=True, whole_word=False)` ¶

`find_names(string)` ¶

`format(expression, default=DEFAULT)` ¶

`get_data(*name_or_path, index=(), default=None, direct_load=False)` ¶

`get_hdf_path(name_or_path)` ¶

`get_image(index=None)` ¶

`get_scannables()` ¶

`get_string(*name_or_path, index=(), default='', units=False)` ¶

`summary()` ¶

`HdfMap` ¶

`add_local(**kwargs)` ¶

`add_named_expression(**kwargs)` ¶

`add_roi(name, cen_i, cen_j, wid_i=30, wid_j=30, image_name='IMAGE')` ¶

`all_attrs()` ¶

`create_dataset_summary(hdf_file)` ¶

`create_interpreter(default=DEFAULT)` ¶

`create_metadata_list(hdf_file, default=None, name_list=None, line_separator='\n', value_separator='=')` ¶

`create_scannables_table(hdf_file, delimiter=', ', string_spec='', format_spec='f', default_decimals=8)` ¶

`eval(hdf_file, expression, default=DEFAULT, raise_errors=True)` ¶

`find_attr(attr_name)` ¶

`find_datasets(*names_or_classes)` ¶

`find_groups(*names_or_classes)` ¶

`find_names(string, match_case=False)` ¶

`find_paths(string, name_only=True, whole_word=False)` ¶

`first_last_scannables(first_names=(), last_names=(), alt_names=None)` ¶

`format_hdf(hdf_file, expression, default=DEFAULT, raise_errors=True)` ¶

`generate_combined()` ¶

`generate_ids(*names, modify_missing=True)` ¶

`generate_scannables(array_size)` ¶

`generate_scannables_from_group(hdf_group, group_path=None, dataset_names=None)` ¶

`generate_scannables_from_names(names)` ¶

`get_attr(name_or_path, attr_label, default='')` ¶

`get_attrs(name_or_path)` ¶

`get_data(hdf_file, name_or_path, index=(), default=None, direct_load=False)` ¶

`get_dataholder(hdf_file, flatten_scannables=False)` ¶

`get_group_classes(name_or_path)` ¶

`get_group_dataset_path(group_name, dataset_name)` ¶

`get_group_datasets(name_or_path)` ¶

`get_group_path(name_or_path)` ¶

`get_image(hdf_file, index=None)` ¶

`get_image_index(index)` ¶

`get_image_path()` ¶

`get_image_shape()` ¶

`get_metadata(hdf_file, default=None, direct_load=False, name_list=None, string_output=False)` ¶

`get_path(name_or_path)` ¶

`get_scannables(hdf_file, flatten=False, numeric_only=False)` ¶

`get_scannables_array(hdf_file, return_structured_array=False)` ¶

`get_string(hdf_file, name_or_path, index=(), default='', units=False)` ¶

`info_classes()` ¶

`info_data(hdf_file)` ¶

`info_datasets()` ¶

`info_groups()` ¶

`info_names(arrays=False, values=False, combined=False, metadata=False, scannables=False, image_data=False)` ¶

`load_hdf(filename=None, name_or_path=None, **kwargs)` ¶

`most_common_shape()` ¶

`most_common_size()` ¶

`populate(hdf_file)` ¶

`scannables_length()` ¶

`scannables_shape()` ¶

`set_image_path(name_or_path)` ¶

`use_local_data(use_data=True)` ¶

`NexusLoader` ¶

`get_plot_data()` ¶

`NexusMap` ¶

`all_nxclasses()` ¶

`generate_image_data_from_nxdetector()` ¶

`generate_scannables_from_nxdata(hdf_file, use_auxiliary=True)` ¶

`generate_scannables_from_scan_fields_or_nxdata(hdf_file)` ¶

`get_plot_data(hdf_file)` ¶

`info_nexus(scannables=True, image_data=True, metadata=False)` ¶

`nexus_default_names()` ¶

`nexus_default_paths()` ¶

`populate(hdf_file, groups=None, default_entry_only=False)` ¶

`compare_maps(map1, map2)` ¶

`create_hdf_map(hdf_filename)` ¶

`create_nexus_map(hdf_filename, groups=None, default_entry_only=False)` ¶

`hdf_compare(hdf_filename1, hdf_filename2, all_links=False)` ¶

`hdf_data(filenames, name_or_path, hdf_map=None, index=(), default=None, fixed_output=False)` ¶