Skip to content

Code Description

HdfMap

hdfmap Map objects within an HDF5 file and create a dataset namespace.

Usage

HdfMap from NeXus file

from hdfmap import create_nexus_map, load_hdf
hmap = create_nexus_map('file.nxs')
with load_hdf('file.nxs') as nxs:
    address = hmap.get_address('energy')
    energy = nxs[address][()]
    string = hmap.format_hdf(nxs, "the energy is {energy:.2f} keV")
    d = hmap.get_dataholder(nxs)  # classic data table, d.scannable, d.metadata

Shortcuts - single file reloading class

from hdfmap import NexusLoader
scan = NexusLoader('file.nxs')
[data1, data2] = scan.get_data(['dataset_name_1', 'dataset_name_2'])
data = scan.eval('dataset_name_1 * 100 + 2')
string = scan.format('my data is {dataset_name_1:.2f}')

Shortcuts - multifile load data

from hdfmap import hdf_data, hdf_eval, hdf_format, hdf_image
all_data = hdf_data([f"file{n}.nxs" for n in range(100)], 'dataset_name')
normalised_data = hdf_eval(filenames, 'total / Transmission / (rc / 300.)')
descriptions = hdf_eval(filenames, 'Energy: {en:5.3f} keV')
image = hdf_image(filenames, index=31)

Copyright 2024-2025 Daniel G. Porter

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

By Dr Dan Porter Diamond Light Source Ltd 2024-2025

HdfLoader

HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation.

E.G.
hdf = HdfLoader('file.hdf')
[data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
data = hdf.eval('dataset_name_1 * 100 + 2')
string = hdf.format('my data is {dataset_name_1:.2f}')
print(hdf.summary())
Source code in src/hdfmap/reloader_class.py
class HdfLoader:
    """
    HDF Loader contains the filename and hdfmap for a HDF file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.

    ### E.G.
        hdf = HdfLoader('file.hdf')
        [data1, data2] = hdf.get_data(*['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
        print(hdf.summary())
    """

    def __init__(self, hdf_filename: str, hdf_map: HdfMap | NexusMap | None = None):
        self.filename = hdf_filename
        if hdf_map is None:
            self.map = create_hdf_map(hdf_filename)
        else:
            self.map = hdf_map

    def __repr__(self):
        return f"HdfReloader('{self.filename}')"

    def __str__(self):
        with self._load() as hdf:
            out = self.map.info_data(hdf)
        return out

    def __getitem__(self, item):
        return self.get_data(item)

    def __call__(self, expression):
        return self.eval(expression)

    def _load(self) -> h5py.File:
        return load_hdf(self.filename)

    def get_hdf_path(self, name_or_path: str) -> str or None:
        """Return hdf path of object in HdfMap"""
        return self.map.get_path(name_or_path)

    def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
        """
        Find any dataset paths that contain the given string argument
        :param string: str to find in list of datasets
        :param name_only: if True, search only the name of the dataset, not the full path
        :param whole_word: if True, search only for case in-sensitive name
        :return: list of hdf paths
        """
        return self.map.find_paths(string, name_only, whole_word)

    def find_names(self, string: str) -> list[str]:
        """
        Find any dataset names that contain the given string argument, searching names in self.combined
        :param string: str to find in list of datasets
        :return: list of names
        """
        return self.map.find_names(string)

    def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
        """
        Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
        :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
        """
        with self._load() as hdf:
            out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_string(self, *name_or_path, index: slice = (), default='', units=False):
        """
        Return data from dataset in file, converted into summary string
        See hdfmap.eval_functions.dataset2data for more information.
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param units: if True and attribute 'units' available, append this to the result
        :return: dataset2str(dataset) -> str
        """
        with self._load() as hdf:
            out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
        if len(name_or_path) == 1:
            return out[0]
        return out

    def get_image(self, index: slice = None) -> np.ndarray:
        """
        Get image data from file, using default image path
        :param index: (slice,) or None to take the middle image
        :return: numpy array of image
        """
        with self._load() as hdf:
            return self.map.get_image(hdf, index)

    def get_metadata(self, defaults=None):
        with self._load() as hdf:
            return self.map.get_metadata(hdf, default=defaults)

    def get_scannables(self):
        """Return scannables from file (values associated with hdfmap.scannables)"""
        with self._load() as hdf:
            return self.map.get_scannables(hdf)

    def summary(self) -> str:
        """Return string summary of datasets"""
        with self._load() as hdf:
            return self.map.create_dataset_summary(hdf)

    def eval(self, expression: str, default=DEFAULT):
        """
        Evaluate an expression using the namespace of the hdf file
        :param expression: str expression to be evaluated
        :param default: returned if varname not in namespace
        :return: eval(expression)
        """
        with self._load() as hdf:
            return self.map.eval(hdf, expression, default)

    def format(self, expression: str, default=DEFAULT):
        """
        Evaluate a formatted string expression using the namespace of the hdf file
        :param expression: str expression using {name} format specifiers
        :param default: returned if varname not in namespace
        :return: eval_hdf(f"expression")
        """
        with self._load() as hdf:
            return self.map.format_hdf(hdf, expression, default)

eval(expression, default=DEFAULT)

Evaluate an expression using the namespace of the hdf file

Parameters:

Name Type Description Default
expression str

str expression to be evaluated

required
default

returned if varname not in namespace

DEFAULT

Returns:

Type Description

eval(expression)

Source code in src/hdfmap/reloader_class.py
def eval(self, expression: str, default=DEFAULT):
    """
    Evaluate an expression using the namespace of the hdf file
    :param expression: str expression to be evaluated
    :param default: returned if varname not in namespace
    :return: eval(expression)
    """
    with self._load() as hdf:
        return self.map.eval(hdf, expression, default)

find_hdf_paths(string, name_only=True, whole_word=False)

Find any dataset paths that contain the given string argument

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required
name_only bool

if True, search only the name of the dataset, not the full path

True
whole_word bool

if True, search only for case in-sensitive name

False

Returns:

Type Description
list[str]

list of hdf paths

Source code in src/hdfmap/reloader_class.py
def find_hdf_paths(self, string: str, name_only: bool = True, whole_word: bool = False) -> list[str]:
    """
    Find any dataset paths that contain the given string argument
    :param string: str to find in list of datasets
    :param name_only: if True, search only the name of the dataset, not the full path
    :param whole_word: if True, search only for case in-sensitive name
    :return: list of hdf paths
    """
    return self.map.find_paths(string, name_only, whole_word)

find_names(string)

Find any dataset names that contain the given string argument, searching names in self.combined

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required

Returns:

Type Description
list[str]

list of names

Source code in src/hdfmap/reloader_class.py
def find_names(self, string: str) -> list[str]:
    """
    Find any dataset names that contain the given string argument, searching names in self.combined
    :param string: str to find in list of datasets
    :return: list of names
    """
    return self.map.find_names(string)

format(expression, default=DEFAULT)

Evaluate a formatted string expression using the namespace of the hdf file

Parameters:

Name Type Description Default
expression str

str expression using {name} format specifiers

required
default

returned if varname not in namespace

DEFAULT

Returns:

Type Description

eval_hdf(f"expression")

Source code in src/hdfmap/reloader_class.py
def format(self, expression: str, default=DEFAULT):
    """
    Evaluate a formatted string expression using the namespace of the hdf file
    :param expression: str expression using {name} format specifiers
    :param default: returned if varname not in namespace
    :return: eval_hdf(f"expression")
    """
    with self._load() as hdf:
        return self.map.format_hdf(hdf, expression, default)

get_data(*name_or_path, index=(), default=None, direct_load=False)

Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
name_or_path

str name or path pointing to dataset in hdf file

()
index slice

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

None
direct_load

return str, datetime or squeezed array if False, otherwise load data directly

False

Returns:

Type Description

dataset2data(dataset) -> datetime, str or squeezed array as required.

Source code in src/hdfmap/reloader_class.py
def get_data(self, *name_or_path, index: slice = (), default=None, direct_load=False):
    """
    Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
    :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
    """
    with self._load() as hdf:
        out = [self.map.get_data(hdf, name, index, default, direct_load) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

get_hdf_path(name_or_path)

Return hdf path of object in HdfMap

Source code in src/hdfmap/reloader_class.py
def get_hdf_path(self, name_or_path: str) -> str or None:
    """Return hdf path of object in HdfMap"""
    return self.map.get_path(name_or_path)

get_image(index=None)

Get image data from file, using default image path

Parameters:

Name Type Description Default
index slice

(slice,) or None to take the middle image

None

Returns:

Type Description
ndarray

numpy array of image

Source code in src/hdfmap/reloader_class.py
def get_image(self, index: slice = None) -> np.ndarray:
    """
    Get image data from file, using default image path
    :param index: (slice,) or None to take the middle image
    :return: numpy array of image
    """
    with self._load() as hdf:
        return self.map.get_image(hdf, index)

get_scannables()

Return scannables from file (values associated with hdfmap.scannables)

Source code in src/hdfmap/reloader_class.py
def get_scannables(self):
    """Return scannables from file (values associated with hdfmap.scannables)"""
    with self._load() as hdf:
        return self.map.get_scannables(hdf)

get_string(*name_or_path, index=(), default='', units=False)

Return data from dataset in file, converted into summary string See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
name_or_path

str name or path pointing to dataset in hdf file

()
index slice

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

''
units

if True and attribute 'units' available, append this to the result

False

Returns:

Type Description

dataset2str(dataset) -> str

Source code in src/hdfmap/reloader_class.py
def get_string(self, *name_or_path, index: slice = (), default='', units=False):
    """
    Return data from dataset in file, converted into summary string
    See hdfmap.eval_functions.dataset2data for more information.
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param units: if True and attribute 'units' available, append this to the result
    :return: dataset2str(dataset) -> str
    """
    with self._load() as hdf:
        out = [self.map.get_string(hdf, name, index, default, units) for name in name_or_path]
    if len(name_or_path) == 1:
        return out[0]
    return out

summary()

Return string summary of datasets

Source code in src/hdfmap/reloader_class.py
def summary(self) -> str:
    """Return string summary of datasets"""
    with self._load() as hdf:
        return self.map.create_dataset_summary(hdf)

HdfMap

HdfMap object, container for paths of different objects in an HDF file

with h5py.File('file.hdf') as hdf:
    map = HdfMap(hdf)

map.get_path('data') -> '/entry/measurement/data'
map['data'] -> '/entry/measurement/data'

with h5py.File('another_file.hdf') as hdf:
    data = map.get_data(hdf, 'data')
    array = map.get_scannables_array(hdf)
    metadata = map.get_metadata(hdf)
    out = map.eval(hdf, 'data / 10')
    outstr = map.format(hdf, 'the data looks like: {data}')

Objects within the HDF file are separated into Groups and Datasets. Each object has a defined 'path' and 'name' paramater, as well as other attribute:

  • path -> '/entry/measurement/data' -> the location of an object within the file
  • name -> 'data' -> a path expressed as a simple variable name

Paths are unique location within the file but can be used to identify similar objects in other files Names may not be unique within a file and are generated from the final element of the hdf path.

  • When multiple paths produce the same name, the name is overwritten each time, so the last path in the file has priority.
  • Names are also stored using the 'local_name' attribute, if it exists

Names of different types of datasets are stored for arrays (size > 0) and values (size 0) Names for scannables relate to all arrays of a particular size A combined list of names is provided where scannables > arrays > values

Attributes
  • map.groups stores attributes of each group by path
  • map.classes stores list of group paths by nx_class
  • map.datasets stores attributes of each dataset by path
  • map.arrays stores array dataset paths by name
  • map.values stores value dataset paths by name
  • map.metadata stores value dataset path by altname only
  • map.scannables stores array dataset paths with given size, by name, all arrays have the same shape
  • map.combined stores array and value paths (arrays overwrite values)
  • map.image_data stores dataset paths of image data (arrays with 2+ dimensions or arrays of image files)
E.G.
  • map.groups = {'/hdf/group': ('class', 'name', {attrs}, [datasets])}
  • map.classes = {'class_name': ['/hdf/group1', '/hdf/group2']}
  • map.datasets = {'/hdf/group/dataset': ('name', size, shape, {attrs})}
  • map.arrays = {'name': '/hdf/group/dataset'}
  • map.values = {'name': '/hdf/group/dataset'}
  • map.scannables = {'name': '/hdf/group/dataset'}
  • map.image_data = {'name': '/hdf/group/dataset'}
Methods
  • map.populate(h5py.File) -> populates the dictionaries using the given file
  • map.generate_scannables(array_size) -> populates scannables namespace with arrays of same size
  • map.most_common_size -> returns the most common dataset size > 1
  • map.get_attr('name_or_path', 'attr') -> return value of dataset attribute
  • map.get_path('name_or_group_or_class') -> returns path of object with name
  • map.get_image_path() -> returns default path of detector dataset (or largest dataset)
  • map.get_group_path('name_or_path_or_class') -> return path of group with class
  • map.get_group_datasets('name_or_path_or_class') -> return list of dataset paths in class
  • map.find_groups(*names_or_classes) -> return list of group paths matching given group names or classes
  • map.find_paths('string') -> return list of dataset paths containing string
  • map.find_names('string') -> return list of dataset names containing string
  • map.find_attr('attr_name') -> return list of paths of groups or datasets containing attribute 'attr_name'
  • map.add_local(local_variable=value) -> add to the local namespace accessed by eval
  • map.add_named_expression(alternate_name='expression') -> add local variables for expressions replaced during eval
File Methods
  • map.get_metadata(h5py.File) -> returns dict of value datasets
  • map.get_scannables(h5py.File) -> returns dict of scannable datasets
  • map.get_scannables_array(h5py.File) -> returns numpy array of scannable datasets
  • map.get_dataholder(h5py.File) -> returns dict like object with metadata and scannables
  • map.get_image(h5py.File, index) -> returns image data (2D float array or str image filename)
  • map.get_data(h5py.File, 'name') -> returns data from dataset
  • map.get_string(h5py.File, 'name') -> returns string summary of dataset
  • map.eval(h5py.File, 'expression') -> returns output of expression
  • map.format(h5py.File, 'string {name}') -> returns output of str expression
Source code in src/hdfmap/hdfmap_class.py
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
class HdfMap:
    """
    HdfMap object, container for paths of different objects in an HDF file

        with h5py.File('file.hdf') as hdf:
            map = HdfMap(hdf)

        map.get_path('data') -> '/entry/measurement/data'
        map['data'] -> '/entry/measurement/data'

        with h5py.File('another_file.hdf') as hdf:
            data = map.get_data(hdf, 'data')
            array = map.get_scannables_array(hdf)
            metadata = map.get_metadata(hdf)
            out = map.eval(hdf, 'data / 10')
            outstr = map.format(hdf, 'the data looks like: {data}')

    Objects within the HDF file are separated into Groups and Datasets. Each object has a
    defined 'path' and 'name' paramater, as well as other attribute:

    - path -> '/entry/measurement/data' -> the location of an object within the file
    - name -> 'data' -> a path expressed as a simple variable name

    Paths are unique location within the file but can be used to identify similar objects in other files
    Names may not be unique within a file and are generated from the final element of the hdf path.

    - When multiple paths produce the same name, the name is overwritten each time, so the last path in the
    file has priority.
    - Names are also stored using the 'local_name' attribute, if it exists

    Names of different types of datasets are stored for arrays (size > 0) and values (size 0)
    Names for scannables relate to all arrays of a particular size
    A combined list of names is provided where scannables > arrays > values

    ### Attributes
    - map.groups      stores attributes of each group by path
    - map.classes     stores list of group paths by nx_class
    - map.datasets    stores attributes of each dataset by path
    - map.arrays      stores array dataset paths by name
    - map.values      stores value dataset paths by name
    - map.metadata   stores value dataset path by altname only
    - map.scannables  stores array dataset paths with given size, by name, all arrays have the same shape
    - map.combined    stores array and value paths (arrays overwrite values)
    - map.image_data  stores dataset paths of image data (arrays with 2+ dimensions or arrays of image files)
    #### E.G.
    - map.groups = {'/hdf/group': ('class', 'name', {attrs}, [datasets])}
    - map.classes = {'class_name': ['/hdf/group1', '/hdf/group2']}
    - map.datasets = {'/hdf/group/dataset': ('name', size, shape, {attrs})}
    - map.arrays = {'name': '/hdf/group/dataset'}
    - map.values = {'name': '/hdf/group/dataset'}
    - map.scannables = {'name': '/hdf/group/dataset'}
    - map.image_data = {'name': '/hdf/group/dataset'}

    ### Methods
    - map.populate(h5py.File) -> populates the dictionaries using the  given file
    - map.generate_scannables(array_size) -> populates scannables namespace with arrays of same size
    - map.most_common_size -> returns the most common dataset size > 1
    - map.get_attr('name_or_path', 'attr') -> return value of dataset attribute
    - map.get_path('name_or_group_or_class') -> returns path of object with name
    - map.get_image_path() -> returns default path of detector dataset (or largest dataset)
    - map.get_group_path('name_or_path_or_class') -> return path of group with class
    - map.get_group_datasets('name_or_path_or_class') -> return list of dataset paths in class
    - map.find_groups(*names_or_classes) -> return list of group paths matching given group names or classes
    - map.find_paths('string') -> return list of dataset paths containing string
    - map.find_names('string') -> return list of dataset names containing string
    - map.find_attr('attr_name') -> return list of paths of groups or datasets containing attribute 'attr_name'
    - map.add_local(local_variable=value) -> add to the local namespace accessed by eval
    - map.add_named_expression(alternate_name='expression') -> add local variables for expressions replaced during eval
    ### File Methods
    - map.get_metadata(h5py.File) -> returns dict of value datasets
    - map.get_scannables(h5py.File) -> returns dict of scannable datasets
    - map.get_scannables_array(h5py.File) -> returns numpy array of scannable datasets
    - map.get_dataholder(h5py.File) -> returns dict like object with metadata and scannables
    - map.get_image(h5py.File, index) -> returns image data (2D float array or str image filename)
    - map.get_data(h5py.File, 'name') -> returns data from dataset
    - map.get_string(h5py.File, 'name') -> returns string summary of dataset
    - map.eval(h5py.File, 'expression') -> returns output of expression
    - map.format(h5py.File, 'string {name}') -> returns output of str expression
    """

    def __init__(self, file: h5py.File | None = None):
        self.filename = ''
        self.all_paths = []
        self.groups = {}  # stores attributes of each group by path
        self.datasets = {}  # stores attributes of each dataset by path
        self.classes = defaultdict(list)  # stores lists of group paths by nx_class
        self.arrays = {}  # stores array dataset paths by name, altname + group_name
        self.values = {}  # stores value dataset paths by name, altname + group_name
        self.metadata = {}  # stores value dataset path by altname only
        self.scannables = {}  # stores array dataset paths with given size, by name
        self.combined = {}  # stores array and value paths (arrays overwrite values)
        self.image_data = {}  # stores dataset paths of image data
        self._local_data = {}  # stores variables and data to be used in eval
        self._alternate_names = {}  # stores variable names for expressions to be evaluated
        self._default_image_path = None
        self._use_local_data = False  # if True, preferentially loads data from _local_data

        if isinstance(file, h5py.File):
            self.populate(file)

    def __getitem__(self, item):
        return self.combined[item]

    def __iter__(self):
        return iter(self.combined)

    def __contains__(self, item):
        return item in self.combined or item in self.datasets

    def __call__(self, expression, **kwargs):
        if 'hdf_file' not in kwargs:
            kwargs['hdf_file'] = self.load_hdf()
        return self.eval(expression=expression, **kwargs)

    def __repr__(self):
        return f"HdfMap based on '{self.filename}'"

    def __str__(self):
        out = f"{repr(self)}\n"
        out += self.info_summary()
        out += "\n*use print(self.info_names(combined=True, scannables=True, image_data=True)) to see detail\n"
        return out

    def info_groups(self) -> str:
        """Return str info on groups"""
        out = f"{repr(self)}\n"
        out += "Groups:\n"
        for path, group in self.groups.items():
            out += f"{path} [{group.nx_class}: '{group.name}']\n"
            out += '\n'.join(f"  @{attr}: {self.get_attr(path, attr)}" for attr in group.attrs)
            out += '\n'
            for dataset_name in group.datasets:
                dataset_path = build_hdf_path(path, dataset_name)
                if dataset_path in self.datasets:
                    dataset = self.datasets[dataset_path]
                    out += f"  {dataset_name}: {dataset.shape}\n"
        return out

    def info_classes(self) -> str:
        """Return str info on group class names"""
        out = f"{repr(self)}\n"
        out += 'Classes:\n'
        out += disp_dict(self.classes, 20)
        return out

    def info_datasets(self) -> str:
        """Return str info on datasets"""
        out = f"{repr(self)}\n"
        out += "Datasets:\n"
        out += disp_dict(self.datasets, 20)
        return out

    def info_names(self, arrays=False, values=False, combined=False,
                   metadata=False, scannables=False, image_data=False) -> str:
        """Return str info for different namespaces"""
        if not any((arrays, values, combined, metadata, scannables, image_data)):
            combined = True
        options = [
            ('Arrays', arrays, self.arrays),
            ('Values', values, self.values),
            ('Combined', combined, self.combined),
            ('Metadata', metadata, self.metadata),
            ('Scannables', scannables, self.scannables),
            ('Image Data', image_data, self.image_data),
        ]
        out = ''
        for name, show, namespace in options:
            if show:
                out += f"\n{name} Namespace:\n"
                out += '\n'.join([
                    f"{name:>30}: {str(self.datasets[path].shape):10} : {path:60}"
                    for name, path in namespace.items()
                ])
                out += '\n'
        return out

    def info_summary(self):
        out = [
            "--Paths--",
            f"All paths: {len(self.all_paths)}",
            f"Groups: {len(self.groups)}",
            f"Datasets: {len(self.datasets)}",
            "--Names--",
            f"Classes: {len(self.classes)}",
            f"Arrays: {len(self.arrays)}",
            f"Values: {len(self.values)}",
            f"Combined: {len(self.combined)}",
            f"Metadata: {len(self.metadata)}",
            f"Scannables: {len(self.scannables)}, shape={self.scannables_shape()}, size={self.scannables_length()}",
            f"Image Data: {len(self.image_data)}, shape={self.get_image_shape()}",
        ]
        return '\n'.join(out)

    def _store_class(self, name, path):
        if path not in self.classes[name]:
            self.classes[name].append(path)

    def _store_group(self, hdf_group: h5py.Group, path: str, name: str):

        nx_class = hdf_group.attrs.get('NX_class', default='Group')
        if hasattr(nx_class, 'decode'):
            nx_class = nx_class.decode()
        self.groups[path] = Group(
            nx_class,
            name,
            dict(hdf_group.attrs),
            [key for key, item in hdf_group.items() if isinstance(item, h5py.Dataset)]
        )
        self._store_class(name, path)
        self._store_class(nx_class, path)
        logger.debug(f"{path}  HDFGroup: {nx_class}")
        return nx_class

    def _store_dataset(self, hdf_dataset: h5py.Dataset, hdf_path: str, name: str):
        # New: add group_name to namespace as standard, helps with names like s5/x + s4/x
        # this significantly increases the number of names in namespaces
        group = self.groups[SEP.join(hdf_path.split(SEP)[:-1])]  # group is already stored
        group_name = f"{group.name}_{name}"
        class_name = f"{group.nx_class}_{name}"
        # group_name = generate_identifier(f"{hdf_path.split(SEP)[-2]}_{name}")
        # alt_name = generate_identifier(hdf_dataset.attrs[LOCAL_NAME]) if LOCAL_NAME in hdf_dataset.attrs else None
        alt_name = generate_alt_name(hdf_dataset)
        names = {n: hdf_path for n in {name, group_name, class_name, alt_name} if n}
        self.datasets[hdf_path] = Dataset(
            name=name,
            names=list(names),
            size=hdf_dataset.size,
            shape=hdf_dataset.shape,
            attrs=dict(hdf_dataset.attrs),
        )
        if is_image(hdf_dataset.shape):
            self.image_data[name] = hdf_path
            self.image_data[group_name] = hdf_path
            self.arrays.update(names)
            logger.debug(f"{hdf_path}  HDFDataset: image_data & array {name, hdf_dataset.size, hdf_dataset.shape}")
        elif hdf_dataset.ndim > 0:
            self.arrays.update(names)
            logger.debug(f"{hdf_path}  HDFDataset: array {name, hdf_dataset.size, hdf_dataset.shape}")
        else:
            self.values.update(names)
            if alt_name:
                self.metadata[alt_name] = hdf_path
            logger.debug(f"{hdf_path}  HDFDataset: value")

    def _populate(self, hdf_group: h5py.Group, root: str = '',
                  recursive: bool = True, groups: list[str] = None):
        """
        populate HdfMap dictionary's using recursive method
        :param hdf_group: HDF group object, from HDF File
        :param root: str path of hdf Group, used to build dataset paths
        :param recursive: if True, will recursively search through subgroups
        :param groups: if not None, will only search subgroups named in list, e.g. ['entry','NX_DATA']
        :return: None
        """
        logger.debug(f"{repr(self)}._populate root='{root}'")
        for key in hdf_group:
            obj = hdf_group.get(key)
            link = hdf_group.get(key, getlink=True)
            logger.debug(f"{key}: {repr(obj)} : {repr(link)}")
            if obj is None:
                continue  # dataset may be missing due to a broken link
            hdf_path = root + SEP + key  # build hdf path - a cross-file unique identifier
            # New: store all paths in file, useful for checking if anything was missed, but might be slow
            self.all_paths.append(hdf_path)
            name = generate_identifier(hdf_path)
            logger.debug(f"{hdf_path}:  {name}, link={repr(link)}")

            # Group
            if isinstance(obj, h5py.Group):
                nx_class = self._store_group(obj, hdf_path, name)
                if recursive and (key in groups or nx_class in groups if groups else True):
                    self._populate(obj, hdf_path, recursive)

            # Dataset
            elif isinstance(obj, h5py.Dataset): #18 remove link omission
                self._store_dataset(obj, hdf_path, name)

    def add_local(self, **kwargs):
        """Add value to the local namespace, used in eval"""
        self._local_data.update(kwargs)

    def use_local_data(self, use_data: bool = True):
        """
        Activate the option to reload data from the namespace locally, rather than from the file.

        self.eval(hdf, 'cmd') -> default will load 'cmd' from the file based on the hdf path associated with cmd
        self.use_local_data() -> self.eval(hdf, 'cmd') will return 'cmd' from local data if available, or from the file.
        self.use_local_data(False) -> returns to default behaviour
        """
        self._use_local_data = use_data

    def add_named_expression(self, **kwargs):
        """Add named expression to the local namespace, used in eval"""
        self._alternate_names.update(kwargs)

    def add_roi(self, name: str, cen_i: int | str, cen_j: int | str,
                wid_i: int = 30, wid_j: int = 30, image_name: str = 'IMAGE'):
        """
        Add an image ROI (region of interest) to the named expressions
        The ROI operates on the default IMAGE dataset, loading only the required region from the file.
        The following expressions will be added, for use in self.eval etc.
            *name* -> returns the whole ROI array as a HDF5 dataset
            *name*_total -> returns the sum of each image in the ROI array
            *name*_max -> returns the max of each image in the ROI array
            *name*_min -> returns the min of each image in the ROI array
            *name*_mean -> returns the mean of each image in the ROI array
            *name*_bkg -> returns the background ROI array (area around ROI)
            *name*_rmbkg -> returns the total with background subtracted
            *name*_box -> returns the pixel positions of the ROI corners
            *name*_bkg_box -> returns the pixel positions of the background ROI

        :param name: string name of the ROI
        :param cen_i: central pixel index along first dimension, can be callable string
        :param cen_j: central pixel index along second dimension, can be callable string
        :param wid_i: full width along first dimension, in pixels
        :param wid_j: full width along second dimension, in pixels
        :param image_name: string name of the image
        """
        wid_i = abs(wid_i) // 2
        wid_j = abs(wid_j) // 2
        islice = f"{cen_i}-{wid_i:.0f} : {cen_i}+{wid_i:.0f}"
        jslice = f"{cen_j}-{wid_j:.0f} : {cen_j}+{wid_j:.0f}"
        dataset = f"d_{image_name}"
        roi_array = dataset + f"[..., {islice}, {jslice}]"
        roi_total = f"{roi_array}.sum(axis=(-1, -2))"
        roi_max = f"{roi_array}.max(axis=(-1, -2))"
        roi_min = f"{roi_array}.min(axis=(-1, -2))"
        roi_mean = f"{roi_array}.mean(axis=(-1, -2))"
        roi_box = (
            'array([' +
            f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
            f"[{cen_i}-{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
            f"[{cen_i}+{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
            f"[{cen_i}+{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
            f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
            '])'
        )

        islice = f"{cen_i}-{wid_i * 2:.0f} : {cen_i}+{wid_i * 2:.0f}"
        jslice = f"{cen_j}-{wid_j * 2:.0f} : {cen_j}+{wid_j * 2:.0f}"
        bkg_array = dataset + f"[..., {islice}, {jslice}]"
        bkg_total = f"{bkg_array}.sum(axis=(-1, -2))"
        roi_bkg_total = f"({bkg_total} - {roi_total})"
        roi_bkg_mean = f"{roi_bkg_total}/(12*{wid_i * wid_j})"
        # Transpose array to broadcast bkg_total
        roi_rmbkg = f"({roi_array}.T - {roi_bkg_mean}).sum(axis=(0, 1))"
        roi_bkg_box = (
            'array([' +
            f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
            f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
            f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
            f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
            f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
            '])'
        )

        alternate_names = {
            f"{name}_total": roi_total,
            f"{name}_max": roi_max,
            f"{name}_min": roi_min,
            f"{name}_mean": roi_mean,
            f"{name}_bkg": roi_bkg_total,
            f"{name}_rmbkg": roi_rmbkg,
            f"{name}_box": roi_box,
            f"{name}_bkg_box": roi_bkg_box,
            name: roi_array,
        }
        self.add_named_expression(**alternate_names)

    def populate(self, hdf_file: h5py.File):
        """Populate all datasets from file"""
        self.filename = hdf_file.filename
        self._local_data.update(extra_hdf_data(hdf_file))
        self._populate(hdf_file)
        size = self.most_common_size()
        self.generate_scannables(size)
        self.generate_combined()

    def generate_combined(self):
        """Finalise the mapped namespace by combining dataset names"""
        # if self.scannables:
        #     # check image datasets are larger than scannables_shape
        #     ndim = len(self.scannables_shape())
        #     self.image_data = {
        #         name: path for name, path in self.image_data.items()
        #         if is_image(self.datasets[path].shape, ndim + 1)
        #     }
        if self.image_data:
            # add default 'image_data'
            self.image_data[IMAGE_DATA] = next(iter(self.image_data.values()))
        self.combined = {**self.values, **self.arrays, **self.image_data, **self.scannables}

    def all_attrs(self) -> dict:
        """Return dict of all attributes in self.datasets and self.groups"""
        ds_attrs = {k: v for path, ds in self.datasets.items() for k, v in ds.attrs.items()}
        grp_attrs = {k: v for path, grp in self.groups.items() for k, v in grp.attrs.items()}
        return {**grp_attrs, **ds_attrs}

    def most_common_size(self) -> int:
        """Return most common array size > 1"""
        array_sizes = [size for name, path in self.arrays.items() if (size := self.datasets[path].size) > 1]
        return max(set(array_sizes), key=array_sizes.count)

    def most_common_shape(self) -> tuple:
        """Return most common non-singular array shape"""
        array_shapes = [shape for name, path in self.arrays.items() if len(shape := self.datasets[path].shape) > 0]
        return max(set(array_shapes), key=array_shapes.count)

    def scannables_length(self) -> int:
        """Return the length of the first axis of scannables array"""
        if not self.scannables:
            return 0
        path = next(iter(self.scannables.values()))
        return self.datasets[path].size

    def scannables_shape(self) -> tuple:
        """Return the shape of the first axis of scannables array"""
        if not self.scannables:
            return (0, )
        path = next(iter(self.scannables.values()))
        return self.datasets[path].shape

    def generate_scannables(self, array_size):
        """Populate self.scannables field with datasets size that match array_size"""
        # self.scannables = {k: v for k, v in self.arrays.items() if self.datasets[v].size == array_size}
        self.scannables = {ds.name: path for path, ds in self.datasets.items() if ds.size == array_size}
        # create combined dict, scannables and arrays overwrite values with same name
        # self.generate_combined()

    def generate_scannables_from_group(self, hdf_group: h5py.Group, group_path: str = None,
                                       dataset_names: list[str] = None):
        """
        Generate scannables list from a specific group, using the first item to define array size
        :param hdf_group: h5py.Group
        :param group_path: str path of group hdf_group if hdf_group.name is incorrect
        :param dataset_names: list of names of group sub-entries to use (use all if None)
        """
        # watch out - hdf_group.name may not point to a location in the file!
        hdf_path = hdf_group.name if group_path is None else group_path
        # list of datasets within group
        if dataset_names:
            dataset_names = [
                name for name in dataset_names if isinstance(hdf_group.get(name), h5py.Dataset)
            ]
        else:
            dataset_names = [name for name, item in hdf_group.items() if isinstance(item, h5py.Dataset)]

        # catch empty groups
        if len(dataset_names) == 0:
            logger.warning(f"HDF Group {hdf_path} has no datasets for scannables")
            self.scannables = {}
        else:
            # use min size dataset as scannable_shape (avoiding image datasets)
            array_size = min(hdf_group[name].size for name in dataset_names)
            self._populate(hdf_group, root=hdf_path, recursive=False)
            self.scannables = {
                name: build_hdf_path(hdf_path, name)
                for name in dataset_names if hdf_group[name].size == array_size  # doesn't check if link
            }
            if len(self.scannables) < 2:
                logger.warning(f"HDF Group {hdf_path} has no consistent datasets for scannables")
                self.scannables = {}
        logger.debug(f"Scannables from group: {list(self.scannables.keys())}")
        # self.generate_combined()

    def generate_scannables_from_names(self, names: list[str]):
        """Generate scannables list from a set of dataset names, using the first item to define array size"""
        # concert names or paths to name (to match alt_name)
        array_names = [n for name in names if (n := generate_identifier(name)) in self.arrays]
        logger.debug(f"Scannables from names: {array_names}")
        array_size = self.datasets[self.arrays[array_names[0]]].size
        self.scannables = {
            name: self.arrays[name] for name in array_names if self.datasets[self.arrays[name]].size == array_size
        }
        # self.generate_combined()

    def first_last_scannables(self, first_names: list[str] = (),
                              last_names: list[str] = (),
                              alt_names: dict[str, list[str]] | None = None) -> tuple[dict[str, str], dict[str, str]]:
        """
        Returns default names from scannables
            output first_names returns dict of N names, where N is the number of dimensions in scannable shape
                if fewer axes_names are provided than required, use the first items of scannables instead
            output signal_names returns the last dict item in the list of scannables + signal_names

        :param first_names: list of names of plottable axes in scannables
        :param last_names: list of names of plottable values in scannables
        :param alt_names: dict of alternative names for each plottable value
        :return {first_names: path}, {last_names: path}
        """
        if alt_names is None:
            alt_names = {}
        list_names = list(first_names) + list(self.scannables.keys()) + list(last_names)
        # check names are in scannables
        warnings = []
        all_names = []
        for name in list_names:
            if name in self.scannables:
                all_names.append(name)
            elif name in alt_names:
                alt_name = next((alt for alt in alt_names[name] if alt in self.scannables), None)
                if alt_name:
                    all_names.append(alt_name)
                else:
                    warnings.append(name)
            else:
                warnings.append(name)

        for name in warnings:
            logger.warning(f"name: '{name}' not in scannables")
        # return correct number of values from start and end
        ndims = len(self.scannables_shape())
        first = {name: self.scannables[name] for name in all_names[:ndims]}
        last = {name: self.scannables[name] for name in all_names[-(len(last_names) or 1):]}
        return first, last

    def get_path(self, name_or_path):
        """Return hdf path of object in HdfMap"""
        if name_or_path in self.datasets or name_or_path in self.groups:
            return name_or_path
        if name_or_path in self.combined:
            return self.combined[name_or_path]
        if name_or_path in self.image_data:
            return self.image_data[name_or_path]
        if name_or_path in self.classes:
            return self.classes[name_or_path][0]  # return first path in list
        return None

    def get_group_path(self, name_or_path):
        """Return group path of object in HdfMap"""
        hdf_path = self.get_path(name_or_path)
        while hdf_path and hdf_path not in self.groups:
            hdf_path = SEP.join(hdf_path.split(SEP)[:-1])
        if not hdf_path:
            return SEP
        return hdf_path

    def get_group_classes(self, name_or_path) -> list[str]:
        """Return list of class names associated with a group or parent group of dataset"""
        group_path = self.get_group_path(name_or_path)
        sub_groups = group_path.split(SEP)
        sub_group_paths = [SEP.join(sub_groups[:n]) for n in range(1, len(sub_groups)+1)]
        sub_group_classes = [self.groups[g].nx_class for g in sub_group_paths if g in self.groups]
        return sub_group_classes

    def get_group_dataset_path(self, group_name, dataset_name) -> str | None:
        """Return path of dataset defined by group and dataset name/attribute"""
        if group_name in self.groups:
            group_paths = [group_name]
        else:
            group_paths = self.classes[group_name]
        for group_path in group_paths:
            group = self.groups[group_path]
            for name in group.datasets:
                dataset_path = build_hdf_path(group_path, name)
                dataset = self.datasets[dataset_path]
                if dataset_name in dataset.names:
                    return dataset_path
        return None

    def find_groups(self, *names_or_classes: str) -> list[str]:
        """
        Find groups that are associated with several names or class names

            [paths, ] = m.find_groups('NXslit', 'NXtransformations', 's1')

        Intended for use finding groups with a certain hierarchy
        :params names_or_classes:  group names or group class names
        :returns: list of hdf group paths, where all groups are associated with all given names or classes.
        """
        # generate a list of all names and class names associated with each group
        # TODO: add all_names to self.generate_combined
        all_names = {p: self.get_group_classes(p) + p.split('/') for p in self.groups}
        return [path for path, names in all_names.items() if all(arg in names for arg in names_or_classes)]

    def find_datasets(self, *names_or_classes: str) -> list[str]:
        """
        Find datasets that are associated with several names or class names

            [paths, ] = m.find_datasets('NXslit', 'x_gap')

        Intended for use finding datasets associated with groups with a certain hierarchy

        Note that arguments are checked against the dataset namespace first, so if the argument appears
        in both lists, it will be assumed to be a dataset.

        :params names_or_classes:  dataset names, group names or group class names
        :returns: list of hdf dataset paths
        """
        args = list(names_or_classes)
        # split args by dataset names
        dataset_names = [args.pop(n) for n, a in enumerate(args) if a in self.combined]
        # find groups from remaining arguments
        group_paths = self.find_groups(*args)
        if not dataset_names:
            # if no datasets are given, return all dataset in group
            return [build_hdf_path(path, name) for path in group_paths for name in self.groups[path].datasets]
        # find all dataset paths associated with name
        dataset_paths = {
            path for name in dataset_names for path in [
                p for p, ds in self.datasets.items() if name in ds.names
            ] + [self.combined[name]] if self.get_group_path(path) in group_paths
        }
        return list(dataset_paths)

    def find_paths(self, string: str, name_only=True, whole_word=False) -> list[str]:
        """
        Find any dataset paths that contain the given string argument

            [paths, ] = m.find_paths('en')  # finds all datasets with name including 'en'

        :param string: str to find in list of datasets
        :param name_only: if True, search only the name of the dataset, not the full path
        :param whole_word: if True, search only for whole-word names (case in-sensitive)
        :return: list of hdf paths
        """
        if whole_word:
            return [path for name, path in self.combined.items() if string.lower() == name.lower()]
        # find string in combined
        combined_paths = {path for name, path in self.combined.items() if string in name}
        if name_only:
            return [
                path for path, dataset in self.datasets.items()
                if string in dataset.name and path not in combined_paths
            ] + list(combined_paths)
        return [
            path for path in self.datasets if string in path and path not in combined_paths
        ] + list(combined_paths)

    def find_names(self, string: str, match_case=False) -> list[str]:
        """
        Find any dataset names that contain the given string argument, searching names in self.combined

            ['m1x', 'm1y', ...] = m.find_names('m1')

        :param string: str to find in list of datasets
        :param match_case: if True, match must be case-sensitive
        :return: list of names
        """
        if match_case:
            return [name for name in self.combined if string in name]
        return [name for name in self.combined if string.lower() in name.lower()]

    def find_attr(self, attr_name: str) -> list[str]:
        """
        Find any dataset or group path with an attribute that contains attr_name.
        :param attr_name: str name of hdfobj.attr
        :return: list of hdf paths
        """
        return [
            path for path, ds in self.datasets.items() if attr_name in ds.attrs
        ] + [
            path for path, grp in self.groups.items() if attr_name in grp.attrs
        ]

    def get_attrs(self, name_or_path: str) -> dict | None:
        """Return attributes of dataset or group"""
        if name_or_path in self.datasets:
            return self.datasets[name_or_path].attrs
        if name_or_path in self.groups:
            return self.groups[name_or_path].attrs
        if name_or_path in self.combined:
            return self.datasets[self.combined[name_or_path]].attrs
        if name_or_path in self.classes:
            return self.groups[self.classes[name_or_path][0]].attrs
        return None

    def get_attr(self, name_or_path: str, attr_label: str, default: str | typing.Any = '') -> str | None:
        """Return named attribute from dataset or group, or default"""
        attrs = self.get_attrs(name_or_path)
        if attrs and attr_label in attrs:
            return attr.decode() if hasattr(attr := attrs[attr_label], 'decode') else attr
        return default

    def set_image_path(self, name_or_path: str):
        """Set the default image path, used by get_image"""
        if name_or_path is None:
            self._default_image_path = None
        else:
            path = self.get_path(name_or_path)
            if path:
                self._default_image_path = path
        logger.info(f"Default image path: {self._default_image_path}")

    def get_image_path(self) -> str:
        """Return HDF path of first dataset in self.image_data"""
        if self._default_image_path:
            return self._default_image_path
        return next(iter(self.image_data.values()), '')

    def get_image_shape(self) -> tuple:
        """Return the scan shape of the detector dataset"""
        path = self.get_image_path()
        if path in self.datasets:
            return self.datasets[path].shape[-2:]
        return 0, 0

    def get_image_index(self, index: int) -> tuple:
        """Return image slice index for index along total scan size"""
        return np.unravel_index(index, self.scannables_shape())

    def get_group_datasets(self, name_or_path: str) -> list[str] | None:
        """Find the path associate with the given name and return all datasets in that group"""
        group_path = self.get_group_path(name_or_path)
        if group_path:
            return self.groups[group_path].datasets
        return None

    def generate_ids(self, *names: str, modify_missing: bool = True) -> list[str]:
        """
        Will return the path identifier of the given name if the name is in the namespace,
        otherwise a valid identifier will be generated.

            xlabel, ylabel = generate_axis_labels('axes', 'signal')
            generate_axis_labels('my/data/label', modify_missing=True) #-> ['label', ]
            generate_axis_labels('(x-y)/y', modify_missing=False) #-> ['(x-y)/y', ]

        :param names: names to generate axis labels for
        :param modify_missing: if True, modifies names even if they are not in namespace
        :return: list of axis labels as valid identifiers
        """
        return [
            generate_identifier(self.combined.get(name, name)) if modify_missing else (
                generate_identifier(self.combined[name]) if name in self.combined else name
            )
            for name in names
        ]

    "--------------------------------------------------------"
    "---------------------- FILE READERS --------------------"
    "--------------------------------------------------------"

    def load_hdf(self, filename: str | None = None, name_or_path: str = None, **kwargs) -> h5py.File | h5py.Dataset:
        """
        Load hdf file or hdf dataset in open state
        :param filename: str filename of hdf file, or None to use self.filename
        :param name_or_path: if given, returns the dataset
        :param kwargs: additional key-word arguments to pass to h5py.File(...)
        :return: h5py.File object or h5py.dataset object if dataset name given
        """
        if filename is None:
            filename = self.filename
        if name_or_path is None:
            return load_hdf(filename, **kwargs)
        return load_hdf(filename, **kwargs).get(self.get_path(name_or_path))

    def get_data(self, hdf_file: h5py.File, name_or_path: str, index=(), default=None, direct_load=False):
        """
        Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
        See hdfmap.eval_functions.dataset2data for more information.
        :param hdf_file: hdf file object
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
        :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
        """
        path = self.get_path(name_or_path)
        if path and path in hdf_file:
            return dataset2data(hdf_file[path], index, direct_load)
        return default

    def get_string(self, hdf_file: h5py.File, name_or_path: str, index=(), default='', units=False) -> str:
        """
        Return data from dataset in file, converted into string summary of data
        See hdfmap.eval_functions.dataset2str for more information.
        :param hdf_file: hdf file object
        :param name_or_path: str name or path pointing to dataset in hdf file
        :param index: index or slice of data in hdf file
        :param default: value to return if name not found in hdf file
        :param units: if True and attribute 'units' available, append this to the result
        :return: dataset2str(dataset) -> str
        """
        path = self.get_path(name_or_path)
        if path and path in hdf_file:
            return dataset2str(hdf_file[path], index, units=units)
        return default

    def get_metadata(self, hdf_file: h5py.File, default=None, direct_load=False,
                     name_list: list = None, string_output=False) -> dict:
        """
        Return metadata dict from file, loading data for each item in the metadata list
        The metadata list is taken from name_list, otherwise self.metadata or self.values
        :param hdf_file: hdf file object
        :param default: Value to return for names not associated with a dataset
        :param direct_load: if True, loads data from hdf file directory, without conversion
        :param name_list: if available, uses this list of dataset names to generate the metadata list
        :param string_output: if True, returns string summary of each value
        :return: {name: value}
        """
        extra = extra_hdf_data(hdf_file)
        if name_list:
            metadata_paths = {name: self.combined.get(name, '') for name in name_list}
        elif self.metadata:
            metadata_paths = self.metadata
        else:
            logger.warning("'local_names' metadata is not available, using all size=1 datasets.")
            # metadata_paths = self.values
            metadata_paths = {ds.name: path for path, ds in self.datasets.items() if ds.size <= 1}
        if string_output:
            extra = {key: f"'{val}'" for key, val in extra.items()}
            metadata = {
                name: dataset2str(hdf_file[path]) if path in hdf_file else str(default)
                for name, path in metadata_paths.items()
            }
        else:
            metadata = {
                name: dataset2data(hdf_file[path], direct_load=direct_load) if path in hdf_file else default
                for name, path in metadata_paths.items()
            }
        return {**extra, **metadata}

    def create_metadata_list(self, hdf_file: h5py.File, default=None, name_list: list = None,
                             line_separator: str = '\n', value_separator: str = '=') -> str:
        """
        Return a metadata string, using self.get_metadata
        :param hdf_file: hdf file object
        :param default: Value to return for names not associated with a dataset
        :param name_list: if available, uses this list of dataset names to generate the metadata list
        :param line_separator: str separating each metadata parameter
        :param value_separator: str separating name from value
        :return: multi-line string
        """
        return line_separator.join(
            f"{name}{value_separator}{value}"
            for name, value in self.get_metadata(hdf_file, default=default,
                                                 name_list=name_list, string_output=True).items()
        )

    def get_scannables(self, hdf_file: h5py.File, flatten: bool = False, numeric_only: bool = False) -> dict:
        """Return scannables from file (values associated with hdfmap.scannables)"""
        return {
            name: dataset[()].flatten() if flatten else hdf_file[path][()]
            for name, path in self.scannables.items()
            if (dataset := hdf_file.get(path)) and
               (np.issubdtype(dataset.dtype, np.number) if numeric_only else True)
        }

    def get_image(self, hdf_file: h5py.File, index: int | tuple | slice | None = None) -> np.ndarray | None:
        """
        Get image data from file, using default image path
            - If the image path points to a numeric 2+D dataset, returns dataset[index, :, :] -> ndarray
            - If the image path points to a string dataset, returns dataset[index] -> '/path/to/image.tiff'

        Image filenames may be relative to the location of the current file (this is not checked)

        :param hdf_file: hdf file object
        :param index: (slice,) or None to take the middle image
        :return: 2D numpy array of image, or string file path of image
        """
        if index is None:
            index = self.get_image_index(self.scannables_length() // 2)
        if isinstance(index, int):
            index = self.get_image_index(index)
        image_path = self.get_image_path()
        logger.info(f"image path: {image_path}")
        if image_path and image_path in hdf_file:
            # return hdf_file[image_path][index].squeeze()  # remove trailing dimensions
            return self.get_data(hdf_file, image_path, index)  # return array or image paths
        return None

    def _get_numeric_scannables(self, hdf_file: h5py.File) -> list[tuple[str, str, np.ndarray]]:
        """Return numeric scannables available in file"""
        return [
            (name, path, dataset[()].flatten()) for name, path in self.scannables.items()
            if (dataset := hdf_file.get(path)) and np.issubdtype(dataset.dtype, np.number)
        ]

    def get_scannables_array(self, hdf_file: h5py.File, return_structured_array=False) -> np.ndarray:
        """
        Return 2D array of all numeric scannables in file

        :param hdf_file: h5py.File object
        :param return_structured_array: bool, if True, return a Numpy structured array with column headers
        :returns: numpy array with a row for each scannable, shape: (no_scannables, flattened_length)
        """
        _scannables = self._get_numeric_scannables(hdf_file)
        array = np.array([array for name, path, array in _scannables])
        if return_structured_array:
            dtypes = np.dtype([
                (name, hdf_file[path].dtype) for name, path, array in _scannables
            ])
            return np.array([tuple(row) for row in np.transpose(array)], dtype=dtypes)
        return array

    def create_scannables_table(self, hdf_file: h5py.File, delimiter=', ',
                                string_spec='', format_spec='f', default_decimals=8) -> str:
        """
        Return str representation of scannables as a table
        The table starts with a header row given by names of the scannables.
        Each row contains the numeric values for each scannable, formated by the given string spec:
                {value: "string_spec.decimals format_spec"}
            e.g. {value: "5.8f"}
        decimals is taken from each scannables "decimals" attribute if it exits, otherwise uses default
        :param hdf_file: h5py.File object
        :param delimiter: str seperator between each column
        :param string_spec: str first element of float format specifier - length of string
        :param format_spec: str type element of format specifier - 'f'=float, 'e'=exponential, 'g'=general
        :param default_decimals: int default number of decimals given
        :return: str
        """
        _scannables = self._get_numeric_scannables(hdf_file)
        fmt = string_spec + '.%d' + format_spec
        formats = [
            '{:' + fmt % self.get_attr(path, 'decimals', default=default_decimals) + '}'
            for name, path, array in _scannables
        ]

        length = self.scannables_length()
        out = delimiter.join([name for name, _, _ in _scannables]) + '\n'
        out += '\n'.join([
            delimiter.join([
                fmt.format(array[n])
                for (_, path, array), fmt in zip(_scannables, formats)
            ])
            for n in range(length)
        ])
        return out

    def get_dataholder(self, hdf_file: h5py.File, flatten_scannables: bool = False) -> DataHolder:
        """
        Return DataHolder object - a simple replication of scisoftpy.dictutils.DataHolder
        Also known as DLS dat format.
            dataholder.scannable -> array
            dataholder.metadata.value -> metadata
            dataholder['scannable'] -> array
            dataholder.metadata['value'] -> metadata
        :param hdf_file: h5py.File object
        :param flatten_scannables: bool, it True the scannables will be flattened arrays
        :return: data_object (similar to dict)
        """
        metadata = self.get_metadata(hdf_file)
        scannables = self.get_scannables(hdf_file, flatten=flatten_scannables)
        scannables['metadata'] = DataHolder(**metadata)
        return DataHolder(**scannables)

    def eval(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True):
        """
        Evaluate an expression using the namespace of the hdf file
        :param hdf_file: h5py.File object
        :param expression: str expression to be evaluated
        :param default: returned if varname not in namespace
        :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
        :return: eval(expression)
        """
        return eval_hdf(
            hdf_file=hdf_file,
            expression=expression,
            hdf_namespace=self.combined,
            data_namespace=self._local_data,
            replace_names=self._alternate_names,
            default=default,
            use_stored_data=self._use_local_data,
            raise_errors=raise_errors
        )

    def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True) -> str:
        """
        Evaluate a formatted string expression using the namespace of the hdf file
        :param hdf_file: h5py.File object
        :param expression: str expression using {name} format specifiers
        :param default: returned if varname not in namespace
        :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
        :return: eval_hdf(f"expression")
        """
        return format_hdf(
            hdf_file=hdf_file,
            expression=expression,
            hdf_namespace=self.combined,
            data_namespace=self._local_data,
            replace_names=self._alternate_names,
            default=default,
            use_stored_data=self._use_local_data,
            raise_errors=raise_errors
        )

    def create_interpreter(self, default=DEFAULT):
        """
        Create an interpreter object for the current file
        The interpreter is a sub-class of asteval.Interpreter that parses expressions for hdfmap eval patters
        and loads data when required.

        The hdf file self.filename is used to extract data and is only opened during evaluation.

            ii = HdfMap.create_interpreter()
            out = ii.eval('expression')
        """
        interpreter = HdfMapInterpreter(
            hdfmap=self,
            replace_names=self._alternate_names,
            default=default,
            user_symbols=self._local_data,
            use_numpy=True
        )
        interpreter.use_stored_data = self._use_local_data
        return interpreter

    def create_dataset_summary(self, hdf_file: h5py.File) -> str:
        """Create summary of all datasets in file"""
        return '\n'.join(f"{path:60}: {self.get_string(hdf_file, path)}" for path in self.datasets)

    def info_data(self, hdf_file: h5py.File) -> str:
        """Return string showing metadata values associated with names"""
        out = repr(self) + '\n'
        out += "Combined Namespace:\n"
        out += '\n'.join([
            f"{name:>30}: " +
            f"{dataset2str(hdf_file[path]):20}" +
            f": {path:60}"
            for name, path in self.combined.items()
        ])
        out += f"\n{self.info_names(scannables=True)}"
        return out

add_local(**kwargs)

Add value to the local namespace, used in eval

Source code in src/hdfmap/hdfmap_class.py
def add_local(self, **kwargs):
    """Add value to the local namespace, used in eval"""
    self._local_data.update(kwargs)

add_named_expression(**kwargs)

Add named expression to the local namespace, used in eval

Source code in src/hdfmap/hdfmap_class.py
def add_named_expression(self, **kwargs):
    """Add named expression to the local namespace, used in eval"""
    self._alternate_names.update(kwargs)

add_roi(name, cen_i, cen_j, wid_i=30, wid_j=30, image_name='IMAGE')

Add an image ROI (region of interest) to the named expressions The ROI operates on the default IMAGE dataset, loading only the required region from the file. The following expressions will be added, for use in self.eval etc. name -> returns the whole ROI array as a HDF5 dataset name_total -> returns the sum of each image in the ROI array name_max -> returns the max of each image in the ROI array name_min -> returns the min of each image in the ROI array name_mean -> returns the mean of each image in the ROI array name_bkg -> returns the background ROI array (area around ROI) name_rmbkg -> returns the total with background subtracted name_box -> returns the pixel positions of the ROI corners name_bkg_box -> returns the pixel positions of the background ROI

Parameters:

Name Type Description Default
name str

string name of the ROI

required
cen_i int | str

central pixel index along first dimension, can be callable string

required
cen_j int | str

central pixel index along second dimension, can be callable string

required
wid_i int

full width along first dimension, in pixels

30
wid_j int

full width along second dimension, in pixels

30
image_name str

string name of the image

'IMAGE'
Source code in src/hdfmap/hdfmap_class.py
def add_roi(self, name: str, cen_i: int | str, cen_j: int | str,
            wid_i: int = 30, wid_j: int = 30, image_name: str = 'IMAGE'):
    """
    Add an image ROI (region of interest) to the named expressions
    The ROI operates on the default IMAGE dataset, loading only the required region from the file.
    The following expressions will be added, for use in self.eval etc.
        *name* -> returns the whole ROI array as a HDF5 dataset
        *name*_total -> returns the sum of each image in the ROI array
        *name*_max -> returns the max of each image in the ROI array
        *name*_min -> returns the min of each image in the ROI array
        *name*_mean -> returns the mean of each image in the ROI array
        *name*_bkg -> returns the background ROI array (area around ROI)
        *name*_rmbkg -> returns the total with background subtracted
        *name*_box -> returns the pixel positions of the ROI corners
        *name*_bkg_box -> returns the pixel positions of the background ROI

    :param name: string name of the ROI
    :param cen_i: central pixel index along first dimension, can be callable string
    :param cen_j: central pixel index along second dimension, can be callable string
    :param wid_i: full width along first dimension, in pixels
    :param wid_j: full width along second dimension, in pixels
    :param image_name: string name of the image
    """
    wid_i = abs(wid_i) // 2
    wid_j = abs(wid_j) // 2
    islice = f"{cen_i}-{wid_i:.0f} : {cen_i}+{wid_i:.0f}"
    jslice = f"{cen_j}-{wid_j:.0f} : {cen_j}+{wid_j:.0f}"
    dataset = f"d_{image_name}"
    roi_array = dataset + f"[..., {islice}, {jslice}]"
    roi_total = f"{roi_array}.sum(axis=(-1, -2))"
    roi_max = f"{roi_array}.max(axis=(-1, -2))"
    roi_min = f"{roi_array}.min(axis=(-1, -2))"
    roi_mean = f"{roi_array}.mean(axis=(-1, -2))"
    roi_box = (
        'array([' +
        f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
        f"[{cen_i}-{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
        f"[{cen_i}+{wid_i:.0f}, {cen_j}+{wid_j:.0f}]," +
        f"[{cen_i}+{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
        f"[{cen_i}-{wid_i:.0f}, {cen_j}-{wid_j:.0f}]," +
        '])'
    )

    islice = f"{cen_i}-{wid_i * 2:.0f} : {cen_i}+{wid_i * 2:.0f}"
    jslice = f"{cen_j}-{wid_j * 2:.0f} : {cen_j}+{wid_j * 2:.0f}"
    bkg_array = dataset + f"[..., {islice}, {jslice}]"
    bkg_total = f"{bkg_array}.sum(axis=(-1, -2))"
    roi_bkg_total = f"({bkg_total} - {roi_total})"
    roi_bkg_mean = f"{roi_bkg_total}/(12*{wid_i * wid_j})"
    # Transpose array to broadcast bkg_total
    roi_rmbkg = f"({roi_array}.T - {roi_bkg_mean}).sum(axis=(0, 1))"
    roi_bkg_box = (
        'array([' +
        f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
        f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
        f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}+{wid_j * 2:.0f}]," +
        f"[{cen_i}+{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
        f"[{cen_i}-{wid_i * 2:.0f}, {cen_j}-{wid_j * 2:.0f}]," +
        '])'
    )

    alternate_names = {
        f"{name}_total": roi_total,
        f"{name}_max": roi_max,
        f"{name}_min": roi_min,
        f"{name}_mean": roi_mean,
        f"{name}_bkg": roi_bkg_total,
        f"{name}_rmbkg": roi_rmbkg,
        f"{name}_box": roi_box,
        f"{name}_bkg_box": roi_bkg_box,
        name: roi_array,
    }
    self.add_named_expression(**alternate_names)

all_attrs()

Return dict of all attributes in self.datasets and self.groups

Source code in src/hdfmap/hdfmap_class.py
def all_attrs(self) -> dict:
    """Return dict of all attributes in self.datasets and self.groups"""
    ds_attrs = {k: v for path, ds in self.datasets.items() for k, v in ds.attrs.items()}
    grp_attrs = {k: v for path, grp in self.groups.items() for k, v in grp.attrs.items()}
    return {**grp_attrs, **ds_attrs}

create_dataset_summary(hdf_file)

Create summary of all datasets in file

Source code in src/hdfmap/hdfmap_class.py
def create_dataset_summary(self, hdf_file: h5py.File) -> str:
    """Create summary of all datasets in file"""
    return '\n'.join(f"{path:60}: {self.get_string(hdf_file, path)}" for path in self.datasets)

create_interpreter(default=DEFAULT)

Create an interpreter object for the current file The interpreter is a sub-class of asteval.Interpreter that parses expressions for hdfmap eval patters and loads data when required.

The hdf file self.filename is used to extract data and is only opened during evaluation.

ii = HdfMap.create_interpreter()
out = ii.eval('expression')
Source code in src/hdfmap/hdfmap_class.py
def create_interpreter(self, default=DEFAULT):
    """
    Create an interpreter object for the current file
    The interpreter is a sub-class of asteval.Interpreter that parses expressions for hdfmap eval patters
    and loads data when required.

    The hdf file self.filename is used to extract data and is only opened during evaluation.

        ii = HdfMap.create_interpreter()
        out = ii.eval('expression')
    """
    interpreter = HdfMapInterpreter(
        hdfmap=self,
        replace_names=self._alternate_names,
        default=default,
        user_symbols=self._local_data,
        use_numpy=True
    )
    interpreter.use_stored_data = self._use_local_data
    return interpreter

create_metadata_list(hdf_file, default=None, name_list=None, line_separator='\n', value_separator='=')

Return a metadata string, using self.get_metadata

Parameters:

Name Type Description Default
hdf_file File

hdf file object

required
default

Value to return for names not associated with a dataset

None
name_list list

if available, uses this list of dataset names to generate the metadata list

None
line_separator str

str separating each metadata parameter

'\n'
value_separator str

str separating name from value

'='

Returns:

Type Description
str

multi-line string

Source code in src/hdfmap/hdfmap_class.py
def create_metadata_list(self, hdf_file: h5py.File, default=None, name_list: list = None,
                         line_separator: str = '\n', value_separator: str = '=') -> str:
    """
    Return a metadata string, using self.get_metadata
    :param hdf_file: hdf file object
    :param default: Value to return for names not associated with a dataset
    :param name_list: if available, uses this list of dataset names to generate the metadata list
    :param line_separator: str separating each metadata parameter
    :param value_separator: str separating name from value
    :return: multi-line string
    """
    return line_separator.join(
        f"{name}{value_separator}{value}"
        for name, value in self.get_metadata(hdf_file, default=default,
                                             name_list=name_list, string_output=True).items()
    )

create_scannables_table(hdf_file, delimiter=', ', string_spec='', format_spec='f', default_decimals=8)

Return str representation of scannables as a table The table starts with a header row given by names of the scannables. Each row contains the numeric values for each scannable, formated by the given string spec: {value: "string_spec.decimals format_spec"} e.g. {value: "5.8f"} decimals is taken from each scannables "decimals" attribute if it exits, otherwise uses default

Parameters:

Name Type Description Default
hdf_file File

h5py.File object

required
delimiter

str seperator between each column

', '
string_spec

str first element of float format specifier - length of string

''
format_spec

str type element of format specifier - 'f'=float, 'e'=exponential, 'g'=general

'f'
default_decimals

int default number of decimals given

8

Returns:

Type Description
str

str

Source code in src/hdfmap/hdfmap_class.py
def create_scannables_table(self, hdf_file: h5py.File, delimiter=', ',
                            string_spec='', format_spec='f', default_decimals=8) -> str:
    """
    Return str representation of scannables as a table
    The table starts with a header row given by names of the scannables.
    Each row contains the numeric values for each scannable, formated by the given string spec:
            {value: "string_spec.decimals format_spec"}
        e.g. {value: "5.8f"}
    decimals is taken from each scannables "decimals" attribute if it exits, otherwise uses default
    :param hdf_file: h5py.File object
    :param delimiter: str seperator between each column
    :param string_spec: str first element of float format specifier - length of string
    :param format_spec: str type element of format specifier - 'f'=float, 'e'=exponential, 'g'=general
    :param default_decimals: int default number of decimals given
    :return: str
    """
    _scannables = self._get_numeric_scannables(hdf_file)
    fmt = string_spec + '.%d' + format_spec
    formats = [
        '{:' + fmt % self.get_attr(path, 'decimals', default=default_decimals) + '}'
        for name, path, array in _scannables
    ]

    length = self.scannables_length()
    out = delimiter.join([name for name, _, _ in _scannables]) + '\n'
    out += '\n'.join([
        delimiter.join([
            fmt.format(array[n])
            for (_, path, array), fmt in zip(_scannables, formats)
        ])
        for n in range(length)
    ])
    return out

eval(hdf_file, expression, default=DEFAULT, raise_errors=True)

Evaluate an expression using the namespace of the hdf file

Parameters:

Name Type Description Default
hdf_file File

h5py.File object

required
expression str

str expression to be evaluated

required
default

returned if varname not in namespace

DEFAULT
raise_errors bool

raise exceptions if True, otherwise return str error message as result and log the error

True

Returns:

Type Description

eval(expression)

Source code in src/hdfmap/hdfmap_class.py
def eval(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True):
    """
    Evaluate an expression using the namespace of the hdf file
    :param hdf_file: h5py.File object
    :param expression: str expression to be evaluated
    :param default: returned if varname not in namespace
    :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
    :return: eval(expression)
    """
    return eval_hdf(
        hdf_file=hdf_file,
        expression=expression,
        hdf_namespace=self.combined,
        data_namespace=self._local_data,
        replace_names=self._alternate_names,
        default=default,
        use_stored_data=self._use_local_data,
        raise_errors=raise_errors
    )

find_attr(attr_name)

Find any dataset or group path with an attribute that contains attr_name.

Parameters:

Name Type Description Default
attr_name str

str name of hdfobj.attr

required

Returns:

Type Description
list[str]

list of hdf paths

Source code in src/hdfmap/hdfmap_class.py
def find_attr(self, attr_name: str) -> list[str]:
    """
    Find any dataset or group path with an attribute that contains attr_name.
    :param attr_name: str name of hdfobj.attr
    :return: list of hdf paths
    """
    return [
        path for path, ds in self.datasets.items() if attr_name in ds.attrs
    ] + [
        path for path, grp in self.groups.items() if attr_name in grp.attrs
    ]

find_datasets(*names_or_classes)

Find datasets that are associated with several names or class names

[paths, ] = m.find_datasets('NXslit', 'x_gap')

Intended for use finding datasets associated with groups with a certain hierarchy

Note that arguments are checked against the dataset namespace first, so if the argument appears in both lists, it will be assumed to be a dataset.

Parameters:

Name Type Description Default
names_or_classes str

dataset names, group names or group class names

()

Returns:

Type Description
list[str]

list of hdf dataset paths

Source code in src/hdfmap/hdfmap_class.py
def find_datasets(self, *names_or_classes: str) -> list[str]:
    """
    Find datasets that are associated with several names or class names

        [paths, ] = m.find_datasets('NXslit', 'x_gap')

    Intended for use finding datasets associated with groups with a certain hierarchy

    Note that arguments are checked against the dataset namespace first, so if the argument appears
    in both lists, it will be assumed to be a dataset.

    :params names_or_classes:  dataset names, group names or group class names
    :returns: list of hdf dataset paths
    """
    args = list(names_or_classes)
    # split args by dataset names
    dataset_names = [args.pop(n) for n, a in enumerate(args) if a in self.combined]
    # find groups from remaining arguments
    group_paths = self.find_groups(*args)
    if not dataset_names:
        # if no datasets are given, return all dataset in group
        return [build_hdf_path(path, name) for path in group_paths for name in self.groups[path].datasets]
    # find all dataset paths associated with name
    dataset_paths = {
        path for name in dataset_names for path in [
            p for p, ds in self.datasets.items() if name in ds.names
        ] + [self.combined[name]] if self.get_group_path(path) in group_paths
    }
    return list(dataset_paths)

find_groups(*names_or_classes)

Find groups that are associated with several names or class names

[paths, ] = m.find_groups('NXslit', 'NXtransformations', 's1')

Intended for use finding groups with a certain hierarchy

Parameters:

Name Type Description Default
names_or_classes str

group names or group class names

()

Returns:

Type Description
list[str]

list of hdf group paths, where all groups are associated with all given names or classes.

Source code in src/hdfmap/hdfmap_class.py
def find_groups(self, *names_or_classes: str) -> list[str]:
    """
    Find groups that are associated with several names or class names

        [paths, ] = m.find_groups('NXslit', 'NXtransformations', 's1')

    Intended for use finding groups with a certain hierarchy
    :params names_or_classes:  group names or group class names
    :returns: list of hdf group paths, where all groups are associated with all given names or classes.
    """
    # generate a list of all names and class names associated with each group
    # TODO: add all_names to self.generate_combined
    all_names = {p: self.get_group_classes(p) + p.split('/') for p in self.groups}
    return [path for path, names in all_names.items() if all(arg in names for arg in names_or_classes)]

find_names(string, match_case=False)

Find any dataset names that contain the given string argument, searching names in self.combined

['m1x', 'm1y', ...] = m.find_names('m1')

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required
match_case

if True, match must be case-sensitive

False

Returns:

Type Description
list[str]

list of names

Source code in src/hdfmap/hdfmap_class.py
def find_names(self, string: str, match_case=False) -> list[str]:
    """
    Find any dataset names that contain the given string argument, searching names in self.combined

        ['m1x', 'm1y', ...] = m.find_names('m1')

    :param string: str to find in list of datasets
    :param match_case: if True, match must be case-sensitive
    :return: list of names
    """
    if match_case:
        return [name for name in self.combined if string in name]
    return [name for name in self.combined if string.lower() in name.lower()]

find_paths(string, name_only=True, whole_word=False)

Find any dataset paths that contain the given string argument

[paths, ] = m.find_paths('en')  # finds all datasets with name including 'en'

Parameters:

Name Type Description Default
string str

str to find in list of datasets

required
name_only

if True, search only the name of the dataset, not the full path

True
whole_word

if True, search only for whole-word names (case in-sensitive)

False

Returns:

Type Description
list[str]

list of hdf paths

Source code in src/hdfmap/hdfmap_class.py
def find_paths(self, string: str, name_only=True, whole_word=False) -> list[str]:
    """
    Find any dataset paths that contain the given string argument

        [paths, ] = m.find_paths('en')  # finds all datasets with name including 'en'

    :param string: str to find in list of datasets
    :param name_only: if True, search only the name of the dataset, not the full path
    :param whole_word: if True, search only for whole-word names (case in-sensitive)
    :return: list of hdf paths
    """
    if whole_word:
        return [path for name, path in self.combined.items() if string.lower() == name.lower()]
    # find string in combined
    combined_paths = {path for name, path in self.combined.items() if string in name}
    if name_only:
        return [
            path for path, dataset in self.datasets.items()
            if string in dataset.name and path not in combined_paths
        ] + list(combined_paths)
    return [
        path for path in self.datasets if string in path and path not in combined_paths
    ] + list(combined_paths)

first_last_scannables(first_names=(), last_names=(), alt_names=None)

Returns default names from scannables output first_names returns dict of N names, where N is the number of dimensions in scannable shape if fewer axes_names are provided than required, use the first items of scannables instead output signal_names returns the last dict item in the list of scannables + signal_names

Parameters:

Name Type Description Default
first_names list[str]

list of names of plottable axes in scannables

()
last_names list[str]

list of names of plottable values in scannables

()
alt_names dict[str, list[str]] | None

dict of alternative names for each plottable value

None

Returns:

Type Description
tuple[dict[str, str], dict[str, str]]

path}, {last_names: path}

Source code in src/hdfmap/hdfmap_class.py
def first_last_scannables(self, first_names: list[str] = (),
                          last_names: list[str] = (),
                          alt_names: dict[str, list[str]] | None = None) -> tuple[dict[str, str], dict[str, str]]:
    """
    Returns default names from scannables
        output first_names returns dict of N names, where N is the number of dimensions in scannable shape
            if fewer axes_names are provided than required, use the first items of scannables instead
        output signal_names returns the last dict item in the list of scannables + signal_names

    :param first_names: list of names of plottable axes in scannables
    :param last_names: list of names of plottable values in scannables
    :param alt_names: dict of alternative names for each plottable value
    :return {first_names: path}, {last_names: path}
    """
    if alt_names is None:
        alt_names = {}
    list_names = list(first_names) + list(self.scannables.keys()) + list(last_names)
    # check names are in scannables
    warnings = []
    all_names = []
    for name in list_names:
        if name in self.scannables:
            all_names.append(name)
        elif name in alt_names:
            alt_name = next((alt for alt in alt_names[name] if alt in self.scannables), None)
            if alt_name:
                all_names.append(alt_name)
            else:
                warnings.append(name)
        else:
            warnings.append(name)

    for name in warnings:
        logger.warning(f"name: '{name}' not in scannables")
    # return correct number of values from start and end
    ndims = len(self.scannables_shape())
    first = {name: self.scannables[name] for name in all_names[:ndims]}
    last = {name: self.scannables[name] for name in all_names[-(len(last_names) or 1):]}
    return first, last

format_hdf(hdf_file, expression, default=DEFAULT, raise_errors=True)

Evaluate a formatted string expression using the namespace of the hdf file

Parameters:

Name Type Description Default
hdf_file File

h5py.File object

required
expression str

str expression using {name} format specifiers

required
default

returned if varname not in namespace

DEFAULT
raise_errors bool

raise exceptions if True, otherwise return str error message as result and log the error

True

Returns:

Type Description
str

eval_hdf(f"expression")

Source code in src/hdfmap/hdfmap_class.py
def format_hdf(self, hdf_file: h5py.File, expression: str, default=DEFAULT, raise_errors: bool = True) -> str:
    """
    Evaluate a formatted string expression using the namespace of the hdf file
    :param hdf_file: h5py.File object
    :param expression: str expression using {name} format specifiers
    :param default: returned if varname not in namespace
    :param raise_errors: raise exceptions if True, otherwise return str error message as result and log the error
    :return: eval_hdf(f"expression")
    """
    return format_hdf(
        hdf_file=hdf_file,
        expression=expression,
        hdf_namespace=self.combined,
        data_namespace=self._local_data,
        replace_names=self._alternate_names,
        default=default,
        use_stored_data=self._use_local_data,
        raise_errors=raise_errors
    )

generate_combined()

Finalise the mapped namespace by combining dataset names

Source code in src/hdfmap/hdfmap_class.py
def generate_combined(self):
    """Finalise the mapped namespace by combining dataset names"""
    # if self.scannables:
    #     # check image datasets are larger than scannables_shape
    #     ndim = len(self.scannables_shape())
    #     self.image_data = {
    #         name: path for name, path in self.image_data.items()
    #         if is_image(self.datasets[path].shape, ndim + 1)
    #     }
    if self.image_data:
        # add default 'image_data'
        self.image_data[IMAGE_DATA] = next(iter(self.image_data.values()))
    self.combined = {**self.values, **self.arrays, **self.image_data, **self.scannables}

generate_ids(*names, modify_missing=True)

Will return the path identifier of the given name if the name is in the namespace, otherwise a valid identifier will be generated.

xlabel, ylabel = generate_axis_labels('axes', 'signal')
generate_axis_labels('my/data/label', modify_missing=True) #-> ['label', ]
generate_axis_labels('(x-y)/y', modify_missing=False) #-> ['(x-y)/y', ]

Parameters:

Name Type Description Default
names str

names to generate axis labels for

()
modify_missing bool

if True, modifies names even if they are not in namespace

True

Returns:

Type Description
list[str]

list of axis labels as valid identifiers

Source code in src/hdfmap/hdfmap_class.py
def generate_ids(self, *names: str, modify_missing: bool = True) -> list[str]:
    """
    Will return the path identifier of the given name if the name is in the namespace,
    otherwise a valid identifier will be generated.

        xlabel, ylabel = generate_axis_labels('axes', 'signal')
        generate_axis_labels('my/data/label', modify_missing=True) #-> ['label', ]
        generate_axis_labels('(x-y)/y', modify_missing=False) #-> ['(x-y)/y', ]

    :param names: names to generate axis labels for
    :param modify_missing: if True, modifies names even if they are not in namespace
    :return: list of axis labels as valid identifiers
    """
    return [
        generate_identifier(self.combined.get(name, name)) if modify_missing else (
            generate_identifier(self.combined[name]) if name in self.combined else name
        )
        for name in names
    ]

generate_scannables(array_size)

Populate self.scannables field with datasets size that match array_size

Source code in src/hdfmap/hdfmap_class.py
def generate_scannables(self, array_size):
    """Populate self.scannables field with datasets size that match array_size"""
    # self.scannables = {k: v for k, v in self.arrays.items() if self.datasets[v].size == array_size}
    self.scannables = {ds.name: path for path, ds in self.datasets.items() if ds.size == array_size}

generate_scannables_from_group(hdf_group, group_path=None, dataset_names=None)

Generate scannables list from a specific group, using the first item to define array size

Parameters:

Name Type Description Default
hdf_group Group

h5py.Group

required
group_path str

str path of group hdf_group if hdf_group.name is incorrect

None
dataset_names list[str]

list of names of group sub-entries to use (use all if None)

None
Source code in src/hdfmap/hdfmap_class.py
def generate_scannables_from_group(self, hdf_group: h5py.Group, group_path: str = None,
                                   dataset_names: list[str] = None):
    """
    Generate scannables list from a specific group, using the first item to define array size
    :param hdf_group: h5py.Group
    :param group_path: str path of group hdf_group if hdf_group.name is incorrect
    :param dataset_names: list of names of group sub-entries to use (use all if None)
    """
    # watch out - hdf_group.name may not point to a location in the file!
    hdf_path = hdf_group.name if group_path is None else group_path
    # list of datasets within group
    if dataset_names:
        dataset_names = [
            name for name in dataset_names if isinstance(hdf_group.get(name), h5py.Dataset)
        ]
    else:
        dataset_names = [name for name, item in hdf_group.items() if isinstance(item, h5py.Dataset)]

    # catch empty groups
    if len(dataset_names) == 0:
        logger.warning(f"HDF Group {hdf_path} has no datasets for scannables")
        self.scannables = {}
    else:
        # use min size dataset as scannable_shape (avoiding image datasets)
        array_size = min(hdf_group[name].size for name in dataset_names)
        self._populate(hdf_group, root=hdf_path, recursive=False)
        self.scannables = {
            name: build_hdf_path(hdf_path, name)
            for name in dataset_names if hdf_group[name].size == array_size  # doesn't check if link
        }
        if len(self.scannables) < 2:
            logger.warning(f"HDF Group {hdf_path} has no consistent datasets for scannables")
            self.scannables = {}
    logger.debug(f"Scannables from group: {list(self.scannables.keys())}")

generate_scannables_from_names(names)

Generate scannables list from a set of dataset names, using the first item to define array size

Source code in src/hdfmap/hdfmap_class.py
def generate_scannables_from_names(self, names: list[str]):
    """Generate scannables list from a set of dataset names, using the first item to define array size"""
    # concert names or paths to name (to match alt_name)
    array_names = [n for name in names if (n := generate_identifier(name)) in self.arrays]
    logger.debug(f"Scannables from names: {array_names}")
    array_size = self.datasets[self.arrays[array_names[0]]].size
    self.scannables = {
        name: self.arrays[name] for name in array_names if self.datasets[self.arrays[name]].size == array_size
    }

get_attr(name_or_path, attr_label, default='')

Return named attribute from dataset or group, or default

Source code in src/hdfmap/hdfmap_class.py
def get_attr(self, name_or_path: str, attr_label: str, default: str | typing.Any = '') -> str | None:
    """Return named attribute from dataset or group, or default"""
    attrs = self.get_attrs(name_or_path)
    if attrs and attr_label in attrs:
        return attr.decode() if hasattr(attr := attrs[attr_label], 'decode') else attr
    return default

get_attrs(name_or_path)

Return attributes of dataset or group

Source code in src/hdfmap/hdfmap_class.py
def get_attrs(self, name_or_path: str) -> dict | None:
    """Return attributes of dataset or group"""
    if name_or_path in self.datasets:
        return self.datasets[name_or_path].attrs
    if name_or_path in self.groups:
        return self.groups[name_or_path].attrs
    if name_or_path in self.combined:
        return self.datasets[self.combined[name_or_path]].attrs
    if name_or_path in self.classes:
        return self.groups[self.classes[name_or_path][0]].attrs
    return None

get_data(hdf_file, name_or_path, index=(), default=None, direct_load=False)

Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects See hdfmap.eval_functions.dataset2data for more information.

Parameters:

Name Type Description Default
hdf_file File

hdf file object

required
name_or_path str

str name or path pointing to dataset in hdf file

required
index

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

None
direct_load

return str, datetime or squeezed array if False, otherwise load data directly

False

Returns:

Type Description

dataset2data(dataset) -> datetime, str or squeezed array as required.

Source code in src/hdfmap/hdfmap_class.py
def get_data(self, hdf_file: h5py.File, name_or_path: str, index=(), default=None, direct_load=False):
    """
    Return data from dataset in file, converted into either datetime, str or squeezed numpy.array objects
    See hdfmap.eval_functions.dataset2data for more information.
    :param hdf_file: hdf file object
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param direct_load: return str, datetime or squeezed array if False, otherwise load data directly
    :return: dataset2data(dataset) -> datetime, str or squeezed array as required.
    """
    path = self.get_path(name_or_path)
    if path and path in hdf_file:
        return dataset2data(hdf_file[path], index, direct_load)
    return default

get_dataholder(hdf_file, flatten_scannables=False)

Return DataHolder object - a simple replication of scisoftpy.dictutils.DataHolder Also known as DLS dat format. dataholder.scannable -> array dataholder.metadata.value -> metadata dataholder['scannable'] -> array dataholder.metadata['value'] -> metadata

Parameters:

Name Type Description Default
hdf_file File

h5py.File object

required
flatten_scannables bool

bool, it True the scannables will be flattened arrays

False

Returns:

Type Description
DataHolder

data_object (similar to dict)

Source code in src/hdfmap/hdfmap_class.py
def get_dataholder(self, hdf_file: h5py.File, flatten_scannables: bool = False) -> DataHolder:
    """
    Return DataHolder object - a simple replication of scisoftpy.dictutils.DataHolder
    Also known as DLS dat format.
        dataholder.scannable -> array
        dataholder.metadata.value -> metadata
        dataholder['scannable'] -> array
        dataholder.metadata['value'] -> metadata
    :param hdf_file: h5py.File object
    :param flatten_scannables: bool, it True the scannables will be flattened arrays
    :return: data_object (similar to dict)
    """
    metadata = self.get_metadata(hdf_file)
    scannables = self.get_scannables(hdf_file, flatten=flatten_scannables)
    scannables['metadata'] = DataHolder(**metadata)
    return DataHolder(**scannables)

get_group_classes(name_or_path)

Return list of class names associated with a group or parent group of dataset

Source code in src/hdfmap/hdfmap_class.py
def get_group_classes(self, name_or_path) -> list[str]:
    """Return list of class names associated with a group or parent group of dataset"""
    group_path = self.get_group_path(name_or_path)
    sub_groups = group_path.split(SEP)
    sub_group_paths = [SEP.join(sub_groups[:n]) for n in range(1, len(sub_groups)+1)]
    sub_group_classes = [self.groups[g].nx_class for g in sub_group_paths if g in self.groups]
    return sub_group_classes

get_group_dataset_path(group_name, dataset_name)

Return path of dataset defined by group and dataset name/attribute

Source code in src/hdfmap/hdfmap_class.py
def get_group_dataset_path(self, group_name, dataset_name) -> str | None:
    """Return path of dataset defined by group and dataset name/attribute"""
    if group_name in self.groups:
        group_paths = [group_name]
    else:
        group_paths = self.classes[group_name]
    for group_path in group_paths:
        group = self.groups[group_path]
        for name in group.datasets:
            dataset_path = build_hdf_path(group_path, name)
            dataset = self.datasets[dataset_path]
            if dataset_name in dataset.names:
                return dataset_path
    return None

get_group_datasets(name_or_path)

Find the path associate with the given name and return all datasets in that group

Source code in src/hdfmap/hdfmap_class.py
def get_group_datasets(self, name_or_path: str) -> list[str] | None:
    """Find the path associate with the given name and return all datasets in that group"""
    group_path = self.get_group_path(name_or_path)
    if group_path:
        return self.groups[group_path].datasets
    return None

get_group_path(name_or_path)

Return group path of object in HdfMap

Source code in src/hdfmap/hdfmap_class.py
def get_group_path(self, name_or_path):
    """Return group path of object in HdfMap"""
    hdf_path = self.get_path(name_or_path)
    while hdf_path and hdf_path not in self.groups:
        hdf_path = SEP.join(hdf_path.split(SEP)[:-1])
    if not hdf_path:
        return SEP
    return hdf_path

get_image(hdf_file, index=None)

Get image data from file, using default image path - If the image path points to a numeric 2+D dataset, returns dataset[index, :, :] -> ndarray - If the image path points to a string dataset, returns dataset[index] -> '/path/to/image.tiff'

Image filenames may be relative to the location of the current file (this is not checked)

Parameters:

Name Type Description Default
hdf_file File

hdf file object

required
index int | tuple | slice | None

(slice,) or None to take the middle image

None

Returns:

Type Description
ndarray | None

2D numpy array of image, or string file path of image

Source code in src/hdfmap/hdfmap_class.py
def get_image(self, hdf_file: h5py.File, index: int | tuple | slice | None = None) -> np.ndarray | None:
    """
    Get image data from file, using default image path
        - If the image path points to a numeric 2+D dataset, returns dataset[index, :, :] -> ndarray
        - If the image path points to a string dataset, returns dataset[index] -> '/path/to/image.tiff'

    Image filenames may be relative to the location of the current file (this is not checked)

    :param hdf_file: hdf file object
    :param index: (slice,) or None to take the middle image
    :return: 2D numpy array of image, or string file path of image
    """
    if index is None:
        index = self.get_image_index(self.scannables_length() // 2)
    if isinstance(index, int):
        index = self.get_image_index(index)
    image_path = self.get_image_path()
    logger.info(f"image path: {image_path}")
    if image_path and image_path in hdf_file:
        # return hdf_file[image_path][index].squeeze()  # remove trailing dimensions
        return self.get_data(hdf_file, image_path, index)  # return array or image paths
    return None

get_image_index(index)

Return image slice index for index along total scan size

Source code in src/hdfmap/hdfmap_class.py
def get_image_index(self, index: int) -> tuple:
    """Return image slice index for index along total scan size"""
    return np.unravel_index(index, self.scannables_shape())

get_image_path()

Return HDF path of first dataset in self.image_data

Source code in src/hdfmap/hdfmap_class.py
def get_image_path(self) -> str:
    """Return HDF path of first dataset in self.image_data"""
    if self._default_image_path:
        return self._default_image_path
    return next(iter(self.image_data.values()), '')

get_image_shape()

Return the scan shape of the detector dataset

Source code in src/hdfmap/hdfmap_class.py
def get_image_shape(self) -> tuple:
    """Return the scan shape of the detector dataset"""
    path = self.get_image_path()
    if path in self.datasets:
        return self.datasets[path].shape[-2:]
    return 0, 0

get_metadata(hdf_file, default=None, direct_load=False, name_list=None, string_output=False)

Return metadata dict from file, loading data for each item in the metadata list The metadata list is taken from name_list, otherwise self.metadata or self.values

Parameters:

Name Type Description Default
hdf_file File

hdf file object

required
default

Value to return for names not associated with a dataset

None
direct_load

if True, loads data from hdf file directory, without conversion

False
name_list list

if available, uses this list of dataset names to generate the metadata list

None
string_output

if True, returns string summary of each value

False

Returns:

Type Description
dict

{name: value}

Source code in src/hdfmap/hdfmap_class.py
def get_metadata(self, hdf_file: h5py.File, default=None, direct_load=False,
                 name_list: list = None, string_output=False) -> dict:
    """
    Return metadata dict from file, loading data for each item in the metadata list
    The metadata list is taken from name_list, otherwise self.metadata or self.values
    :param hdf_file: hdf file object
    :param default: Value to return for names not associated with a dataset
    :param direct_load: if True, loads data from hdf file directory, without conversion
    :param name_list: if available, uses this list of dataset names to generate the metadata list
    :param string_output: if True, returns string summary of each value
    :return: {name: value}
    """
    extra = extra_hdf_data(hdf_file)
    if name_list:
        metadata_paths = {name: self.combined.get(name, '') for name in name_list}
    elif self.metadata:
        metadata_paths = self.metadata
    else:
        logger.warning("'local_names' metadata is not available, using all size=1 datasets.")
        # metadata_paths = self.values
        metadata_paths = {ds.name: path for path, ds in self.datasets.items() if ds.size <= 1}
    if string_output:
        extra = {key: f"'{val}'" for key, val in extra.items()}
        metadata = {
            name: dataset2str(hdf_file[path]) if path in hdf_file else str(default)
            for name, path in metadata_paths.items()
        }
    else:
        metadata = {
            name: dataset2data(hdf_file[path], direct_load=direct_load) if path in hdf_file else default
            for name, path in metadata_paths.items()
        }
    return {**extra, **metadata}

get_path(name_or_path)

Return hdf path of object in HdfMap

Source code in src/hdfmap/hdfmap_class.py
def get_path(self, name_or_path):
    """Return hdf path of object in HdfMap"""
    if name_or_path in self.datasets or name_or_path in self.groups:
        return name_or_path
    if name_or_path in self.combined:
        return self.combined[name_or_path]
    if name_or_path in self.image_data:
        return self.image_data[name_or_path]
    if name_or_path in self.classes:
        return self.classes[name_or_path][0]  # return first path in list
    return None

get_scannables(hdf_file, flatten=False, numeric_only=False)

Return scannables from file (values associated with hdfmap.scannables)

Source code in src/hdfmap/hdfmap_class.py
def get_scannables(self, hdf_file: h5py.File, flatten: bool = False, numeric_only: bool = False) -> dict:
    """Return scannables from file (values associated with hdfmap.scannables)"""
    return {
        name: dataset[()].flatten() if flatten else hdf_file[path][()]
        for name, path in self.scannables.items()
        if (dataset := hdf_file.get(path)) and
           (np.issubdtype(dataset.dtype, np.number) if numeric_only else True)
    }

get_scannables_array(hdf_file, return_structured_array=False)

Return 2D array of all numeric scannables in file

Parameters:

Name Type Description Default
hdf_file File

h5py.File object

required
return_structured_array

bool, if True, return a Numpy structured array with column headers

False

Returns:

Type Description
ndarray

numpy array with a row for each scannable, shape: (no_scannables, flattened_length)

Source code in src/hdfmap/hdfmap_class.py
def get_scannables_array(self, hdf_file: h5py.File, return_structured_array=False) -> np.ndarray:
    """
    Return 2D array of all numeric scannables in file

    :param hdf_file: h5py.File object
    :param return_structured_array: bool, if True, return a Numpy structured array with column headers
    :returns: numpy array with a row for each scannable, shape: (no_scannables, flattened_length)
    """
    _scannables = self._get_numeric_scannables(hdf_file)
    array = np.array([array for name, path, array in _scannables])
    if return_structured_array:
        dtypes = np.dtype([
            (name, hdf_file[path].dtype) for name, path, array in _scannables
        ])
        return np.array([tuple(row) for row in np.transpose(array)], dtype=dtypes)
    return array

get_string(hdf_file, name_or_path, index=(), default='', units=False)

Return data from dataset in file, converted into string summary of data See hdfmap.eval_functions.dataset2str for more information.

Parameters:

Name Type Description Default
hdf_file File

hdf file object

required
name_or_path str

str name or path pointing to dataset in hdf file

required
index

index or slice of data in hdf file

()
default

value to return if name not found in hdf file

''
units

if True and attribute 'units' available, append this to the result

False

Returns:

Type Description
str

dataset2str(dataset) -> str

Source code in src/hdfmap/hdfmap_class.py
def get_string(self, hdf_file: h5py.File, name_or_path: str, index=(), default='', units=False) -> str:
    """
    Return data from dataset in file, converted into string summary of data
    See hdfmap.eval_functions.dataset2str for more information.
    :param hdf_file: hdf file object
    :param name_or_path: str name or path pointing to dataset in hdf file
    :param index: index or slice of data in hdf file
    :param default: value to return if name not found in hdf file
    :param units: if True and attribute 'units' available, append this to the result
    :return: dataset2str(dataset) -> str
    """
    path = self.get_path(name_or_path)
    if path and path in hdf_file:
        return dataset2str(hdf_file[path], index, units=units)
    return default

info_classes()

Return str info on group class names

Source code in src/hdfmap/hdfmap_class.py
def info_classes(self) -> str:
    """Return str info on group class names"""
    out = f"{repr(self)}\n"
    out += 'Classes:\n'
    out += disp_dict(self.classes, 20)
    return out

info_data(hdf_file)

Return string showing metadata values associated with names

Source code in src/hdfmap/hdfmap_class.py
def info_data(self, hdf_file: h5py.File) -> str:
    """Return string showing metadata values associated with names"""
    out = repr(self) + '\n'
    out += "Combined Namespace:\n"
    out += '\n'.join([
        f"{name:>30}: " +
        f"{dataset2str(hdf_file[path]):20}" +
        f": {path:60}"
        for name, path in self.combined.items()
    ])
    out += f"\n{self.info_names(scannables=True)}"
    return out

info_datasets()

Return str info on datasets

Source code in src/hdfmap/hdfmap_class.py
def info_datasets(self) -> str:
    """Return str info on datasets"""
    out = f"{repr(self)}\n"
    out += "Datasets:\n"
    out += disp_dict(self.datasets, 20)
    return out

info_groups()

Return str info on groups

Source code in src/hdfmap/hdfmap_class.py
def info_groups(self) -> str:
    """Return str info on groups"""
    out = f"{repr(self)}\n"
    out += "Groups:\n"
    for path, group in self.groups.items():
        out += f"{path} [{group.nx_class}: '{group.name}']\n"
        out += '\n'.join(f"  @{attr}: {self.get_attr(path, attr)}" for attr in group.attrs)
        out += '\n'
        for dataset_name in group.datasets:
            dataset_path = build_hdf_path(path, dataset_name)
            if dataset_path in self.datasets:
                dataset = self.datasets[dataset_path]
                out += f"  {dataset_name}: {dataset.shape}\n"
    return out

info_names(arrays=False, values=False, combined=False, metadata=False, scannables=False, image_data=False)

Return str info for different namespaces

Source code in src/hdfmap/hdfmap_class.py
def info_names(self, arrays=False, values=False, combined=False,
               metadata=False, scannables=False, image_data=False) -> str:
    """Return str info for different namespaces"""
    if not any((arrays, values, combined, metadata, scannables, image_data)):
        combined = True
    options = [
        ('Arrays', arrays, self.arrays),
        ('Values', values, self.values),
        ('Combined', combined, self.combined),
        ('Metadata', metadata, self.metadata),
        ('Scannables', scannables, self.scannables),
        ('Image Data', image_data, self.image_data),
    ]
    out = ''
    for name, show, namespace in options:
        if show:
            out += f"\n{name} Namespace:\n"
            out += '\n'.join([
                f"{name:>30}: {str(self.datasets[path].shape):10} : {path:60}"
                for name, path in namespace.items()
            ])
            out += '\n'
    return out

load_hdf(filename=None, name_or_path=None, **kwargs)

Load hdf file or hdf dataset in open state

Parameters:

Name Type Description Default
filename str | None

str filename of hdf file, or None to use self.filename

None
name_or_path str

if given, returns the dataset

None
kwargs

additional key-word arguments to pass to h5py.File(...)

{}

Returns:

Type Description
File | Dataset

h5py.File object or h5py.dataset object if dataset name given

Source code in src/hdfmap/hdfmap_class.py
def load_hdf(self, filename: str | None = None, name_or_path: str = None, **kwargs) -> h5py.File | h5py.Dataset:
    """
    Load hdf file or hdf dataset in open state
    :param filename: str filename of hdf file, or None to use self.filename
    :param name_or_path: if given, returns the dataset
    :param kwargs: additional key-word arguments to pass to h5py.File(...)
    :return: h5py.File object or h5py.dataset object if dataset name given
    """
    if filename is None:
        filename = self.filename
    if name_or_path is None:
        return load_hdf(filename, **kwargs)
    return load_hdf(filename, **kwargs).get(self.get_path(name_or_path))

most_common_shape()

Return most common non-singular array shape

Source code in src/hdfmap/hdfmap_class.py
def most_common_shape(self) -> tuple:
    """Return most common non-singular array shape"""
    array_shapes = [shape for name, path in self.arrays.items() if len(shape := self.datasets[path].shape) > 0]
    return max(set(array_shapes), key=array_shapes.count)

most_common_size()

Return most common array size > 1

Source code in src/hdfmap/hdfmap_class.py
def most_common_size(self) -> int:
    """Return most common array size > 1"""
    array_sizes = [size for name, path in self.arrays.items() if (size := self.datasets[path].size) > 1]
    return max(set(array_sizes), key=array_sizes.count)

populate(hdf_file)

Populate all datasets from file

Source code in src/hdfmap/hdfmap_class.py
def populate(self, hdf_file: h5py.File):
    """Populate all datasets from file"""
    self.filename = hdf_file.filename
    self._local_data.update(extra_hdf_data(hdf_file))
    self._populate(hdf_file)
    size = self.most_common_size()
    self.generate_scannables(size)
    self.generate_combined()

scannables_length()

Return the length of the first axis of scannables array

Source code in src/hdfmap/hdfmap_class.py
def scannables_length(self) -> int:
    """Return the length of the first axis of scannables array"""
    if not self.scannables:
        return 0
    path = next(iter(self.scannables.values()))
    return self.datasets[path].size

scannables_shape()

Return the shape of the first axis of scannables array

Source code in src/hdfmap/hdfmap_class.py
def scannables_shape(self) -> tuple:
    """Return the shape of the first axis of scannables array"""
    if not self.scannables:
        return (0, )
    path = next(iter(self.scannables.values()))
    return self.datasets[path].shape

set_image_path(name_or_path)

Set the default image path, used by get_image

Source code in src/hdfmap/hdfmap_class.py
def set_image_path(self, name_or_path: str):
    """Set the default image path, used by get_image"""
    if name_or_path is None:
        self._default_image_path = None
    else:
        path = self.get_path(name_or_path)
        if path:
            self._default_image_path = path
    logger.info(f"Default image path: {self._default_image_path}")

use_local_data(use_data=True)

Activate the option to reload data from the namespace locally, rather than from the file.

self.eval(hdf, 'cmd') -> default will load 'cmd' from the file based on the hdf path associated with cmd self.use_local_data() -> self.eval(hdf, 'cmd') will return 'cmd' from local data if available, or from the file. self.use_local_data(False) -> returns to default behaviour

Source code in src/hdfmap/hdfmap_class.py
def use_local_data(self, use_data: bool = True):
    """
    Activate the option to reload data from the namespace locally, rather than from the file.

    self.eval(hdf, 'cmd') -> default will load 'cmd' from the file based on the hdf path associated with cmd
    self.use_local_data() -> self.eval(hdf, 'cmd') will return 'cmd' from local data if available, or from the file.
    self.use_local_data(False) -> returns to default behaviour
    """
    self._use_local_data = use_data

NexusLoader

Bases: HdfLoader

Nexus Loader contains the filename and hdfmap for a NeXus file, the hdfmap contains all the dataset paths and a namespace, allowing data to be called from the file using variable names, loading only the required datasets for each operation. E.G. hdf = NexusLoader('file.hdf') [data1, data2] = hdf.get_data(['dataset_name_1', 'dataset_name_2']) data = hdf.eval('dataset_name_1 * 100 + 2') string = hdf.format('my data is {dataset_name_1:.2f}')

Source code in src/hdfmap/reloader_class.py
class NexusLoader(HdfLoader):
    """
    Nexus Loader
    contains the filename and hdfmap for a NeXus file, the hdfmap contains all the dataset paths and a
    namespace, allowing data to be called from the file using variable names, loading only the required datasets
    for each operation.
    E.G.
        hdf = NexusLoader('file.hdf')
        [data1, data2] = hdf.get_data(['dataset_name_1', 'dataset_name_2'])
        data = hdf.eval('dataset_name_1 * 100 + 2')
        string = hdf.format('my data is {dataset_name_1:.2f}')
    """

    def __init__(self, nxs_filename: str, hdf_map: NexusMap | None = None):
        if not hdf_map:
            hdf_map = create_nexus_map(nxs_filename)
        super().__init__(nxs_filename, hdf_map)

    def get_plot_data(self) -> dict:
        """Return dict of useful plot data"""
        with self._load() as hdf:
            return self.map.get_plot_data(hdf)

get_plot_data()

Return dict of useful plot data

Source code in src/hdfmap/reloader_class.py
def get_plot_data(self) -> dict:
    """Return dict of useful plot data"""
    with self._load() as hdf:
        return self.map.get_plot_data(hdf)

NexusMap

Bases: HdfMap

HdfMap for Nexus (.nxs) files

Extends the HdfMap class with additional behaviours for NeXus files. http://www.nexusformat.org/

E.G. nxmap = NexusMap() with h5py.File('file.nxs', 'r') as nxs: nxmap.populate(nxs, default_entry_only=True) # populates only from the default entry

Special behaviour

nxmap.image_data is preferentially populated by NXdetector groups nxmap['axes'] -> return path of default axes dataset nxmap['signal'] -> return path of default signal dataset nxmap['image_data'] -> return path of first area detector data object [axes_paths], [signal_paths] = nxmap.nexus_default_paths() [axes_names], [signal_names] = nxmap.nexus_default_names() # returns default names in nxmap.scannables

Source code in src/hdfmap/nexus.py
class NexusMap(HdfMap):
    """
    HdfMap for Nexus (.nxs) files

    Extends the HdfMap class with additional behaviours for NeXus files.
    http://www.nexusformat.org/

    E.G.
    nxmap = NexusMap()
    with h5py.File('file.nxs', 'r') as nxs:
        nxmap.populate(nxs, default_entry_only=True)  # populates only from the default entry

    # Special behaviour
    nxmap.image_data is preferentially populated by NXdetector groups
    nxmap['axes'] -> return path of default axes dataset
    nxmap['signal'] -> return path of default signal dataset
    nxmap['image_data'] -> return path of first area detector data object
    [axes_paths], [signal_paths] = nxmap.nexus_default_paths()
    [axes_names], [signal_names] = nxmap.nexus_default_names()  # returns default names in nxmap.scannables
    """

    def __repr__(self):
        return f"NexusMap based on '{self.filename}'"

    def all_nxclasses(self) -> list[str]:
        """Return list of unique NX_class attributes used in NXgroups"""
        return list({
            nxclass.decode() if isinstance(nxclass, bytes) else nxclass
            for path, grp in self.groups.items() if (nxclass := grp.attrs.get(NX_CLASS))
        })

    def info_nexus(self, scannables=True, image_data=True, metadata=False) -> str:
        """Return str info on nexus format"""
        out = f"{repr(self)}\n"
        out += f"{NX_CLASS}:\n"
        nx_classes = self.all_nxclasses()
        out += disp_dict({k: v for k, v in self.classes.items() if k in nx_classes}, 20)
        out += '\nDefaults:\n'
        out += f"  @{NX_DEFAULT}: {self.find_attr(NX_DEFAULT)}\n"
        out += f"  @{NX_AXES}: {self.get_path(NX_AXES)}\n"
        out += f"  @{NX_SIGNAL}: {self.get_path(NX_SIGNAL)}\n"
        out += f"{self.info_names(scannables=scannables, image_data=image_data, metadata=metadata)}"
        out += f""
        return out

    def _store_group(self, hdf_group: h5py.Group, path: str, name: str):
        super()._store_group(hdf_group, path, name)
        if NX_DEFINITION in hdf_group:
            definition = hdf_group[NX_DEFINITION].asstr()[()]  # e.g. NXmx or NXxas
            self._store_class(definition, path)

    def _store_default_nexus_paths(self, hdf_file):
        """Load Nexus default axes and signal"""
        try:
            # find the default NXentry group
            nx_entry_name = default_nxentry(hdf_file)
            nx_entry = hdf_file[nx_entry_name]
            nx_entry_path = build_hdf_path(nx_entry_name)
            self._store_group(nx_entry, nx_entry_path, NX_ENTRY)
            # find the default NXdata group
            nx_data_name = default_nxdata(nx_entry)
            nx_data = nx_entry[nx_data_name]
            nx_data_path = build_hdf_path(nx_entry_name, nx_data_name)
            self._store_group(nx_data, nx_data_path, NX_DATA)

            axes_paths, signal_paths = find_nexus_defaults(hdf_file, nx_data_path)
            if axes_paths and isinstance(hdf_file.get(axes_paths[0]), h5py.Dataset):
                self.arrays[NX_AXES] = axes_paths[0]
                n = 0
                for axes_path in axes_paths:
                    if isinstance(hdf_file.get(axes_path), h5py.Dataset):
                        self.arrays[f"{NX_AXES}{n}"] = axes_path
                        n += 1
                logger.info(f"DEFAULT axes: {axes_paths}")
            if signal_paths and isinstance(hdf_file.get(signal_paths[0]), h5py.Dataset):
                self.arrays[NX_SIGNAL] = signal_paths[0]
                n = 0
                for signal_path in signal_paths:
                    if isinstance(hdf_file.get(signal_path), h5py.Dataset):
                        self.arrays[f"{NX_SIGNAL}{n}"] = signal_path
                        n += 1
                logger.info(f"DEFAULT signals: {signal_paths}")
        except KeyError:
            pass

    def nexus_default_paths(self) -> tuple[list[str], list[str]]:
        """Return default axes and signal paths"""
        axes_paths = [self.arrays[axes] for n in range(10) if (axes := f"{NX_AXES}{n}") in self.arrays]
        signal_paths = [self.arrays[signal] for n in range(10) if (signal := f"{NX_SIGNAL}{n}") in self.arrays]
        return axes_paths, signal_paths

    def nexus_default_names(self) -> tuple[dict[str, str], dict[str, str]]:
        """Return name of default axes and signal paths, as defined in scannables"""
        axes_paths, signal_paths = self.nexus_default_paths()
        axes_names = [self.datasets[path].name for path in axes_paths]
        signal_names = [self.datasets[path].name for path in signal_paths]
        # axes_names = [name for path in axes_paths for name in self.datasets[path].names]
        # signal_names = [name for path in signal_paths for name in self.datasets[path].names]
        alt_names = {
            self.datasets[path].name: self.datasets[path].names
            for path in axes_paths + signal_paths
        }
        return self.first_last_scannables(axes_names, signal_names, alt_names)

    def generate_scannables_from_nxdata(self, hdf_file: h5py.File, use_auxiliary: bool = True):
        """Generate scannables from default NXdata, using axuiliary_names if available"""
        # find the default NXdata group and generate the scannables list
        # nx_entry = hdf_file.get(default_nxentry(hdf_file))
        # nx_data = nx_entry.get(default_nxdata(nx_entry))
        nx_entry = hdf_file.get(self.classes[NX_ENTRY][0])  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
        nx_data = hdf_file.get(self.classes[NX_DATA][0])  # classes[NX_DATA] pre-populated by _default_nexus_paths
        logger.info(f"{nx_entry}, {nx_data}")
        if nx_data:
            logger.info(f"Generating Scannables from NXData: {nx_data.name}")
            if use_auxiliary and NX_AUXILIARY in nx_data.attrs:
                signals = list(nx_data.attrs[NX_AUXILIARY])
                if NX_SIGNAL in nx_data.attrs:
                    signals.insert(0, nx_data.attrs[NX_SIGNAL])
                if NX_AXES in nx_data.attrs:
                    signals.extend(list(nx_data.attrs[NX_AXES]))
                signals = [i.decode() if isinstance(i, bytes) else i for i in signals]  # convert bytes to str
                logger.info(f"NX Data - using auxiliary_names: {signals}")
                self.generate_scannables_from_group(nx_data, dataset_names=signals)
            else:
                self.generate_scannables_from_group(nx_data)

    def generate_scannables_from_scan_fields_or_nxdata(self, hdf_file: h5py.File):
        """Generate scannables from scan_field names or default NXdata"""

        # find 'scan_fields' to generate scannables list
        if NX_SCANFIELDS in self.arrays:
            scan_fields_path = self.arrays[NX_SCANFIELDS]
            # scan_fields = hdf_file[scan_fields_path][()]
            scan_fields = names_from_scan_fields(hdf_file, scan_fields_path)
            if scan_fields:
                logger.info(f"Generating Scannables from NX ScanFields: {scan_fields_path}: {scan_fields}")
                self.generate_scannables_from_names(scan_fields)
            else:
                self.generate_scannables_from_nxdata(hdf_file)
        else:
            self.generate_scannables_from_nxdata(hdf_file)

        if not self.scannables:
            logger.warning("No NXdata found, scannables not populated!")

    def generate_image_data_from_nxdetector(self):
        """
        find the NXdetector group and assign the image data
        Must be called after the scannables have been defined as the scan shape is required
        """
        self.image_data = {}
        image_ndim = len(self.scannables_shape()) + 2 if self.scannables else 3
        if NX_DETECTOR in self.classes:
            group_paths = set(self.classes[NX_DETECTOR])
        elif NX_DATA in self.classes:
            # if no detectors, check for NXdata->dataset with > 2 dimensions
            group_paths = set(self.classes[NX_DATA])
        else:
            group_paths = []

        for group_path in group_paths:
            detector_name = generate_identifier(group_path)
            # detector data is stored in NXdata in dataset 'data'
            data_path = build_hdf_path(group_path, NX_DETECTOR_DATA)
            image_data_path = build_hdf_path(group_path, NX_IMAGE_DATA)
            image_data_numbers = build_hdf_path(group_path, NX_IMAGE_NUMBER)
            logger.debug(f"Looking for image_data at: '{data_path}' or '{image_data_path}'")
            if data_path in self.datasets and is_image(self.datasets[data_path].shape, image_ndim):
                logger.info(f"Adding image_data ['{detector_name}'] = '{data_path}'")
                self.image_data[detector_name] = data_path
                self.arrays[detector_name] = data_path
                # also save image_data if available
                if image_data_path in self.datasets:
                    detector_name = f"{detector_name}_image_list"
                    logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
                    self.image_data[detector_name] = image_data_path
                    self.arrays[detector_name] = image_data_path
                elif image_data_numbers in self.datasets:
                    detector_name = f"{detector_name}_image_list"
                    logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
                    self.image_data[detector_name] = image_data_numbers
                    self.arrays[detector_name] = image_data_numbers
            elif image_data_path in self.datasets:
                logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
                self.image_data[detector_name] = image_data_path
                self.arrays[detector_name] = image_data_path
            elif image_data_numbers in self.datasets:
                logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
                self.image_data[detector_name] = image_data_numbers
                self.arrays[detector_name] = image_data_numbers
            else:
                # Use first dataset with > 2 dimensions
                image_dataset = next((
                    path for name in self.get_group_datasets(group_path)
                    if is_image(self.datasets[path := build_hdf_path(group_path, name)].shape, image_ndim)
                ), False)
                if image_dataset:
                    logger.info(f"Adding image_data ['{detector_name}'] = '{image_dataset}'")
                    self.image_data[detector_name] = image_dataset
                    self.arrays[detector_name] = image_dataset

        if not self.image_data:
            logger.info("No NXdetector image found, image_data not populated.")

    def populate(self, hdf_file: h5py.File, groups=None, default_entry_only=False):
        """
        Populate only datasets from default or first entry, with scannables from given groups.
        Automatically load defaults (axes, signal) and generate scannables from default group
        :param hdf_file: HDF File object
        :param groups: list of group names or NXClass names to search for datasets, within default entry
        :param default_entry_only: if True, only the first or default entry will be loaded
        """
        self.filename = hdf_file.filename

        # Add defaults to arrays
        self._store_default_nexus_paths(hdf_file)

        entry_paths = [
            build_hdf_path(name) for name in (
                self.classes[NX_ENTRY] +  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
                [entry for entry in hdf_file if check_nexus_class(hdf_file.get(entry), NX_ENTRY)]  # all NXentry
            )
        ]
        # remove duplicates, sort of default is first
        entry_paths = sorted(set(entry_paths), key=entry_paths.index)

        if default_entry_only:
            entry_paths = entry_paths[:1]

        for entry_path in entry_paths:
            entry = os.path.basename(entry_path)
            nx_entry = hdf_file.get(entry)
            if nx_entry is None:
                continue  # group may be missing due to a broken link
            hdf_path = build_hdf_path(entry)
            logger.debug(f"NX Entry: {hdf_path}")
            self.all_paths.append(hdf_path)
            self._store_group(nx_entry, hdf_path, entry)
            self._populate(nx_entry, root=hdf_path, groups=groups)  # nx_entry.name can be wrong!

        if not self.datasets:
            logger.warning("No datasets found!")

        # find the scannable arrays and generate self.combined
        self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)
        if not self.scannables:
            logger.warning('NXdata not found, getting scannables from most common array size')
            size = self.most_common_size()
            self.generate_scannables(size)
            if len(self.scannables) < len(self.scannables_shape()):
                logger.warning('Less scannables than most common shape dimensions, removing scannables')
                self.scannables = {}
        # find the NXdetector group and assign the image data
        self.generate_image_data_from_nxdetector()
        # finalise map with combined namespace
        self.generate_combined()

    def get_plot_data(self, hdf_file: h5py.File):
        """
        Return plotting data from scannables
        :returns: {
            'xlabel': str label of first axes
            'ylabel': str label of first signal
            'xdata': flattened array of first axes
            'ydata': flattend array of first signal
            'axes_names': list of axes names,
            'signal_names': list of signal + auxilliary signal names,
            'axes_data': list of ND arrays of data for axes,
            'signal_data': list of ND array of data for signal + auxilliary signals,
            'axes_labels': list of axes labels as 'name [units]',
            'signal_labels': list of signal labels,
            'data': dict of all scannables axes,
            'title': str title as 'filename\nNXtitle'
        if dataset is a 2D grid scan, additional rows:
            'grid_xlabel': str label of grid x-axis
            'grid_ylabel': str label of grid y-axis
            'grid_label': str label of height or colour
            'grid_xdata': 2D array of x-coordinates
            'grid_ydata': 2D array of y-coordinates
            'grid_data': 2D array of height or colour
        }
        """
        axes, signals = self.nexus_default_names()
        axes_units = [self.get_attr(path, NX_UNITS, '') for name, path in axes.items()]
        signal_units = [self.get_attr(path, NX_UNITS, '') for name, path in signals.items()]
        axes_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(axes, axes_units)]
        signal_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(signals, signal_units)]
        title = f"{os.path.basename(self.filename)}\n{self.get_data(hdf_file, NX_TITLE)}"

        xdata = (
            self.get_data(hdf_file, next(iter(axes.values()))).flatten()
            if axes else range(self.scannables_length())
        )
        ydata = (
            self.get_data(hdf_file, next(iter(signals.values()))).flatten()
            if signals else [1.0] * self.scannables_length()
        )

        data = {
            'xlabel': next(iter(axes_labels), 'x'),
            'ylabel': next(iter(signal_labels), 'y'),
            'xdata': xdata,
            'ydata': ydata,
            'axes_names': list(axes.keys()),
            'signal_names': list(signals.keys()),
            'axes_data': [self.get_data(hdf_file, ax) for ax in axes.values()],
            'signal_data': [self.get_data(hdf_file, sig) for sig in signals.values()],
            'axes_labels': axes_labels,
            'signal_labels': signal_labels,
            'data': self.get_scannables(hdf_file, numeric_only=True),
            'title': title
        }
        if len(axes) == 2 and len(self.scannables_shape()) == 2:
            # 2D grid scan
            xpath, ypath = axes.values()
            data_path = next(iter(signals.values()))
            data['grid_xlabel'] = axes_labels[0]
            data['grid_ylabel'] = axes_labels[1]
            data['grid_label'] = signal_labels[0]
            data['grid_xdata'] = self.get_data(hdf_file, xpath)
            data['grid_ydata'] = self.get_data(hdf_file, ypath)
            data['grid_data'] = self.get_data(hdf_file, data_path)
        return data

all_nxclasses()

Return list of unique NX_class attributes used in NXgroups

Source code in src/hdfmap/nexus.py
def all_nxclasses(self) -> list[str]:
    """Return list of unique NX_class attributes used in NXgroups"""
    return list({
        nxclass.decode() if isinstance(nxclass, bytes) else nxclass
        for path, grp in self.groups.items() if (nxclass := grp.attrs.get(NX_CLASS))
    })

generate_image_data_from_nxdetector()

find the NXdetector group and assign the image data Must be called after the scannables have been defined as the scan shape is required

Source code in src/hdfmap/nexus.py
def generate_image_data_from_nxdetector(self):
    """
    find the NXdetector group and assign the image data
    Must be called after the scannables have been defined as the scan shape is required
    """
    self.image_data = {}
    image_ndim = len(self.scannables_shape()) + 2 if self.scannables else 3
    if NX_DETECTOR in self.classes:
        group_paths = set(self.classes[NX_DETECTOR])
    elif NX_DATA in self.classes:
        # if no detectors, check for NXdata->dataset with > 2 dimensions
        group_paths = set(self.classes[NX_DATA])
    else:
        group_paths = []

    for group_path in group_paths:
        detector_name = generate_identifier(group_path)
        # detector data is stored in NXdata in dataset 'data'
        data_path = build_hdf_path(group_path, NX_DETECTOR_DATA)
        image_data_path = build_hdf_path(group_path, NX_IMAGE_DATA)
        image_data_numbers = build_hdf_path(group_path, NX_IMAGE_NUMBER)
        logger.debug(f"Looking for image_data at: '{data_path}' or '{image_data_path}'")
        if data_path in self.datasets and is_image(self.datasets[data_path].shape, image_ndim):
            logger.info(f"Adding image_data ['{detector_name}'] = '{data_path}'")
            self.image_data[detector_name] = data_path
            self.arrays[detector_name] = data_path
            # also save image_data if available
            if image_data_path in self.datasets:
                detector_name = f"{detector_name}_image_list"
                logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
                self.image_data[detector_name] = image_data_path
                self.arrays[detector_name] = image_data_path
            elif image_data_numbers in self.datasets:
                detector_name = f"{detector_name}_image_list"
                logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
                self.image_data[detector_name] = image_data_numbers
                self.arrays[detector_name] = image_data_numbers
        elif image_data_path in self.datasets:
            logger.info(f"Adding image_data str ['{detector_name}'] = '{image_data_path}'")
            self.image_data[detector_name] = image_data_path
            self.arrays[detector_name] = image_data_path
        elif image_data_numbers in self.datasets:
            logger.info(f"Adding image_data 1D ['{detector_name}'] = '{image_data_numbers}'")
            self.image_data[detector_name] = image_data_numbers
            self.arrays[detector_name] = image_data_numbers
        else:
            # Use first dataset with > 2 dimensions
            image_dataset = next((
                path for name in self.get_group_datasets(group_path)
                if is_image(self.datasets[path := build_hdf_path(group_path, name)].shape, image_ndim)
            ), False)
            if image_dataset:
                logger.info(f"Adding image_data ['{detector_name}'] = '{image_dataset}'")
                self.image_data[detector_name] = image_dataset
                self.arrays[detector_name] = image_dataset

    if not self.image_data:
        logger.info("No NXdetector image found, image_data not populated.")

generate_scannables_from_nxdata(hdf_file, use_auxiliary=True)

Generate scannables from default NXdata, using axuiliary_names if available

Source code in src/hdfmap/nexus.py
def generate_scannables_from_nxdata(self, hdf_file: h5py.File, use_auxiliary: bool = True):
    """Generate scannables from default NXdata, using axuiliary_names if available"""
    # find the default NXdata group and generate the scannables list
    # nx_entry = hdf_file.get(default_nxentry(hdf_file))
    # nx_data = nx_entry.get(default_nxdata(nx_entry))
    nx_entry = hdf_file.get(self.classes[NX_ENTRY][0])  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
    nx_data = hdf_file.get(self.classes[NX_DATA][0])  # classes[NX_DATA] pre-populated by _default_nexus_paths
    logger.info(f"{nx_entry}, {nx_data}")
    if nx_data:
        logger.info(f"Generating Scannables from NXData: {nx_data.name}")
        if use_auxiliary and NX_AUXILIARY in nx_data.attrs:
            signals = list(nx_data.attrs[NX_AUXILIARY])
            if NX_SIGNAL in nx_data.attrs:
                signals.insert(0, nx_data.attrs[NX_SIGNAL])
            if NX_AXES in nx_data.attrs:
                signals.extend(list(nx_data.attrs[NX_AXES]))
            signals = [i.decode() if isinstance(i, bytes) else i for i in signals]  # convert bytes to str
            logger.info(f"NX Data - using auxiliary_names: {signals}")
            self.generate_scannables_from_group(nx_data, dataset_names=signals)
        else:
            self.generate_scannables_from_group(nx_data)

generate_scannables_from_scan_fields_or_nxdata(hdf_file)

Generate scannables from scan_field names or default NXdata

Source code in src/hdfmap/nexus.py
def generate_scannables_from_scan_fields_or_nxdata(self, hdf_file: h5py.File):
    """Generate scannables from scan_field names or default NXdata"""

    # find 'scan_fields' to generate scannables list
    if NX_SCANFIELDS in self.arrays:
        scan_fields_path = self.arrays[NX_SCANFIELDS]
        # scan_fields = hdf_file[scan_fields_path][()]
        scan_fields = names_from_scan_fields(hdf_file, scan_fields_path)
        if scan_fields:
            logger.info(f"Generating Scannables from NX ScanFields: {scan_fields_path}: {scan_fields}")
            self.generate_scannables_from_names(scan_fields)
        else:
            self.generate_scannables_from_nxdata(hdf_file)
    else:
        self.generate_scannables_from_nxdata(hdf_file)

    if not self.scannables:
        logger.warning("No NXdata found, scannables not populated!")

get_plot_data(hdf_file)

    Return plotting data from scannables
    :returns: {
        'xlabel': str label of first axes
        'ylabel': str label of first signal
        'xdata': flattened array of first axes
        'ydata': flattend array of first signal
        'axes_names': list of axes names,
        'signal_names': list of signal + auxilliary signal names,
        'axes_data': list of ND arrays of data for axes,
        'signal_data': list of ND array of data for signal + auxilliary signals,
        'axes_labels': list of axes labels as 'name [units]',
        'signal_labels': list of signal labels,
        'data': dict of all scannables axes,
        'title': str title as 'filename

NXtitle' if dataset is a 2D grid scan, additional rows: 'grid_xlabel': str label of grid x-axis 'grid_ylabel': str label of grid y-axis 'grid_label': str label of height or colour 'grid_xdata': 2D array of x-coordinates 'grid_ydata': 2D array of y-coordinates 'grid_data': 2D array of height or colour }

Source code in src/hdfmap/nexus.py
def get_plot_data(self, hdf_file: h5py.File):
    """
    Return plotting data from scannables
    :returns: {
        'xlabel': str label of first axes
        'ylabel': str label of first signal
        'xdata': flattened array of first axes
        'ydata': flattend array of first signal
        'axes_names': list of axes names,
        'signal_names': list of signal + auxilliary signal names,
        'axes_data': list of ND arrays of data for axes,
        'signal_data': list of ND array of data for signal + auxilliary signals,
        'axes_labels': list of axes labels as 'name [units]',
        'signal_labels': list of signal labels,
        'data': dict of all scannables axes,
        'title': str title as 'filename\nNXtitle'
    if dataset is a 2D grid scan, additional rows:
        'grid_xlabel': str label of grid x-axis
        'grid_ylabel': str label of grid y-axis
        'grid_label': str label of height or colour
        'grid_xdata': 2D array of x-coordinates
        'grid_ydata': 2D array of y-coordinates
        'grid_data': 2D array of height or colour
    }
    """
    axes, signals = self.nexus_default_names()
    axes_units = [self.get_attr(path, NX_UNITS, '') for name, path in axes.items()]
    signal_units = [self.get_attr(path, NX_UNITS, '') for name, path in signals.items()]
    axes_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(axes, axes_units)]
    signal_labels = [name + (f" [{unit}]" if unit else '') for name, unit in zip(signals, signal_units)]
    title = f"{os.path.basename(self.filename)}\n{self.get_data(hdf_file, NX_TITLE)}"

    xdata = (
        self.get_data(hdf_file, next(iter(axes.values()))).flatten()
        if axes else range(self.scannables_length())
    )
    ydata = (
        self.get_data(hdf_file, next(iter(signals.values()))).flatten()
        if signals else [1.0] * self.scannables_length()
    )

    data = {
        'xlabel': next(iter(axes_labels), 'x'),
        'ylabel': next(iter(signal_labels), 'y'),
        'xdata': xdata,
        'ydata': ydata,
        'axes_names': list(axes.keys()),
        'signal_names': list(signals.keys()),
        'axes_data': [self.get_data(hdf_file, ax) for ax in axes.values()],
        'signal_data': [self.get_data(hdf_file, sig) for sig in signals.values()],
        'axes_labels': axes_labels,
        'signal_labels': signal_labels,
        'data': self.get_scannables(hdf_file, numeric_only=True),
        'title': title
    }
    if len(axes) == 2 and len(self.scannables_shape()) == 2:
        # 2D grid scan
        xpath, ypath = axes.values()
        data_path = next(iter(signals.values()))
        data['grid_xlabel'] = axes_labels[0]
        data['grid_ylabel'] = axes_labels[1]
        data['grid_label'] = signal_labels[0]
        data['grid_xdata'] = self.get_data(hdf_file, xpath)
        data['grid_ydata'] = self.get_data(hdf_file, ypath)
        data['grid_data'] = self.get_data(hdf_file, data_path)
    return data

info_nexus(scannables=True, image_data=True, metadata=False)

Return str info on nexus format

Source code in src/hdfmap/nexus.py
def info_nexus(self, scannables=True, image_data=True, metadata=False) -> str:
    """Return str info on nexus format"""
    out = f"{repr(self)}\n"
    out += f"{NX_CLASS}:\n"
    nx_classes = self.all_nxclasses()
    out += disp_dict({k: v for k, v in self.classes.items() if k in nx_classes}, 20)
    out += '\nDefaults:\n'
    out += f"  @{NX_DEFAULT}: {self.find_attr(NX_DEFAULT)}\n"
    out += f"  @{NX_AXES}: {self.get_path(NX_AXES)}\n"
    out += f"  @{NX_SIGNAL}: {self.get_path(NX_SIGNAL)}\n"
    out += f"{self.info_names(scannables=scannables, image_data=image_data, metadata=metadata)}"
    out += f""
    return out

nexus_default_names()

Return name of default axes and signal paths, as defined in scannables

Source code in src/hdfmap/nexus.py
def nexus_default_names(self) -> tuple[dict[str, str], dict[str, str]]:
    """Return name of default axes and signal paths, as defined in scannables"""
    axes_paths, signal_paths = self.nexus_default_paths()
    axes_names = [self.datasets[path].name for path in axes_paths]
    signal_names = [self.datasets[path].name for path in signal_paths]
    # axes_names = [name for path in axes_paths for name in self.datasets[path].names]
    # signal_names = [name for path in signal_paths for name in self.datasets[path].names]
    alt_names = {
        self.datasets[path].name: self.datasets[path].names
        for path in axes_paths + signal_paths
    }
    return self.first_last_scannables(axes_names, signal_names, alt_names)

nexus_default_paths()

Return default axes and signal paths

Source code in src/hdfmap/nexus.py
def nexus_default_paths(self) -> tuple[list[str], list[str]]:
    """Return default axes and signal paths"""
    axes_paths = [self.arrays[axes] for n in range(10) if (axes := f"{NX_AXES}{n}") in self.arrays]
    signal_paths = [self.arrays[signal] for n in range(10) if (signal := f"{NX_SIGNAL}{n}") in self.arrays]
    return axes_paths, signal_paths

populate(hdf_file, groups=None, default_entry_only=False)

Populate only datasets from default or first entry, with scannables from given groups. Automatically load defaults (axes, signal) and generate scannables from default group

Parameters:

Name Type Description Default
hdf_file File

HDF File object

required
groups

list of group names or NXClass names to search for datasets, within default entry

None
default_entry_only

if True, only the first or default entry will be loaded

False
Source code in src/hdfmap/nexus.py
def populate(self, hdf_file: h5py.File, groups=None, default_entry_only=False):
    """
    Populate only datasets from default or first entry, with scannables from given groups.
    Automatically load defaults (axes, signal) and generate scannables from default group
    :param hdf_file: HDF File object
    :param groups: list of group names or NXClass names to search for datasets, within default entry
    :param default_entry_only: if True, only the first or default entry will be loaded
    """
    self.filename = hdf_file.filename

    # Add defaults to arrays
    self._store_default_nexus_paths(hdf_file)

    entry_paths = [
        build_hdf_path(name) for name in (
            self.classes[NX_ENTRY] +  # classes[NX_ENTRY] pre-populated by _default_nexus_paths
            [entry for entry in hdf_file if check_nexus_class(hdf_file.get(entry), NX_ENTRY)]  # all NXentry
        )
    ]
    # remove duplicates, sort of default is first
    entry_paths = sorted(set(entry_paths), key=entry_paths.index)

    if default_entry_only:
        entry_paths = entry_paths[:1]

    for entry_path in entry_paths:
        entry = os.path.basename(entry_path)
        nx_entry = hdf_file.get(entry)
        if nx_entry is None:
            continue  # group may be missing due to a broken link
        hdf_path = build_hdf_path(entry)
        logger.debug(f"NX Entry: {hdf_path}")
        self.all_paths.append(hdf_path)
        self._store_group(nx_entry, hdf_path, entry)
        self._populate(nx_entry, root=hdf_path, groups=groups)  # nx_entry.name can be wrong!

    if not self.datasets:
        logger.warning("No datasets found!")

    # find the scannable arrays and generate self.combined
    self.generate_scannables_from_scan_fields_or_nxdata(hdf_file)
    if not self.scannables:
        logger.warning('NXdata not found, getting scannables from most common array size')
        size = self.most_common_size()
        self.generate_scannables(size)
        if len(self.scannables) < len(self.scannables_shape()):
            logger.warning('Less scannables than most common shape dimensions, removing scannables')
            self.scannables = {}
    # find the NXdetector group and assign the image data
    self.generate_image_data_from_nxdetector()
    # finalise map with combined namespace
    self.generate_combined()

compare_maps(map1, map2)

Compare two HdfMap objects

Source code in src/hdfmap/file_functions.py
def compare_maps(map1: HdfMap | NexusMap, map2: HdfMap | NexusMap) -> str:
    """
    Compare two HdfMap objects
    """
    missing_in_2 = []
    missing_in_1 = []
    different = []
    same = []
    for name1, path1 in map1.combined.items():
        if name1 in map2.combined:
            path2 = map2.combined[name1]
            if path2 != path1:
                different.append(f"{name1}: {path1} != {path2}")
            dataset1 = map1.datasets[path1]
            dataset2 = map2.datasets[path2]
            if dataset1.shape != dataset2.shape:
                different.append(f"{name1}: {dataset1.shape}, {dataset2.shape}")
            else:
                same.append(f"{name1}: {dataset1.shape} : {path1}, {path2}")
        else:
            missing_in_2.append(f"{name1}: {path1}")

    for name2, path2 in map2.combined.items():
        if name2 not in map1.combined:
            missing_in_1.append(f"{name2}: {path2}")

    output = f"Comparing:\n  {map1.filename}, with\n  {map2.filename}\n\n"
    output += "Different items:\n  " + '\n  '.join(different)
    output += f"\n\nMissing in {map1.filename}:\n  " + '\n  '.join(missing_in_1)
    output += f"\n\nMissing in {map2.filename}:\n  " + '\n  '.join(missing_in_2)
    output += '\n'
    return output

create_hdf_map(hdf_filename)

Create a HdfMap from a hdf file

Parameters:

Name Type Description Default
hdf_filename str

str filename of hdf file

required

Returns:

Type Description
HdfMap

HdfMap

Source code in src/hdfmap/file_functions.py
def create_hdf_map(hdf_filename: str) -> HdfMap:
    """
    Create a HdfMap from a hdf file
    :param hdf_filename: str filename of hdf file
    :return: HdfMap
    """
    with load_hdf(hdf_filename) as hdf:
        hdf_map = HdfMap(hdf)
    return hdf_map

create_nexus_map(hdf_filename, groups=None, default_entry_only=False)

Create a HdfMap from a NeXus file, loading default parameters and allowing a reduced, single entry map

Parameters:

Name Type Description Default
hdf_filename str

str filename of hdf file

required
groups None | list[str]

list of groups to collect datasets from

None
default_entry_only bool

if True, only the first or default entry will be loaded

False

Returns:

Type Description
NexusMap

NexusMap

Source code in src/hdfmap/file_functions.py
def create_nexus_map(hdf_filename: str, groups: None | list[str] = None,
                     default_entry_only: bool = False) -> NexusMap:
    """
    Create a HdfMap from a NeXus file, loading default parameters and allowing a reduced, single entry map
    :param hdf_filename: str filename of hdf file
    :param groups: list of groups to collect datasets from
    :param default_entry_only: if True, only the first or default entry will be loaded
    :return: NexusMap
    """
    hdf_map = NexusMap()
    with load_hdf(hdf_filename) as hdf:
        hdf_map.populate(hdf, groups=groups, default_entry_only=default_entry_only)
    return hdf_map

hdf_compare(hdf_filename1, hdf_filename2, all_links=False)

Compare hdf tree structure between two files

Parameters:

Name Type Description Default
hdf_filename1 str

filename of hdf file

required
hdf_filename2 str

filename of hdf file

required
all_links

bool, if True, also show soft links

False

Returns:

Type Description
str

str

Source code in src/hdfmap/hdf_loader.py
def hdf_compare(hdf_filename1: str, hdf_filename2: str, all_links=False) -> str:
    """
    Compare hdf tree structure between two files
    :param hdf_filename1: filename of hdf file
    :param hdf_filename2: filename of hdf file
    :param all_links: bool, if True, also show soft links
    :return: str
    """
    datasets1 = hdf_dataset_list(hdf_filename1, all_links)
    datasets2 = hdf_dataset_list(hdf_filename2, all_links)

    # both = [ds for ds in datasets1 if ds in datasets2]
    only_in_1 = '\n  '.join([ds for ds in datasets1 if ds not in datasets2])
    only_in_2 = '\n  '.join([ds for ds in datasets2 if ds not in datasets1])

    output = f"Compare\n    {hdf_filename1}, with\n    {hdf_filename2}\n\n"
    output += f"Datasets only in {os.path.basename(hdf_filename1)}:\n\n"
    output += f"  {only_in_1}\n"
    output += f"Datasets only in {os.path.basename(hdf_filename2)}:\n\n"
    output += f"  {only_in_2}\n"
    return output

hdf_data(filenames, name_or_path, hdf_map=None, index=(), default=None, fixed_output=False)

General purpose function to retrieve data from HDF files

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
name_or_path str | list[str]

str or list of str - names or paths of HDF datasets

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
index

dataset index or slice

()
default

value to give if dataset doesn't exist in file

None
fixed_output

if True, always returns list of list

False

Returns:

Type Description

list[files: list[names]]

Source code in src/hdfmap/file_functions.py
def hdf_data(filenames: str | list[str], name_or_path: str | list[str], hdf_map: HdfMap = None,
             index=(), default=None, fixed_output=False):
    """
    General purpose function to retrieve data from HDF files
    :param filenames: str or list of str - file paths
    :param name_or_path: str or list of str - names or paths of HDF datasets
    :param hdf_map: HdfMap object, or None to generate from first file
    :param index: dataset index or slice
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list of list
    :return if single file, single dataset: single value
    :return if multi file or multi dataset: list, len(filenames) or len(name_or_path)
    :return if multi file and multi dataset: list[files: list[names]]
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    name_or_path = as_str_list(name_or_path)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append([hdf_map.get_data(hdf, name, index=index, default=default) for name in name_or_path])
    if fixed_output:
        return out
    if len(filenames) == 1 and len(name_or_path) == 1:
        return out[0][0]
    if len(filenames) == 1 and len(name_or_path) > 1:
        return out[0]
    if len(name_or_path) == 1:
        return [val[0] for val in out]
    return out

hdf_dataset_list(hdf_filename, all_links=True, group='/')

Generate list of all datasets in the hdf file structure

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
all_links

bool, if True, also include soft links

True
group str

only display tree structure of this group (default root)

'/'

Returns:

Type Description
list[str]

list of str addresses

Source code in src/hdfmap/hdf_loader.py
def hdf_dataset_list(hdf_filename: str, all_links=True, group: str = '/') -> list[str]:
    """
    Generate list of all datasets in the hdf file structure
    :param hdf_filename: filename of hdf file
    :param all_links: bool, if True, also include soft links
    :param group: only display tree structure of this group (default root)
    :return: list of str addresses
    """

    output = []

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)

        def visit_paths(name, obj: h5py.Group | h5py.Dataset):
            if isinstance(obj, h5py.Dataset):
                output.append(name)

        def visit_links(name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            h5py_obj = hdf_group.get(name)
            if isinstance(h5py_obj, h5py.Dataset) and (
                    isinstance(obj, h5py.ExternalLink) if not all_links else True):
                output.append(name)
        if not all_links:  # visititems_links visits all items, don't double up
            hdf_group.visititems(visit_paths)
        hdf_group.visititems_links(visit_links)
    return output

hdf_eval(filenames, expression, hdf_map=None, default=None, fixed_output=False)

Evaluate expression using dataset names

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
expression str

str expression to evaluate in each file, e.g. "roi2_sum / Transmission"

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
default

value to give if dataset doesn't exist in file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def hdf_eval(filenames: str | list[str], expression: str, hdf_map: HdfMap = None, default=None, fixed_output=False):
    """
    Evaluate expression using dataset names
    :param filenames: str or list of str - file paths
    :param expression: str expression to evaluate in each file, e.g. "roi2_sum / Transmission"
    :param hdf_map: HdfMap object, or None to generate from first file
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.eval(hdf, expression, default=default))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

hdf_find(hdf_filename, *names_or_classes, attributes=('NX_class', 'local_name'))

find groups and datasets within hdf file matching a set of names or class names

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
names_or_classes str

object names or NXclass names to search for

()
attributes tuple[str]

list of attr fields to check against names

('NX_class', 'local_name')

Returns:

Type Description
tuple[list[str], list[str]]

groups[], datasets[]

Source code in src/hdfmap/hdf_loader.py
def hdf_find(hdf_filename: str, *names_or_classes: str,
             attributes: tuple[str] = ('NX_class', 'local_name')) -> tuple[list[str], list[str]]:
    """
    find groups and datasets within hdf file matching a set of names or class names
    :param hdf_filename: filename of hdf file
    :params names_or_classes: object names or NXclass names to search for
    :params attributes: list of attr fields to check against names
    :return: groups[], datasets[]
    """

    with load_hdf(hdf_filename) as hdf_file:
        group_paths = []
        dataset_paths = []

        def visit_links(name):
            # For each path in the file, create tree of parent-groups
            sub_groups = name.split('/')
            sub_group_paths = ['/'.join(sub_groups[:n]) for n in range(1, len(sub_groups) + 1)]
            sub_group_names = [
                bytes2str(grp.attrs.get(attr, '')) for attr in attributes for path in sub_group_paths
                if (grp := hdf_file.get(path))
            ] + sub_groups
            if all(arg in sub_group_names for arg in names_or_classes):
                h5py_obj = hdf_file.get(name)
                if isinstance(h5py_obj, h5py.Group):
                    group_paths.append(name)
                elif isinstance(h5py_obj, h5py.Dataset):
                    dataset_paths.append(name)
        hdf_file.visit_links(visit_links)
    return group_paths, dataset_paths

hdf_find_first(hdf_filename, *names_or_classes, attributes=('NX_class', 'local_name'))

return the first path of object matching a set of names or class names

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
names_or_classes str

object names or NXclass names to search for

()
attributes tuple[str]

list of attr fields to check against names

('NX_class', 'local_name')

Returns:

Type Description
str | None

hdf_path or None if no match

Source code in src/hdfmap/hdf_loader.py
def hdf_find_first(hdf_filename: str, *names_or_classes: str,
                   attributes: tuple[str] = ('NX_class', 'local_name')) -> str | None:
    """
    return the first path of object matching a set of names or class names
    :param hdf_filename: filename of hdf file
    :params names_or_classes: object names or NXclass names to search for
    :params attributes: list of attr fields to check against names
    :return: hdf_path or None if no match
    """

    with load_hdf(hdf_filename) as hdf_file:

        def visit_links(name):
            # For each path in the file, create tree of parent-groups
            parent_groups = name.split('/')
            parent_group_paths = ['/'.join(parent_groups[:n]) for n in range(1, len(parent_groups) + 1)]
            parent_group_names = [
                bytes2str(grp.attrs.get(attr, '')) for attr in attributes for path in parent_group_paths
                if (grp := hdf_file.get(path))
            ] + parent_groups
            if all(arg in parent_group_names for arg in names_or_classes):
                return name
            return None

        return hdf_file.visit_links(visit_links)

hdf_format(filenames, expression, hdf_map=None, default=None, fixed_output=False)

Evaluate string format expression using dataset names

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
expression str

str expression to evaluate in each file, e.g. "the energy is {en:.2f} keV"

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
default

value to give if dataset doesn't exist in file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def hdf_format(filenames: str | list[str], expression: str, hdf_map: HdfMap = None, default=None, fixed_output=False):
    """
    Evaluate string format expression using dataset names
    :param filenames: str or list of str - file paths
    :param expression: str expression to evaluate in each file, e.g. "the energy is {en:.2f} keV"
    :param hdf_map: HdfMap object, or None to generate from first file
    :param default: value to give if dataset doesn't exist in file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.format_hdf(hdf, expression, default=default))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

hdf_image(filenames, index=None, hdf_map=None, fixed_output=False)

Evaluate string format expression using dataset names

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
index slice

index or slice of dataset volume, or None to use middle index

None
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def hdf_image(filenames: str | list[str], index: slice = None, hdf_map: HdfMap = None, fixed_output=False):
    """
    Evaluate string format expression using dataset names
    :param filenames: str or list of str - file paths
    :param index: index or slice of dataset volume, or None to use middle index
    :param hdf_map: HdfMap object, or None to generate from first file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output - numpy array
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_hdf_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.get_image(hdf, index=index))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

hdf_linked_files(hdf_filename, group='/')

Return a list of files linked to the current file, looking for all external links.

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
group str

only look at links within this group (default root)

'/'

Returns:

Type Description
list[str]

list of str filenames (usually relative file paths)

Source code in src/hdfmap/hdf_loader.py
def hdf_linked_files(hdf_filename: str, group: str = '/') -> list[str]:
    """
    Return a list of files linked to the current file, looking for all external links.

    :param hdf_filename: filename of hdf file
    :param group: only look at links within this group (default root)
    :return: list of str filenames (usually relative file paths)
    """

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)
        external_files = []

        def visit_links(_name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            if isinstance(obj, h5py.ExternalLink) and obj.filename not in external_files:
                external_files.append(obj.filename)
        hdf_group.visititems_links(visit_links)
    return external_files

hdf_tree_dict(hdf_filename)

Generate summary dict of the hdf tree structure The structure is: {'group': {'@attrs': str, 'sub-group': {}, 'dataset': str}, ...}

Group attributes are stored with names pre-fixed with '@'

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required

Returns:

Type Description
dict

{'entry': {'dataset': value}...}

Source code in src/hdfmap/hdf_loader.py
def hdf_tree_dict(hdf_filename: str) -> dict:
    """
    Generate summary dict of the hdf tree structure
    The structure is:
        {'group': {'@attrs': str, 'sub-group': {}, 'dataset': str}, ...}

    Group attributes are stored with names pre-fixed with '@'

    :param hdf_filename: filename of hdf file
    :return: {'entry': {'dataset': value}...}
    """

    def store(hdf_dict: dict, hdf_group: h5py.Group) -> dict:
        for key in hdf_group:
            obj = hdf_group.get(key)
            link = hdf_group.get(key, getlink=True)
            if obj is None:
                hdf_dict[key] = '! Missing'
                continue  # dataset may be missing due to a broken link
            # Group
            if isinstance(obj, h5py.Group):
                hdf_dict[key] = {f"@{attr}": str(val) for attr, val in obj.attrs.items()}
                store(hdf_dict[key], obj)
            # Dataset
            elif isinstance(obj, h5py.Dataset):
                if obj.size <= 1:
                    detail = str(obj[()])
                else:
                    detail = f"{obj.dtype}, {obj.shape}"
                if isinstance(link, (h5py.SoftLink, h5py.ExternalLink)):
                    detail = f"LINK: " + detail
                hdf_dict[key] = detail
        return hdf_dict
    return store({}, load_hdf(hdf_filename))

hdf_tree_string(hdf_filename, all_links=True, group='/', attributes=True)

Generate string of the hdf file structure, similar to h5ls. Uses h5py.visititems

Parameters:

Name Type Description Default
hdf_filename str

filename of hdf file

required
all_links bool

bool, if True, also show links

True
group str

only display tree structure of this group (default root)

'/'
attributes bool

if True, display the attributes of groups and datasets

True

Returns:

Type Description
str

str

Source code in src/hdfmap/hdf_loader.py
def hdf_tree_string(hdf_filename: str, all_links: bool = True, group: str = '/', attributes: bool = True) -> str:
    """
    Generate string of the hdf file structure, similar to h5ls. Uses h5py.visititems
    :param hdf_filename: filename of hdf file
    :param all_links: bool, if True, also show links
    :param group: only display tree structure of this group (default root)
    :param attributes: if True, display the attributes of groups and datasets
    :return: str
    """
    output = [f"########## {hdf_filename} ##########"]

    def grp(path):
        return f"-------------- {path} " + "-" * (63 - (17 + len(path)))

    def ds(path, detail):
        return f"{path:60}  :  {detail}"

    def attr(path, name, value):
        return f"{' ' * len(path) + '@' + name} = {value}"

    with load_hdf(hdf_filename) as hdf_file:
        hdf_group = hdf_file.get(group)
        output.append(grp(hdf_group.name))
        if attributes:
            output.extend([attr(hdf_group.name, name, value) for name, value in hdf_group.attrs.items()])

        def visit_paths(name, obj: h5py.Group | h5py.Dataset):
            if isinstance(obj, h5py.Dataset):
                if obj.size <= 1:
                    detail = f"{obj[()]}"
                else:
                    detail = f"{obj.dtype}, {obj.shape}"
                output.append(ds(name, detail))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in obj.attrs.items()])
            elif isinstance(obj, h5py.Group):
                output.append(grp(name))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in obj.attrs.items()])

        def visit_links(name, obj: h5py.HardLink | h5py.SoftLink | h5py.ExternalLink):
            h5py_obj = hdf_group.get(name)

            if isinstance(h5py_obj, h5py.Dataset):
                if isinstance(obj, h5py.ExternalLink):
                    detail = f"LINK: {h5py_obj.dtype}, {h5py_obj.shape}"
                elif h5py_obj.size <= 1:
                    detail = f"{h5py_obj[()]}"
                else:
                    detail = f"{h5py_obj.dtype}, {h5py_obj.shape}"
                output.append(ds(name, detail))
                if attributes:
                    output.extend([attr(name, _attr, value) for _attr, value in h5py_obj.attrs.items()])
            elif isinstance(h5py_obj, h5py.Group):
                output.append(grp(name))
                if attributes:
                    output.append(f"{name}")
                    output.extend([attr(name, _attr, value) for _attr, value in h5py_obj.attrs.items()])

        if all_links:
            hdf_group.visititems_links(visit_links)
        else:
            hdf_group.visititems(visit_paths)
        output.append('\n --- End --- ')
    return '\n'.join(output)

list_files(folder_directory, extension=DEFAULT_EXTENSION)

Return list of files in directory with extension, returning list of full file paths

Source code in src/hdfmap/file_functions.py
def list_files(folder_directory: str, extension=DEFAULT_EXTENSION) -> list[str]:
    """Return list of files in directory with extension, returning list of full file paths"""
    try:
        return sorted(
            (file.path for file in os.scandir(folder_directory) if file.is_file() and file.name.endswith(extension)),
            key=lambda x: os.path.getmtime(x)
        )
    except FileNotFoundError:
        return []

load_hdf(hdf_filename, **kwargs)

Load hdf file, return h5py.File object

Source code in src/hdfmap/hdf_loader.py
def load_hdf(hdf_filename: str, **kwargs) -> h5py.File:
    """Load hdf file, return h5py.File object"""
    options = HDF_FILE_OPTIONS  #.copy()
    options.update(kwargs)
    return h5py.File(hdf_filename, 'r', **options)

nexus_data_block(filenames, hdf_map=None, fixed_output=False)

Create classic dict like dataloader objects from nexus files E.G. d = nexus_data_block('filename') d.scannable -> array d.metadata.filename -> value d.keys() -> list of items

Parameters:

Name Type Description Default
filenames str | list[str]

str or list of str - file paths

required
hdf_map HdfMap

HdfMap object, or None to generate from first file

None
fixed_output

if True, always returns list len(filenames)

False

Returns:

Type Description

list, len(filenames)

Source code in src/hdfmap/file_functions.py
def nexus_data_block(filenames: str | list[str], hdf_map: HdfMap = None, fixed_output=False):
    """
    Create classic dict like dataloader objects from nexus files
    E.G.
        d = nexus_data_block('filename')
        d.scannable -> array
        d.metadata.filename -> value
        d.keys() -> list of items

    :param filenames: str or list of str - file paths
    :param hdf_map: HdfMap object, or None to generate from first file
    :param fixed_output: if True, always returns list len(filenames)
    :return if single file: single output - dict like DataObject
    :return if multi file: list, len(filenames)
    """
    # cast as 1D arrays
    filenames = as_str_list(filenames)
    # generate hdf_map
    if hdf_map is None:
        hdf_map = create_nexus_map(filenames[0])
    out = []
    for filename in filenames:
        logger.info(f"\nHDF file: {filename}")
        with load_hdf(filename) as hdf:
            out.append(hdf_map.get_dataholder(hdf))
    if not fixed_output and len(filenames) == 1:
        return out[0]
    return out

set_all_logging_level(level)

Set logging level of all loggers Logging Levels (see builtin module logging) 'notset' | 0 'debug' | 10 'info' | 20 'warning' | 30 'error' | 40 'critical' | 50

Parameters:

Name Type Description Default
level str | int

str level name or int level

required

Returns:

Type Description

None

Source code in src/hdfmap/logging.py
def set_all_logging_level(level: str | int):
    """
    Set logging level of all loggers
    Logging Levels (see builtin module logging)
        'notset'   |  0
        'debug'    |  10
        'info'     |  20
        'warning'  |  30
        'error'    |  40
        'critical' |  50
    :param level: str level name or int level
    :return: None
    """
    try:
        level = level.upper()
        # level = logging.getLevelNamesMapping()[level]  # Python >3.11
        level = logging._nameToLevel[level]
    except AttributeError:
        level = int(level)

    logging_logger = logging.getLogger(__name__)
    for logger in [logging.getLogger(name) for name in logging.root.manager.loggerDict]:
        logger.setLevel(level)
    logging_logger.info(f"Logging level set to {level}")