• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenCOMPES / sed / 6725041947

01 Nov 2023 08:57PM UTC coverage: 90.459% (-0.02%) from 90.481%
6725041947

Pull #227

github

rettigl
add processor function to add time-stamped data either from directly provided data or from data extracted from an EPICS archiver instance, and add tests for it
Pull Request #227: Time stamped data

105 of 105 new or added lines in 5 files covered. (100.0%)

4333 of 4790 relevant lines covered (90.46%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.23
/sed/core/processor.py
1
"""This module contains the core class for the sed package
2

3
"""
4
import pathlib
1✔
5
from typing import Any
1✔
6
from typing import cast
1✔
7
from typing import Dict
1✔
8
from typing import List
1✔
9
from typing import Sequence
1✔
10
from typing import Tuple
1✔
11
from typing import Union
1✔
12

13
import dask.dataframe as ddf
1✔
14
import matplotlib.pyplot as plt
1✔
15
import numpy as np
1✔
16
import pandas as pd
1✔
17
import psutil
1✔
18
import xarray as xr
1✔
19

20
from sed.binning import bin_dataframe
1✔
21
from sed.calibrator import DelayCalibrator
1✔
22
from sed.calibrator import EnergyCalibrator
1✔
23
from sed.calibrator import MomentumCorrector
1✔
24
from sed.core.config import parse_config
1✔
25
from sed.core.config import save_config
1✔
26
from sed.core.dfops import add_time_stamped_data
1✔
27
from sed.core.dfops import apply_jitter
1✔
28
from sed.core.metadata import MetaHandler
1✔
29
from sed.diagnostics import grid_histogram
1✔
30
from sed.io import to_h5
1✔
31
from sed.io import to_nexus
1✔
32
from sed.io import to_tiff
1✔
33
from sed.loader import CopyTool
1✔
34
from sed.loader import get_loader
1✔
35
from sed.loader.mpes.loader import get_archiver_data
1✔
36
from sed.loader.mpes.loader import MpesLoader
1✔
37

38
N_CPU = psutil.cpu_count()
1✔
39

40

41
class SedProcessor:
1✔
42
    """Processor class of sed. Contains wrapper functions defining a work flow for data
43
    correction, calibration and binning.
44

45
    Args:
46
        metadata (dict, optional): Dict of external Metadata. Defaults to None.
47
        config (Union[dict, str], optional): Config dictionary or config file name.
48
            Defaults to None.
49
        dataframe (Union[pd.DataFrame, ddf.DataFrame], optional): dataframe to load
50
            into the class. Defaults to None.
51
        files (List[str], optional): List of files to pass to the loader defined in
52
            the config. Defaults to None.
53
        folder (str, optional): Folder containing files to pass to the loader
54
            defined in the config. Defaults to None.
55
        collect_metadata (bool): Option to collect metadata from files.
56
            Defaults to False.
57
        **kwds: Keyword arguments passed to the reader.
58
    """
59

60
    def __init__(
1✔
61
        self,
62
        metadata: dict = None,
63
        config: Union[dict, str] = None,
64
        dataframe: Union[pd.DataFrame, ddf.DataFrame] = None,
65
        files: List[str] = None,
66
        folder: str = None,
67
        runs: Sequence[str] = None,
68
        collect_metadata: bool = False,
69
        **kwds,
70
    ):
71
        """Processor class of sed. Contains wrapper functions defining a work flow
72
        for data correction, calibration, and binning.
73

74
        Args:
75
            metadata (dict, optional): Dict of external Metadata. Defaults to None.
76
            config (Union[dict, str], optional): Config dictionary or config file name.
77
                Defaults to None.
78
            dataframe (Union[pd.DataFrame, ddf.DataFrame], optional): dataframe to load
79
                into the class. Defaults to None.
80
            files (List[str], optional): List of files to pass to the loader defined in
81
                the config. Defaults to None.
82
            folder (str, optional): Folder containing files to pass to the loader
83
                defined in the config. Defaults to None.
84
            runs (Sequence[str], optional): List of run identifiers to pass to the loader
85
                defined in the config. Defaults to None.
86
            collect_metadata (bool): Option to collect metadata from files.
87
                Defaults to False.
88
            **kwds: Keyword arguments passed to parse_config and to the reader.
89
        """
90
        config_kwds = {
1✔
91
            key: value for key, value in kwds.items() if key in parse_config.__code__.co_varnames
92
        }
93
        for key in config_kwds.keys():
1✔
94
            del kwds[key]
1✔
95
        self._config = parse_config(config, **config_kwds)
1✔
96
        num_cores = self._config.get("binning", {}).get("num_cores", N_CPU - 1)
1✔
97
        if num_cores >= N_CPU:
1✔
98
            num_cores = N_CPU - 1
1✔
99
        self._config["binning"]["num_cores"] = num_cores
1✔
100

101
        self._dataframe: Union[pd.DataFrame, ddf.DataFrame] = None
1✔
102
        self._files: List[str] = []
1✔
103

104
        self._binned: xr.DataArray = None
1✔
105
        self._pre_binned: xr.DataArray = None
1✔
106

107
        self._attributes = MetaHandler(meta=metadata)
1✔
108

109
        loader_name = self._config["core"]["loader"]
1✔
110
        self.loader = get_loader(
1✔
111
            loader_name=loader_name,
112
            config=self._config,
113
        )
114

115
        self.ec = EnergyCalibrator(
1✔
116
            loader=self.loader,
117
            config=self._config,
118
        )
119

120
        self.mc = MomentumCorrector(
1✔
121
            config=self._config,
122
        )
123

124
        self.dc = DelayCalibrator(
1✔
125
            config=self._config,
126
        )
127

128
        self.use_copy_tool = self._config.get("core", {}).get(
1✔
129
            "use_copy_tool",
130
            False,
131
        )
132
        if self.use_copy_tool:
1✔
133
            try:
1✔
134
                self.ct = CopyTool(
1✔
135
                    source=self._config["core"]["copy_tool_source"],
136
                    dest=self._config["core"]["copy_tool_dest"],
137
                    **self._config["core"].get("copy_tool_kwds", {}),
138
                )
139
            except KeyError:
1✔
140
                self.use_copy_tool = False
1✔
141

142
        # Load data if provided:
143
        if dataframe is not None or files is not None or folder is not None or runs is not None:
1✔
144
            self.load(
1✔
145
                dataframe=dataframe,
146
                metadata=metadata,
147
                files=files,
148
                folder=folder,
149
                runs=runs,
150
                collect_metadata=collect_metadata,
151
                **kwds,
152
            )
153

154
    def __repr__(self):
1✔
155
        if self._dataframe is None:
1✔
156
            df_str = "Data Frame: No Data loaded"
1✔
157
        else:
158
            df_str = self._dataframe.__repr__()
1✔
159
        attributes_str = f"Metadata: {self._attributes.metadata}"
1✔
160
        pretty_str = df_str + "\n" + attributes_str
1✔
161
        return pretty_str
1✔
162

163
    @property
1✔
164
    def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]:
1✔
165
        """Accessor to the underlying dataframe.
166

167
        Returns:
168
            Union[pd.DataFrame, ddf.DataFrame]: Dataframe object.
169
        """
170
        return self._dataframe
1✔
171

172
    @dataframe.setter
1✔
173
    def dataframe(self, dataframe: Union[pd.DataFrame, ddf.DataFrame]):
1✔
174
        """Setter for the underlying dataframe.
175

176
        Args:
177
            dataframe (Union[pd.DataFrame, ddf.DataFrame]): The dataframe object to set.
178
        """
179
        if not isinstance(dataframe, (pd.DataFrame, ddf.DataFrame)) or not isinstance(
1✔
180
            dataframe,
181
            self._dataframe.__class__,
182
        ):
183
            raise ValueError(
1✔
184
                "'dataframe' has to be a Pandas or Dask dataframe and has to be of the same kind "
185
                "as the dataframe loaded into the SedProcessor!.\n"
186
                f"Loaded type: {self._dataframe.__class__}, provided type: {dataframe}.",
187
            )
188
        self._dataframe = dataframe
1✔
189

190
    @property
1✔
191
    def attributes(self) -> dict:
1✔
192
        """Accessor to the metadata dict.
193

194
        Returns:
195
            dict: The metadata dict.
196
        """
197
        return self._attributes.metadata
1✔
198

199
    def add_attribute(self, attributes: dict, name: str, **kwds):
1✔
200
        """Function to add element to the attributes dict.
201

202
        Args:
203
            attributes (dict): The attributes dictionary object to add.
204
            name (str): Key under which to add the dictionary to the attributes.
205
        """
206
        self._attributes.add(
1✔
207
            entry=attributes,
208
            name=name,
209
            **kwds,
210
        )
211

212
    @property
1✔
213
    def config(self) -> Dict[Any, Any]:
1✔
214
        """Getter attribute for the config dictionary
215

216
        Returns:
217
            Dict: The config dictionary.
218
        """
219
        return self._config
1✔
220

221
    @property
1✔
222
    def files(self) -> List[str]:
1✔
223
        """Getter attribute for the list of files
224

225
        Returns:
226
            List[str]: The list of loaded files
227
        """
228
        return self._files
1✔
229

230
    def cpy(self, path: Union[str, List[str]]) -> Union[str, List[str]]:
1✔
231
        """Function to mirror a list of files or a folder from a network drive to a
232
        local storage. Returns either the original or the copied path to the given
233
        path. The option to use this functionality is set by
234
        config["core"]["use_copy_tool"].
235

236
        Args:
237
            path (Union[str, List[str]]): Source path or path list.
238

239
        Returns:
240
            Union[str, List[str]]: Source or destination path or path list.
241
        """
242
        if self.use_copy_tool:
1✔
243
            if isinstance(path, list):
1✔
244
                path_out = []
1✔
245
                for file in path:
1✔
246
                    path_out.append(self.ct.copy(file))
1✔
247
                return path_out
1✔
248

249
            return self.ct.copy(path)
×
250

251
        if isinstance(path, list):
1✔
252
            return path
1✔
253

254
        return path
1✔
255

256
    def load(
1✔
257
        self,
258
        dataframe: Union[pd.DataFrame, ddf.DataFrame] = None,
259
        metadata: dict = None,
260
        files: List[str] = None,
261
        folder: str = None,
262
        runs: Sequence[str] = None,
263
        collect_metadata: bool = False,
264
        **kwds,
265
    ):
266
        """Load tabular data of single events into the dataframe object in the class.
267

268
        Args:
269
            dataframe (Union[pd.DataFrame, ddf.DataFrame], optional): data in tabular
270
                format. Accepts anything which can be interpreted by pd.DataFrame as
271
                an input. Defaults to None.
272
            metadata (dict, optional): Dict of external Metadata. Defaults to None.
273
            files (List[str], optional): List of file paths to pass to the loader.
274
                Defaults to None.
275
            runs (Sequence[str], optional): List of run identifiers to pass to the
276
                loader. Defaults to None.
277
            folder (str, optional): Folder path to pass to the loader.
278
                Defaults to None.
279

280
        Raises:
281
            ValueError: Raised if no valid input is provided.
282
        """
283
        if metadata is None:
1✔
284
            metadata = {}
1✔
285
        if dataframe is not None:
1✔
286
            self._dataframe = dataframe
1✔
287
        elif runs is not None:
1✔
288
            # If runs are provided, we only use the copy tool if also folder is provided.
289
            # In that case, we copy the whole provided base folder tree, and pass the copied
290
            # version to the loader as base folder to look for the runs.
291
            if folder is not None:
1✔
292
                dataframe, metadata = self.loader.read_dataframe(
1✔
293
                    folders=cast(str, self.cpy(folder)),
294
                    runs=runs,
295
                    metadata=metadata,
296
                    collect_metadata=collect_metadata,
297
                    **kwds,
298
                )
299
            else:
300
                dataframe, metadata = self.loader.read_dataframe(
×
301
                    runs=runs,
302
                    metadata=metadata,
303
                    collect_metadata=collect_metadata,
304
                    **kwds,
305
                )
306

307
        elif folder is not None:
1✔
308
            dataframe, metadata = self.loader.read_dataframe(
1✔
309
                folders=cast(str, self.cpy(folder)),
310
                metadata=metadata,
311
                collect_metadata=collect_metadata,
312
                **kwds,
313
            )
314

315
        elif files is not None:
1✔
316
            dataframe, metadata = self.loader.read_dataframe(
1✔
317
                files=cast(List[str], self.cpy(files)),
318
                metadata=metadata,
319
                collect_metadata=collect_metadata,
320
                **kwds,
321
            )
322

323
        else:
324
            raise ValueError(
1✔
325
                "Either 'dataframe', 'files', 'folder', or 'runs' needs to be provided!",
326
            )
327

328
        self._dataframe = dataframe
1✔
329
        self._files = self.loader.files
1✔
330

331
        for key in metadata:
1✔
332
            self._attributes.add(
1✔
333
                entry=metadata[key],
334
                name=key,
335
                duplicate_policy="merge",
336
            )
337

338
    # Momentum calibration workflow
339
    # 1. Bin raw detector data for distortion correction
340
    def bin_and_load_momentum_calibration(
1✔
341
        self,
342
        df_partitions: int = 100,
343
        axes: List[str] = None,
344
        bins: List[int] = None,
345
        ranges: Sequence[Tuple[float, float]] = None,
346
        plane: int = 0,
347
        width: int = 5,
348
        apply: bool = False,
349
        **kwds,
350
    ):
351
        """1st step of momentum correction work flow. Function to do an initial binning
352
        of the dataframe loaded to the class, slice a plane from it using an
353
        interactive view, and load it into the momentum corrector class.
354

355
        Args:
356
            df_partitions (int, optional): Number of dataframe partitions to use for
357
                the initial binning. Defaults to 100.
358
            axes (List[str], optional): Axes to bin.
359
                Defaults to config["momentum"]["axes"].
360
            bins (List[int], optional): Bin numbers to use for binning.
361
                Defaults to config["momentum"]["bins"].
362
            ranges (List[Tuple], optional): Ranges to use for binning.
363
                Defaults to config["momentum"]["ranges"].
364
            plane (int, optional): Initial value for the plane slider. Defaults to 0.
365
            width (int, optional): Initial value for the width slider. Defaults to 5.
366
            apply (bool, optional): Option to directly apply the values and select the
367
                slice. Defaults to False.
368
            **kwds: Keyword argument passed to the pre_binning function.
369
        """
370
        self._pre_binned = self.pre_binning(
1✔
371
            df_partitions=df_partitions,
372
            axes=axes,
373
            bins=bins,
374
            ranges=ranges,
375
            **kwds,
376
        )
377

378
        self.mc.load_data(data=self._pre_binned)
1✔
379
        self.mc.select_slicer(plane=plane, width=width, apply=apply)
1✔
380

381
    # 2. Generate the spline warp correction from momentum features.
382
    # Either autoselect features, or input features from view above.
383
    def define_features(
1✔
384
        self,
385
        features: np.ndarray = None,
386
        rotation_symmetry: int = 6,
387
        auto_detect: bool = False,
388
        include_center: bool = True,
389
        apply: bool = False,
390
        **kwds,
391
    ):
392
        """2. Step of the distortion correction workflow: Define feature points in
393
        momentum space. They can be either manually selected using a GUI tool, be
394
        ptovided as list of feature points, or auto-generated using a
395
        feature-detection algorithm.
396

397
        Args:
398
            features (np.ndarray, optional): np.ndarray of features. Defaults to None.
399
            rotation_symmetry (int, optional): Number of rotational symmetry axes.
400
                Defaults to 6.
401
            auto_detect (bool, optional): Whether to auto-detect the features.
402
                Defaults to False.
403
            include_center (bool, optional): Option to include a point at the center
404
                in the feature list. Defaults to True.
405
            ***kwds: Keyword arguments for MomentumCorrector.feature_extract() and
406
                MomentumCorrector.feature_select()
407
        """
408
        if auto_detect:  # automatic feature selection
1✔
409
            sigma = kwds.pop("sigma", self._config["momentum"]["sigma"])
×
410
            fwhm = kwds.pop("fwhm", self._config["momentum"]["fwhm"])
×
411
            sigma_radius = kwds.pop(
×
412
                "sigma_radius",
413
                self._config["momentum"]["sigma_radius"],
414
            )
415
            self.mc.feature_extract(
×
416
                sigma=sigma,
417
                fwhm=fwhm,
418
                sigma_radius=sigma_radius,
419
                rotsym=rotation_symmetry,
420
                **kwds,
421
            )
422
            features = self.mc.peaks
×
423

424
        self.mc.feature_select(
1✔
425
            rotsym=rotation_symmetry,
426
            include_center=include_center,
427
            features=features,
428
            apply=apply,
429
            **kwds,
430
        )
431

432
    # 3. Generate the spline warp correction from momentum features.
433
    # If no features have been selected before, use class defaults.
434
    def generate_splinewarp(
1✔
435
        self,
436
        use_center: bool = None,
437
        **kwds,
438
    ):
439
        """3. Step of the distortion correction workflow: Generate the correction
440
        function restoring the symmetry in the image using a splinewarp algortihm.
441

442
        Args:
443
            use_center (bool, optional): Option to use the position of the
444
                center point in the correction. Default is read from config, or set to True.
445
            **kwds: Keyword arguments for MomentumCorrector.spline_warp_estimate().
446
        """
447
        self.mc.spline_warp_estimate(use_center=use_center, **kwds)
1✔
448

449
        if self.mc.slice is not None:
1✔
450
            print("Original slice with reference features")
1✔
451
            self.mc.view(annotated=True, backend="bokeh", crosshair=True)
1✔
452

453
            print("Corrected slice with target features")
1✔
454
            self.mc.view(
1✔
455
                image=self.mc.slice_corrected,
456
                annotated=True,
457
                points={"feats": self.mc.ptargs},
458
                backend="bokeh",
459
                crosshair=True,
460
            )
461

462
            print("Original slice with target features")
1✔
463
            self.mc.view(
1✔
464
                image=self.mc.slice,
465
                points={"feats": self.mc.ptargs},
466
                annotated=True,
467
                backend="bokeh",
468
            )
469

470
    # 3a. Save spline-warp parameters to config file.
471
    def save_splinewarp(
1✔
472
        self,
473
        filename: str = None,
474
        overwrite: bool = False,
475
    ):
476
        """Save the generated spline-warp parameters to the folder config file.
477

478
        Args:
479
            filename (str, optional): Filename of the config dictionary to save to.
480
                Defaults to "sed_config.yaml" in the current folder.
481
            overwrite (bool, optional): Option to overwrite the present dictionary.
482
                Defaults to False.
483
        """
484
        if filename is None:
1✔
485
            filename = "sed_config.yaml"
×
486
        points = []
1✔
487
        try:
1✔
488
            for point in self.mc.pouter_ord:
1✔
489
                points.append([float(i) for i in point])
1✔
490
            if self.mc.include_center:
1✔
491
                points.append([float(i) for i in self.mc.pcent])
1✔
492
        except AttributeError as exc:
×
493
            raise AttributeError(
×
494
                "Momentum correction parameters not found, need to generate parameters first!",
495
            ) from exc
496
        config = {
1✔
497
            "momentum": {
498
                "correction": {
499
                    "rotation_symmetry": self.mc.rotsym,
500
                    "feature_points": points,
501
                    "include_center": self.mc.include_center,
502
                    "use_center": self.mc.use_center,
503
                },
504
            },
505
        }
506
        save_config(config, filename, overwrite)
1✔
507

508
    # 4. Pose corrections. Provide interactive interface for correcting
509
    # scaling, shift and rotation
510
    def pose_adjustment(
1✔
511
        self,
512
        scale: float = 1,
513
        xtrans: float = 0,
514
        ytrans: float = 0,
515
        angle: float = 0,
516
        apply: bool = False,
517
        use_correction: bool = True,
518
        reset: bool = True,
519
    ):
520
        """3. step of the distortion correction workflow: Generate an interactive panel
521
        to adjust affine transformations that are applied to the image. Applies first
522
        a scaling, next an x/y translation, and last a rotation around the center of
523
        the image.
524

525
        Args:
526
            scale (float, optional): Initial value of the scaling slider.
527
                Defaults to 1.
528
            xtrans (float, optional): Initial value of the xtrans slider.
529
                Defaults to 0.
530
            ytrans (float, optional): Initial value of the ytrans slider.
531
                Defaults to 0.
532
            angle (float, optional): Initial value of the angle slider.
533
                Defaults to 0.
534
            apply (bool, optional): Option to directly apply the provided
535
                transformations. Defaults to False.
536
            use_correction (bool, option): Whether to use the spline warp correction
537
                or not. Defaults to True.
538
            reset (bool, optional):
539
                Option to reset the correction before transformation. Defaults to True.
540
        """
541
        # Generate homomorphy as default if no distortion correction has been applied
542
        if self.mc.slice_corrected is None:
1✔
543
            if self.mc.slice is None:
1✔
544
                raise ValueError(
1✔
545
                    "No slice for corrections and transformations loaded!",
546
                )
547
            self.mc.slice_corrected = self.mc.slice
×
548

549
        if not use_correction:
1✔
550
            self.mc.reset_deformation()
1✔
551

552
        if self.mc.cdeform_field is None or self.mc.rdeform_field is None:
1✔
553
            # Generate distortion correction from config values
554
            self.mc.add_features()
×
555
            self.mc.spline_warp_estimate()
×
556

557
        self.mc.pose_adjustment(
1✔
558
            scale=scale,
559
            xtrans=xtrans,
560
            ytrans=ytrans,
561
            angle=angle,
562
            apply=apply,
563
            reset=reset,
564
        )
565

566
    # 5. Apply the momentum correction to the dataframe
567
    def apply_momentum_correction(
1✔
568
        self,
569
        preview: bool = False,
570
    ):
571
        """Applies the distortion correction and pose adjustment (optional)
572
        to the dataframe.
573

574
        Args:
575
            rdeform_field (np.ndarray, optional): Row deformation field.
576
                Defaults to None.
577
            cdeform_field (np.ndarray, optional): Column deformation field.
578
                Defaults to None.
579
            inv_dfield (np.ndarray, optional): Inverse deformation field.
580
                Defaults to None.
581
            preview (bool): Option to preview the first elements of the data frame.
582
        """
583
        if self._dataframe is not None:
1✔
584
            print("Adding corrected X/Y columns to dataframe:")
1✔
585
            self._dataframe, metadata = self.mc.apply_corrections(
1✔
586
                df=self._dataframe,
587
            )
588
            # Add Metadata
589
            self._attributes.add(
1✔
590
                metadata,
591
                "momentum_correction",
592
                duplicate_policy="merge",
593
            )
594
            if preview:
1✔
595
                print(self._dataframe.head(10))
×
596
            else:
597
                print(self._dataframe)
1✔
598

599
    # Momentum calibration work flow
600
    # 1. Calculate momentum calibration
601
    def calibrate_momentum_axes(
1✔
602
        self,
603
        point_a: Union[np.ndarray, List[int]] = None,
604
        point_b: Union[np.ndarray, List[int]] = None,
605
        k_distance: float = None,
606
        k_coord_a: Union[np.ndarray, List[float]] = None,
607
        k_coord_b: Union[np.ndarray, List[float]] = np.array([0.0, 0.0]),
608
        equiscale: bool = True,
609
        apply=False,
610
    ):
611
        """1. step of the momentum calibration workflow. Calibrate momentum
612
        axes using either provided pixel coordinates of a high-symmetry point and its
613
        distance to the BZ center, or the k-coordinates of two points in the BZ
614
        (depending on the equiscale option). Opens an interactive panel for selecting
615
        the points.
616

617
        Args:
618
            point_a (Union[np.ndarray, List[int]]): Pixel coordinates of the first
619
                point used for momentum calibration.
620
            point_b (Union[np.ndarray, List[int]], optional): Pixel coordinates of the
621
                second point used for momentum calibration.
622
                Defaults to config["momentum"]["center_pixel"].
623
            k_distance (float, optional): Momentum distance between point a and b.
624
                Needs to be provided if no specific k-koordinates for the two points
625
                are given. Defaults to None.
626
            k_coord_a (Union[np.ndarray, List[float]], optional): Momentum coordinate
627
                of the first point used for calibration. Used if equiscale is False.
628
                Defaults to None.
629
            k_coord_b (Union[np.ndarray, List[float]], optional): Momentum coordinate
630
                of the second point used for calibration. Defaults to [0.0, 0.0].
631
            equiscale (bool, optional): Option to apply different scales to kx and ky.
632
                If True, the distance between points a and b, and the absolute
633
                position of point a are used for defining the scale. If False, the
634
                scale is calculated from the k-positions of both points a and b.
635
                Defaults to True.
636
            apply (bool, optional): Option to directly store the momentum calibration
637
                in the class. Defaults to False.
638
        """
639
        if point_b is None:
1✔
640
            point_b = self._config["momentum"]["center_pixel"]
1✔
641

642
        self.mc.select_k_range(
1✔
643
            point_a=point_a,
644
            point_b=point_b,
645
            k_distance=k_distance,
646
            k_coord_a=k_coord_a,
647
            k_coord_b=k_coord_b,
648
            equiscale=equiscale,
649
            apply=apply,
650
        )
651

652
    # 1a. Save momentum calibration parameters to config file.
653
    def save_momentum_calibration(
1✔
654
        self,
655
        filename: str = None,
656
        overwrite: bool = False,
657
    ):
658
        """Save the generated momentum calibration parameters to the folder config file.
659

660
        Args:
661
            filename (str, optional): Filename of the config dictionary to save to.
662
                Defaults to "sed_config.yaml" in the current folder.
663
            overwrite (bool, optional): Option to overwrite the present dictionary.
664
                Defaults to False.
665
        """
666
        if filename is None:
1✔
667
            filename = "sed_config.yaml"
×
668
        calibration = {}
1✔
669
        try:
1✔
670
            for key in [
1✔
671
                "kx_scale",
672
                "ky_scale",
673
                "x_center",
674
                "y_center",
675
                "rstart",
676
                "cstart",
677
                "rstep",
678
                "cstep",
679
            ]:
680
                calibration[key] = float(self.mc.calibration[key])
1✔
681
        except KeyError as exc:
×
682
            raise KeyError(
×
683
                "Momentum calibration parameters not found, need to generate parameters first!",
684
            ) from exc
685

686
        config = {"momentum": {"calibration": calibration}}
1✔
687
        save_config(config, filename, overwrite)
1✔
688

689
    # 2. Apply correction and calibration to the dataframe
690
    def apply_momentum_calibration(
1✔
691
        self,
692
        calibration: dict = None,
693
        preview: bool = False,
694
    ):
695
        """2. step of the momentum calibration work flow: Apply the momentum
696
        calibration stored in the class to the dataframe. If corrected X/Y axis exist,
697
        these are used.
698

699
        Args:
700
            calibration (dict, optional): Optional dictionary with calibration data to
701
                use. Defaults to None.
702
            preview (bool): Option to preview the first elements of the data frame.
703
        """
704
        if self._dataframe is not None:
1✔
705

706
            print("Adding kx/ky columns to dataframe:")
1✔
707
            self._dataframe, metadata = self.mc.append_k_axis(
1✔
708
                df=self._dataframe,
709
                calibration=calibration,
710
            )
711

712
            # Add Metadata
713
            self._attributes.add(
1✔
714
                metadata,
715
                "momentum_calibration",
716
                duplicate_policy="merge",
717
            )
718
            if preview:
1✔
719
                print(self._dataframe.head(10))
×
720
            else:
721
                print(self._dataframe)
1✔
722

723
    # Energy correction workflow
724
    # 1. Adjust the energy correction parameters
725
    def adjust_energy_correction(
1✔
726
        self,
727
        correction_type: str = None,
728
        amplitude: float = None,
729
        center: Tuple[float, float] = None,
730
        apply=False,
731
        **kwds,
732
    ):
733
        """1. step of the energy crrection workflow: Opens an interactive plot to
734
        adjust the parameters for the TOF/energy correction. Also pre-bins the data if
735
        they are not present yet.
736

737
        Args:
738
            correction_type (str, optional): Type of correction to apply to the TOF
739
                axis. Valid values are:
740

741
                - 'spherical'
742
                - 'Lorentzian'
743
                - 'Gaussian'
744
                - 'Lorentzian_asymmetric'
745

746
                Defaults to config["energy"]["correction_type"].
747
            amplitude (float, optional): Amplitude of the correction.
748
                Defaults to config["energy"]["correction"]["amplitude"].
749
            center (Tuple[float, float], optional): Center X/Y coordinates for the
750
                correction. Defaults to config["energy"]["correction"]["center"].
751
            apply (bool, optional): Option to directly apply the provided or default
752
                correction parameters. Defaults to False.
753
        """
754
        if self._pre_binned is None:
1✔
755
            print(
1✔
756
                "Pre-binned data not present, binning using defaults from config...",
757
            )
758
            self._pre_binned = self.pre_binning()
1✔
759

760
        self.ec.adjust_energy_correction(
1✔
761
            self._pre_binned,
762
            correction_type=correction_type,
763
            amplitude=amplitude,
764
            center=center,
765
            apply=apply,
766
            **kwds,
767
        )
768

769
    # 1a. Save energy correction parameters to config file.
770
    def save_energy_correction(
1✔
771
        self,
772
        filename: str = None,
773
        overwrite: bool = False,
774
    ):
775
        """Save the generated energy correction parameters to the folder config file.
776

777
        Args:
778
            filename (str, optional): Filename of the config dictionary to save to.
779
                Defaults to "sed_config.yaml" in the current folder.
780
            overwrite (bool, optional): Option to overwrite the present dictionary.
781
                Defaults to False.
782
        """
783
        if filename is None:
1✔
784
            filename = "sed_config.yaml"
1✔
785
        correction = {}
1✔
786
        try:
1✔
787
            for key, val in self.ec.correction.items():
1✔
788
                if key == "correction_type":
1✔
789
                    correction[key] = val
1✔
790
                elif key == "center":
1✔
791
                    correction[key] = [float(i) for i in val]
1✔
792
                else:
793
                    correction[key] = float(val)
1✔
794
        except AttributeError as exc:
×
795
            raise AttributeError(
×
796
                "Energy correction parameters not found, need to generate parameters first!",
797
            ) from exc
798

799
        config = {"energy": {"correction": correction}}
1✔
800
        save_config(config, filename, overwrite)
1✔
801

802
    # 2. Apply energy correction to dataframe
803
    def apply_energy_correction(
1✔
804
        self,
805
        correction: dict = None,
806
        preview: bool = False,
807
        **kwds,
808
    ):
809
        """2. step of the energy correction workflow: Apply the enery correction
810
        parameters stored in the class to the dataframe.
811

812
        Args:
813
            correction (dict, optional): Dictionary containing the correction
814
                parameters. Defaults to config["energy"]["calibration"].
815
            preview (bool): Option to preview the first elements of the data frame.
816
            **kwds:
817
                Keyword args passed to ``EnergyCalibrator.apply_energy_correction``.
818
            preview (bool): Option to preview the first elements of the data frame.
819
            **kwds:
820
                Keyword args passed to ``EnergyCalibrator.apply_energy_correction``.
821
        """
822
        if self._dataframe is not None:
1✔
823
            print("Applying energy correction to dataframe...")
1✔
824
            self._dataframe, metadata = self.ec.apply_energy_correction(
1✔
825
                df=self._dataframe,
826
                correction=correction,
827
                **kwds,
828
            )
829

830
            # Add Metadata
831
            self._attributes.add(
1✔
832
                metadata,
833
                "energy_correction",
834
            )
835
            if preview:
1✔
836
                print(self._dataframe.head(10))
×
837
            else:
838
                print(self._dataframe)
1✔
839

840
    # Energy calibrator workflow
841
    # 1. Load and normalize data
842
    def load_bias_series(
1✔
843
        self,
844
        binned_data: Union[xr.DataArray, Tuple[np.ndarray, np.ndarray, np.ndarray]] = None,
845
        data_files: List[str] = None,
846
        axes: List[str] = None,
847
        bins: List = None,
848
        ranges: Sequence[Tuple[float, float]] = None,
849
        biases: np.ndarray = None,
850
        bias_key: str = None,
851
        normalize: bool = None,
852
        span: int = None,
853
        order: int = None,
854
    ):
855
        """1. step of the energy calibration workflow: Load and bin data from
856
        single-event files, or load binned bias/TOF traces.
857

858
        Args:
859
            binned_data (Union[xr.DataArray, Tuple[np.ndarray, np.ndarray, np.ndarray]], optional):
860
                Binned data If provided as DataArray, Needs to contain dimensions
861
                config["dataframe"]["tof_column"] and config["dataframe"]["bias_column"]. If
862
                provided as tuple, needs to contain elements tof, biases, traces.
863
            data_files (List[str], optional): list of file paths to bin
864
            axes (List[str], optional): bin axes.
865
                Defaults to config["dataframe"]["tof_column"].
866
            bins (List, optional): number of bins.
867
                Defaults to config["energy"]["bins"].
868
            ranges (Sequence[Tuple[float, float]], optional): bin ranges.
869
                Defaults to config["energy"]["ranges"].
870
            biases (np.ndarray, optional): Bias voltages used. If missing, bias
871
                voltages are extracted from the data files.
872
            bias_key (str, optional): hdf5 path where bias values are stored.
873
                Defaults to config["energy"]["bias_key"].
874
            normalize (bool, optional): Option to normalize traces.
875
                Defaults to config["energy"]["normalize"].
876
            span (int, optional): span smoothing parameters of the LOESS method
877
                (see ``scipy.signal.savgol_filter()``).
878
                Defaults to config["energy"]["normalize_span"].
879
            order (int, optional): order smoothing parameters of the LOESS method
880
                (see ``scipy.signal.savgol_filter()``).
881
                Defaults to config["energy"]["normalize_order"].
882
        """
883
        if binned_data is not None:
1✔
884
            if isinstance(binned_data, xr.DataArray):
1✔
885
                if (
1✔
886
                    self._config["dataframe"]["tof_column"] not in binned_data.dims
887
                    or self._config["dataframe"]["bias_column"] not in binned_data.dims
888
                ):
889
                    raise ValueError(
1✔
890
                        "If binned_data is provided as an xarray, it needs to contain dimensions "
891
                        f"'{self._config['dataframe']['tof_column']}' and "
892
                        f"'{self._config['dataframe']['bias_column']}'!.",
893
                    )
894
                tof = binned_data.coords[self._config["dataframe"]["tof_column"]].values
1✔
895
                biases = binned_data.coords[self._config["dataframe"]["bias_column"]].values
1✔
896
                traces = binned_data.values[:, :]
1✔
897
            else:
898
                try:
1✔
899
                    (tof, biases, traces) = binned_data
1✔
900
                except ValueError as exc:
1✔
901
                    raise ValueError(
1✔
902
                        "If binned_data is provided as tuple, it needs to contain "
903
                        "(tof, biases, traces)!",
904
                    ) from exc
905
            self.ec.load_data(biases=biases, traces=traces, tof=tof)
1✔
906

907
        elif data_files is not None:
1✔
908

909
            self.ec.bin_data(
1✔
910
                data_files=cast(List[str], self.cpy(data_files)),
911
                axes=axes,
912
                bins=bins,
913
                ranges=ranges,
914
                biases=biases,
915
                bias_key=bias_key,
916
            )
917

918
        else:
919
            raise ValueError("Either binned_data or data_files needs to be provided!")
1✔
920

921
        if (normalize is not None and normalize is True) or (
1✔
922
            normalize is None and self._config["energy"]["normalize"]
923
        ):
924
            if span is None:
1✔
925
                span = self._config["energy"]["normalize_span"]
1✔
926
            if order is None:
1✔
927
                order = self._config["energy"]["normalize_order"]
1✔
928
            self.ec.normalize(smooth=True, span=span, order=order)
1✔
929
        self.ec.view(
1✔
930
            traces=self.ec.traces_normed,
931
            xaxis=self.ec.tof,
932
            backend="bokeh",
933
        )
934

935
    # 2. extract ranges and get peak positions
936
    def find_bias_peaks(
1✔
937
        self,
938
        ranges: Union[List[Tuple], Tuple],
939
        ref_id: int = 0,
940
        infer_others: bool = True,
941
        mode: str = "replace",
942
        radius: int = None,
943
        peak_window: int = None,
944
        apply: bool = False,
945
    ):
946
        """2. step of the energy calibration workflow: Find a peak within a given range
947
        for the indicated reference trace, and tries to find the same peak for all
948
        other traces. Uses fast_dtw to align curves, which might not be too good if the
949
        shape of curves changes qualitatively. Ideally, choose a reference trace in the
950
        middle of the set, and don't choose the range too narrow around the peak.
951
        Alternatively, a list of ranges for all traces can be provided.
952

953
        Args:
954
            ranges (Union[List[Tuple], Tuple]): Tuple of TOF values indicating a range.
955
                Alternatively, a list of ranges for all traces can be given.
956
            refid (int, optional): The id of the trace the range refers to.
957
                Defaults to 0.
958
            infer_others (bool, optional): Whether to determine the range for the other
959
                traces. Defaults to True.
960
            mode (str, optional): Whether to "add" or "replace" existing ranges.
961
                Defaults to "replace".
962
            radius (int, optional): Radius parameter for fast_dtw.
963
                Defaults to config["energy"]["fastdtw_radius"].
964
            peak_window (int, optional): Peak_window parameter for the peak detection
965
                algorthm. amount of points that have to have to behave monotoneously
966
                around a peak. Defaults to config["energy"]["peak_window"].
967
            apply (bool, optional): Option to directly apply the provided parameters.
968
                Defaults to False.
969
        """
970
        if radius is None:
1✔
971
            radius = self._config["energy"]["fastdtw_radius"]
1✔
972
        if peak_window is None:
1✔
973
            peak_window = self._config["energy"]["peak_window"]
1✔
974
        if not infer_others:
1✔
975
            self.ec.add_ranges(
1✔
976
                ranges=ranges,
977
                ref_id=ref_id,
978
                infer_others=infer_others,
979
                mode=mode,
980
                radius=radius,
981
            )
982
            print(self.ec.featranges)
1✔
983
            try:
1✔
984
                self.ec.feature_extract(peak_window=peak_window)
1✔
985
                self.ec.view(
1✔
986
                    traces=self.ec.traces_normed,
987
                    segs=self.ec.featranges,
988
                    xaxis=self.ec.tof,
989
                    peaks=self.ec.peaks,
990
                    backend="bokeh",
991
                )
992
            except IndexError:
×
993
                print("Could not determine all peaks!")
×
994
                raise
×
995
        else:
996
            # New adjustment tool
997
            assert isinstance(ranges, tuple)
1✔
998
            self.ec.adjust_ranges(
1✔
999
                ranges=ranges,
1000
                ref_id=ref_id,
1001
                traces=self.ec.traces_normed,
1002
                infer_others=infer_others,
1003
                radius=radius,
1004
                peak_window=peak_window,
1005
                apply=apply,
1006
            )
1007

1008
    # 3. Fit the energy calibration relation
1009
    def calibrate_energy_axis(
1✔
1010
        self,
1011
        ref_id: int,
1012
        ref_energy: float,
1013
        method: str = None,
1014
        energy_scale: str = None,
1015
        **kwds,
1016
    ):
1017
        """3. Step of the energy calibration workflow: Calculate the calibration
1018
        function for the energy axis, and apply it to the dataframe. Two
1019
        approximations are implemented, a (normally 3rd order) polynomial
1020
        approximation, and a d^2/(t-t0)^2 relation.
1021

1022
        Args:
1023
            ref_id (int): id of the trace at the bias where the reference energy is
1024
                given.
1025
            ref_energy (float): Absolute energy of the detected feature at the bias
1026
                of ref_id
1027
            method (str, optional): Method for determining the energy calibration.
1028

1029
                - **'lmfit'**: Energy calibration using lmfit and 1/t^2 form.
1030
                - **'lstsq'**, **'lsqr'**: Energy calibration using polynomial form.
1031

1032
                Defaults to config["energy"]["calibration_method"]
1033
            energy_scale (str, optional): Direction of increasing energy scale.
1034

1035
                - **'kinetic'**: increasing energy with decreasing TOF.
1036
                - **'binding'**: increasing energy with increasing TOF.
1037

1038
                Defaults to config["energy"]["energy_scale"]
1039
        """
1040
        if method is None:
1✔
1041
            method = self._config["energy"]["calibration_method"]
1✔
1042

1043
        if energy_scale is None:
1✔
1044
            energy_scale = self._config["energy"]["energy_scale"]
1✔
1045

1046
        self.ec.calibrate(
1✔
1047
            ref_id=ref_id,
1048
            ref_energy=ref_energy,
1049
            method=method,
1050
            energy_scale=energy_scale,
1051
            **kwds,
1052
        )
1053
        print("Quality of Calibration:")
1✔
1054
        self.ec.view(
1✔
1055
            traces=self.ec.traces_normed,
1056
            xaxis=self.ec.calibration["axis"],
1057
            align=True,
1058
            energy_scale=energy_scale,
1059
            backend="bokeh",
1060
        )
1061
        print("E/TOF relationship:")
1✔
1062
        self.ec.view(
1✔
1063
            traces=self.ec.calibration["axis"][None, :],
1064
            xaxis=self.ec.tof,
1065
            backend="matplotlib",
1066
            show_legend=False,
1067
        )
1068
        if energy_scale == "kinetic":
1✔
1069
            plt.scatter(
1✔
1070
                self.ec.peaks[:, 0],
1071
                -(self.ec.biases - self.ec.biases[ref_id]) + ref_energy,
1072
                s=50,
1073
                c="k",
1074
            )
1075
        elif energy_scale == "binding":
1✔
1076
            plt.scatter(
1✔
1077
                self.ec.peaks[:, 0],
1078
                self.ec.biases - self.ec.biases[ref_id] + ref_energy,
1079
                s=50,
1080
                c="k",
1081
            )
1082
        else:
1083
            raise ValueError(
×
1084
                'energy_scale needs to be either "binding" or "kinetic"',
1085
                f", got {energy_scale}.",
1086
            )
1087
        plt.xlabel("Time-of-flight", fontsize=15)
1✔
1088
        plt.ylabel("Energy (eV)", fontsize=15)
1✔
1089
        plt.show()
1✔
1090

1091
    # 3a. Save energy calibration parameters to config file.
1092
    def save_energy_calibration(
1✔
1093
        self,
1094
        filename: str = None,
1095
        overwrite: bool = False,
1096
    ):
1097
        """Save the generated energy calibration parameters to the folder config file.
1098

1099
        Args:
1100
            filename (str, optional): Filename of the config dictionary to save to.
1101
                Defaults to "sed_config.yaml" in the current folder.
1102
            overwrite (bool, optional): Option to overwrite the present dictionary.
1103
                Defaults to False.
1104
        """
1105
        if filename is None:
1✔
1106
            filename = "sed_config.yaml"
×
1107
        calibration = {}
1✔
1108
        try:
1✔
1109
            for (key, value) in self.ec.calibration.items():
1✔
1110
                if key in ["axis", "refid", "Tmat", "bvec"]:
1✔
1111
                    continue
1✔
1112
                if key == "energy_scale":
1✔
1113
                    calibration[key] = value
1✔
1114
                elif key == "coeffs":
1✔
1115
                    calibration[key] = [float(i) for i in value]
1✔
1116
                else:
1117
                    calibration[key] = float(value)
1✔
1118
        except AttributeError as exc:
×
1119
            raise AttributeError(
×
1120
                "Energy calibration parameters not found, need to generate parameters first!",
1121
            ) from exc
1122

1123
        config = {"energy": {"calibration": calibration}}
1✔
1124
        save_config(config, filename, overwrite)
1✔
1125

1126
    # 4. Apply energy calibration to the dataframe
1127
    def append_energy_axis(
1✔
1128
        self,
1129
        calibration: dict = None,
1130
        preview: bool = False,
1131
        **kwds,
1132
    ):
1133
        """4. step of the energy calibration workflow: Apply the calibration function
1134
        to to the dataframe. Two approximations are implemented, a (normally 3rd order)
1135
        polynomial approximation, and a d^2/(t-t0)^2 relation. a calibration dictionary
1136
        can be provided.
1137

1138
        Args:
1139
            calibration (dict, optional): Calibration dict containing calibration
1140
                parameters. Overrides calibration from class or config.
1141
                Defaults to None.
1142
            preview (bool): Option to preview the first elements of the data frame.
1143
            **kwds:
1144
                Keyword args passed to ``EnergyCalibrator.append_energy_axis``.
1145
        """
1146
        if self._dataframe is not None:
1✔
1147
            print("Adding energy column to dataframe:")
1✔
1148
            self._dataframe, metadata = self.ec.append_energy_axis(
1✔
1149
                df=self._dataframe,
1150
                calibration=calibration,
1151
                **kwds,
1152
            )
1153

1154
            # Add Metadata
1155
            self._attributes.add(
1✔
1156
                metadata,
1157
                "energy_calibration",
1158
                duplicate_policy="merge",
1159
            )
1160
            if preview:
1✔
1161
                print(self._dataframe.head(10))
1✔
1162
            else:
1163
                print(self._dataframe)
1✔
1164

1165
    # Delay calibration function
1166
    def calibrate_delay_axis(
1✔
1167
        self,
1168
        delay_range: Tuple[float, float] = None,
1169
        datafile: str = None,
1170
        preview: bool = False,
1171
        **kwds,
1172
    ):
1173
        """Append delay column to dataframe. Either provide delay ranges, or read
1174
        them from a file.
1175

1176
        Args:
1177
            delay_range (Tuple[float, float], optional): The scanned delay range in
1178
                picoseconds. Defaults to None.
1179
            datafile (str, optional): The file from which to read the delay ranges.
1180
                Defaults to None.
1181
            preview (bool): Option to preview the first elements of the data frame.
1182
            **kwds: Keyword args passed to ``DelayCalibrator.append_delay_axis``.
1183
        """
1184
        if self._dataframe is not None:
1✔
1185
            print("Adding delay column to dataframe:")
1✔
1186

1187
            if delay_range is not None:
1✔
1188
                self._dataframe, metadata = self.dc.append_delay_axis(
1✔
1189
                    self._dataframe,
1190
                    delay_range=delay_range,
1191
                    **kwds,
1192
                )
1193
            else:
1194
                if datafile is None:
1✔
1195
                    try:
1✔
1196
                        datafile = self._files[0]
1✔
1197
                    except IndexError:
×
1198
                        print(
×
1199
                            "No datafile available, specify either",
1200
                            " 'datafile' or 'delay_range'",
1201
                        )
1202
                        raise
×
1203

1204
                self._dataframe, metadata = self.dc.append_delay_axis(
1✔
1205
                    self._dataframe,
1206
                    datafile=datafile,
1207
                    **kwds,
1208
                )
1209

1210
            # Add Metadata
1211
            self._attributes.add(
1✔
1212
                metadata,
1213
                "delay_calibration",
1214
                duplicate_policy="merge",
1215
            )
1216
            if preview:
1✔
1217
                print(self._dataframe.head(10))
1✔
1218
            else:
1219
                print(self._dataframe)
1✔
1220

1221
    def add_jitter(
1✔
1222
        self,
1223
        cols: List[str] = None,
1224
        amps: Union[float, Sequence[float]] = None,
1225
        **kwds,
1226
    ):
1227
        """Add jitter to the selected dataframe columns.
1228

1229
        Args:
1230
            cols (List[str], optional): The colums onto which to apply jitter.
1231
                Defaults to config["dataframe"]["jitter_cols"].
1232
            amps (Union[float, Sequence[float]], optional): Amplitude scalings for the
1233
                jittering noise. If one number is given, the same is used for all axes.
1234
                For uniform noise (default) it will cover the interval [-amp, +amp].
1235
                Defaults to config["dataframe"]["jitter_amps"].
1236
            **kwds: additional keyword arguments passed to apply_jitter
1237
        """
1238
        if cols is None:
1✔
1239
            cols = self._config["dataframe"]["jitter_cols"]
1✔
1240
        for loc, col in enumerate(cols):
1✔
1241
            if col.startswith("@"):
1✔
1242
                cols[loc] = self._config["dataframe"].get(col.strip("@"))
1✔
1243

1244
        if amps is None:
1✔
1245
            amps = self._config["dataframe"]["jitter_amps"]
1✔
1246

1247
        self._dataframe = self._dataframe.map_partitions(
1✔
1248
            apply_jitter,
1249
            cols=cols,
1250
            cols_jittered=cols,
1251
            amps=amps,
1252
            **kwds,
1253
        )
1254
        metadata = []
1✔
1255
        for col in cols:
1✔
1256
            metadata.append(col)
1✔
1257
        self._attributes.add(metadata, "jittering", duplicate_policy="append")
1✔
1258

1259
    def add_time_stamped_data(
1✔
1260
        self,
1261
        dest_column: str,
1262
        time_stamps: np.ndarray = None,
1263
        data: np.ndarray = None,
1264
        archiver_channel: str = None,
1265
        **kwds,
1266
    ):
1267
        """Add data in form of timestamp/value pairs to the dataframe using interpolation to the
1268
        timestamps in the dataframe. The time-stamped data can either be provided, or fetched from
1269
        an EPICS archiver instance.
1270

1271
        Args:
1272
            dest_column (str): destination column name
1273
            time_stamps (np.ndarray, optional): Time stamps of the values to add. If omitted,
1274
                time stamps are retrieved from the epics archiver
1275
            data (np.ndarray, optional): Values corresponding at the time stamps in time_stamps.
1276
                If omitted, data are retrieved from the epics archiver.
1277
            archiver_channel (str, optional): EPICS archiver channel from which to retrieve data.
1278
                Either this or data and time_stamps have to be present.
1279
            **kwds: additional keyword arguments passed to add_time_stamped_data
1280
        """
1281
        time_stamp_column = kwds.pop(
1✔
1282
            "time_stamp_column",
1283
            self._config["dataframe"].get("time_stamp_alias", ""),
1284
        )
1285

1286
        if time_stamps is None and data is None:
1✔
1287
            if archiver_channel is None:
×
1288
                raise ValueError(
×
1289
                    "Either archiver_channel or both time_stamps and data have to be present!",
1290
                )
1291
            if self.loader.__name__ != "mpes":
×
1292
                raise NotImplementedError(
×
1293
                    "This function is currently only implemented for the mpes loader!",
1294
                )
1295
            ts_from, ts_to = cast(MpesLoader, self.loader).get_start_and_end_time()
×
1296
            # get channel data with +-5 seconds safety margin
1297
            time_stamps, data = get_archiver_data(
×
1298
                archiver_url=self._config["metadata"].get("archiver_url", ""),
1299
                archiver_channel=archiver_channel,
1300
                ts_from=ts_from - 5,
1301
                ts_to=ts_to + 5,
1302
            )
1303

1304
        self._dataframe = add_time_stamped_data(
1✔
1305
            self._dataframe,
1306
            time_stamps=time_stamps,
1307
            data=data,
1308
            dest_column=dest_column,
1309
            time_stamp_column=time_stamp_column,
1310
            **kwds,
1311
        )
1312
        metadata: List[Any] = []
1✔
1313
        metadata.append(dest_column)
1✔
1314
        metadata.append(time_stamps)
1✔
1315
        metadata.append(data)
1✔
1316
        self._attributes.add(metadata, "time_stamped_data", duplicate_policy="append")
1✔
1317

1318
    def pre_binning(
1✔
1319
        self,
1320
        df_partitions: int = 100,
1321
        axes: List[str] = None,
1322
        bins: List[int] = None,
1323
        ranges: Sequence[Tuple[float, float]] = None,
1324
        **kwds,
1325
    ) -> xr.DataArray:
1326
        """Function to do an initial binning of the dataframe loaded to the class.
1327

1328
        Args:
1329
            df_partitions (int, optional): Number of dataframe partitions to use for
1330
                the initial binning. Defaults to 100.
1331
            axes (List[str], optional): Axes to bin.
1332
                Defaults to config["momentum"]["axes"].
1333
            bins (List[int], optional): Bin numbers to use for binning.
1334
                Defaults to config["momentum"]["bins"].
1335
            ranges (List[Tuple], optional): Ranges to use for binning.
1336
                Defaults to config["momentum"]["ranges"].
1337
            **kwds: Keyword argument passed to ``compute``.
1338

1339
        Returns:
1340
            xr.DataArray: pre-binned data-array.
1341
        """
1342
        if axes is None:
1✔
1343
            axes = self._config["momentum"]["axes"]
1✔
1344
        for loc, axis in enumerate(axes):
1✔
1345
            if axis.startswith("@"):
1✔
1346
                axes[loc] = self._config["dataframe"].get(axis.strip("@"))
1✔
1347

1348
        if bins is None:
1✔
1349
            bins = self._config["momentum"]["bins"]
1✔
1350
        if ranges is None:
1✔
1351
            ranges_ = list(self._config["momentum"]["ranges"])
1✔
1352
            ranges_[2] = np.asarray(ranges_[2]) / 2 ** (
1✔
1353
                self._config["dataframe"]["tof_binning"] - 1
1354
            )
1355
            ranges = [cast(Tuple[float, float], tuple(v)) for v in ranges_]
1✔
1356

1357
        assert self._dataframe is not None, "dataframe needs to be loaded first!"
1✔
1358

1359
        return self.compute(
1✔
1360
            bins=bins,
1361
            axes=axes,
1362
            ranges=ranges,
1363
            df_partitions=df_partitions,
1364
            **kwds,
1365
        )
1366

1367
    def compute(
1✔
1368
        self,
1369
        bins: Union[
1370
            int,
1371
            dict,
1372
            tuple,
1373
            List[int],
1374
            List[np.ndarray],
1375
            List[tuple],
1376
        ] = 100,
1377
        axes: Union[str, Sequence[str]] = None,
1378
        ranges: Sequence[Tuple[float, float]] = None,
1379
        **kwds,
1380
    ) -> xr.DataArray:
1381
        """Compute the histogram along the given dimensions.
1382

1383
        Args:
1384
            bins (int, dict, tuple, List[int], List[np.ndarray], List[tuple], optional):
1385
                Definition of the bins. Can be any of the following cases:
1386

1387
                - an integer describing the number of bins in on all dimensions
1388
                - a tuple of 3 numbers describing start, end and step of the binning
1389
                  range
1390
                - a np.arrays defining the binning edges
1391
                - a list (NOT a tuple) of any of the above (int, tuple or np.ndarray)
1392
                - a dictionary made of the axes as keys and any of the above as values.
1393

1394
                This takes priority over the axes and range arguments. Defaults to 100.
1395
            axes (Union[str, Sequence[str]], optional): The names of the axes (columns)
1396
                on which to calculate the histogram. The order will be the order of the
1397
                dimensions in the resulting array. Defaults to None.
1398
            ranges (Sequence[Tuple[float, float]], optional): list of tuples containing
1399
                the start and end point of the binning range. Defaults to None.
1400
            **kwds: Keyword arguments:
1401

1402
                - **hist_mode**: Histogram calculation method. "numpy" or "numba". See
1403
                  ``bin_dataframe`` for details. Defaults to
1404
                  config["binning"]["hist_mode"].
1405
                - **mode**: Defines how the results from each partition are combined.
1406
                  "fast", "lean" or "legacy". See ``bin_dataframe`` for details.
1407
                  Defaults to config["binning"]["mode"].
1408
                - **pbar**: Option to show the tqdm progress bar. Defaults to
1409
                  config["binning"]["pbar"].
1410
                - **n_cores**: Number of CPU cores to use for parallelization.
1411
                  Defaults to config["binning"]["num_cores"] or N_CPU-1.
1412
                - **threads_per_worker**: Limit the number of threads that
1413
                  multiprocessing can spawn per binning thread. Defaults to
1414
                  config["binning"]["threads_per_worker"].
1415
                - **threadpool_api**: The API to use for multiprocessing. "blas",
1416
                  "openmp" or None. See ``threadpool_limit`` for details. Defaults to
1417
                  config["binning"]["threadpool_API"].
1418
                - **df_partitions**: A list of dataframe partitions. Defaults to all
1419
                  partitions.
1420

1421
                Additional kwds are passed to ``bin_dataframe``.
1422

1423
        Raises:
1424
            AssertError: Rises when no dataframe has been loaded.
1425

1426
        Returns:
1427
            xr.DataArray: The result of the n-dimensional binning represented in an
1428
            xarray object, combining the data with the axes.
1429
        """
1430
        assert self._dataframe is not None, "dataframe needs to be loaded first!"
1✔
1431

1432
        hist_mode = kwds.pop("hist_mode", self._config["binning"]["hist_mode"])
1✔
1433
        mode = kwds.pop("mode", self._config["binning"]["mode"])
1✔
1434
        pbar = kwds.pop("pbar", self._config["binning"]["pbar"])
1✔
1435
        num_cores = kwds.pop("num_cores", self._config["binning"]["num_cores"])
1✔
1436
        threads_per_worker = kwds.pop(
1✔
1437
            "threads_per_worker",
1438
            self._config["binning"]["threads_per_worker"],
1439
        )
1440
        threadpool_api = kwds.pop(
1✔
1441
            "threadpool_API",
1442
            self._config["binning"]["threadpool_API"],
1443
        )
1444
        df_partitions = kwds.pop("df_partitions", None)
1✔
1445
        if df_partitions is not None:
1✔
1446
            dataframe = self._dataframe.partitions[
1✔
1447
                0 : min(df_partitions, self._dataframe.npartitions)
1448
            ]
1449
        else:
1450
            dataframe = self._dataframe
1✔
1451

1452
        self._binned = bin_dataframe(
1✔
1453
            df=dataframe,
1454
            bins=bins,
1455
            axes=axes,
1456
            ranges=ranges,
1457
            hist_mode=hist_mode,
1458
            mode=mode,
1459
            pbar=pbar,
1460
            n_cores=num_cores,
1461
            threads_per_worker=threads_per_worker,
1462
            threadpool_api=threadpool_api,
1463
            **kwds,
1464
        )
1465

1466
        for dim in self._binned.dims:
1✔
1467
            try:
1✔
1468
                self._binned[dim].attrs["unit"] = self._config["dataframe"]["units"][dim]
1✔
1469
            except KeyError:
1✔
1470
                pass
1✔
1471

1472
        self._binned.attrs["units"] = "counts"
1✔
1473
        self._binned.attrs["long_name"] = "photoelectron counts"
1✔
1474
        self._binned.attrs["metadata"] = self._attributes.metadata
1✔
1475

1476
        return self._binned
1✔
1477

1478
    def view_event_histogram(
1✔
1479
        self,
1480
        dfpid: int,
1481
        ncol: int = 2,
1482
        bins: Sequence[int] = None,
1483
        axes: Sequence[str] = None,
1484
        ranges: Sequence[Tuple[float, float]] = None,
1485
        backend: str = "bokeh",
1486
        legend: bool = True,
1487
        histkwds: dict = None,
1488
        legkwds: dict = None,
1489
        **kwds,
1490
    ):
1491
        """Plot individual histograms of specified dimensions (axes) from a substituent
1492
        dataframe partition.
1493

1494
        Args:
1495
            dfpid (int): Number of the data frame partition to look at.
1496
            ncol (int, optional): Number of columns in the plot grid. Defaults to 2.
1497
            bins (Sequence[int], optional): Number of bins to use for the speicified
1498
                axes. Defaults to config["histogram"]["bins"].
1499
            axes (Sequence[str], optional): Names of the axes to display.
1500
                Defaults to config["histogram"]["axes"].
1501
            ranges (Sequence[Tuple[float, float]], optional): Value ranges of all
1502
                specified axes. Defaults toconfig["histogram"]["ranges"].
1503
            backend (str, optional): Backend of the plotting library
1504
                ('matplotlib' or 'bokeh'). Defaults to "bokeh".
1505
            legend (bool, optional): Option to include a legend in the histogram plots.
1506
                Defaults to True.
1507
            histkwds (dict, optional): Keyword arguments for histograms
1508
                (see ``matplotlib.pyplot.hist()``). Defaults to {}.
1509
            legkwds (dict, optional): Keyword arguments for legend
1510
                (see ``matplotlib.pyplot.legend()``). Defaults to {}.
1511
            **kwds: Extra keyword arguments passed to
1512
                ``sed.diagnostics.grid_histogram()``.
1513

1514
        Raises:
1515
            TypeError: Raises when the input values are not of the correct type.
1516
        """
1517
        if bins is None:
1✔
1518
            bins = self._config["histogram"]["bins"]
1✔
1519
        if axes is None:
1✔
1520
            axes = self._config["histogram"]["axes"]
1✔
1521
        axes = list(axes)
1✔
1522
        for loc, axis in enumerate(axes):
1✔
1523
            if axis.startswith("@"):
1✔
1524
                axes[loc] = self._config["dataframe"].get(axis.strip("@"))
1✔
1525
        if ranges is None:
1✔
1526
            ranges = list(self._config["histogram"]["ranges"])
1✔
1527
            for loc, axis in enumerate(axes):
1✔
1528
                if axis == self._config["dataframe"]["tof_column"]:
1✔
1529
                    ranges[loc] = np.asarray(ranges[loc]) / 2 ** (
1✔
1530
                        self._config["dataframe"]["tof_binning"] - 1
1531
                    )
1532
                elif axis == self._config["dataframe"]["adc_column"]:
1✔
1533
                    ranges[loc] = np.asarray(ranges[loc]) / 2 ** (
×
1534
                        self._config["dataframe"]["adc_binning"] - 1
1535
                    )
1536

1537
        input_types = map(type, [axes, bins, ranges])
1✔
1538
        allowed_types = [list, tuple]
1✔
1539

1540
        df = self._dataframe
1✔
1541

1542
        if not set(input_types).issubset(allowed_types):
1✔
1543
            raise TypeError(
×
1544
                "Inputs of axes, bins, ranges need to be list or tuple!",
1545
            )
1546

1547
        # Read out the values for the specified groups
1548
        group_dict_dd = {}
1✔
1549
        dfpart = df.get_partition(dfpid)
1✔
1550
        cols = dfpart.columns
1✔
1551
        for ax in axes:
1✔
1552
            group_dict_dd[ax] = dfpart.values[:, cols.get_loc(ax)]
1✔
1553
        group_dict = ddf.compute(group_dict_dd)[0]
1✔
1554

1555
        # Plot multiple histograms in a grid
1556
        grid_histogram(
1✔
1557
            group_dict,
1558
            ncol=ncol,
1559
            rvs=axes,
1560
            rvbins=bins,
1561
            rvranges=ranges,
1562
            backend=backend,
1563
            legend=legend,
1564
            histkwds=histkwds,
1565
            legkwds=legkwds,
1566
            **kwds,
1567
        )
1568

1569
    def save(
1✔
1570
        self,
1571
        faddr: str,
1572
        **kwds,
1573
    ):
1574
        """Saves the binned data to the provided path and filename.
1575

1576
        Args:
1577
            faddr (str): Path and name of the file to write. Its extension determines
1578
                the file type to write. Valid file types are:
1579

1580
                - "*.tiff", "*.tif": Saves a TIFF stack.
1581
                - "*.h5", "*.hdf5": Saves an HDF5 file.
1582
                - "*.nxs", "*.nexus": Saves a NeXus file.
1583

1584
            **kwds: Keyword argumens, which are passed to the writer functions:
1585
                For TIFF writing:
1586

1587
                - **alias_dict**: Dictionary of dimension aliases to use.
1588

1589
                For HDF5 writing:
1590

1591
                - **mode**: hdf5 read/write mode. Defaults to "w".
1592

1593
                For NeXus:
1594

1595
                - **reader**: Name of the nexustools reader to use.
1596
                  Defaults to config["nexus"]["reader"]
1597
                - **definiton**: NeXus application definition to use for saving.
1598
                  Must be supported by the used ``reader``. Defaults to
1599
                  config["nexus"]["definition"]
1600
                - **input_files**: A list of input files to pass to the reader.
1601
                  Defaults to config["nexus"]["input_files"]
1602
                - **eln_data**: An electronic-lab-notebook file in '.yaml' format
1603
                  to add to the list of files to pass to the reader.
1604
        """
1605
        if self._binned is None:
1✔
1606
            raise NameError("Need to bin data first!")
1✔
1607

1608
        extension = pathlib.Path(faddr).suffix
1✔
1609

1610
        if extension in (".tif", ".tiff"):
1✔
1611
            to_tiff(
1✔
1612
                data=self._binned,
1613
                faddr=faddr,
1614
                **kwds,
1615
            )
1616
        elif extension in (".h5", ".hdf5"):
1✔
1617
            to_h5(
1✔
1618
                data=self._binned,
1619
                faddr=faddr,
1620
                **kwds,
1621
            )
1622
        elif extension in (".nxs", ".nexus"):
1✔
1623
            try:
1✔
1624
                reader = kwds.pop("reader", self._config["nexus"]["reader"])
1✔
1625
                definition = kwds.pop(
1✔
1626
                    "definition",
1627
                    self._config["nexus"]["definition"],
1628
                )
1629
                input_files = kwds.pop(
1✔
1630
                    "input_files",
1631
                    self._config["nexus"]["input_files"],
1632
                )
1633
            except KeyError as exc:
×
1634
                raise ValueError(
×
1635
                    "The nexus reader, definition and input files need to be provide!",
1636
                ) from exc
1637

1638
            if isinstance(input_files, str):
1✔
1639
                input_files = [input_files]
1✔
1640

1641
            if "eln_data" in kwds:
1✔
1642
                input_files.append(kwds.pop("eln_data"))
×
1643

1644
            to_nexus(
1✔
1645
                data=self._binned,
1646
                faddr=faddr,
1647
                reader=reader,
1648
                definition=definition,
1649
                input_files=input_files,
1650
                **kwds,
1651
            )
1652

1653
        else:
1654
            raise NotImplementedError(
1✔
1655
                f"Unrecognized file format: {extension}.",
1656
            )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc