• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OpenCOMPES / sed / 6942383825

21 Nov 2023 10:22AM UTC coverage: 89.993% (-0.6%) from 90.586%
6942383825

push

github

zain-sohail
workflow runs when it is updated

5009 of 5566 relevant lines covered (89.99%)

0.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.0
/sed/loader/utils.py
1
"""Utilities for loaders
2
"""
3
from glob import glob
1✔
4
from typing import cast
1✔
5
from typing import List
1✔
6
from typing import Sequence
1✔
7

8
import dask.dataframe
1✔
9
import numpy as np
1✔
10
from h5py import File
1✔
11
from h5py import Group
1✔
12
from natsort import natsorted
1✔
13

14

15
def gather_files(
1✔
16
    folder: str,
17
    extension: str,
18
    f_start: int = None,
19
    f_end: int = None,
20
    f_step: int = 1,
21
    file_sorting: bool = True,
22
) -> List[str]:
23
    """Collects and sorts files with specified extension from a given folder.
24

25
    Args:
26
        folder (str): The folder to search
27
        extension (str):  File extension used for glob.glob().
28
        f_start (int, optional): Start file id used to construct a file selector.
29
            Defaults to None.
30
        f_end (int, optional): End file id used to construct a file selector.
31
            Defaults to None.
32
        f_step (int, optional): Step of file id incrementation, used to construct
33
            a file selector. Defaults to 1.
34
        file_sorting (bool, optional): Option to sort the files by their names.
35
            Defaults to True.
36

37
    Returns:
38
        List[str]: List of collected file names.
39
    """
40
    try:
1✔
41
        files = glob(folder + "/*." + extension)
1✔
42

43
        if file_sorting:
1✔
44
            files = cast(List[str], natsorted(files))
1✔
45

46
        if f_start is not None and f_end is not None:
1✔
47
            files = files[slice(f_start, f_end, f_step)]
×
48

49
    except FileNotFoundError:
×
50
        print("No legitimate folder address is specified for file retrieval!")
×
51
        raise
×
52

53
    return files
1✔
54

55

56
def parse_h5_keys(h5_file: File, prefix: str = "") -> List[str]:
1✔
57
    """Helper method which parses the channels present in the h5 file
58
    Args:
59
        h5_file (h5py.File): The H5 file object.
60
        prefix (str, optional): The prefix for the channel names.
61
        Defaults to an empty string.
62

63
    Returns:
64
        List[str]: A list of channel names in the H5 file.
65

66
    Raises:
67
        Exception: If an error occurs while parsing the keys.
68
    """
69

70
    # Initialize an empty list to store the channels
71
    file_channel_list = []
1✔
72

73
    # Iterate over the keys in the H5 file
74
    for key in h5_file.keys():
1✔
75
        try:
1✔
76
            # Check if the object corresponding to the key is a group
77
            if isinstance(h5_file[key], Group):
1✔
78
                # If it's a group, recursively call the function on the group object
79
                # and append the returned channels to the file_channel_list
80
                file_channel_list.extend(
1✔
81
                    parse_h5_keys(h5_file[key], prefix=prefix + "/" + key),
82
                )
83
            else:
84
                # If it's not a group (i.e., it's a dataset), append the key
85
                # to the file_channel_list
86
                file_channel_list.append(prefix + "/" + key)
1✔
87
        except KeyError as exception:
×
88
            # If an exception occurs, raise a new exception with an error message
89
            raise KeyError(
×
90
                f"Error parsing key: {prefix}/{key}",
91
            ) from exception
92

93
    # Return the list of channels
94
    return file_channel_list
1✔
95

96

97
def split_channel_bitwise(
1✔
98
    df: dask.dataframe.DataFrame,
99
    input_column: str,
100
    output_columns: Sequence[str],
101
    bit_mask: int,
102
    overwrite: bool = False,
103
    types: Sequence[type] = None,
104
) -> dask.dataframe.DataFrame:
105
    """Splits a channel into two channels bitwise.
106

107
    This function splits a channel into two channels by separating the first n bits from
108
    the remaining bits. The first n bits are stored in the first output column, the
109
    remaining bits are stored in the second output column.
110

111
    Args:
112
        df (dask.dataframe.DataFrame): Dataframe to use.
113
        input_column (str): Name of the column to split.
114
        output_columns (Sequence[str]): Names of the columns to create.
115
        bit_mask (int): Bit mask to use for splitting.
116
        overwrite (bool, optional): Whether to overwrite existing columns.
117
            Defaults to False.
118
        types (Sequence[type], optional): Types of the new columns.
119

120
    Returns:
121
        dask.dataframe.DataFrame: Dataframe with the new columns.
122
    """
123
    if len(output_columns) != 2:
1✔
124
        raise ValueError("Exactly two output columns must be given.")
1✔
125
    if input_column not in df.columns:
1✔
126
        raise KeyError(f"Column {input_column} not in dataframe.")
1✔
127
    if output_columns[0] in df.columns and not overwrite:
1✔
128
        raise KeyError(f"Column {output_columns[0]} already in dataframe.")
1✔
129
    if output_columns[1] in df.columns and not overwrite:
1✔
130
        raise KeyError(f"Column {output_columns[1]} already in dataframe.")
×
131
    if bit_mask < 0 or not isinstance(bit_mask, int):
1✔
132
        raise ValueError("bit_mask must be a positive. integer")
1✔
133
    if types is None:
1✔
134
        types = [np.int8 if bit_mask < 8 else np.int16, np.int32]
1✔
135
    elif len(types) != 2:
1✔
136
        raise ValueError("Exactly two types must be given.")
1✔
137
    elif not all(isinstance(t, type) for t in types):
1✔
138
        raise ValueError("types must be a sequence of types.")
1✔
139
    df[output_columns[0]] = (df[input_column] % 2**bit_mask).astype(types[0])
1✔
140
    df[output_columns[1]] = (df[input_column] // 2**bit_mask).astype(types[1])
1✔
141
    return df
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc