• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

angelolab / tmi / 4106902596

pending completion
4106902596

Pull #20

github

GitHub
Merge 5de60795e into 9fb6cee18
Pull Request #20: GitHub Actions

294 of 306 branches covered (96.08%)

Branch coverage included in aggregate %.

1 of 1 new or added line in 1 file covered. (100.0%)

431 of 443 relevant lines covered (97.29%)

0.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.17
/src/tmi/io_utils.py
1
import itertools
1✔
2
import os
1✔
3
import pathlib
1✔
4
import warnings
1✔
5
from typing import List
1✔
6

7
from tmi import misc_utils
1✔
8
from tmi.settings import EXTENSION_TYPES
1✔
9

10

11
def validate_paths(paths):
1✔
12
    """Verifies that paths exist and don't leave Docker's scope
13

14
    Args:
15
        paths (str or list):
16
            paths to verify.
17

18
    Raises:
19
        ValueError:
20
            Raised if any directory is out of scope or non-existent
21
    """
22

23
    # if given a single path, convert to list
24
    paths = misc_utils.make_iterable(paths, ignore_str=True)
1✔
25

26
    # if given a single path, convert to list
27
    if not isinstance(paths, list):
1✔
28
        paths = [paths]
×
29

30
    for path in paths:
1✔
31
        if not os.path.exists(path):
1✔
32
            for parent in reversed(pathlib.Path(path).parents):
1✔
33
                if not os.path.exists(parent):
1✔
34
                    raise FileNotFoundError(
1✔
35
                        f"A bad path, {path}, was provided.\n"
36
                        f"The folder, {parent.name}, could not be found..."
37
                    )
38
            raise FileNotFoundError(
1✔
39
                f"The file/path, {pathlib.Path(path).name}, could not be found..."
40
            )
41

42

43
def list_files(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
1✔
44
    """List all files in a directory containing at least one given substring
45

46
    Args:
47
        dir_name (str):
48
            Parent directory for files of interest
49
        substrs (str or list):
50
            Substring matching criteria, defaults to None (all files)
51
        exact_match (bool):
52
            If True, will match exact file names ('C' will match only 'C.tiff')
53
            If False, will match substr pattern in file ('C' will match 'C.tiff' and 'CD30.tiff')
54
        ignore_hidden (bool):
55
            If True, will ignore hidden files. If False, will allow hidden files to be
56
            matched against the search substring.
57

58
    Returns:
59
        list:
60
            List of files containing at least one of the substrings
61
    """
62
    files = os.listdir(dir_name)
1✔
63
    files = [file for file in files if not os.path.isdir(os.path.join(dir_name, file))]
1✔
64

65
    # Filter out hidden files
66
    if ignore_hidden:
1✔
67
        files = [file for file in files if not file.startswith(".")]
1✔
68

69
    # default to return all files
70
    if substrs is None:
1✔
71
        return files
1✔
72

73
    # handle case where substrs is a single string (not wrapped in list)
74
    if type(substrs) is not list:
1✔
75
        substrs = [substrs]
1✔
76

77
    if exact_match:
1✔
78
        matches = [
1✔
79
            file
80
            for file in files
81
            if any([substr == os.path.splitext(file)[0] for substr in substrs])
82
        ]
83
    else:
84
        matches = [file for file in files if any([substr in file for substr in substrs])]
1✔
85

86
    return matches
1✔
87

88

89
def remove_file_extensions(files):
1✔
90
    """Removes file extensions from a list of files
91

92
    Args:
93
        files (list):
94
            List of files to remove file extensions from.
95
            Any element that doesn't have an extension is left unchanged
96

97
    Raises:
98
        UserWarning:
99
            Some of the processed file names still contain a period
100

101
    Returns:
102
        list:
103
            List of files without file extensions
104
    """
105

106
    # make sure we don't try to split on a non-existent list
107
    if files is None:
1✔
108
        return
1✔
109

110
    # remove the file extension
111
    names = [os.path.splitext(name) for name in files]
1✔
112
    names_corrected = []
1✔
113
    extension_types: List[str] = list(itertools.chain(*EXTENSION_TYPES.values()))
1✔
114
    for name in names:
1✔
115
        # We want everything after the "." for the extension
116
        ext = name[-1][1:]
1✔
117
        if (ext in extension_types) or (len(ext) == 0):
1✔
118
            # If it is one of the extension types, only keep the filename.
119
            # Or there is no extension and the names are similar to ["fov1", "fov2", "fov3", ...]
120
            names_corrected.append(name[:-1][0])
1✔
121
        else:
122
            # If `ext` not one of the specified file types, keep the value after the "."
123
            names_corrected.append(name[:-1][0] + "." + name[-1][1])
1✔
124

125
    # identify names with '.' in them: these may not be processed correctly.
126
    bad_names = [name for name in names_corrected if "." in name]
1✔
127
    if len(bad_names) > 0:
1✔
128
        warnings.warn(
1✔
129
            'These files still have "." in them after file extension removal: '
130
            f'{",".join(bad_names)}, '
131
            "please double check that these are the correct names"
132
        )
133

134
    return names_corrected
1✔
135

136

137
def extract_delimited_names(names, delimiter="_", delimiter_optional=True):
1✔
138
    """For a given list of names, extract the delimited prefix
139

140
    Examples (if delimiter='_'):
141

142
    - 'fov1' becomes 'fov1'
143
    - 'fov2_part1' becomes 'fov2'
144
    - 'fov3_part1_part2' becomes 'fov3'
145

146
    Args:
147
        names (list):
148
            List of names to split by delimiter.
149
            Make sure to call remove_file_extensions first if you need to drop file extensions.
150
        delimiter (str):
151
            Character separator used to determine filename prefix. Defaults to '_'.
152
        delimiter_optional (bool):
153
            If False, function will return None if any of the files don't contain the delimiter.
154
            Defaults to True. Ignored if delimiter is None.
155

156
    Raises:
157
        UserWarning:
158
            Raised if delimiter_optional=False and no delimiter is present in any of the files
159

160
    Returns:
161
        list:
162
            List of extracted names. Indicies should match that of files
163
    """
164

165
    if names is None:
1✔
166
        return
1✔
167

168
    # check for bad files/folders
169
    if delimiter is not None and not delimiter_optional:
1✔
170
        no_delim = [delimiter not in name for name in names]
1✔
171
        if any(no_delim):
1✔
172
            print(
1✔
173
                "The following files do not have the mandatory delimiter, "
174
                f"'{delimiter}': "
175
                f"{','.join([name for indx,name in enumerate(names) if no_delim[indx]])}"
176
            )
177
            warnings.warn("files without mandatory delimiter")
1✔
178

179
            return None
1✔
180

181
    # now split on the delimiter as well
182
    names = [name.split(delimiter)[0] for name in names]
1!
183

184
    return names
1✔
185

186

187
def list_folders(dir_name, substrs=None, exact_match=False, ignore_hidden=True):
1✔
188
    """List all folders in a directory containing at least one given substring
189

190
    Args:
191
        dir_name (str):
192
            Parent directory for folders of interest
193
        substrs (str or list):
194
            Substring matching criteria, defaults to None (all folders)
195
        exact_match (bool):
196
            If True, will match exact folder names (so 'C' will match only 'C/').
197
            If False, will match substr pattern in folder (so 'C' will match 'C/' & 'C_DIREC/').
198
        ignore_hidden (bool):
199
            If True, will ignore hidden directories. If False, will allow hidden directories to
200
            be matched against the search substring.
201

202
    Returns:
203
        list:
204
            List of folders containing at least one of the substrings
205
    """
206
    files = os.listdir(dir_name)
1✔
207
    folders = [file for file in files if os.path.isdir(os.path.join(dir_name, file))]
1✔
208

209
    # Filter out hidden directories
210
    if ignore_hidden:
1✔
211
        folders = [folder for folder in folders if not folder.startswith(".")]
1✔
212

213
    # default to return all files
214
    if substrs is None:
1✔
215
        return folders
1✔
216

217
    # handle case where substrs is a single string (not wrapped in list)
218
    if type(substrs) is not list:
1✔
219
        substrs = [substrs]
1✔
220

221
    # Exact match case
222
    if exact_match:
1✔
223
        matches = [
1✔
224
            folder
225
            for folder in folders
226
            if any([substr == os.path.splitext(folder)[0] for substr in substrs])
227
        ]
228
    else:
229
        matches = [folder for folder in folders if any([substr in folder for substr in substrs])]
1✔
230

231
    return matches
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc