• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

angelolab / mibi-bin-tools / 4431962890

pending completion
4431962890

push

github

GitHub
Classifier Update (#51)

110 of 111 branches covered (99.1%)

Branch coverage included in aggregate %.

149 of 149 relevant lines covered (100.0%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.53
/src/mibi_bin_tools/bin_files.py
1
from typing import Any, Dict, List, Tuple, Union
1✔
2
import os
1✔
3
import json
1✔
4

5
import numpy as np
1✔
6
import pandas as pd
1✔
7
import skimage.io as io
1✔
8
import xarray as xr
1✔
9

10
from mibi_bin_tools import type_utils, _extract_bin
1✔
11
from alpineer import io_utils, image_utils
1✔
12

13

14
def _mass2tof(masses_arr: np.ndarray, mass_offset: float, mass_gain: float,
1✔
15
              time_res: float) -> np.ndarray:
16
    """Convert array of m/z values to equivalent time of flight values
17

18
    Args:
19
        masses_arr (array_like):
20
            Array of m/z values
21
        mass_offset (float):
22
            Mass offset for parabolic transformation
23
        mass_gain (float):
24
            Mass gain for parabolic transformation
25
        time_res (float):
26
            Time resolution for scaling parabolic transformation
27

28
    Returns:
29
        array_like:
30
            Array of time of flight values; indicies paried to `masses_arr`
31
    """
32
    return (mass_gain * np.sqrt(masses_arr) + mass_offset) / time_res
1✔
33

34

35
def _set_tof_ranges(fov: Dict[str, Any], higher: np.ndarray, lower: np.ndarray,
1✔
36
                    time_res: float) -> None:
37
    """Converts and stores provided mass ranges as time of flight ranges within fov metadata
38

39
    Args:
40
        fov (Dict[str, Any]):
41
            Metadata for the fov.
42
        higher (array_like):
43
            Array of m/z values; upper bounds for integration
44
        lower (array_like):
45
            Array of m/z values; lower bounds for integration
46
        time_res (float):
47
            Time resolution for scaling parabolic transformation
48

49
    Returns:
50
        None:
51
            Fovs argument is modified in place
52
    """
53
    key_names = ('upper_tof_range', 'lower_tof_range')
1✔
54
    mass_ranges = (higher, lower)
1✔
55
    wrapping_functions = (np.ceil, np.floor)
1✔
56

57
    for key, masses, wrap in zip(key_names, mass_ranges, wrapping_functions):
1✔
58
        fov[key] = \
1✔
59
            wrap(
60
                _mass2tof(masses, fov['mass_offset'], fov['mass_gain'], time_res)
61
            ).astype(np.uint16)
62

63

64
def _write_out(img_data: np.ndarray, out_dir: str, fov_name: str, targets: List[str],
1✔
65
               intensities: Union[bool, List[str]] = False) -> None:
66
    """Parses extracted data and writes out tifs
67

68
    Args:
69
        img_data (np.ndarray):
70
            Array containing the pulse counts, intensity, and intensity * width images
71
        out_dir (str | PathLike):
72
            Directory to save tifs
73
        fov_name (str):
74
            Name of the field of view
75
        targets (array_like):
76
            List of target names (i.e channels)
77
        intensities (bool | List):
78
            Whether or not to write out intensity images.  If a List, specific
79
            peaks can be written out, ignoring the rest, which will only have pulse count images.
80
    """
81
    out_dirs = [
1✔
82
        os.path.join(out_dir, fov_name),
83
        os.path.join(out_dir, fov_name, 'intensities'),
84
    ]
85
    suffixes = [
1✔
86
        '',
87
        '_intensity',
88
    ]
89
    save_dtypes = [
1✔
90
        np.uint32,
91
        np.uint32,
92
    ]
93

94
    for i, (out_dir_i, suffix, save_dtype) in enumerate(zip(out_dirs, suffixes, save_dtypes)):
1✔
95
        # break loop when index is larger than type dimension of img_data
96
        if i+1 > img_data.shape[0]:
1✔
97
            break
1✔
98
        if not os.path.exists(out_dir_i):
1✔
99
            os.makedirs(out_dir_i)
1✔
100
        for j, target in enumerate(targets):
1!
101
            # save all first images regardless of replacing
102
            # if not replace (i=1), only save intensity images for specified targets
103
            if i == 0 or (target in list(intensities)):
1✔
104
                fname = os.path.join(out_dir_i, f"{target}{suffix}.tiff")
1✔
105
                image_utils.save_image(fname=fname, data=img_data[i, :, :, j].astype(save_dtype))
1✔
106

107

108
def _find_bin_files(data_dir: str,
1✔
109
                    include_fovs: Union[List[str], None] = None) -> Dict[str, Dict[str, str]]:
110
    """Locates paired bin/json files within the provided directory.
111

112
    Args:
113
        data_dir (str | PathLike):
114
            Directory containing bin/json files
115
        include_fovs (List | None):
116
            List of fovs to include. Includes all if None.
117

118
    Returns:
119
        Dict[str, Dict[str, str]]:
120
            Dictionary containing the names of the valid bin files
121
    """
122
    bin_files = io_utils.list_files(data_dir, substrs=['.bin'])
1✔
123
    json_files = io_utils.list_files(data_dir, substrs=['.json'])
1✔
124

125
    fov_names = io_utils.extract_delimited_names(bin_files, delimiter='.')
1✔
126

127
    fov_files = {
1✔
128
        fov_name: {
129
            'bin': fov_name + '.bin',
130
            'json': fov_name + '.json',
131
        }
132
        for fov_name in fov_names
133
        if fov_name + '.json' in json_files
134
    }
135

136
    if include_fovs is not None:
1✔
137
        fov_files = {
1✔
138
            fov_file: fov_files[fov_file]
139
            for fov_file in include_fovs
140
            if fov_file in fov_files
141
        }
142

143
    if not len(fov_files):
1✔
144
        raise FileNotFoundError(f'No viable bin files were found in {data_dir}...')
1✔
145

146
    return fov_files
1✔
147

148

149
def _fill_fov_metadata(data_dir: str, fov: Dict[str, Any],
1✔
150
                       panel: Union[Tuple[float, float], pd.DataFrame],
151
                       intensities: Union[bool, List[str]], time_res: float,
152
                       channels: List[str] = None) -> None:
153
    """ Parses user input and mibiscope json to build extraction parameters
154

155
    Fills fov metadata with mass calibration parameters, builds panel, and sets intensity
156
    extraction flags.
157

158
    Args:
159
        data_dir (str):
160
            Directory containing bin files as well as accompanying json metadata files
161
        fov (Dict[str, Any]):
162
            Metadata for the fov.
163
        panel (tuple | pd.DataFrame):
164
            If a tuple, global integration range over all antibodies within json metadata.
165
            If a pd.DataFrame, specific peaks with custom integration ranges.  Column names must be
166
            'Mass' and 'Target' with integration ranges specified via 'Start' and 'Stop' columns.
167
        intensities (bool | List[str]):
168
            Whether or not to extract intensity and intensity * width images.  If a List, specific
169
            peaks can be extracted, ignoring the rest, which will only have pulse count images
170
            extracted.
171
        time_res (float):
172
            Time resolution for scaling parabolic transformation
173
        channels (List[str] | None):
174
            Filters panel for given channels.  All channels in panel extracted if None
175
    Returns:
176
        None:
177
            `fov` argument is modified in place
178
    """
179

180
    with open(os.path.join(data_dir, fov['json']), 'rb') as f:
1✔
181
        data = json.load(f)
1✔
182

183
    fov['mass_gain'] = data['fov']['fullTiming']['massCalibration']['massGain']
1✔
184
    fov['mass_offset'] = data['fov']['fullTiming']['massCalibration']['massOffset']
1✔
185

186
    if type(panel) is tuple:
1✔
187
        _parse_global_panel(data, fov, panel, time_res, channels)
1✔
188
    else:
189
        _parse_df_panel(fov, panel, time_res, channels)
1✔
190

191
    _parse_intensities(fov, intensities)
1✔
192

193

194
def _parse_global_panel(json_metadata: dict, fov: Dict[str, Any], panel: Tuple[float, float],
1✔
195
                        time_res: float, channels: List[str]) -> None:
196
    """Extracts panel contained in mibiscope json metadata
197

198
    Args:
199
        json_metadata (dict):
200
            metadata read via mibiscope json
201
        fov (Dict[str, Any]):
202
            Metadata for the fov.
203
        panel (tuple):
204
            Global integration range over all antibodies within json metadata.
205
            Column names must 'Mass' and 'Target' with integration ranges specified via 'Start' and
206
            'Stop' columns.
207
        time_res (float):
208
            Time resolution for scaling parabolic transformation
209
        channels (List[str] | None):
210
            Filters panel for given channels.  All channels in panel extracted if None
211
    Returns:
212
        None:
213
            `fov` argument is modified in place
214
    """
215
    if json_metadata['fov'].get('panel', None) is None:
1✔
216
        raise KeyError(
1✔
217
            f"'panel' field not found in {fov['json']}. "
218
            + "If this is a moly point, you must manually supply a panel..."
219
        )
220
    rows = json_metadata['fov']['panel']['conjugates']
1✔
221
    fov['masses'], fov['targets'] = zip(*[
1✔
222
        (el['mass'], el['target'])
223
        for el in rows
224
        if channels is None or el['target'] in channels
225
    ])
226

227
    masses_arr = np.array(fov['masses'])
1✔
228
    _set_tof_ranges(fov, masses_arr + panel[1], masses_arr + panel[0], time_res)
1✔
229

230

231
def _parse_df_panel(fov: Dict[str, Any], panel: pd.DataFrame, time_res: float,
1✔
232
                    channels: List[str]) -> None:
233
    """Converts masses from panel into times for fov extraction-metadata structure
234

235
    Args:
236
        fov (Dict[str, Any]):
237
            Metadata for the fov.
238
        panel (pd.DataFrame):
239
            Specific peaks with custom integration ranges.  Column names must be 'Mass' and
240
            'Target' with integration ranges specified via 'Start' and 'Stop' columns.
241
        time_res (float):
242
            Time resolution for scaling parabolic transformation
243
        channels (List[str] | None):
244
            Filters panel for given channels.  All channels in panel extracted if None
245
    Returns:
246
        None:
247
            `fov` argument is modified in place
248
    """
249
    rows = panel.loc[panel['Target'].isin(panel['Target'] if channels is None else channels)]
1✔
250
    fov['masses'] = rows['Mass']
1✔
251
    fov['targets'] = rows['Target']
1✔
252

253
    _set_tof_ranges(fov, rows['Stop'].values, rows['Start'].values, time_res)
1✔
254

255

256
def _parse_intensities(fov: Dict[str, Any], intensities: Union[bool, List[str]]) -> None:
1✔
257
    """Sets intensity extraction flags within the extraction-metadata
258

259
    Args:
260
        fov (Dict[str, Any]):
261
            Metadata for the fov
262
        intensities (bool | List):
263
            Whether or not to extract intensity and intensity * width images.  If a List, specific
264
            peaks can be extracted, ignoring the rest, which will only have pulse count images
265
            extracted.
266
    Returns:
267
        None:
268
            `fov` argument is modified in place
269
    """
270

271
    filtered_intensities = None
1✔
272
    if type(intensities) is list:
1✔
273
        filtered_intensities = [target for target in fov['targets'] if target in intensities]
1✔
274
    elif intensities is True:
1✔
275
        filtered_intensities = fov['targets']
1✔
276

277
    # order the 'calc_intensity' bools
278
    if filtered_intensities is not None:
1✔
279
        fov['calc_intensity'] = [target in list(filtered_intensities) for target in fov['targets']]
1✔
280
    else:
281
        fov['calc_intensity'] = [False, ] * len(fov['targets'])
1✔
282

283

284
def condense_img_data(img_data, targets, intensities, replace):
1✔
285
    """Changes image data from separate pulse and intensity data into one column if replace=True.
286
    Args:
287
        img_data (np.array):
288
            Contains the image data with all pulse and intensity information.
289
        targets (list):
290
            List of targets.
291
        intensities (bool | List):
292
            Whether or not to extract intensity images.  If a List, specific
293
            peaks can be extracted, ignoring the rest, which will only have pulse count images
294
            extracted.
295
        replace (bool):
296
            Whether to replace pulse images with intensity images.
297

298
    Return:
299
        altered img_data according to args
300

301
    """
302
    # extracting intensity and replacing
303
    if type_utils.any_true(intensities) and replace:
1✔
304
        for j, target in enumerate(targets):
1✔
305
            # replace only specified targets
306
            if target in intensities:
1✔
307
                img_data[0, :, :, j] = img_data[1, :, :, j]
1✔
308
        img_data = img_data[[0], :, :, :]
1✔
309

310
    # not extracting intensity
311
    elif not type_utils.any_true(intensities):
1✔
312
        img_data = img_data[[0], :, :, :]
1✔
313

314
    # extracting intensity but not replacing
315
    else:
316
        img_data = img_data[[0, 1], :, :, :]
1✔
317

318
    return img_data
1✔
319

320

321
def extract_bin_files(data_dir: str, out_dir: Union[str, None],
1✔
322
                      include_fovs: Union[List[str], None] = None,
323
                      panel: Union[Tuple[float, float], pd.DataFrame] = (-0.3, 0.0),
324
                      intensities: Union[bool, List[str]] = False, replace=True,
325
                      time_res: float = 500e-6):
326
    """Converts MibiScope bin files to pulse count, intensity, and intensity * width tiff images
327

328
    Args:
329
        data_dir (str | PathLike):
330
            Directory containing bin files as well as accompanying json metadata files
331
        out_dir (str | PathLike | None):
332
            Directory to save the tiffs in.  If None, image data is returned as an ndarray.
333
        include_fovs (List | None):
334
            List of fovs to include.  Includes all if None.
335
        panel (tuple | pd.DataFrame):
336
            If a tuple, global integration range over all antibodies within json metadata.
337
            If a pd.DataFrame, specific peaks with custom integration ranges.  Column names must be
338
            'Mass' and 'Target' with integration ranges specified via 'Start' and 'Stop' columns.
339
        intensities (bool | List):
340
            Whether or not to extract intensity images.  If a List, specific
341
            peaks can be extracted, ignoring the rest, which will only have pulse count images
342
            extracted.
343
        replace (bool):
344
            Whether to replace pulse images with intensity images.
345
        time_res (float):
346
            Time resolution for scaling parabolic transformation
347
    Returns:
348
        None | np.ndarray:
349
            image data if no out_dir is provided, otherwise no return
350
    """
351

352
    fov_files = _find_bin_files(data_dir, include_fovs)
1✔
353

354
    for fov in fov_files.values():
1✔
355
        _fill_fov_metadata(data_dir, fov, panel, intensities, time_res)
1✔
356

357
    bin_files = \
1✔
358
        [(fov, os.path.join(data_dir, fov['bin'])) for fov in fov_files.values()]
359

360
    image_data = []
1✔
361

362
    for i, (fov, bf) in enumerate(bin_files):
1✔
363
        img_data = _extract_bin.c_extract_bin(
1✔
364
            bytes(bf, 'utf-8'), fov['lower_tof_range'],
365
            fov['upper_tof_range'], np.array(fov['calc_intensity'], dtype=np.uint8)
366
        )
367

368
        # convert intensities=True to list of all targets
369
        if type_utils.any_true(intensities):
1✔
370
            if type(intensities) is not list:
1✔
371
                intensities = list(fov['targets'])
1✔
372

373
        img_data = condense_img_data(img_data, list(fov['targets']), intensities, replace)
1✔
374

375
        if out_dir is not None:
1✔
376
            _write_out(
1✔
377
                img_data,
378
                out_dir,
379
                fov['bin'][:-4],
380
                fov['targets'],
381
                intensities
382
            )
383
        else:
384
            if replace or not type_utils.any_true(intensities):
1✔
385
                type_list = ['pulse']
1✔
386
            else:
387
                type_list = ['pulse', 'intensities']
1✔
388
            image_data.append(
1✔
389
                xr.DataArray(
390
                    data=img_data[np.newaxis, :],
391
                    coords=[
392
                        [fov['bin'].split('.')[0]],
393
                        type_list,
394
                        np.arange(img_data.shape[1]),
395
                        np.arange(img_data.shape[2]),
396
                        list(fov['targets']),
397
                    ],
398
                    dims=['fov', 'type', 'x', 'y', 'channel'],
399
                )
400
            )
401

402
    if out_dir is None:
1✔
403
        image_data = xr.concat(image_data, dim='fov')
1✔
404

405
        return image_data
1✔
406

407

408
def get_histograms_per_tof(data_dir: str, fov: str, channel: str, mass_range=(-0.3, 0.0),
1✔
409
                           time_res: float = 500e-6):
410
    """Generates histograms of pulse widths, pulse counts, and pulse intensities found within the
411
    given mass range
412

413
    Args:
414
        data_dir (str | PathLike):
415
            Directory containing bin files as well as accompanying json metadata files
416
        fov (str):
417
            Fov to generate histogram for
418
        channel (str):
419
            Channel to check widths for
420
        mass_range (tuple):
421
            Integration range
422
        time_res (float):
423
            Time resolution for scaling parabolic transformation
424
    """
425
    fov = _find_bin_files(data_dir, [fov])[fov]
1✔
426

427
    _fill_fov_metadata(data_dir, fov, mass_range, False, time_res, [channel])
1✔
428

429
    local_bin_file = os.path.join(data_dir, fov['bin'])
1✔
430

431
    widths, intensities, pulses = _extract_bin.c_extract_histograms(bytes(local_bin_file, 'utf-8'),
1✔
432
                                                                    fov['lower_tof_range'][0],
433
                                                                    fov['upper_tof_range'][0])
434
    return widths, intensities, pulses
1✔
435

436

437
def get_median_pulse_height(data_dir: str, fov: str, channel: str,
1✔
438
                            panel: Union[Tuple[float, float], pd.DataFrame] = (-0.3, 0.0),
439
                            time_res: float = 500e-6):
440
    """Retrieves median pulse intensity and mean pulse count for a given channel
441

442
    Args:
443
        data_dir (str | PathLike):
444
            Directory containing bin files as well as accompanying json metadata files
445
        fov (str):
446
            Fov to generate histogram for
447
        channel (str):
448
            Channel to check widths for
449
        mass_range (tuple | pd.DataFrame):
450
            Integration range
451
        time_res (float):
452
            Time resolution for scaling parabolic transformation
453

454
    """
455

456
    fov = _find_bin_files(data_dir, [fov])[fov]
1✔
457
    _fill_fov_metadata(data_dir, fov, panel, False, time_res, [channel])
1✔
458

459
    local_bin_file = os.path.join(data_dir, fov['bin'])
1✔
460

461
    _, intensities, _ = \
1✔
462
        _extract_bin.c_extract_histograms(bytes(local_bin_file, 'utf-8'),
463
                                          fov['lower_tof_range'][0],
464
                                          fov['upper_tof_range'][0])
465

466
    int_bin = np.cumsum(intensities) / intensities.sum()
1✔
467
    median_height = (np.abs(int_bin - 0.5)).argmin()
1✔
468

469
    return median_height
1✔
470

471

472
def get_total_counts(data_dir: str, include_fovs: Union[List[str], None] = None):
1✔
473
    """Retrieves total counts for each field of view
474

475
    Args:
476
        data_dir (str | PathLike):
477
            Directory containing bin files as well as accompanying json metadata files
478
        include_fovs (List | None):
479
            List of fovs to include.  Includes all if None.
480

481
    Returns:
482
        dict:
483
            dictionary of total counts, with fov names as keys
484
    """
485

486
    fov_files = _find_bin_files(data_dir, include_fovs)
1✔
487

488
    bin_files = \
1✔
489
        [(name, os.path.join(data_dir, fov['bin'])) for name, fov in fov_files.items()]
490

491
    outs = {name: _extract_bin.c_total_counts(bytes(bf, 'utf-8')) for name, bf in bin_files}
1✔
492

493
    return outs
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc