11961352837

Committed 21 Nov 2024 08:44PM UTC coverage: 91.952% (-0.2%) from 92.104%

Build # 11961352837

Build Type

Pull #467

github

Committed by

web-flow

Commit Message

Merge 25e0126a9 into 5f16d6fa0

Pull Request Pull Request #467: Enforce `exact_match=True` when listing JSON file for `get_estimated_time` for MPH

Run Details

1421 of 1549 branches covered (91.74%)

Branch coverage included in aggregate %.

8 of 8 new or added lines in 4 files covered. (100.0%)

6 existing lines in 1 file now uncovered.

2715 of 2949 relevant lines covered (92.07%)

0.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.24

/src/toffy/watcher_callbacks.py

import inspect
import os
import warnings
from dataclasses import dataclass, field
from typing import Iterable

# prevent memory leaking from creating plots that are never shown
import matplotlib
import pandas as pd
import xarray as xr
from alpineer import io_utils, misc_utils
from mibi_bin_tools.bin_files import _write_out, extract_bin_files
from mibi_bin_tools.type_utils import any_true

from toffy.bin_extraction import incomplete_fov_check
from toffy.image_stitching import stitch_images
from toffy.json_utils import missing_fov_check
from toffy.mph_comp import combine_mph_metrics, compute_mph_metrics, visualize_mph
from toffy.normalize import write_mph_per_mass
from toffy.panel_utils import modify_panel_ranges
from toffy.qc_comp import combine_qc_metrics, compute_qc_metrics_direct
from toffy.qc_metrics_plots import visualize_qc_metrics
from toffy.settings import QC_COLUMNS

matplotlib.use("Agg")

RUN_PREREQUISITES = {
    "plot_qc_metrics": set(["generate_qc"]),
    "plot_mph_metrics": set(["generate_mph"]),
    "image_stitching": set(["extract_tiffs"]),
}


# If FovCallbacks ever should pass data to RunCallbacks, make this a dataclass following the
# field structure outlined for __fov_data and __panel in FovCallbacks
@dataclass
class RunCallbacks:
    """Class for run level callbacks in watcher."""

    run_folder: str

    def plot_qc_metrics(self, qc_out_dir: str, warn_overwrite=False, **kwargs):
        """Plots qc metrics generated by the `generate_qc` callback.

        Args:
            qc_out_dir (str):
                Directory containing qc metric csv
            warn_overwrite (bool): whether to warn if existing `_combined.csv` file found,
                needed to curb watcher output if `plot_qc_metrics` set as intermediate callback
            **kwargs (Dict[str, Any]):
                Additional arguments for `toffy.qc_comp.visualize_qc_metrics`.
                Accepted kwargs are

             - axes_size
             - wrap
             - dpi
             - save_dir
        Returns:
            dict:
                Maps each metric name to their respective plot
        """
        # filter kwargs
        valid_kwargs = ["axes_size", "wrap", "dpi", "save_dir"]
        viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}
        qc_plots = {}

        combine_qc_metrics(qc_out_dir, warn_overwrite=warn_overwrite)
        for metric_name in QC_COLUMNS:
            qc_plots[metric_name] = visualize_qc_metrics(
                metric_name, qc_out_dir, **viz_kwargs, return_plot=True
            )

        return qc_plots

    def plot_mph_metrics(self, mph_out_dir, plot_dir, warn_overwrite=False, **kwargs):
        """Plots mph metrics generated by the `generate_mph` callback.

        Args:
            mph_out_dir (str): directory containing mph metric csv
            plot_dir (str): director to store the plot to
            warn_overwrite (bool): whether to warn if existing `_combined.csv` file found,
                needed to curb watcher output if `plot_mph_metrics` set as intermediate callback
            **kwargs (Dict[str, Any]):
                Additional arguments for `toffy.mph_comp.visualize_mph`.
                Accepted kwargs are

             - regression
        Returns:
            matplotlib.figure.Figure:
                The figure containing the MPH plot
        """
        if not os.path.exists(plot_dir):
            os.makedirs(plot_dir)

        # filter kwargs
        valid_kwargs = [
            "regression",
        ]
        viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}

        # set verbose to false to prevent overwrite error from popping up each FOV
        mph_df = combine_mph_metrics(mph_out_dir, return_data=True, warn_overwrite=warn_overwrite)
        mph_fig = visualize_mph(mph_df, plot_dir, **viz_kwargs, return_plot=True)

        return mph_fig

    def image_stitching(self, tiff_out_dir, **kwargs):
        """Stitches individual FOV channel images together into one tiff.

        Args:
            tiff_out_dir (str): directory containing extracted images
            **kwargs (Dict[str, Any]):
                Additional arguments for `toffy.image_stitching.stitch_images`.
                Accepted kwargs are

             - channels
        """
        # filter kwargs
        valid_kwargs = ["channels"]
        viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}

        stitch_images(tiff_out_dir, self.run_folder, **viz_kwargs)

    def check_incomplete_fovs(self, tiff_out_dir, **kwargs):
        """Checks for partial images (even when fully extracted).

        Args:
            tiff_out_dir (str): directory containing extracted images
            **kwargs (Dict[str, Any]):
                Additional arguments for `toffy.bin_extractions.incomplete_fov_check`.
                Accepted kwargs are

             - num_rows
             - num_channels
             - signal_percent
        Raises:
            Warning if any  FOVs have partially generated images
        """
        incomplete_fov_check(self.run_folder, tiff_out_dir)

    def check_missing_fovs(self, **kwargs):
        """Checks for associated bin/json files per FOV.

        Raises:
            Warning if any fov data is missing
            **kwargs (Dict[str, Any]):
                Additional arguments for `toffy.json_utils.missing_fov_check`.
        """
        missing_fov_check(self.run_folder, os.path.basename(self.run_folder))


@dataclass
class FovCallbacks:
    """Class for FOV level callbacks in watcher."""

    run_folder: str
    point_name: str
    overwrite: bool
    __panel: pd.DataFrame = field(default=None, init=False)
    __panel_prof: pd.DataFrame = field(default=None, init=False)
    __fov_data: xr.DataArray = field(default=None, init=False)
    __fov_data_prof: xr.DataArray = field(default=None, init=False)

    def _generate_fov_data(
        self,
        panel: pd.DataFrame,
        extract_prof: bool,
        intensities=["Au", "chan_39"],
        replace=True,
        time_res=0.0005,
        **kwargs,
    ):
        """Extracts data from bin files using the given panel.

        The data and the panel are then cached members of the FovCallbacks object

        Both the deficient and proficient extracted data and panel are computed and cached

        Args:
            panel (pd.DataFrame):
                Panel used for extraction
            extract_prof (bool):
                If set, extract proficient data
            intensities (bool | List[str]):
                Intensities argument for `mibi_bin_tools.bin_files.extract_bin_files`
            replace (bool):
                Whether to replace pulse images with intensity
            time_res (float):
                Time resolution argument for `mibi_bin_tool.bin_files.extract_bin_files`
            **kwargs (dict):
                Unused kwargs for other functions
        """
        self.__fov_data = extract_bin_files(
            data_dir=self.run_folder,
            out_dir=None,
            include_fovs=[self.point_name],
            panel=panel,
            intensities=intensities,
            replace=replace,
            time_res=time_res,
        )
        self.__panel = panel

        if extract_prof:
            # adds an offset of 0.3 to 'Start' and 'Stop' columns, modifying extraction range
            # from (-0.3, 0) to (0, 0.3) for proficient extraction
            panel_prof = modify_panel_ranges(panel, start_offset=0.3, stop_offset=0.3)
            self.__fov_data_prof = extract_bin_files(
                data_dir=self.run_folder,
                out_dir=None,
                include_fovs=[self.point_name],
                panel=panel_prof,
                intensities=intensities,
                replace=replace,
                time_res=time_res,
            )
            self.__panel_prof = panel_prof

    def extract_tiffs(
        self, tiff_out_dir: str, panel: pd.DataFrame, extract_prof: bool = True, **kwargs
    ):
        """Extract tiffs into provided directory, using given panel.

        Done for both the extracted deficient and proficient data

        Args:
            tiff_out_dir (str):
                Path where tiffs are written
            panel (pd.DataFrame):
                Target mass integration ranges
            extract_prof (bool):
                If set, extract mass proficient data
            **kwargs (dict):
                Additional arguments for `mibi_bin_tools.bin_files.extract_bin_files`.
                Accepted kwargs are

             - intensities
             - replace
             - time_res
        """
        if not os.path.exists(tiff_out_dir):
            os.makedirs(tiff_out_dir)

        extracted_img_dir = os.path.join(tiff_out_dir, self.point_name)
        unextracted_chan_tiffs = []

        # in the case all images have been extracted, simply return
        if os.path.exists(extracted_img_dir) and not self.overwrite:
            all_chan_tiffs = [f"{ct}.tiff" for ct in panel["Target"]]
            extracted_chan_tiffs = io_utils.list_files(extracted_img_dir, substrs=".tiff")
            unextracted_chan_tiffs = set(all_chan_tiffs).difference(extracted_chan_tiffs)

            if len(unextracted_chan_tiffs) == 0:
                warnings.warn(f"Images already extracted for FOV {self.point_name}")
                return

        # ensure we don't re-extract channels that have already been extracted
        if unextracted_chan_tiffs and not self.overwrite:
            unextracted_chans = io_utils.remove_file_extensions(unextracted_chan_tiffs)
            panel = panel[panel["Target"].isin(unextracted_chans)]

        if self.__fov_data is None or self.__fov_data_prof is None:
            self._generate_fov_data(panel, extract_prof, **kwargs)

        intensities = kwargs.get("intensities", ["Au", "chan_39"])
        if any_true(intensities) and type(intensities) is not list:
            intensities = list(self.__fov_data.channel.values)

        _write_out(
            img_data=self.__fov_data[0, :, :, :, :].values,
            out_dir=tiff_out_dir,
            fov_name=self.point_name,
            targets=list(self.__fov_data.channel.values),
            intensities=intensities,
        )

        if extract_prof:
            _write_out(
                img_data=self.__fov_data_prof[0, :, :, :, :].values,
                out_dir=tiff_out_dir + "_proficient",
                fov_name=self.point_name,
                targets=list(self.__fov_data.channel.values),
                intensities=intensities,
            )

    def generate_qc(
        self, qc_out_dir: str, panel: pd.DataFrame = None, extract_prof: bool = True, **kwargs
    ):
        """Generates qc metrics from given panel, and saves output to provided directory.

        Args:
            qc_out_dir (str):
                Path where qc_metrics are written
            panel (pd.DataFrame):
                Target mass integration ranges
            extract_prof (bool):
                If set, extract mass proficient data
            **kwargs (dict):
                Additional arguments for `toffy.qc_comp.compute_qc_metrics`. Accepted kwargs are:

             - gaussian_blur
             - blur_factor
        """
        if not os.path.exists(qc_out_dir):
            os.makedirs(qc_out_dir)

        if self.__fov_data is None:
            if panel is None:
                raise ValueError("Must provide panel if fov data is not already generated...")
            self._generate_fov_data(panel, extract_prof, **kwargs)

        qc_metric_paths = [
            os.path.join(qc_out_dir, f"{self.point_name}_nonzero_mean_stats.csv"),
            os.path.join(qc_out_dir, f"{self.point_name}_total_intensity_stats.csv"),
            os.path.join(qc_out_dir, f"{self.point_name}_percentile_99_9_stats.csv"),
        ]
        if all([os.path.exists(qc_file) for qc_file in qc_metric_paths]) and not self.overwrite:
            warnings.warn(f"All QC metrics already extracted for FOV {self.point_name}")
            return

        metric_data = compute_qc_metrics_direct(
            image_data=self.__fov_data,
            fov_name=self.point_name,
            gaussian_blur=kwargs.get("gaussian_blur", False),
            blur_factor=kwargs.get("blur_factor", 1),
        )

        for metric_name, data in metric_data.items():
            data.to_csv(os.path.join(qc_out_dir, metric_name), index=False)

    def generate_mph(self, mph_out_dir, **kwargs):
        """Generates mph metrics from given panel, and saves output to provided directory.

        Args:
            mph_out_dir (str): where to output mph csvs to
            **kwargs (dict):
                Additional arguments for `toffy.mph_comp.compute_mph_metrics`. Accepted kwargs are:

             - mass
             - mass_start
             - mass_stop
        """
        if not os.path.exists(mph_out_dir):
            os.makedirs(mph_out_dir)

        mph_pulse_file = os.path.join(mph_out_dir, f"{self.point_name}-mph_pulse.csv")
        if os.path.exists(mph_pulse_file) and not self.overwrite:
            warnings.warn(f"MPH pulse metrics already extracted for FOV {self.point_name}")
            return

        compute_mph_metrics(
            bin_file_dir=self.run_folder,
            csv_dir=mph_out_dir,
            fov=self.point_name,
            mass=kwargs.get("mass", 98),
            mass_start=kwargs.get("mass_start", 97.5),
            mass_stop=kwargs.get("mass_stop", 98.5),
        )

    def generate_pulse_heights(self, pulse_out_dir: str, panel: pd.DataFrame = None, **kwargs):
        """Generates pulse height csvs from bin files, and saves output to provided directory.

        Args:
            pulse_out_dir (str): where to output pulse height csvs
            panel (pd.DataFrame): Target mass integration ranges
            **kwargs (dict):
                Additional arguments for `toffy.normalize.write_mph_per_mass`. Accepted kwargs are:

             - start_offset
             - stop_offset
        """
        if not os.path.exists(pulse_out_dir):
            os.makedirs(pulse_out_dir)

        pulse_height_file = os.path.join(pulse_out_dir, f"{self.point_name}_pulse_heights.csv")
        if os.path.exists(pulse_height_file) and not self.overwrite:
            warnings.warn(f"Pulse heights per mass already extracted for FOV {self.point_name}")
            return

        write_mph_per_mass(
            base_dir=self.run_folder,
            output_dir=pulse_out_dir,
            fov=self.point_name,
            masses=panel["Mass"].values,
            start_offset=kwargs.get("mass_start", 0.3),
            stop_offset=kwargs.get("mass_stop", 0),
        )


def build_fov_callback(*args, **kwargs):
    """Assembles callbacks to be run for each transferred FoV.

    Args:
        *args (List[str]):
            Names of member functions of `FovCallbacks` to chain together
        **kwargs (Dict[str, Any]):
            Arguments to pass to `FovCallbacks` member functions specified in *args

    Raises:
        ValueError:
            Raised on non-existant member function or missing required kwarg

    Returns:
        Callable[[str, str], None]
            Chained fov callback which will execute all specified callbacks
    """
    # retrieve all 'non-special' methods of FovCallbacks
    methods = [attr for attr in dir(FovCallbacks) if attr[0] != "_"]

    # validate user callback settings
    misc_utils.verify_in_list(arg_strings=args, valid_callbacks=methods)
    for arg in args:
        # check that required (non-keyword) arguments for `arg` is present in passed `**kwargs`
        argnames = inspect.getfullargspec(getattr(FovCallbacks, arg))[0]
        argnames = [argname for argname in argnames if argname != "self"]
        misc_utils.verify_in_list(required_arguments=argnames, passed_arguments=list(kwargs.keys()))

    # construct actual callback
    def fov_callback(run_folder: str, point_name: str, overwrite: bool = False):
        # construct FovCallback object for given FoV
        callback_obj = FovCallbacks(run_folder, point_name, overwrite)

        # for each member, retrieve the member function and run it
        for arg in args:
            if cb := getattr(callback_obj, arg, None):
                cb(**kwargs)
            else:
                # unreachable...
                raise ValueError(f"Could not locate attribute {arg} in FovCallback object")

    return fov_callback


def build_callbacks(
    run_callbacks: Iterable[str],
    intermediate_callbacks: Iterable[str] = None,
    fov_callbacks: Iterable[str] = ("extract_tiffs",),
    **kwargs,
):
    """Deduces and assembles all run & FoV callbacks for the watcher function.

    Args:
        run_callbacks (Iterable[str]):
            List of run callback names.  These will deduce the prerequisite fov callbacks
        intermediate_callbacks (Iterable[str]):
            List of intermediate callback names, these will be subsets of `run_callbacks`
            but overriden to act as `fov_callbacks`
        fov_callbacks (Iterable[str]):
            List of fov callbacks to be run, regardless of prerequisite status
        **kwargs (Dict[str, Any]):
            Arguments to pass to `RunCallbacks` and `FovCallbacks` member functions

    Raises:
        ValueError:
            Raised on non-existant member function or missing required kwarg

    Returns:
        Callable[[None,], None], Callable[[str, str], None]:
            Assembled run callback and fov callback
    """
    methods = [attr for attr in dir(RunCallbacks) if attr[0] != "_"]

    fov_callbacks = set(fov_callbacks)

    misc_utils.verify_in_list(requested_callbacks=run_callbacks, valid_callbacks=methods)
    if intermediate_callbacks:
        misc_utils.verify_in_list(
            intermediate_callbacks=intermediate_callbacks, valid_callbacks=methods
        )

    callbacks_with_prereq = (
        run_callbacks + intermediate_callbacks if intermediate_callbacks else run_callbacks[:]
    )

    for run_cb in callbacks_with_prereq:
        argnames = inspect.getfullargspec(getattr(RunCallbacks, run_cb))[0]
        argnames = [argname for argname in argnames if argname != "self"]

        misc_utils.verify_in_list(required_arguments=argnames, passed_arguments=list(kwargs.keys()))

        fov_callbacks = fov_callbacks.union(RUN_PREREQUISITES.get(run_cb, set()))

    fov_callback = build_fov_callback(*list(fov_callbacks), **kwargs)

    def run_callback(run_folder: str):
        callback_obj = RunCallbacks(run_folder)

        for run_cb in run_callbacks:
            if cb := getattr(callback_obj, run_cb, None):
                cb(**kwargs)
            else:
                # unreachable...
                raise ValueError(f"Could not locate attribute {run_cb} in RunCallbacks object")

    intermediate_callback = None
    if intermediate_callbacks:

        def intermediate_callback(run_folder: str):
            callback_obj = RunCallbacks(run_folder)
            inter_return_vals = {}

            for run_cb in intermediate_callbacks:
                if cb := getattr(callback_obj, run_cb, None):
                    inter_return_vals[cb.__func__.__name__] = cb(**kwargs)
                else:
                    # unreachable...
                    raise ValueError(f"Could not locate attribute {run_cb} in RunCallbacks object")

            return inter_return_vals

    return fov_callback, run_callback, intermediate_callback

1	import inspect	1✔
2	import os	1✔
3	import warnings	1✔
4	from dataclasses import dataclass, field	1✔
5	from typing import Iterable	1✔
6
7	# prevent memory leaking from creating plots that are never shown
8	import matplotlib	1✔
9	import pandas as pd	1✔
10	import xarray as xr	1✔
11	from alpineer import io_utils, misc_utils	1✔
12	from mibi_bin_tools.bin_files import _write_out, extract_bin_files	1✔
13	from mibi_bin_tools.type_utils import any_true	1✔
14
15	from toffy.bin_extraction import incomplete_fov_check	1✔
16	from toffy.image_stitching import stitch_images	1✔
17	from toffy.json_utils import missing_fov_check	1✔
18	from toffy.mph_comp import combine_mph_metrics, compute_mph_metrics, visualize_mph	1✔
19	from toffy.normalize import write_mph_per_mass	1✔
20	from toffy.panel_utils import modify_panel_ranges	1✔
21	from toffy.qc_comp import combine_qc_metrics, compute_qc_metrics_direct	1✔
22	from toffy.qc_metrics_plots import visualize_qc_metrics	1✔
23	from toffy.settings import QC_COLUMNS	1✔
24
25	matplotlib.use("Agg")	1✔
26
27	RUN_PREREQUISITES = {	1✔
28	"plot_qc_metrics": set(["generate_qc"]),
29	"plot_mph_metrics": set(["generate_mph"]),
30	"image_stitching": set(["extract_tiffs"]),
31	}
32
33
34	# If FovCallbacks ever should pass data to RunCallbacks, make this a dataclass following the
35	# field structure outlined for __fov_data and __panel in FovCallbacks
36	@dataclass	1✔
37	class RunCallbacks:	1✔
38	"""Class for run level callbacks in watcher."""	1✔
39
40	run_folder: str	1✔
41
42	def plot_qc_metrics(self, qc_out_dir: str, warn_overwrite=False, **kwargs):	1✔
43	"""Plots qc metrics generated by the `generate_qc` callback.
44
45	Args:
46	qc_out_dir (str):
47	Directory containing qc metric csv
48	warn_overwrite (bool): whether to warn if existing `_combined.csv` file found,
49	needed to curb watcher output if `plot_qc_metrics` set as intermediate callback
50	**kwargs (Dict[str, Any]):
51	Additional arguments for `toffy.qc_comp.visualize_qc_metrics`.
52	Accepted kwargs are
53
54	- axes_size
55	- wrap
56	- dpi
57	- save_dir
58	Returns:
59	dict:
60	Maps each metric name to their respective plot
61	"""
62	# filter kwargs
63	valid_kwargs = ["axes_size", "wrap", "dpi", "save_dir"]	1✔
64	viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}	1✔
65	qc_plots = {}	1✔
66
67	combine_qc_metrics(qc_out_dir, warn_overwrite=warn_overwrite)	1✔
68	for metric_name in QC_COLUMNS:	1✔
69	qc_plots[metric_name] = visualize_qc_metrics(	1✔
70	metric_name, qc_out_dir, **viz_kwargs, return_plot=True
71	)
72
73	return qc_plots	1✔
74
75	def plot_mph_metrics(self, mph_out_dir, plot_dir, warn_overwrite=False, **kwargs):	1✔
76	"""Plots mph metrics generated by the `generate_mph` callback.
77
78	Args:
79	mph_out_dir (str): directory containing mph metric csv
80	plot_dir (str): director to store the plot to
81	warn_overwrite (bool): whether to warn if existing `_combined.csv` file found,
82	needed to curb watcher output if `plot_mph_metrics` set as intermediate callback
83	**kwargs (Dict[str, Any]):
84	Additional arguments for `toffy.mph_comp.visualize_mph`.
85	Accepted kwargs are
86
87	- regression
88	Returns:
89	matplotlib.figure.Figure:
90	The figure containing the MPH plot
91	"""
92	if not os.path.exists(plot_dir):	1✔
93	os.makedirs(plot_dir)	1✔
94
95	# filter kwargs
96	valid_kwargs = [	1✔
97	"regression",
98	]
99	viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}	1✔
100
101	# set verbose to false to prevent overwrite error from popping up each FOV
102	mph_df = combine_mph_metrics(mph_out_dir, return_data=True, warn_overwrite=warn_overwrite)	1✔
103	mph_fig = visualize_mph(mph_df, plot_dir, **viz_kwargs, return_plot=True)	1✔
104
105	return mph_fig	1✔
106
107	def image_stitching(self, tiff_out_dir, **kwargs):	1✔
108	"""Stitches individual FOV channel images together into one tiff.
109
110	Args:
111	tiff_out_dir (str): directory containing extracted images
112	**kwargs (Dict[str, Any]):
113	Additional arguments for `toffy.image_stitching.stitch_images`.
114	Accepted kwargs are
115
116	- channels
117	"""
118	# filter kwargs
119	valid_kwargs = ["channels"]	1✔
120	viz_kwargs = {k: v for k, v in kwargs.items() if k in valid_kwargs}	1✔
121
122	stitch_images(tiff_out_dir, self.run_folder, **viz_kwargs)	1✔
123
124	def check_incomplete_fovs(self, tiff_out_dir, **kwargs):	1✔
125	"""Checks for partial images (even when fully extracted).
126
127	Args:
128	tiff_out_dir (str): directory containing extracted images
129	**kwargs (Dict[str, Any]):
130	Additional arguments for `toffy.bin_extractions.incomplete_fov_check`.
131	Accepted kwargs are
132
133	- num_rows
134	- num_channels
135	- signal_percent
136	Raises:
137	Warning if any FOVs have partially generated images
138	"""
139	incomplete_fov_check(self.run_folder, tiff_out_dir)	1✔
140
141	def check_missing_fovs(self, **kwargs):	1✔
142	"""Checks for associated bin/json files per FOV.
143
144	Raises:
145	Warning if any fov data is missing
146	**kwargs (Dict[str, Any]):
147	Additional arguments for `toffy.json_utils.missing_fov_check`.
148	"""
149	missing_fov_check(self.run_folder, os.path.basename(self.run_folder))	1✔
150
151
152	@dataclass	1✔
153	class FovCallbacks:	1✔
154	"""Class for FOV level callbacks in watcher."""	1✔
155
156	run_folder: str	1✔
157	point_name: str	1✔
158	overwrite: bool	1✔
159	__panel: pd.DataFrame = field(default=None, init=False)	1✔
160	__panel_prof: pd.DataFrame = field(default=None, init=False)	1✔
161	__fov_data: xr.DataArray = field(default=None, init=False)	1✔
162	__fov_data_prof: xr.DataArray = field(default=None, init=False)	1✔
163
164	def _generate_fov_data(	1✔
165	self,
166	panel: pd.DataFrame,
167	extract_prof: bool,
168	intensities=["Au", "chan_39"],
169	replace=True,
170	time_res=0.0005,
171	**kwargs,
172	):
173	"""Extracts data from bin files using the given panel.
174
175	The data and the panel are then cached members of the FovCallbacks object
176
177	Both the deficient and proficient extracted data and panel are computed and cached
178
179	Args:
180	panel (pd.DataFrame):
181	Panel used for extraction
182	extract_prof (bool):
183	If set, extract proficient data
184	intensities (bool \| List[str]):
185	Intensities argument for `mibi_bin_tools.bin_files.extract_bin_files`
186	replace (bool):
187	Whether to replace pulse images with intensity
188	time_res (float):
189	Time resolution argument for `mibi_bin_tool.bin_files.extract_bin_files`
190	**kwargs (dict):
191	Unused kwargs for other functions
192	"""
193	self.__fov_data = extract_bin_files(	1✔
194	data_dir=self.run_folder,
195	out_dir=None,
196	include_fovs=[self.point_name],
197	panel=panel,
198	intensities=intensities,
199	replace=replace,
200	time_res=time_res,
201	)
202	self.__panel = panel	1✔
203
204	if extract_prof:	1✔
205	# adds an offset of 0.3 to 'Start' and 'Stop' columns, modifying extraction range
206	# from (-0.3, 0) to (0, 0.3) for proficient extraction
207	panel_prof = modify_panel_ranges(panel, start_offset=0.3, stop_offset=0.3)	1✔
208	self.__fov_data_prof = extract_bin_files(	1✔
209	data_dir=self.run_folder,
210	out_dir=None,
211	include_fovs=[self.point_name],
212	panel=panel_prof,
213	intensities=intensities,
214	replace=replace,
215	time_res=time_res,
216	)
217	self.__panel_prof = panel_prof	1✔
218
219	def extract_tiffs(	1✔
220	self, tiff_out_dir: str, panel: pd.DataFrame, extract_prof: bool = True, **kwargs
221	):
222	"""Extract tiffs into provided directory, using given panel.
223
224	Done for both the extracted deficient and proficient data
225
226	Args:
227	tiff_out_dir (str):
228	Path where tiffs are written
229	panel (pd.DataFrame):
230	Target mass integration ranges
231	extract_prof (bool):
232	If set, extract mass proficient data
233	**kwargs (dict):
234	Additional arguments for `mibi_bin_tools.bin_files.extract_bin_files`.
235	Accepted kwargs are
236
237	- intensities
238	- replace
239	- time_res
240	"""
241	if not os.path.exists(tiff_out_dir):	1✔
242	os.makedirs(tiff_out_dir)	1✔
243
244	extracted_img_dir = os.path.join(tiff_out_dir, self.point_name)	1✔
245	unextracted_chan_tiffs = []	1✔
246
247	# in the case all images have been extracted, simply return
248	if os.path.exists(extracted_img_dir) and not self.overwrite:	1✔
249	all_chan_tiffs = [f"{ct}.tiff" for ct in panel["Target"]]	1✔
250	extracted_chan_tiffs = io_utils.list_files(extracted_img_dir, substrs=".tiff")	1✔
251	unextracted_chan_tiffs = set(all_chan_tiffs).difference(extracted_chan_tiffs)	1✔
252
253	if len(unextracted_chan_tiffs) == 0:	1✔
254	warnings.warn(f"Images already extracted for FOV {self.point_name}")	×
255	return	×
256
257	# ensure we don't re-extract channels that have already been extracted
258	if unextracted_chan_tiffs and not self.overwrite:	1✔
259	unextracted_chans = io_utils.remove_file_extensions(unextracted_chan_tiffs)	1✔
260	panel = panel[panel["Target"].isin(unextracted_chans)]	1✔
261
262	if self.__fov_data is None or self.__fov_data_prof is None:	1✔
263	self._generate_fov_data(panel, extract_prof, **kwargs)	1✔
264
265	intensities = kwargs.get("intensities", ["Au", "chan_39"])	1!
266	if any_true(intensities) and type(intensities) is not list:	1✔
267	intensities = list(self.__fov_data.channel.values)	1✔
268
269	_write_out(	1✔
270	img_data=self.__fov_data[0, :, :, :, :].values,
271	out_dir=tiff_out_dir,
272	fov_name=self.point_name,
273	targets=list(self.__fov_data.channel.values),
274	intensities=intensities,
275	)
276
277	if extract_prof:	1✔
278	_write_out(	1✔
279	img_data=self.__fov_data_prof[0, :, :, :, :].values,
280	out_dir=tiff_out_dir + "_proficient",
281	fov_name=self.point_name,
282	targets=list(self.__fov_data.channel.values),
283	intensities=intensities,
284	)
285
286	def generate_qc(	1✔
287	self, qc_out_dir: str, panel: pd.DataFrame = None, extract_prof: bool = True, **kwargs
288	):
289	"""Generates qc metrics from given panel, and saves output to provided directory.
290
291	Args:
292	qc_out_dir (str):
293	Path where qc_metrics are written
294	panel (pd.DataFrame):
295	Target mass integration ranges
296	extract_prof (bool):
297	If set, extract mass proficient data
298	**kwargs (dict):
299	Additional arguments for `toffy.qc_comp.compute_qc_metrics`. Accepted kwargs are:
300
301	- gaussian_blur
302	- blur_factor
303	"""
304	if not os.path.exists(qc_out_dir):	1✔
305	os.makedirs(qc_out_dir)	1✔
306
307	if self.__fov_data is None:	1✔
308	if panel is None:	1✔
309	raise ValueError("Must provide panel if fov data is not already generated...")	×
310	self._generate_fov_data(panel, extract_prof, **kwargs)	1✔
311
312	qc_metric_paths = [	1✔
313	os.path.join(qc_out_dir, f"{self.point_name}_nonzero_mean_stats.csv"),
314	os.path.join(qc_out_dir, f"{self.point_name}_total_intensity_stats.csv"),
315	os.path.join(qc_out_dir, f"{self.point_name}_percentile_99_9_stats.csv"),
316	]
317	if all([os.path.exists(qc_file) for qc_file in qc_metric_paths]) and not self.overwrite:	1✔
318	warnings.warn(f"All QC metrics already extracted for FOV {self.point_name}")	1✔
319	return	1✔
320
321	metric_data = compute_qc_metrics_direct(	1✔
322	image_data=self.__fov_data,
323	fov_name=self.point_name,
324	gaussian_blur=kwargs.get("gaussian_blur", False),
325	blur_factor=kwargs.get("blur_factor", 1),
326	)
327
328	for metric_name, data in metric_data.items():	1✔
329	data.to_csv(os.path.join(qc_out_dir, metric_name), index=False)	1✔
330
331	def generate_mph(self, mph_out_dir, **kwargs):	1✔
332	"""Generates mph metrics from given panel, and saves output to provided directory.
333
334	Args:
335	mph_out_dir (str): where to output mph csvs to
336	**kwargs (dict):
337	Additional arguments for `toffy.mph_comp.compute_mph_metrics`. Accepted kwargs are:
338
339	- mass
340	- mass_start
341	- mass_stop
342	"""
343	if not os.path.exists(mph_out_dir):	1✔
344	os.makedirs(mph_out_dir)	1✔
345
346	mph_pulse_file = os.path.join(mph_out_dir, f"{self.point_name}-mph_pulse.csv")	1✔
347	if os.path.exists(mph_pulse_file) and not self.overwrite:	1✔
348	warnings.warn(f"MPH pulse metrics already extracted for FOV {self.point_name}")	1✔
349	return	1✔
350
351	compute_mph_metrics(	1✔
352	bin_file_dir=self.run_folder,
353	csv_dir=mph_out_dir,
354	fov=self.point_name,
355	mass=kwargs.get("mass", 98),
356	mass_start=kwargs.get("mass_start", 97.5),
357	mass_stop=kwargs.get("mass_stop", 98.5),
358	)
359
360	def generate_pulse_heights(self, pulse_out_dir: str, panel: pd.DataFrame = None, **kwargs):	1✔
361	"""Generates pulse height csvs from bin files, and saves output to provided directory.
362
363	Args:
364	pulse_out_dir (str): where to output pulse height csvs
365	panel (pd.DataFrame): Target mass integration ranges
366	**kwargs (dict):
367	Additional arguments for `toffy.normalize.write_mph_per_mass`. Accepted kwargs are:
368
369	- start_offset
370	- stop_offset
371	"""
372	if not os.path.exists(pulse_out_dir):	1✔
373	os.makedirs(pulse_out_dir)	1✔
374
375	pulse_height_file = os.path.join(pulse_out_dir, f"{self.point_name}_pulse_heights.csv")	1✔
376	if os.path.exists(pulse_height_file) and not self.overwrite:	1✔
377	warnings.warn(f"Pulse heights per mass already extracted for FOV {self.point_name}")	1✔
378	return	1✔
379
380	write_mph_per_mass(	1✔
381	base_dir=self.run_folder,
382	output_dir=pulse_out_dir,
383	fov=self.point_name,
384	masses=panel["Mass"].values,
385	start_offset=kwargs.get("mass_start", 0.3),
386	stop_offset=kwargs.get("mass_stop", 0),
387	)
388
389
390	def build_fov_callback(args, *kwargs):	1✔
391	"""Assembles callbacks to be run for each transferred FoV.
392
393	Args:
394	*args (List[str]):
395	Names of member functions of `FovCallbacks` to chain together
396	**kwargs (Dict[str, Any]):
397	Arguments to pass to `FovCallbacks` member functions specified in *args
398
399	Raises:
400	ValueError:
401	Raised on non-existant member function or missing required kwarg
402
403	Returns:
404	Callable[[str, str], None]
405	Chained fov callback which will execute all specified callbacks
406	"""
407	# retrieve all 'non-special' methods of FovCallbacks
408	methods = [attr for attr in dir(FovCallbacks) if attr[0] != "_"]	1✔
409
410	# validate user callback settings
411	misc_utils.verify_in_list(arg_strings=args, valid_callbacks=methods)	1✔
412	for arg in args:	1✔
413	# check that required (non-keyword) arguments for `arg` is present in passed `**kwargs`
414	argnames = inspect.getfullargspec(getattr(FovCallbacks, arg))[0]	1✔
415	argnames = [argname for argname in argnames if argname != "self"]	1✔
416	misc_utils.verify_in_list(required_arguments=argnames, passed_arguments=list(kwargs.keys()))	1✔
417
418	# construct actual callback
419	def fov_callback(run_folder: str, point_name: str, overwrite: bool = False):	1✔
420	# construct FovCallback object for given FoV
421	callback_obj = FovCallbacks(run_folder, point_name, overwrite)	1✔
422
423	# for each member, retrieve the member function and run it
424	for arg in args:	1✔
425	if cb := getattr(callback_obj, arg, None):	1✔
426	cb(**kwargs)	1✔
427	else:
428	# unreachable...
429	raise ValueError(f"Could not locate attribute {arg} in FovCallback object")	×
430
431	return fov_callback	1✔
432
433
434	def build_callbacks(	1✔
435	run_callbacks: Iterable[str],
436	intermediate_callbacks: Iterable[str] = None,
437	fov_callbacks: Iterable[str] = ("extract_tiffs",),
438	**kwargs,
439	):
440	"""Deduces and assembles all run & FoV callbacks for the watcher function.
441
442	Args:
443	run_callbacks (Iterable[str]):
444	List of run callback names. These will deduce the prerequisite fov callbacks
445	intermediate_callbacks (Iterable[str]):
446	List of intermediate callback names, these will be subsets of `run_callbacks`
447	but overriden to act as `fov_callbacks`
448	fov_callbacks (Iterable[str]):
449	List of fov callbacks to be run, regardless of prerequisite status
450	**kwargs (Dict[str, Any]):
451	Arguments to pass to `RunCallbacks` and `FovCallbacks` member functions
452
453	Raises:
454	ValueError:
455	Raised on non-existant member function or missing required kwarg
456
457	Returns:
458	Callable[[None,], None], Callable[[str, str], None]:
459	Assembled run callback and fov callback
460	"""
461	methods = [attr for attr in dir(RunCallbacks) if attr[0] != "_"]	1✔
462
463	fov_callbacks = set(fov_callbacks)	1✔
464
465	misc_utils.verify_in_list(requested_callbacks=run_callbacks, valid_callbacks=methods)	1✔
466	if intermediate_callbacks:	1✔
467	misc_utils.verify_in_list(	1✔
468	intermediate_callbacks=intermediate_callbacks, valid_callbacks=methods
469	)
470
471	callbacks_with_prereq = (	1✔
472	run_callbacks + intermediate_callbacks if intermediate_callbacks else run_callbacks[:]
473	)
474
475	for run_cb in callbacks_with_prereq:	1✔
476	argnames = inspect.getfullargspec(getattr(RunCallbacks, run_cb))[0]	1✔
477	argnames = [argname for argname in argnames if argname != "self"]	1✔
478
479	misc_utils.verify_in_list(required_arguments=argnames, passed_arguments=list(kwargs.keys()))	1✔
480
481	fov_callbacks = fov_callbacks.union(RUN_PREREQUISITES.get(run_cb, set()))	1✔
482
483	fov_callback = build_fov_callback(list(fov_callbacks), *kwargs)	1✔
484
485	def run_callback(run_folder: str):	1✔
486	callback_obj = RunCallbacks(run_folder)	1✔
487
488	for run_cb in run_callbacks:	1✔
489	if cb := getattr(callback_obj, run_cb, None):	1✔
490	cb(**kwargs)	1✔
491	else:
492	# unreachable...
493	raise ValueError(f"Could not locate attribute {run_cb} in RunCallbacks object")	×
494
495	intermediate_callback = None	1✔
496	if intermediate_callbacks:	1✔
497
498	def intermediate_callback(run_folder: str):	1✔
499	callback_obj = RunCallbacks(run_folder)	1✔
500	inter_return_vals = {}	1✔
501
502	for run_cb in intermediate_callbacks:	1✔
503	if cb := getattr(callback_obj, run_cb, None):	1✔
504	inter_return_vals[cb.__func__.__name__] = cb(**kwargs)	1✔
505	else:
506	# unreachable...
507	raise ValueError(f"Could not locate attribute {run_cb} in RunCallbacks object")	×
508
509	return inter_return_vals	1✔
510
511	return fov_callback, run_callback, intermediate_callback	1✔

angelolab / toffy / 11961352837

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous