6040338364

Committed 31 Aug 2023 05:08PM UTC coverage: 97.997% (+0.2%) from 97.838%

Build # 6040338364

Build Type

push

github-actions

Committed by

kalekundert

Commit Message

chore: lint with ruff

Run Details

783 of 799 relevant lines covered (98.0%)

3.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.44

/wellmap/plot.py

#!/usr/bin/env python3

"""\
Visualize the plate layout described by a wellmap TOML file.

Usage:
    wellmap <toml> [<param>...] [-o <path>] [-p] [-c <color>] [-f]

Arguments:
    <toml>
        TOML file describing the plate layout to display.  For a complete 
        description of the file format, refer to:
        
        https://wellmap.readthedocs.io/en/latest/file_format.html

    <param>
        The name(s) of one or more experimental parameters from the above TOML 
        file to project onto the plate.  For example, if the TOML file contains 
        something equivalent to `well.A1.conc = 1`, then "conc" would be a 
        valid parameter name.

        If no names are given, the default is to display any parameters that 
        have at least two different values.  For complex layouts, this may 
        result in a figure too big to fit on the screen.  The best solution for 
        this (at the moment) is just to specify some parameters to focus on.

Options:
    -o --output PATH
        Output an image of the layout to the given path.  The file type is 
        inferred from the file extension.  If the path contains a dollar sign 
        (e.g. '$.svg'), the dollar sign will be replaced with the base name of 
        the <toml> path.

    -p --print
        Print a paper copy of the layout, e.g. to reference when setting up an 
        experiment.  The default printer for the system will be used.  To see 
        the current default printer, run: `lpstat -d`.  To change the default 
        printer, run: `lpoptions -d <printer name>`.  When printing, the 
        default color scheme is changed to 'dimgray'.  This can still be 
        overridden using the '--color' flag.

    -c --color NAME
        Use the given color scheme to illustrate which wells have which 
        properties.  The given NAME must be one of the color scheme names 
        understood by either `matplotlib` or `colorcet`.  See the links below 
        for the full list of supported colors, but some common choices are 
        given below.  The default is 'rainbow':

        rainbow:  blue, green, yellow, orange, red
        viridis:  purple, green, yellow
        plasma:   purple, red, yellow
        coolwarm: blue, red
        tab10:    blue, orange, green, red, purple, ...
        dimgray:  gray, black

        Matplotlib colors:
        https://matplotlib.org/examples/color/colormaps_reference.html

        Colorcet colors:
        http://colorcet.pyviz.org/

    -f --foreground
        Don't attempt to return the terminal to the user while the GUI runs.  
        This is meant to be used on systems where the program crashes if run in 
        the background.
"""

import wellmap
import colorcet
import numpy as np
import matplotlib.pyplot as plt
import sys, os

from wellmap import LayoutError
from inform import plural
from matplotlib.colors import Normalize
from pathlib import Path
from dataclasses import dataclass
from .util import *

def main():
    import docopt
    from subprocess import Popen, PIPE

    try:
        args = docopt.docopt(__doc__)
        toml_path = Path(args['<toml>'])
        show_gui = not args['--output'] and not args['--print']

        if show_gui and not args['--foreground']:
            if os.fork() != 0:
                sys.exit()

        style = Style()
        default_color = 'dimgray' if args['--print'] else 'rainbow'
        style.color_scheme = args['--color'] or default_color

        fig = show(toml_path, args['<param>'], style=style)

        if args['--output']:
            out_path = args['--output'].replace('$', toml_path.stem)
            fig.savefig(out_path)
            print("Layout written to:", out_path)

        if args['--print']:
            lpr = [
                'lpr',
                '-o', 'ppi=600',
                '-o', 'position=top-left',
                '-o', 'page-top=36',  # 72 pt == 1 in
                '-o', 'page-left=72',
            ]
            p = Popen(lpr, stdin=PIPE)
            fig.savefig(p.stdin, format='png', dpi=600)
            print("Layout sent to printer.")

        if show_gui:
            title = str(toml_path)
            if args['<param>']: title += f' [{", ".join(args["<param>"])}]'
            fig.canvas.set_window_title(title)
            plt.show()

    except UsageError as err:
        print(err)
    except LayoutError as err:
        err.toml_path = toml_path
        print(err)

def show(toml_path, params=None, *, style=None):
    """
    Visualize the given microplate layout.

    It's wise to visualize TOML layouts before doing any analysis, to ensure 
    that all of the wells are correctly annotated.  The :prog:`wellmap` 
    command-line program is a useful tool for doing this, but sometimes it's 
    more convenient to make visualizations directly from python (e.g. when 
    working in a jupyter notebook).  That's what this function is for.

    :param str,pathlib.Path toml_path:
        The path to a file describing the layout of one or more plates.  See 
        the :doc:`/file_format` page for details about this file.

    :param str,list params:
        The names of one or more experimental parameters from the above TOML 
        file to visualize.  For example, if the TOML file contains something 
        equivalent to ``well.A1.conc = 1``, then "conc" would be a valid 
        parameter name.  If not specified, the default is to display any 
        parameters that have at least two different values. 

    :param Style style:
        Settings that control miscellaneous aspects of the plot, e.g. colors, 
        dimensions, etc.

    :rtype: matplotlib.figure.Figure
    """
    df = wellmap.load(toml_path)
    return show_df(df, params, style=style)

def show_df(df, cols=None, *, style=None):
    """
    Visualize the microplate layout described by the given data frame.

    Unlike the `show()` function and the :prog:`wellmap` command-line program, 
    this function is not limited to displaying layouts parsed directly from 
    TOML files.  Any data frame that specifies a well for each row can be 
    plotted.  This provides the means to:

    - Project experimental data onto a layout.
    - Visualize layouts that weren't generated by wellmap in the first place.

    For example, you could load experimental data into a data frame and use 
    this function to visualize it directly, without ever having to specify a 
    layout.  This might be a useful way to get a quick sense for the data.

    :param pandas.DataFrame df:
        The data frame describing the layout to plot.  The data frame must be 
        tidy_: each row must describe a single well, and each column must 
        describe a single aspect of each well.  The location of each well must 
        be specified using one or more of the same columns that wellmap uses 
        for that purpose, namely:

        - *plate*
        - *well*
        - *well0*
        - *row*
        - *col*
        - *row_i*
        - *col_j*

        See `load()` for the exact meanings of these columns.  It's not 
        necessary to specify all of these columns, there just needs to be 
        enough information to locate each well.  If the *plate* column is 
        missing, it is assumed that all of the wells are on the same plate.  It 
        is also assumed that any redundant columns (e.g. *row* and *row_i*) 
        will be consistent with each other.

        Any scalar-valued columns other than these can be plotted.

    :param str,list cols:
        Which columns to plot onto the layout.  The columns used to locate the 
        wells (listed above) cannot be plotted.  The default is to include any 
        columns that have at least two different values.

    :param Style style:
        Settings than control miscellaneous aspects of the plot, e.g. colors, 
        dimensions, etc.

    :rtype: matplotlib.figure.Figure
    """

    # The whole architecture of this function is dictated by (what I consider 
    # to be) a small and obscure bug in matplotlib.  That bug is: if you are 
    # displaying a figure in the GUI and you use `set_size_inches()`, the whole 
    # GUI will have the given height, but the figure itself will be too short 
    # by the height of the GUI control panel.  That control panel has different 
    # heights with different backends (and no way that I know of to query what 
    # its height will be), so `set_size_inches()` is not reliable.
    #
    # The only way to reliably control the height of the figure is to provide a 
    # size when constructing it.  But that requires knowing the size of the 
    # figure in advance.  I would've preferred to set the size at the end, 
    # because by then I know everything that will be in the figure.  Instead, I 
    # have to basically work out some things twice (once to figure out how big 
    # they will be, then a second time to actually put them in the figure).
    #
    # In particular, I have to work out the colorbar labels twice.  These are 
    # the most complicated part of the figure layout, because they come from 
    # the TOML file and could be either very narrow or very wide.  So I need to 
    # do a first pass where I plot all the labels on a dummy figure, get their 
    # widths, then allocate enough room for them in the main figure.  
    # 
    # I also need to work out the dimensions of the plates twice, but that's a 
    # simpler calculation.

    style = style or Style()

    df = require_well_locations(df)
    plates = sorted(df['plate'].unique())
    params = pick_params(df, cols)

    fig, axes, dims = setup_axes(df, plates, params, style)

    try:
        for i, param in enumerate(params):
            cmap = get_colormap(style[param].color_scheme)
            colors = setup_color_bar(axes[i,-1], df, param, cmap)

            for j, plate in enumerate(plates):
                plot_plate(axes[i,j], df, plate, param, style, dims, colors)

        for i, param in enumerate(params):
            axes[i,0].set_ylabel(param)
        for j, plate in enumerate(plates):
            axes[0,j].set_xlabel(plate)
            axes[0,j].xaxis.set_label_position('top')

        for ax in axes[1:,:-1].flat:
            ax.set_xticklabels([])
        for ax in axes[:,1:-1].flat:
            ax.set_yticklabels([])

    except:
        plt.close(fig)
        raise

    return fig

def plot_plate(ax, df, plate, param, style, dims, colors):
    # Fill in a matrix with integers representing each value of the given 
    # experimental parameter.
    matrix = np.full(dims.shape, np.nan)
    q = df.query('plate == @plate')

    for _, well in q.iterrows():
        i = well['row_i'] - dims.i0
        j = well['col_j'] - dims.j0
        matrix[i, j] = colors.transform(well[param])

    # Plot a heatmap.
    ax.imshow(
            matrix,
            norm=colors.norm,
            cmap=colors.cmap,
            origin='upper',
            interpolation='nearest',
    )

    ax.set_xticks(dims.xticks)
    ax.set_yticks(dims.yticks)
    ax.set_xticks(dims.xticksminor, minor=True)
    ax.set_yticks(dims.yticksminor, minor=True)
    ax.set_xticklabels(dims.xticklabels)
    ax.set_yticklabels(dims.yticklabels)
    ax.grid(which='minor')
    ax.tick_params(which='both', axis='both', length=0)
    ax.xaxis.tick_top()

def pick_params(df, user_params):
    if isinstance(user_params, str):
        user_params = [user_params]

    wellmap_cols = ['plate', 'well', 'well0', 'row', 'col', 'row_i', 'col_j', 'path']
    user_cols = [x for x in df.columns if x not in wellmap_cols]

    if user_params:
        # Complain if the user specified any columns that don't exist.

        # Using lists (slower) instead of sets (faster) to maintain the order 
        # of the columns in case we want to print an error message.
        unknown_params = [
                x for x in user_params
                if x not in user_cols
        ]
        if unknown_params:
            raise UsageError(f"No such {plural(unknown_params):parameter/s}: {quoted_join(unknown_params)}\nDid you mean: {quoted_join(user_cols)}")

        return user_params

    # If the user didn't specify any columns, show any that have more than one 
    # unique value.
    else:
        degenerate_cols = [
                x for x in user_cols
                if df[x].nunique() == 1
        ]
        non_degenerate_cols = [
                x for x in user_cols
                if x not in degenerate_cols
        ]
        if not non_degenerate_cols:
            if degenerate_cols:
                raise UsageError(f"Found only degenerate parameters (i.e. with the same value in every well): {quoted_join(degenerate_cols)}")
            else:
                raise LayoutError("No experimental parameters found.")

        return non_degenerate_cols

def setup_axes(df, plates, params, style):
    from mpl_toolkits.axes_grid1 import Divider
    from mpl_toolkits.axes_grid1.axes_size import Fixed

    # These assumptions let us simplify some code, and should always be true.
    assert len(plates) > 0
    assert len(params) > 0

    # Determine how much data will be shown in the figure:
    num_plates = len(plates)
    num_params = len(params)
    dims = Dimensions(df)

    bar_label_width = guess_param_label_width(df, params)

    # Define the grid on which the axes will live:
    h_divs  = [
            style.left_margin,
    ]
    for _ in plates:
        h_divs += [
                style.cell_size * dims.num_cols,
                style.pad_width,
        ]
    h_divs[-1:] = [
            style.bar_pad_width,
            style.bar_width,
            style.right_margin + bar_label_width,
    ]

    v_divs = [
            style.top_margin,
    ]
    for param in params:
        v_divs += [
                max(
                    style.cell_size * dims.num_rows,
                    style.bar_width * dims.num_values[param],
                ),
                style.pad_height,
        ]
    v_divs[-1:] = [
            style.bottom_margin,
    ]

    # Add up all the divisions to get the width and height of the figure:
    figsize = sum(h_divs), sum(v_divs)

    # Make the figure:
    fig, axes = plt.subplots(
            num_params,
            num_plates + 1,  # +1 for the colorbar axes.
            figsize=figsize,
            squeeze=False,
    )

    # Position the axes:
    rect = 0.0, 0.0, 1, 1
    h_divs = [Fixed(x) for x in h_divs]
    v_divs = [Fixed(x) for x in reversed(v_divs)]
    divider = Divider(fig, rect, h_divs, v_divs, aspect=False)

    for i in range(num_params):
        for j in range(num_plates + 1):
            loc = divider.new_locator(nx=2*j+1, ny=2*(num_params - i) - 1)
            axes[i,j].set_axes_locator(loc)

    return fig, axes, dims

def setup_color_bar(ax, df, param, cmap):
    from matplotlib.colorbar import ColorbarBase

    colors = Colors(cmap, df, param)

    bar = ColorbarBase(
            ax,
            norm=colors.norm,
            cmap=colors.cmap,
            boundaries=colors.boundaries,
    )
    bar.set_ticks(colors.ticks)
    bar.set_ticklabels(colors.ticklabels)

    ax.invert_yaxis()

    return colors

def guess_param_label_width(df, params):
    # I've seen some posts suggesting that this might not work on Macs.  I 
    # can't test that, but if this ends up being a problem, I probably need to 
    # wrap this is a try/except block and fall back to guessing a width based 
    # on the number of characters in the string representation of each label.

    width = 0
    fig, ax = plt.subplots()

    for param in params:
        labels = df[param].unique()
        ax.set_yticks(range(len(labels)))
        ax.set_yticklabels(labels)

        width = max(width, get_yticklabel_width(fig, ax))

    plt.close(fig)
    return width

def get_colormap(name):
    try:
        return colorcet.cm[name]
    except KeyError:
        return plt.get_cmap(name)

def get_yticklabel_width(fig, ax):
    # With some backends, getting the renderer like this may trigger a warning 
    # and cause matplotlib to drop down to the Agg backend.
    from matplotlib import tight_layout
    renderer = tight_layout.get_renderer(fig)

    width = max(
            artist.get_window_extent(renderer).width
            for artist in ax.get_yticklabels()
    )
    dpi = ax.get_figure().get_dpi()

    return width / dpi

_dataclass_kwargs = {}
if sys.version_info >= (3, 10):
    _dataclass_kwargs['kw_only'] = True

@dataclass(**_dataclass_kwargs)
class Style:
    """
    Describe how to plot well layouts.

    Style objects exist to be passed to `show()` or `show_df()`, where they 
    determine various aspects of the plots' appearances.

    .. warning::

        When constructing style objects, use keyword arguments instead of 
        positional arguments.  The order of the arguments is not guaranteed and 
        may change in any minor version of wellmap!  You'll get an immediate 
        error if you try to use positional arguments in python≥3.10, but before 
        then it's possible to shoot yourself in the foot.
    """

    cell_size: float = 0.25
    """
    The size of the boxes representing each well, in inches.
    """

    pad_width: float = 0.20
    """
    The vertical padding between layouts, in inches.
    """

    pad_height: float = 0.20
    """
    The horizontal padding between layouts, in inches.
    """

    bar_width: float = 0.15
    """
    The width of the color bar, in inches.
    """

    bar_pad_width: float = pad_width
    """
    The horizontal padding between the color bar and the nearest layout, in 
    inches.
    """

    top_margin: float = 0.5
    """
    The space between the layouts and the top edge of the figure, in inches.
    """

    left_margin: float = 0.5
    """
    The space between the layouts and the left edge of the figure, in inches.
    """

    right_margin: float = pad_width
    """
    The space between the layouts and the right edge of the figure, in inches.
    """

    bottom_margin: float = pad_height
    """
    The space between the layouts and the bottom edge of the figure, in inches.
    """

    color_scheme: str = 'rainbow'
    """
    The name of the color scheme to use.  Each different value for each 
    different parameter will be assigned a color from this scheme.  Any 
    name understood by either colorcet_ or matplotlib_ can be used.

    .. _matplotlib: https://matplotlib.org/examples/color/colormaps_reference.html
    .. _colorcet: http://colorcet.pyviz.org/
    """

    def __post_init__(self):
        self.params = {}

    def __getitem__(self, param):
        try:
            return self.params[param]
        except KeyError:
            self.params[param] = ps = ParamStyle(self)
            return ps


class ParamStyle:
    # It might be worth distinguishing between settings that can/can't be given 
    # on a per-parameter basis.  That would involve this class raising an 
    # exception when trying to set an invalid attribute.  Right now, anything 
    # goes.

    def __init__(self, style):
        self.style = style

    def __getattr__(self, name):
        return getattr(self.style, name)


class Dimensions:

    def __init__(self, df):
        self.i0 = df['row_i'].min()
        self.j0 = df['col_j'].min() 
        self.num_rows = df['row_i'].max() - self.i0 + 1
        self.num_cols = df['col_j'].max() - self.j0 + 1
        self.num_values = df.nunique()
        self.shape = self.num_rows, self.num_cols

        self.xticks = np.arange(self.num_cols)
        self.yticks = np.arange(self.num_rows)

        self.xticksminor = np.arange(self.num_cols + 1) - 0.5
        self.yticksminor = np.arange(self.num_rows + 1) - 0.5

        self.xticklabels = [
                wellmap.col_from_j(j + self.j0)
                for j in self.xticks
        ]
        self.yticklabels = [
                wellmap.row_from_i(i + self.i0)
                for i in self.yticks
        ]

class Colors:

    def __init__(self, cmap, df, param):
        cols = ['plate', 'row_i', 'col_j']
        rows = df[param].notna()
        labels = df[rows]\
                .sort_values(cols)\
                .groupby(param, sort=False)\
                .head(1)

        self.map = {x: i for i, x in enumerate(labels[param])}

        n = len(self.map)
        self.cmap = cmap
        self.norm = Normalize(vmin=0, vmax=max(n-1, 1))
        self.boundaries = np.arange(n+1) - 0.5
        self.ticks = np.fromiter(self.map.values(), dtype=int, count=n)
        self.ticklabels = list(self.map.keys())

    def transform(self, x):
        def is_nan(x):
            return isinstance(x, float) and np.isnan(x)
        return self.map[x] if not is_nan(x) else np.nan


class UsageError(Exception):
    pass

1	#!/usr/bin/env python3
2
3	"""\	4✔
4	Visualize the plate layout described by a wellmap TOML file.
5
6	Usage:
7	wellmap <toml> [<param>...] [-o <path>] [-p] [-c <color>] [-f]
8
9	Arguments:
10	<toml>
11	TOML file describing the plate layout to display. For a complete
12	description of the file format, refer to:
13
14	https://wellmap.readthedocs.io/en/latest/file_format.html
15
16	<param>
17	The name(s) of one or more experimental parameters from the above TOML
18	file to project onto the plate. For example, if the TOML file contains
19	something equivalent to `well.A1.conc = 1`, then "conc" would be a
20	valid parameter name.
21
22	If no names are given, the default is to display any parameters that
23	have at least two different values. For complex layouts, this may
24	result in a figure too big to fit on the screen. The best solution for
25	this (at the moment) is just to specify some parameters to focus on.
26
27	Options:
28	-o --output PATH
29	Output an image of the layout to the given path. The file type is
30	inferred from the file extension. If the path contains a dollar sign
31	(e.g. '$.svg'), the dollar sign will be replaced with the base name of
32	the <toml> path.
33
34	-p --print
35	Print a paper copy of the layout, e.g. to reference when setting up an
36	experiment. The default printer for the system will be used. To see
37	the current default printer, run: `lpstat -d`. To change the default
38	printer, run: `lpoptions -d <printer name>`. When printing, the
39	default color scheme is changed to 'dimgray'. This can still be
40	overridden using the '--color' flag.
41
42	-c --color NAME
43	Use the given color scheme to illustrate which wells have which
44	properties. The given NAME must be one of the color scheme names
45	understood by either `matplotlib` or `colorcet`. See the links below
46	for the full list of supported colors, but some common choices are
47	given below. The default is 'rainbow':
48
49	rainbow: blue, green, yellow, orange, red
50	viridis: purple, green, yellow
51	plasma: purple, red, yellow
52	coolwarm: blue, red
53	tab10: blue, orange, green, red, purple, ...
54	dimgray: gray, black
55
56	Matplotlib colors:
57	https://matplotlib.org/examples/color/colormaps_reference.html
58
59	Colorcet colors:
60	http://colorcet.pyviz.org/
61
62	-f --foreground
63	Don't attempt to return the terminal to the user while the GUI runs.
64	This is meant to be used on systems where the program crashes if run in
65	the background.
66	"""
67
68	import wellmap	4✔
69	import colorcet	4✔
70	import numpy as np	4✔
71	import matplotlib.pyplot as plt	4✔
72	import sys, os	4✔
73
74	from wellmap import LayoutError	4✔
75	from inform import plural	4✔
76	from matplotlib.colors import Normalize	4✔
77	from pathlib import Path	4✔
78	from dataclasses import dataclass	4✔
79	from .util import *	4✔
80
81	def main():	4✔
82	import docopt	4✔
83	from subprocess import Popen, PIPE	4✔
84
85	try:	4✔
86	args = docopt.docopt(__doc__)	4✔
87	toml_path = Path(args['<toml>'])	4✔
88	show_gui = not args['--output'] and not args['--print']	4✔
89
90	if show_gui and not args['--foreground']:	4✔
91	if os.fork() != 0:	×
92	sys.exit()	×
93
94	style = Style()	4✔
95	default_color = 'dimgray' if args['--print'] else 'rainbow'	4✔
96	style.color_scheme = args['--color'] or default_color	4✔
97
98	fig = show(toml_path, args['<param>'], style=style)	4✔
99
100	if args['--output']:	4✔
101	out_path = args['--output'].replace('$', toml_path.stem)	4✔
102	fig.savefig(out_path)	4✔
103	print("Layout written to:", out_path)	4✔
104
105	if args['--print']:	4✔
106	lpr = [	×
107	'lpr',
108	'-o', 'ppi=600',
109	'-o', 'position=top-left',
110	'-o', 'page-top=36', # 72 pt == 1 in
111	'-o', 'page-left=72',
112	]
113	p = Popen(lpr, stdin=PIPE)	×
114	fig.savefig(p.stdin, format='png', dpi=600)	×
115	print("Layout sent to printer.")	×
116
117	if show_gui:	4✔
118	title = str(toml_path)	×
119	if args['<param>']: title += f' [{", ".join(args["<param>"])}]'	×
120	fig.canvas.set_window_title(title)	×
121	plt.show()	×
122
123	except UsageError as err:	4✔
124	print(err)	4✔
125	except LayoutError as err:	4✔
126	err.toml_path = toml_path	4✔
127	print(err)	4✔
128
129	def show(toml_path, params=None, *, style=None):	4✔
130	"""
131	Visualize the given microplate layout.
132
133	It's wise to visualize TOML layouts before doing any analysis, to ensure
134	that all of the wells are correctly annotated. The :prog:`wellmap`
135	command-line program is a useful tool for doing this, but sometimes it's
136	more convenient to make visualizations directly from python (e.g. when
137	working in a jupyter notebook). That's what this function is for.
138
139	:param str,pathlib.Path toml_path:
140	The path to a file describing the layout of one or more plates. See
141	the :doc:`/file_format` page for details about this file.
142
143	:param str,list params:
144	The names of one or more experimental parameters from the above TOML
145	file to visualize. For example, if the TOML file contains something
146	equivalent to ``well.A1.conc = 1``, then "conc" would be a valid
147	parameter name. If not specified, the default is to display any
148	parameters that have at least two different values.
149
150	:param Style style:
151	Settings that control miscellaneous aspects of the plot, e.g. colors,
152	dimensions, etc.
153
154	:rtype: matplotlib.figure.Figure
155	"""
156	df = wellmap.load(toml_path)	4✔
157	return show_df(df, params, style=style)	4✔
158
159	def show_df(df, cols=None, *, style=None):	4✔
160	"""
161	Visualize the microplate layout described by the given data frame.
162
163	Unlike the `show()` function and the :prog:`wellmap` command-line program,
164	this function is not limited to displaying layouts parsed directly from
165	TOML files. Any data frame that specifies a well for each row can be
166	plotted. This provides the means to:
167
168	- Project experimental data onto a layout.
169	- Visualize layouts that weren't generated by wellmap in the first place.
170
171	For example, you could load experimental data into a data frame and use
172	this function to visualize it directly, without ever having to specify a
173	layout. This might be a useful way to get a quick sense for the data.
174
175	:param pandas.DataFrame df:
176	The data frame describing the layout to plot. The data frame must be
177	tidy_: each row must describe a single well, and each column must
178	describe a single aspect of each well. The location of each well must
179	be specified using one or more of the same columns that wellmap uses
180	for that purpose, namely:
181
182	- plate
183	- well
184	- well0
185	- row
186	- col
187	- row_i
188	- col_j
189
190	See `load()` for the exact meanings of these columns. It's not
191	necessary to specify all of these columns, there just needs to be
192	enough information to locate each well. If the plate column is
193	missing, it is assumed that all of the wells are on the same plate. It
194	is also assumed that any redundant columns (e.g. row and row_i)
195	will be consistent with each other.
196
197	Any scalar-valued columns other than these can be plotted.
198
199	:param str,list cols:
200	Which columns to plot onto the layout. The columns used to locate the
201	wells (listed above) cannot be plotted. The default is to include any
202	columns that have at least two different values.
203
204	:param Style style:
205	Settings than control miscellaneous aspects of the plot, e.g. colors,
206	dimensions, etc.
207
208	:rtype: matplotlib.figure.Figure
209	"""
210
211	# The whole architecture of this function is dictated by (what I consider
212	# to be) a small and obscure bug in matplotlib. That bug is: if you are
213	# displaying a figure in the GUI and you use `set_size_inches()`, the whole
214	# GUI will have the given height, but the figure itself will be too short
215	# by the height of the GUI control panel. That control panel has different
216	# heights with different backends (and no way that I know of to query what
217	# its height will be), so `set_size_inches()` is not reliable.
218	#
219	# The only way to reliably control the height of the figure is to provide a
220	# size when constructing it. But that requires knowing the size of the
221	# figure in advance. I would've preferred to set the size at the end,
222	# because by then I know everything that will be in the figure. Instead, I
223	# have to basically work out some things twice (once to figure out how big
224	# they will be, then a second time to actually put them in the figure).
225	#
226	# In particular, I have to work out the colorbar labels twice. These are
227	# the most complicated part of the figure layout, because they come from
228	# the TOML file and could be either very narrow or very wide. So I need to
229	# do a first pass where I plot all the labels on a dummy figure, get their
230	# widths, then allocate enough room for them in the main figure.
231	#
232	# I also need to work out the dimensions of the plates twice, but that's a
233	# simpler calculation.
234
235	style = style or Style()	4✔
236
237	df = require_well_locations(df)	4✔
238	plates = sorted(df['plate'].unique())	4✔
239	params = pick_params(df, cols)	4✔
240
241	fig, axes, dims = setup_axes(df, plates, params, style)	4✔
242
243	try:	4✔
244	for i, param in enumerate(params):	4✔
245	cmap = get_colormap(style[param].color_scheme)	4✔
246	colors = setup_color_bar(axes[i,-1], df, param, cmap)	4✔
247
248	for j, plate in enumerate(plates):	4✔
249	plot_plate(axes[i,j], df, plate, param, style, dims, colors)	4✔
250
251	for i, param in enumerate(params):	4✔
252	axes[i,0].set_ylabel(param)	4✔
253	for j, plate in enumerate(plates):	4✔
254	axes[0,j].set_xlabel(plate)	4✔
255	axes[0,j].xaxis.set_label_position('top')	4✔
256
257	for ax in axes[1:,:-1].flat:	4✔
258	ax.set_xticklabels([])	4✔
259	for ax in axes[:,1:-1].flat:	4✔
260	ax.set_yticklabels([])	4✔
261
262	except:	×
263	plt.close(fig)	×
264	raise	×
265
266	return fig	4✔
267
268	def plot_plate(ax, df, plate, param, style, dims, colors):	4✔
269	# Fill in a matrix with integers representing each value of the given
270	# experimental parameter.
271	matrix = np.full(dims.shape, np.nan)	4✔
272	q = df.query('plate == @plate')	4✔
273
274	for _, well in q.iterrows():	4✔
275	i = well['row_i'] - dims.i0	4✔
276	j = well['col_j'] - dims.j0	4✔
277	matrix[i, j] = colors.transform(well[param])	4✔
278
279	# Plot a heatmap.
280	ax.imshow(	4✔
281	matrix,
282	norm=colors.norm,
283	cmap=colors.cmap,
284	origin='upper',
285	interpolation='nearest',
286	)
287
288	ax.set_xticks(dims.xticks)	4✔
289	ax.set_yticks(dims.yticks)	4✔
290	ax.set_xticks(dims.xticksminor, minor=True)	4✔
291	ax.set_yticks(dims.yticksminor, minor=True)	4✔
292	ax.set_xticklabels(dims.xticklabels)	4✔
293	ax.set_yticklabels(dims.yticklabels)	4✔
294	ax.grid(which='minor')	4✔
295	ax.tick_params(which='both', axis='both', length=0)	4✔
296	ax.xaxis.tick_top()	4✔
297
298	def pick_params(df, user_params):	4✔
299	if isinstance(user_params, str):	4✔
300	user_params = [user_params]	4✔
301
302	wellmap_cols = ['plate', 'well', 'well0', 'row', 'col', 'row_i', 'col_j', 'path']	4✔
303	user_cols = [x for x in df.columns if x not in wellmap_cols]	4✔
304
305	if user_params:	4✔
306	# Complain if the user specified any columns that don't exist.
307
308	# Using lists (slower) instead of sets (faster) to maintain the order
309	# of the columns in case we want to print an error message.
310	unknown_params = [	4✔
311	x for x in user_params
312	if x not in user_cols
313	]
314	if unknown_params:	4✔
315	raise UsageError(f"No such {plural(unknown_params):parameter/s}: {quoted_join(unknown_params)}\nDid you mean: {quoted_join(user_cols)}")	4✔
316
317	return user_params	4✔
318
319	# If the user didn't specify any columns, show any that have more than one
320	# unique value.
321	else:
322	degenerate_cols = [	4✔
323	x for x in user_cols
324	if df[x].nunique() == 1
325	]
326	non_degenerate_cols = [	4✔
327	x for x in user_cols
328	if x not in degenerate_cols
329	]
330	if not non_degenerate_cols:	4✔
331	if degenerate_cols:	4✔
332	raise UsageError(f"Found only degenerate parameters (i.e. with the same value in every well): {quoted_join(degenerate_cols)}")	4✔
333	else:
334	raise LayoutError("No experimental parameters found.")	4✔
335
336	return non_degenerate_cols	4✔
337
338	def setup_axes(df, plates, params, style):	4✔
339	from mpl_toolkits.axes_grid1 import Divider	4✔
340	from mpl_toolkits.axes_grid1.axes_size import Fixed	4✔
341
342	# These assumptions let us simplify some code, and should always be true.
343	assert len(plates) > 0	4✔
344	assert len(params) > 0	4✔
345
346	# Determine how much data will be shown in the figure:
347	num_plates = len(plates)	4✔
348	num_params = len(params)	4✔
349	dims = Dimensions(df)	4✔
350
351	bar_label_width = guess_param_label_width(df, params)	4✔
352
353	# Define the grid on which the axes will live:
354	h_divs = [	4✔
355	style.left_margin,
356	]
357	for _ in plates:	4✔
358	h_divs += [	4✔
359	style.cell_size * dims.num_cols,
360	style.pad_width,
361	]
362	h_divs[-1:] = [	4✔
363	style.bar_pad_width,
364	style.bar_width,
365	style.right_margin + bar_label_width,
366	]
367
368	v_divs = [	4✔
369	style.top_margin,
370	]
371	for param in params:	4✔
372	v_divs += [	4✔
373	max(
374	style.cell_size * dims.num_rows,
375	style.bar_width * dims.num_values[param],
376	),
377	style.pad_height,
378	]
379	v_divs[-1:] = [	4✔
380	style.bottom_margin,
381	]
382
383	# Add up all the divisions to get the width and height of the figure:
384	figsize = sum(h_divs), sum(v_divs)	4✔
385
386	# Make the figure:
387	fig, axes = plt.subplots(	4✔
388	num_params,
389	num_plates + 1, # +1 for the colorbar axes.
390	figsize=figsize,
391	squeeze=False,
392	)
393
394	# Position the axes:
395	rect = 0.0, 0.0, 1, 1	4✔
396	h_divs = [Fixed(x) for x in h_divs]	4✔
397	v_divs = [Fixed(x) for x in reversed(v_divs)]	4✔
398	divider = Divider(fig, rect, h_divs, v_divs, aspect=False)	4✔
399
400	for i in range(num_params):	4✔
401	for j in range(num_plates + 1):	4✔
402	loc = divider.new_locator(nx=2j+1, ny=2(num_params - i) - 1)	4✔
403	axes[i,j].set_axes_locator(loc)	4✔
404
405	return fig, axes, dims	4✔
406
407	def setup_color_bar(ax, df, param, cmap):	4✔
408	from matplotlib.colorbar import ColorbarBase	4✔
409
410	colors = Colors(cmap, df, param)	4✔
411
412	bar = ColorbarBase(	4✔
413	ax,
414	norm=colors.norm,
415	cmap=colors.cmap,
416	boundaries=colors.boundaries,
417	)
418	bar.set_ticks(colors.ticks)	4✔
419	bar.set_ticklabels(colors.ticklabels)	4✔
420
421	ax.invert_yaxis()	4✔
422
423	return colors	4✔
424
425	def guess_param_label_width(df, params):	4✔
426	# I've seen some posts suggesting that this might not work on Macs. I
427	# can't test that, but if this ends up being a problem, I probably need to
428	# wrap this is a try/except block and fall back to guessing a width based
429	# on the number of characters in the string representation of each label.
430
431	width = 0	4✔
432	fig, ax = plt.subplots()	4✔
433
434	for param in params:	4✔
435	labels = df[param].unique()	4✔
436	ax.set_yticks(range(len(labels)))	4✔
437	ax.set_yticklabels(labels)	4✔
438
439	width = max(width, get_yticklabel_width(fig, ax))	4✔
440
441	plt.close(fig)	4✔
442	return width	4✔
443
444	def get_colormap(name):	4✔
445	try:	4✔
446	return colorcet.cm[name]	4✔
447	except KeyError:	4✔
448	return plt.get_cmap(name)	4✔
449
450	def get_yticklabel_width(fig, ax):	4✔
451	# With some backends, getting the renderer like this may trigger a warning
452	# and cause matplotlib to drop down to the Agg backend.
453	from matplotlib import tight_layout	4✔
454	renderer = tight_layout.get_renderer(fig)	4✔
455
456	width = max(	4✔
457	artist.get_window_extent(renderer).width
458	for artist in ax.get_yticklabels()
459	)
460	dpi = ax.get_figure().get_dpi()	4✔
461
462	return width / dpi	4✔
463
464	_dataclass_kwargs = {}	4✔
465	if sys.version_info >= (3, 10):	4✔
466	_dataclass_kwargs['kw_only'] = True	1✔
467
468	@dataclass(**_dataclass_kwargs)	4✔
469	class Style:	3✔
470	"""
471	Describe how to plot well layouts.
472
473	Style objects exist to be passed to `show()` or `show_df()`, where they
474	determine various aspects of the plots' appearances.
475
476	.. warning::
477
478	When constructing style objects, use keyword arguments instead of
479	positional arguments. The order of the arguments is not guaranteed and
480	may change in any minor version of wellmap! You'll get an immediate
481	error if you try to use positional arguments in python≥3.10, but before
482	then it's possible to shoot yourself in the foot.
483	"""
484
485	cell_size: float = 0.25	4✔
486	"""	1✔
487	The size of the boxes representing each well, in inches.
488	"""
489
490	pad_width: float = 0.20	4✔
491	"""	1✔
492	The vertical padding between layouts, in inches.
493	"""
494
495	pad_height: float = 0.20	4✔
496	"""	1✔
497	The horizontal padding between layouts, in inches.
498	"""
499
500	bar_width: float = 0.15	4✔
501	"""	1✔
502	The width of the color bar, in inches.
503	"""
504
505	bar_pad_width: float = pad_width	4✔
506	"""	1✔
507	The horizontal padding between the color bar and the nearest layout, in
508	inches.
509	"""
510
511	top_margin: float = 0.5	4✔
512	"""	1✔
513	The space between the layouts and the top edge of the figure, in inches.
514	"""
515
516	left_margin: float = 0.5	4✔
517	"""	1✔
518	The space between the layouts and the left edge of the figure, in inches.
519	"""
520
521	right_margin: float = pad_width	4✔
522	"""	1✔
523	The space between the layouts and the right edge of the figure, in inches.
524	"""
525
526	bottom_margin: float = pad_height	4✔
527	"""	1✔
528	The space between the layouts and the bottom edge of the figure, in inches.
529	"""
530
531	color_scheme: str = 'rainbow'	4✔
532	"""	1✔
533	The name of the color scheme to use. Each different value for each
534	different parameter will be assigned a color from this scheme. Any
535	name understood by either colorcet_ or matplotlib_ can be used.
536
537	.. _matplotlib: https://matplotlib.org/examples/color/colormaps_reference.html
538	.. _colorcet: http://colorcet.pyviz.org/
539	"""
540
541	def __post_init__(self):	4✔
542	self.params = {}	4✔
543
544	def __getitem__(self, param):	4✔
545	try:	4✔
546	return self.params[param]	4✔
547	except KeyError:	4✔
548	self.params[param] = ps = ParamStyle(self)	4✔
549	return ps	4✔
550
551
552	class ParamStyle:	4✔
553	# It might be worth distinguishing between settings that can/can't be given
554	# on a per-parameter basis. That would involve this class raising an
555	# exception when trying to set an invalid attribute. Right now, anything
556	# goes.
557
558	def __init__(self, style):	4✔
559	self.style = style	4✔
560
561	def __getattr__(self, name):	4✔
562	return getattr(self.style, name)	4✔
563
564
565	class Dimensions:	4✔
566
567	def __init__(self, df):	4✔
568	self.i0 = df['row_i'].min()	4✔
569	self.j0 = df['col_j'].min()	4✔
570	self.num_rows = df['row_i'].max() - self.i0 + 1	4✔
571	self.num_cols = df['col_j'].max() - self.j0 + 1	4✔
572	self.num_values = df.nunique()	4✔
573	self.shape = self.num_rows, self.num_cols	4✔
574
575	self.xticks = np.arange(self.num_cols)	4✔
576	self.yticks = np.arange(self.num_rows)	4✔
577
578	self.xticksminor = np.arange(self.num_cols + 1) - 0.5	4✔
579	self.yticksminor = np.arange(self.num_rows + 1) - 0.5	4✔
580
581	self.xticklabels = [	4✔
582	wellmap.col_from_j(j + self.j0)
583	for j in self.xticks
584	]
585	self.yticklabels = [	4✔
586	wellmap.row_from_i(i + self.i0)
587	for i in self.yticks
588	]
589
590	class Colors:	4✔
591
592	def __init__(self, cmap, df, param):	4✔
593	cols = ['plate', 'row_i', 'col_j']	4✔
594	rows = df[param].notna()	4✔
595	labels = df[rows]\	4✔
596	.sort_values(cols)\
597	.groupby(param, sort=False)\
598	.head(1)
599
600	self.map = {x: i for i, x in enumerate(labels[param])}	4✔
601
602	n = len(self.map)	4✔
603	self.cmap = cmap	4✔
604	self.norm = Normalize(vmin=0, vmax=max(n-1, 1))	4✔
605	self.boundaries = np.arange(n+1) - 0.5	4✔
606	self.ticks = np.fromiter(self.map.values(), dtype=int, count=n)	4✔
607	self.ticklabels = list(self.map.keys())	4✔
608
609	def transform(self, x):	4✔
610	def is_nan(x):	4✔
611	return isinstance(x, float) and np.isnan(x)	4✔
612	return self.map[x] if not is_nan(x) else np.nan	4✔
613
614
615	class UsageError(Exception):	4✔
616	pass	4✔

kalekundert / wellmap / 6040338364

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous