14915098141

Committed 08 May 2025 08:03PM UTC coverage: 80.239% (+0.05%) from 80.194%

Build # 14915098141

Build Type

push

github

Committed by

jharwell

Commit Message

feature(#326): Arrow storage

- Start updating docs/code to say "output files" instead of "csv"

- Move flattening to be a platform callback so it can be done before scaffolding
  a batch exp.

- Start hacking at statistics generation to support arrow and CSV. Things seem
  to work with arrow, but need to re-run some imagizing/csv tests to verify
  things aren't broken in other ways.

- Add a placeholder for fleshing out SIERRA's dataflow model, which is a really
  important aspect of usage which currently isn't documented.

- Remove excessive class usage in DataFrame{Reader,Writer}

- Overhaul collation and fix nasty bug where data was only being gathered from 1
  run per sim; no idea how long that has been in there. Added an assert so that
  can't happen again.

Run Details

349 of 385 new or added lines in 28 files covered. (90.65%)

3 existing lines in 3 files now uncovered.

5441 of 6781 relevant lines covered (80.24%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/sierra/core/graphs/scatterplot2D.py

# Copyright 2020 John Harwell, All rights reserved.
#
#  SPDX-License-Identifier: MIT
#
"""
2D scatterplot graph generation classes for stage{4,5}.
"""

# Core packages
import logging
import pathlib

# 3rd party packages
import numpy as np
import sympy
import matplotlib.pyplot as plt

# Project packages
from sierra.core import storage, config, utils


class Scatterplot2D:
    """Generates a 2D scatterplot of rows vs. colums (X vs. Y) from a CSV.

    If the necessary CSV file does not exist, the graph is not generated.

    """

    def __init__(self,
                 input_fpath: pathlib.Path,
                 output_fpath: pathlib.Path,
                 title: str,
                 xlabel: str,
                 ylabel: str,
                 xcol: str,
                 ycol: str,
                 large_text: bool = False,
                 regression: bool = False) -> None:

        self.input_fpath = input_fpath
        self.output_fpath = output_fpath
        self.title = title
        self.xlabel = xlabel
        self.ylabel = ylabel
        self.xcol = xcol
        self.ycol = ycol
        self.regression = regression

        if large_text:
            self.text_size = config.kGraphTextSizeLarge
        else:
            self.text_size = config.kGraphTextSizeSmall

        self.logger = logging.getLogger(__name__)

    def generate(self) -> None:
        if not utils.path_exists(self.input_fpath):
            self.logger.debug("Not generating 2D scatterplot: %s does not exist",
                              str(self.input_fpath))
            return

        # Read .csv and scaffold graph
        df = storage.df_read(self.input_fpath, 'storage.csv')
        ax = df.plot.scatter(x=self.xcol, y=self.ycol)

        # Plot regression line
        if self.regression:
            self._plot_regression(df)

        # Plot ticks and labels
        ax.tick_params(labelsize=self.text_size['tick_label'])
        ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label'])
        ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label'])

        # Add title
        ax.set_title(self.title, fontsize=self.text_size['title'])

        # Output figure
        fig = ax.get_figure()
        fig.set_size_inches(config.kGraphBaseSize, config.kGraphBaseSize)
        fig.savefig(self.output_fpath,
                    bbox_inches='tight',
                    dpi=config.kGraphDPI)
        # Prevent memory accumulation (fig.clf() does not close everything)
        plt.close(fig)

    def _plot_regression(self, df):
        # slope, intercept, r_value, p_value, std_err = stats.linregress(df.loc[:, self.xcol],
        #                                                                df.loc[:, self.ycol])
        # x_new = np.linspace(df[self.xcol].min(), df[self.xcol].max(), 50)
        # line = slope * x_new * intercept
        # plt.plot(x_new, line, 'r', label='y={:.2f}x+{:.2f}'.format(slope, intercept))

        # Calculate linear regression line
        coeffs = np.polyfit(x=df.loc[:, self.xcol],
                            y=df.loc[:, self.ycol], deg=1)
        ffit = np.poly1d(coeffs)
        x_new = np.linspace(df[self.xcol].min(), df[self.xcol].max(), 50)
        y_new = ffit(x_new)

        # Plot line and add equation to legend
        xsym = sympy.symbols('x')
        eqn = sum(sympy.S("{:6.2f}".format(v)) * xsym **
                  i for i, v in enumerate(coeffs[::-1]))
        latex = sympy.printing.latex(eqn)
        plt.plot(x_new, y_new, label="${}$".format(latex))
        plt.legend(fontsize=self.text_size['legend_label'])


__all__ = [
    'Scatterplot2D'
]

1	# Copyright 2020 John Harwell, All rights reserved.
2	#
3	# SPDX-License-Identifier: MIT
4	#
5	"""
6	2D scatterplot graph generation classes for stage{4,5}.
7	"""
8
9	# Core packages
10	import logging	×
11	import pathlib	×
12
13	# 3rd party packages
14	import numpy as np	×
15	import sympy	×
16	import matplotlib.pyplot as plt	×
17
18	# Project packages
19	from sierra.core import storage, config, utils	×
20
21
22	class Scatterplot2D:	×
23	"""Generates a 2D scatterplot of rows vs. colums (X vs. Y) from a CSV.
24
25	If the necessary CSV file does not exist, the graph is not generated.
26
27	"""
28
29	def __init__(self,	×
30	input_fpath: pathlib.Path,
31	output_fpath: pathlib.Path,
32	title: str,
33	xlabel: str,
34	ylabel: str,
35	xcol: str,
36	ycol: str,
37	large_text: bool = False,
38	regression: bool = False) -> None:
39
40	self.input_fpath = input_fpath	×
41	self.output_fpath = output_fpath	×
42	self.title = title	×
43	self.xlabel = xlabel	×
44	self.ylabel = ylabel	×
45	self.xcol = xcol	×
46	self.ycol = ycol	×
47	self.regression = regression	×
48
49	if large_text:	×
50	self.text_size = config.kGraphTextSizeLarge	×
51	else:
52	self.text_size = config.kGraphTextSizeSmall	×
53
54	self.logger = logging.getLogger(__name__)	×
55
56	def generate(self) -> None:	×
57	if not utils.path_exists(self.input_fpath):	×
58	self.logger.debug("Not generating 2D scatterplot: %s does not exist",	×
59	str(self.input_fpath))
60	return	×
61
62	# Read .csv and scaffold graph
NEW 63	df = storage.df_read(self.input_fpath, 'storage.csv')	×
64	ax = df.plot.scatter(x=self.xcol, y=self.ycol)	×
65
66	# Plot regression line
67	if self.regression:	×
68	self._plot_regression(df)	×
69
70	# Plot ticks and labels
71	ax.tick_params(labelsize=self.text_size['tick_label'])	×
72	ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label'])	×
73	ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label'])	×
74
75	# Add title
76	ax.set_title(self.title, fontsize=self.text_size['title'])	×
77
78	# Output figure
79	fig = ax.get_figure()	×
80	fig.set_size_inches(config.kGraphBaseSize, config.kGraphBaseSize)	×
81	fig.savefig(self.output_fpath,	×
82	bbox_inches='tight',
83	dpi=config.kGraphDPI)
84	# Prevent memory accumulation (fig.clf() does not close everything)
85	plt.close(fig)	×
86
87	def _plot_regression(self, df):	×
88	# slope, intercept, r_value, p_value, std_err = stats.linregress(df.loc[:, self.xcol],
89	# df.loc[:, self.ycol])
90	# x_new = np.linspace(df[self.xcol].min(), df[self.xcol].max(), 50)
91	# line = slope * x_new * intercept
92	# plt.plot(x_new, line, 'r', label='y={:.2f}x+{:.2f}'.format(slope, intercept))
93
94	# Calculate linear regression line
95	coeffs = np.polyfit(x=df.loc[:, self.xcol],	×
96	y=df.loc[:, self.ycol], deg=1)
97	ffit = np.poly1d(coeffs)	×
98	x_new = np.linspace(df[self.xcol].min(), df[self.xcol].max(), 50)	×
99	y_new = ffit(x_new)	×
100
101	# Plot line and add equation to legend
102	xsym = sympy.symbols('x')	×
103	eqn = sum(sympy.S("{:6.2f}".format(v)) * xsym **	×
104	i for i, v in enumerate(coeffs[::-1]))
105	latex = sympy.printing.latex(eqn)	×
106	plt.plot(x_new, y_new, label="${}$".format(latex))	×
107	plt.legend(fontsize=self.text_size['legend_label'])	×
108
109
110	__all__ = [	×
111	'Scatterplot2D'
112	]

jharwell / sierra / 14915098141

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous