• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jharwell / sierra / 14915098141

08 May 2025 08:03PM UTC coverage: 80.239% (+0.05%) from 80.194%
14915098141

push

github

jharwell
feature(#326): Arrow storage

- Start updating docs/code to say "output files" instead of "csv"

- Move flattening to be a platform callback so it can be done before scaffolding
  a batch exp.

- Start hacking at statistics generation to support arrow and CSV. Things seem
  to work with arrow, but need to re-run some imagizing/csv tests to verify
  things aren't broken in other ways.

- Add a placeholder for fleshing out SIERRA's dataflow model, which is a really
  important aspect of usage which currently isn't documented.

- Remove excessive class usage in DataFrame{Reader,Writer}

- Overhaul collation and fix nasty bug where data was only being gathered from 1
  run per sim; no idea how long that has been in there. Added an assert so that
  can't happen again.

349 of 385 new or added lines in 28 files covered. (90.65%)

3 existing lines in 3 files now uncovered.

5441 of 6781 relevant lines covered (80.24%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/sierra/core/graphs/scatterplot2D.py
1
# Copyright 2020 John Harwell, All rights reserved.
2
#
3
#  SPDX-License-Identifier: MIT
4
#
5
"""
6
2D scatterplot graph generation classes for stage{4,5}.
7
"""
8

9
# Core packages
10
import logging
×
11
import pathlib
×
12

13
# 3rd party packages
14
import numpy as np
×
15
import sympy
×
16
import matplotlib.pyplot as plt
×
17

18
# Project packages
19
from sierra.core import storage, config, utils
×
20

21

22
class Scatterplot2D:
×
23
    """Generates a 2D scatterplot of rows vs. colums (X vs. Y) from a CSV.
24

25
    If the necessary CSV file does not exist, the graph is not generated.
26

27
    """
28

29
    def __init__(self,
×
30
                 input_fpath: pathlib.Path,
31
                 output_fpath: pathlib.Path,
32
                 title: str,
33
                 xlabel: str,
34
                 ylabel: str,
35
                 xcol: str,
36
                 ycol: str,
37
                 large_text: bool = False,
38
                 regression: bool = False) -> None:
39

40
        self.input_fpath = input_fpath
×
41
        self.output_fpath = output_fpath
×
42
        self.title = title
×
43
        self.xlabel = xlabel
×
44
        self.ylabel = ylabel
×
45
        self.xcol = xcol
×
46
        self.ycol = ycol
×
47
        self.regression = regression
×
48

49
        if large_text:
×
50
            self.text_size = config.kGraphTextSizeLarge
×
51
        else:
52
            self.text_size = config.kGraphTextSizeSmall
×
53

54
        self.logger = logging.getLogger(__name__)
×
55

56
    def generate(self) -> None:
×
57
        if not utils.path_exists(self.input_fpath):
×
58
            self.logger.debug("Not generating 2D scatterplot: %s does not exist",
×
59
                              str(self.input_fpath))
60
            return
×
61

62
        # Read .csv and scaffold graph
NEW
63
        df = storage.df_read(self.input_fpath, 'storage.csv')
×
64
        ax = df.plot.scatter(x=self.xcol, y=self.ycol)
×
65

66
        # Plot regression line
67
        if self.regression:
×
68
            self._plot_regression(df)
×
69

70
        # Plot ticks and labels
71
        ax.tick_params(labelsize=self.text_size['tick_label'])
×
72
        ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label'])
×
73
        ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label'])
×
74

75
        # Add title
76
        ax.set_title(self.title, fontsize=self.text_size['title'])
×
77

78
        # Output figure
79
        fig = ax.get_figure()
×
80
        fig.set_size_inches(config.kGraphBaseSize, config.kGraphBaseSize)
×
81
        fig.savefig(self.output_fpath,
×
82
                    bbox_inches='tight',
83
                    dpi=config.kGraphDPI)
84
        # Prevent memory accumulation (fig.clf() does not close everything)
85
        plt.close(fig)
×
86

87
    def _plot_regression(self, df):
×
88
        # slope, intercept, r_value, p_value, std_err = stats.linregress(df.loc[:, self.xcol],
89
        #                                                                df.loc[:, self.ycol])
90
        # x_new = np.linspace(df[self.xcol].min(), df[self.xcol].max(), 50)
91
        # line = slope * x_new * intercept
92
        # plt.plot(x_new, line, 'r', label='y={:.2f}x+{:.2f}'.format(slope, intercept))
93

94
        # Calculate linear regression line
95
        coeffs = np.polyfit(x=df.loc[:, self.xcol],
×
96
                            y=df.loc[:, self.ycol], deg=1)
97
        ffit = np.poly1d(coeffs)
×
98
        x_new = np.linspace(df[self.xcol].min(), df[self.xcol].max(), 50)
×
99
        y_new = ffit(x_new)
×
100

101
        # Plot line and add equation to legend
102
        xsym = sympy.symbols('x')
×
103
        eqn = sum(sympy.S("{:6.2f}".format(v)) * xsym **
×
104
                  i for i, v in enumerate(coeffs[::-1]))
105
        latex = sympy.printing.latex(eqn)
×
106
        plt.plot(x_new, y_new, label="${}$".format(latex))
×
107
        plt.legend(fontsize=self.text_size['legend_label'])
×
108

109

110
__all__ = [
×
111
    'Scatterplot2D'
112
]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc