10923202860

Committed 18 Sep 2024 01:29PM UTC coverage: 99.247% (-0.8%) from 100.0%

Build # 10923202860

Build Type

push

github

Committed by

iprafols

Commit Message

yapfed and linted code

Run Details

499 of 505 branches covered (98.81%)

Branch coverage included in aggregate %.

3 of 3 new or added lines in 3 files covered. (100.0%)

8 existing lines in 3 files now uncovered.

1345 of 1353 relevant lines covered (99.41%)

2.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.91

/stacking/stackers/split_stacker.py

""" This module defines the class SplitStacker to compute multiple
stacks splitting on one or more properties of the spectra"""

import logging

from astropy.table import Table
import numpy as np
import pandas as pd

from stacking.errors import StackerError
from stacking.spectrum import Spectrum
from stacking.stacker import Stacker
from stacking.stacker import defaults, accepted_options, required_options
from stacking.stackers.split_stacker_utils import (
    assign_group_multiple_cuts,
    assign_group_one_cut,
    extract_split_cut_sets,
    format_split_on,
    format_splits,
    retreive_group_number,
)
from stacking.utils import (update_accepted_options, update_default_options,
                            update_required_options)

VALID_SPLIT_TYPES = [
    # the split will be performed independently in the different variables,
    # thus, a spectrum can enter multiple splits
    "OR",
    # the split will be performed using all the different variables,
    # thus, a spectrum can enter only one splits
    "AND"
]

accepted_options = update_accepted_options(accepted_options, [
    "catalogue HDU name or number", "specid name", "split catalogue name",
    "split on", "split cuts", "split type"
])
defaults = update_default_options(defaults, {
    "split type": "OR",
    "catalogue HDU name or number": "CATALOG",
})
required_options = update_required_options(required_options, [
    "catalogue HDU name or number", "specid name", "split catalogue name",
    "split on", "split cuts"
])


class SplitStacker(Stacker):
    """Abstract class to compute mulitple stacks splitting on one
    or more properties of the spectra.

    Methods
    -------
    (see Stacker in stacking/stacker.py)
    __init__
    __parse_config
    assing_groups
    read_catalogue
    stack

    Attributes
    ----------
    (see Stacker in stacking/stacker.py)

    catalogue_hdu_name_or_number: str
    Name of the HDU in `split_catalogue_name` that contains the actual catalogue
    to split

    logger: logging.Logger
    Logger object

    groups_info: pd.DataFrame
    DataFrame containing the group information

    num_groups: int
    Number of groups the data is split on

    specid_name: str
    Name of the column containing the identifier SPECID

    split_catalogue: pd.DataFrame
    The catalogue to be split

    split_catalogue_name: str
    Filename of the catalogue to be split

    split_on: list of str
    List of column name(s) to be split

    split_type: "OR" or "AND"
    If "OR", then the split will be performed independently in the different
    variables (a spectrum can enter multiple splits). If "AND", the split will
    be performed using all the different variables (a spectrum can enter at most
    one split)

    splits: list of array of float
    List of intervals to perform the splits.
    Intervals are defined as [intervals[n], intervals[n-1]].
    The lower (upper) limit of the interval is included in(excluded of) the interval
    Values outside these intervals will be assinged a -1

    stackers: list of Stacker
    Stacker instances that will contain the stacked spectra for each of the groups
    Must be initialized by the child class
    """

    def __init__(self, config, groups_info=None, split_catalogue=None):
        """Initialize class instance

        Arguments
        ---------
        config: configparser.SectionProxy
        Parsed options to initialize class

        groups_info: pd.DataFrame or None - default: None
        If not None, then the groups information will be computed upon initialization. 
        Otherwise, this must be pandas DataFrame with the previously computed information

        split_catalogue: pd.DataFrame or None - default: None
        If not None, then the catalogue will be read from split_catalogue_name
        Otherwise, this must be pandas DataFrame with the previously read catalogue
        """
        self.logger = logging.getLogger(__name__)
        super().__init__(config)

        self.catalogue_hdu_name_or_number = None
        self.specid_name = None
        self.split_catalogue_name = None
        self.split_on = None
        self.split_type = None
        self.splits = []
        self.__parse_config(config)

        # read the catalogue
        if split_catalogue is None:
            self.split_catalogue = self.read_catalogue()
        else:
            self.split_catalogue = split_catalogue

        # add groups
        if groups_info is None:
            self.num_groups = None
            self.groups_info = None
            self.assing_groups()
        else:
            self.num_groups = groups_info.shape[0]
            self.groups_info = groups_info

        # This needs to be defined in the child class
        self.stackers = []

    def __parse_config(self, config):
        """Parse the configuration options

        Arguments
        ---------
        config: configparser.SectionProxy
        Parsed options to initialize class

        Raise
        -----
        StackerError upon missing required variables
        StackerError if variables are not properly formatted
        StackerError if variables are not coherent
        """
        self.catalogue_hdu_name_or_number = config.get(
            "catalogue HDU name or number")
        if self.catalogue_hdu_name_or_number is None:
            raise StackerError(
                "Missing argument 'catalogue HDU name or number' required by "
                "SplitStacker")

        self.specid_name = config.get("specid name")
        if self.specid_name is None:
            raise StackerError("Missing argument 'specid name' required by "
                               "SplitStacker")

        self.split_catalogue_name = config.get("split catalogue name")
        if self.split_catalogue_name is None:
            raise StackerError(
                "Missing argument 'split catalogue name' required by "
                "SplitStacker")

        split_on = config.get("split on")
        if split_on is None:
            raise StackerError("Missing argument 'split on' required by "
                               "SplitStacker")
        # use any of the following as separators (comma semicolon space)
        self.split_on = format_split_on(split_on)

        self.split_type = config.get("split type")
        if self.split_type is None:
            raise StackerError("Missing argument 'split type' required by "
                               "SplitStacker")
        self.split_type = self.split_type.upper()
        if self.split_type not in VALID_SPLIT_TYPES:
            raise StackerError(
                "Invalid value for argument 'split on' required by SplitStacker. "
                "Expected one of '" + " ".join(VALID_SPLIT_TYPES) +
                f" Found: '{self.split_type}'")

        split_cuts = config.get("split cuts")
        if split_cuts is None:
            raise StackerError("Missing argument 'split cuts' required by "
                               "SplitStacker")
        # the splitting on the different quantities is done using ; plus
        # possibly spaces
        split_cuts_sets = extract_split_cut_sets(split_cuts)
        if len(split_cuts_sets) != len(self.split_on):
            raise StackerError(
                "Inconsistency found in reading the splits. The number of "
                f"splitting variables is {len(self.split_on)}, but I found "
                f"{len(split_cuts_sets)} sets of cuts. Read vaues are\n"
                f"'split on' = '{self.split_on}'\n'split cuts' = '{split_cuts}'. "
                "Splitting variables are delimited by a semicolon (;), a comma"
                "(,) or a white space. Cuts sets should be delimited by the "
                "character ';'. Cut values within a given set should be delimited "
                "by commas and/or whitespaces)")
        self.splits = format_splits(split_cuts_sets)

    def assing_groups(self):
        """Assign groups to the catalogue entries. Store the total number of groups

        If split_type is OR-like, then assign one group number per varible in
        the split. Else, it split_type is AND-like, then assing a single group
        number
        """
        self.logger.progress("Assigning groups")

        self.num_groups = 0
        if self.split_type == "OR":
            groups = []
            for index, variable in enumerate(self.split_on):
                self.split_catalogue[
                    f"GROUP_{index}"] = self.split_catalogue.apply(
                        assign_group_one_cut,
                        axis=1,
                        args=(variable, self.splits[index], self.num_groups),
                    )
                # keep grouping info
                groups += [[
                    variable, min_value, max_value, f"GROUP_{index}",
                    group_index + self.num_groups
                ] for group_index, (min_value, max_value) in enumerate(
                    zip(self.splits[index][:-1], self.splits[index][1:]))]
                # update num_groups
                self.num_groups += self.splits[index].size - 1

            self.groups_info = pd.DataFrame(data=groups,
                                            columns=[
                                                "VARIABLE", "MIN_VALUE",
                                                "MAX_VALUE", "COLNAME",
                                                "GROUP_NUM"
                                            ])
        elif self.split_type == "AND":
            num_intervals = np.array([
                self.splits[index].size - 1
                for index in range(len(self.split_on))
            ])

            self.split_catalogue["GROUP"] = self.split_catalogue.apply(
                assign_group_multiple_cuts,
                axis=1,
                args=(self.split_on, self.splits, num_intervals),
            )

            self.num_groups = np.prod(num_intervals)

            groups = []
            for group_number in range(self.num_groups):
                aux_groups = [group_number]
                for index, num_intervals_variable in enumerate(num_intervals):
                    variable_index = group_number % num_intervals_variable
                    aux_groups += [
                        self.split_on[index],
                        self.splits[index][variable_index],
                        self.splits[index][variable_index + 1]
                    ]
                    group_number = (group_number -
                                    variable_index) // num_intervals_variable
                groups.append(aux_groups)

            # columns of the data frame
            cols = ["GROUP_NUM"]
            for index in range(len(self.split_on)):
                cols += [
                    f"VARIABLE_{index}", f"MIN_VALUE_{index}",
                    f"MAX_VALUE_{index}"
                ]

            self.groups_info = pd.DataFrame(data=groups, columns=cols)

        # this should never enter unless new split types are not properly added
        else:  # pragma: no cover
            raise StackerError(
                f"Don't know what to do with split type {self.split_type}. "
                "This is one of the supported split types, maybe it "
                "was not properly coded. If you did the change yourself, check "
                "that you added the behaviour of the new mode to method `assing_groups`. "
                "Otherwise contact 'stacking' developpers.")

        self.logger.progress("Groups assigned")

    def read_catalogue(self):
        """Read the catalogue to do the splits

        Return
        -----
        split_catalogue: pd.DataFrame
        The catalogue to be split

        Raise
        -----
        StackerError if file is not found
        """
        self.logger.progress("Reading catalogue from %s",
                             self.split_catalogue_name)
        self.logger.progress("Reading HDU '%s'",
                             self.catalogue_hdu_name_or_number)
        try:
            catalogue = Table.read(self.split_catalogue_name,
                                   hdu=self.catalogue_hdu_name_or_number)
        # we are currently not accessing this as astropy reads the first HDU
        # when it does not find the correct key. However, we do not delete this
        # check as it is currently raining a DeprecationWarning that will soon
        # turn to an error
        except KeyError:  # pragma: no cover
            self.logger.warning(
                "Error reading HDU '%s'. Maybe it is was a name but rather a "
                "number. I will try this and come back to you",
                self.catalogue_hdu_name_or_number)
            try:
                catalogue = Table.read(self.split_catalogue_name,
                                       hdu=int(
                                           self.catalogue_hdu_name_or_number))
            except ValueError as error:
                raise StackerError(
                    "SplitStacker: Problem reading HDU "
                    f"{self.catalogue_hdu_name_or_number}") from error
            self.logger.ok_warning("Catalogue read properly")

        except FileNotFoundError as error:
            raise StackerError("SplitStacker: Could not find catalogue: "
                               f"{self.split_catalogue_name}") from error

        keep_columns = self.split_on + [self.specid_name]

        split_catalogue = catalogue[keep_columns].to_pandas()
        split_catalogue.rename(columns={self.specid_name: "SPECID"},
                               inplace=True)
        split_catalogue["IN_STACK"] = False

        self.logger.progress("Catalogue read")

        return split_catalogue

    def stack(self, spectra):
        """ Stack spectra

        Arguments
        ---------
        spectra: list of Spectrum
        The spectra to stack

        Raise
        -----
        StackerError if the stackers have not been intialized by the child class
        """
        if len(self.stackers) != self.num_groups:
            raise StackerError(
                f"I expected {self.num_groups} stackers but found "
                f"{len(self.stackers)}. Make sure the member 'stackers' is "
                "properly intialized in the child class")

        self.stacked_flux = np.zeros(
            (Spectrum.common_wavelength_grid.size, self.num_groups),
            dtype=float)
        self.stacked_weight = np.zeros_like(self.stacked_flux)

        for group_number, stacker in enumerate(self.stackers):

            # select the spectra of this particular groups
            if self.split_type == "OR":
                col = self.groups_info[self.groups_info["GROUP_NUM"] ==
                                       group_number]["COLNAME"].values[0]
            elif self.split_type == "AND":
                col = "GROUP"

            # this should never enter unless new split types are not properly added
            else:  # pragma: no cover
                raise StackerError(
                    f"Don't know what to do with split type {self.split_type}. "
                    "This is one of the supported split types, maybe it "
                    "was not properly coded. If you did the change yourself, check "
                    "that you added the behaviour of the new mode to method `stack`. "
                    "Otherwise contact 'stacking' developpers.")

            selected_spectra = [
                spectrum for spectrum in spectra if retreive_group_number(
                    spectrum.specid, self.split_catalogue["SPECID"].values,
                    self.split_catalogue[col].values) == group_number
            ]

            # run the stack
            stacker.stack(selected_spectra)

            self.stacked_flux[:, group_number] = stacker.stacked_flux
            self.stacked_weight[:, group_number] = stacker.stacked_weight

            # update statistics
            selected_specids = [
                spectrum.specid for spectrum in selected_spectra
            ]
            self.split_catalogue.loc[
                self.split_catalogue["SPECID"].isin(selected_specids),
                "IN_STACK"] = True

1	""" This module defines the class SplitStacker to compute multiple
2	stacks splitting on one or more properties of the spectra"""
3
4	import logging	3✔
5
6	from astropy.table import Table	3✔
7	import numpy as np	3✔
8	import pandas as pd	3✔
9
10	from stacking.errors import StackerError	3✔
11	from stacking.spectrum import Spectrum	3✔
12	from stacking.stacker import Stacker	3✔
13	from stacking.stacker import defaults, accepted_options, required_options	3✔
14	from stacking.stackers.split_stacker_utils import (	3✔
15	assign_group_multiple_cuts,
16	assign_group_one_cut,
17	extract_split_cut_sets,
18	format_split_on,
19	format_splits,
20	retreive_group_number,
21	)
22	from stacking.utils import (update_accepted_options, update_default_options,	3✔
23	update_required_options)
24
25	VALID_SPLIT_TYPES = [	3✔
26	# the split will be performed independently in the different variables,
27	# thus, a spectrum can enter multiple splits
28	"OR",
29	# the split will be performed using all the different variables,
30	# thus, a spectrum can enter only one splits
31	"AND"
32	]
33
34	accepted_options = update_accepted_options(accepted_options, [	3✔
35	"catalogue HDU name or number", "specid name", "split catalogue name",
36	"split on", "split cuts", "split type"
37	])
38	defaults = update_default_options(defaults, {	3✔
39	"split type": "OR",
40	"catalogue HDU name or number": "CATALOG",
41	})
42	required_options = update_required_options(required_options, [	3✔
43	"catalogue HDU name or number", "specid name", "split catalogue name",
44	"split on", "split cuts"
45	])
46
47
48	class SplitStacker(Stacker):	3✔
49	"""Abstract class to compute mulitple stacks splitting on one
50	or more properties of the spectra.
51
52	Methods
53	-------
54	(see Stacker in stacking/stacker.py)
55	__init__
56	__parse_config
57	assing_groups
58	read_catalogue
59	stack
60
61	Attributes
62	----------
63	(see Stacker in stacking/stacker.py)
64
65	catalogue_hdu_name_or_number: str
66	Name of the HDU in `split_catalogue_name` that contains the actual catalogue
67	to split
68
69	logger: logging.Logger
70	Logger object
71
72	groups_info: pd.DataFrame
73	DataFrame containing the group information
74
75	num_groups: int
76	Number of groups the data is split on
77
78	specid_name: str
79	Name of the column containing the identifier SPECID
80
81	split_catalogue: pd.DataFrame
82	The catalogue to be split
83
84	split_catalogue_name: str
85	Filename of the catalogue to be split
86
87	split_on: list of str
88	List of column name(s) to be split
89
90	split_type: "OR" or "AND"
91	If "OR", then the split will be performed independently in the different
92	variables (a spectrum can enter multiple splits). If "AND", the split will
93	be performed using all the different variables (a spectrum can enter at most
94	one split)
95
96	splits: list of array of float
97	List of intervals to perform the splits.
98	Intervals are defined as [intervals[n], intervals[n-1]].
99	The lower (upper) limit of the interval is included in(excluded of) the interval
100	Values outside these intervals will be assinged a -1
101
102	stackers: list of Stacker
103	Stacker instances that will contain the stacked spectra for each of the groups
104	Must be initialized by the child class
105	"""
106
107	def __init__(self, config, groups_info=None, split_catalogue=None):	3✔
108	"""Initialize class instance
109
110	Arguments
111	---------
112	config: configparser.SectionProxy
113	Parsed options to initialize class
114
115	groups_info: pd.DataFrame or None - default: None
116	If not None, then the groups information will be computed upon initialization.
117	Otherwise, this must be pandas DataFrame with the previously computed information
118
119	split_catalogue: pd.DataFrame or None - default: None
120	If not None, then the catalogue will be read from split_catalogue_name
121	Otherwise, this must be pandas DataFrame with the previously read catalogue
122	"""
123	self.logger = logging.getLogger(__name__)	3✔
124	super().__init__(config)	3✔
125
126	self.catalogue_hdu_name_or_number = None	3✔
127	self.specid_name = None	3✔
128	self.split_catalogue_name = None	3✔
129	self.split_on = None	3✔
130	self.split_type = None	3✔
131	self.splits = []	3✔
132	self.__parse_config(config)	3✔
133
134	# read the catalogue
135	if split_catalogue is None:	3!
136	self.split_catalogue = self.read_catalogue()	3✔
137	else:
UNCOV 138	self.split_catalogue = split_catalogue	×
139
140	# add groups
141	if groups_info is None:	3!
142	self.num_groups = None	3✔
143	self.groups_info = None	3✔
144	self.assing_groups()	3✔
145	else:
UNCOV 146	self.num_groups = groups_info.shape[0]	×
UNCOV 147	self.groups_info = groups_info	×
148
149	# This needs to be defined in the child class
150	self.stackers = []	3✔
151
152	def __parse_config(self, config):	3✔
153	"""Parse the configuration options
154
155	Arguments
156	---------
157	config: configparser.SectionProxy
158	Parsed options to initialize class
159
160	Raise
161	-----
162	StackerError upon missing required variables
163	StackerError if variables are not properly formatted
164	StackerError if variables are not coherent
165	"""
166	self.catalogue_hdu_name_or_number = config.get(	3✔
167	"catalogue HDU name or number")
168	if self.catalogue_hdu_name_or_number is None:	3✔
169	raise StackerError(	3✔
170	"Missing argument 'catalogue HDU name or number' required by "
171	"SplitStacker")
172
173	self.specid_name = config.get("specid name")	3✔
174	if self.specid_name is None:	3✔
175	raise StackerError("Missing argument 'specid name' required by "	3✔
176	"SplitStacker")
177
178	self.split_catalogue_name = config.get("split catalogue name")	3✔
179	if self.split_catalogue_name is None:	3✔
180	raise StackerError(	3✔
181	"Missing argument 'split catalogue name' required by "
182	"SplitStacker")
183
184	split_on = config.get("split on")	3✔
185	if split_on is None:	3✔
186	raise StackerError("Missing argument 'split on' required by "	3✔
187	"SplitStacker")
188	# use any of the following as separators (comma semicolon space)
189	self.split_on = format_split_on(split_on)	3✔
190
191	self.split_type = config.get("split type")	3✔
192	if self.split_type is None:	3✔
193	raise StackerError("Missing argument 'split type' required by "	3✔
194	"SplitStacker")
195	self.split_type = self.split_type.upper()	3✔
196	if self.split_type not in VALID_SPLIT_TYPES:	3✔
197	raise StackerError(	3✔
198	"Invalid value for argument 'split on' required by SplitStacker. "
199	"Expected one of '" + " ".join(VALID_SPLIT_TYPES) +
200	f" Found: '{self.split_type}'")
201
202	split_cuts = config.get("split cuts")	3✔
203	if split_cuts is None:	3✔
204	raise StackerError("Missing argument 'split cuts' required by "	3✔
205	"SplitStacker")
206	# the splitting on the different quantities is done using ; plus
207	# possibly spaces
208	split_cuts_sets = extract_split_cut_sets(split_cuts)	3✔
209	if len(split_cuts_sets) != len(self.split_on):	3✔
210	raise StackerError(	3✔
211	"Inconsistency found in reading the splits. The number of "
212	f"splitting variables is {len(self.split_on)}, but I found "
213	f"{len(split_cuts_sets)} sets of cuts. Read vaues are\n"
214	f"'split on' = '{self.split_on}'\n'split cuts' = '{split_cuts}'. "
215	"Splitting variables are delimited by a semicolon (;), a comma"
216	"(,) or a white space. Cuts sets should be delimited by the "
217	"character ';'. Cut values within a given set should be delimited "
218	"by commas and/or whitespaces)")
219	self.splits = format_splits(split_cuts_sets)	3✔
220
221	def assing_groups(self):	3✔
222	"""Assign groups to the catalogue entries. Store the total number of groups
223
224	If split_type is OR-like, then assign one group number per varible in
225	the split. Else, it split_type is AND-like, then assing a single group
226	number
227	"""
228	self.logger.progress("Assigning groups")	3✔
229
230	self.num_groups = 0	3✔
231	if self.split_type == "OR":	3✔
232	groups = []	3✔
233	for index, variable in enumerate(self.split_on):	3✔
234	self.split_catalogue[	3✔
235	f"GROUP_{index}"] = self.split_catalogue.apply(
236	assign_group_one_cut,
237	axis=1,
238	args=(variable, self.splits[index], self.num_groups),
239	)
240	# keep grouping info
241	groups += [[	3✔
242	variable, min_value, max_value, f"GROUP_{index}",
243	group_index + self.num_groups
244	] for group_index, (min_value, max_value) in enumerate(
245	zip(self.splits[index][:-1], self.splits[index][1:]))]
246	# update num_groups
247	self.num_groups += self.splits[index].size - 1	3✔
248
249	self.groups_info = pd.DataFrame(data=groups,	3✔
250	columns=[
251	"VARIABLE", "MIN_VALUE",
252	"MAX_VALUE", "COLNAME",
253	"GROUP_NUM"
254	])
255	elif self.split_type == "AND":	3✔
256	num_intervals = np.array([	3✔
257	self.splits[index].size - 1
258	for index in range(len(self.split_on))
259	])
260
261	self.split_catalogue["GROUP"] = self.split_catalogue.apply(	3✔
262	assign_group_multiple_cuts,
263	axis=1,
264	args=(self.split_on, self.splits, num_intervals),
265	)
266
267	self.num_groups = np.prod(num_intervals)	3✔
268
269	groups = []	3✔
270	for group_number in range(self.num_groups):	3✔
271	aux_groups = [group_number]	3✔
272	for index, num_intervals_variable in enumerate(num_intervals):	3✔
273	variable_index = group_number % num_intervals_variable	3✔
274	aux_groups += [	3✔
275	self.split_on[index],
276	self.splits[index][variable_index],
277	self.splits[index][variable_index + 1]
278	]
279	group_number = (group_number -	3✔
280	variable_index) // num_intervals_variable
281	groups.append(aux_groups)	3✔
282
283	# columns of the data frame
284	cols = ["GROUP_NUM"]	3✔
285	for index in range(len(self.split_on)):	3✔
286	cols += [	3✔
287	f"VARIABLE_{index}", f"MIN_VALUE_{index}",
288	f"MAX_VALUE_{index}"
289	]
290
291	self.groups_info = pd.DataFrame(data=groups, columns=cols)	3✔
292
293	# this should never enter unless new split types are not properly added
294	else: # pragma: no cover
295	raise StackerError(
296	f"Don't know what to do with split type {self.split_type}. "
297	"This is one of the supported split types, maybe it "
298	"was not properly coded. If you did the change yourself, check "
299	"that you added the behaviour of the new mode to method `assing_groups`. "
300	"Otherwise contact 'stacking' developpers.")
301
302	self.logger.progress("Groups assigned")	3✔
303
304	def read_catalogue(self):	3✔
305	"""Read the catalogue to do the splits
306
307	Return
308	-----
309	split_catalogue: pd.DataFrame
310	The catalogue to be split
311
312	Raise
313	-----
314	StackerError if file is not found
315	"""
316	self.logger.progress("Reading catalogue from %s",	3✔
317	self.split_catalogue_name)
318	self.logger.progress("Reading HDU '%s'",	3✔
319	self.catalogue_hdu_name_or_number)
320	try:	3✔
321	catalogue = Table.read(self.split_catalogue_name,	3✔
322	hdu=self.catalogue_hdu_name_or_number)
323	# we are currently not accessing this as astropy reads the first HDU
324	# when it does not find the correct key. However, we do not delete this
325	# check as it is currently raining a DeprecationWarning that will soon
326	# turn to an error
327	except KeyError: # pragma: no cover
328	self.logger.warning(
329	"Error reading HDU '%s'. Maybe it is was a name but rather a "
330	"number. I will try this and come back to you",
331	self.catalogue_hdu_name_or_number)
332	try:
333	catalogue = Table.read(self.split_catalogue_name,
334	hdu=int(
335	self.catalogue_hdu_name_or_number))
336	except ValueError as error:
337	raise StackerError(
338	"SplitStacker: Problem reading HDU "
339	f"{self.catalogue_hdu_name_or_number}") from error
340	self.logger.ok_warning("Catalogue read properly")
341
342	except FileNotFoundError as error:	3✔
343	raise StackerError("SplitStacker: Could not find catalogue: "	3✔
344	f"{self.split_catalogue_name}") from error
345
346	keep_columns = self.split_on + [self.specid_name]	3✔
347
348	split_catalogue = catalogue[keep_columns].to_pandas()	3✔
349	split_catalogue.rename(columns={self.specid_name: "SPECID"},	3✔
350	inplace=True)
351	split_catalogue["IN_STACK"] = False	3✔
352
353	self.logger.progress("Catalogue read")	3✔
354
355	return split_catalogue	3✔
356
357	def stack(self, spectra):	3✔
358	""" Stack spectra
359
360	Arguments
361	---------
362	spectra: list of Spectrum
363	The spectra to stack
364
365	Raise
366	-----
367	StackerError if the stackers have not been intialized by the child class
368	"""
369	if len(self.stackers) != self.num_groups:	3✔
370	raise StackerError(	3✔
371	f"I expected {self.num_groups} stackers but found "
372	f"{len(self.stackers)}. Make sure the member 'stackers' is "
373	"properly intialized in the child class")
374
375	self.stacked_flux = np.zeros(	3✔
376	(Spectrum.common_wavelength_grid.size, self.num_groups),
377	dtype=float)
378	self.stacked_weight = np.zeros_like(self.stacked_flux)	3✔
379
380	for group_number, stacker in enumerate(self.stackers):	3✔
381
382	# select the spectra of this particular groups
383	if self.split_type == "OR":	3✔
384	col = self.groups_info[self.groups_info["GROUP_NUM"] ==	3✔
385	group_number]["COLNAME"].values[0]
386	elif self.split_type == "AND":	3✔
387	col = "GROUP"	3✔
388
389	# this should never enter unless new split types are not properly added
390	else: # pragma: no cover
391	raise StackerError(
392	f"Don't know what to do with split type {self.split_type}. "
393	"This is one of the supported split types, maybe it "
394	"was not properly coded. If you did the change yourself, check "
395	"that you added the behaviour of the new mode to method `stack`. "
396	"Otherwise contact 'stacking' developpers.")
397
398	selected_spectra = [	3✔
399	spectrum for spectrum in spectra if retreive_group_number(
400	spectrum.specid, self.split_catalogue["SPECID"].values,
401	self.split_catalogue[col].values) == group_number
402	]
403
404	# run the stack
405	stacker.stack(selected_spectra)	3✔
406
407	self.stacked_flux[:, group_number] = stacker.stacked_flux	3✔
408	self.stacked_weight[:, group_number] = stacker.stacked_weight	3✔
409
410	# update statistics
411	selected_specids = [	3✔
412	spectrum.specid for spectrum in selected_spectra
413	]
414	self.split_catalogue.loc[	3✔
415	self.split_catalogue["SPECID"].isin(selected_specids),
416	"IN_STACK"] = True

iprafols / stacking / 10923202860

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous