5545066420

Committed 13 Jul 2023 03:36PM UTC coverage: 48.496%. First build

Build # 5545066420

Build Type

Pull #102

github

Committed by

web-flow

Commit Message

Merge 0823ae2a9 into 702d175a8

Pull Request Pull Request #102: Add newer version of pyoorb.py

Run Details

64 of 64 new or added lines in 3 files covered. (100.0%)

3095 of 6382 relevant lines covered (48.5%)

0.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

8.43

/thor/data_processing.py

import warnings

import numpy as np
import pandas as pd
from astropy.time import Time

__all__ = [
    "UNKNOWN_ID_REGEX",
    "preprocessObservations",
]

UNKNOWN_ID_REGEX = "^u[0-9]{12}$"


def preprocessObservations(
    observations, column_mapping, astrometric_errors=None, mjd_scale="utc"
):
    """
    Create two seperate data frames: one with all observation data needed to run THOR stripped of
    object IDs and the other with known object IDs and attempts to attribute unknown observations to
    the latest catalog of known objects from the MPC.

    Parameters
    ----------
    observations : `~pandas.DataFrame`
        DataFrame containing at minimum a column of observation IDs, exposure times in MJD (with scale
        set by mjd_scale), RA in degrees, Dec in degrees, 1-sigma error in RA in degrees, 1-sigma error in
        Dec in degrees and the observatory code.
    column_mapping : dict
        Dictionary containing internal column names as keys mapped to column names in the data frame as values.
        Should include the following:
        {# Internal : # External
            "obs_id" : column name or None,
            "mjd" : column name,
            "RA_deg" : column name,
            "Dec_deg" : column name,
            "RA_sigma_deg" : column name or None,
            "Dec_sigma_deg" : column name or None,
            "observatory_code" : column name,
            "obj_id" : column name or None,
            "mag" : optional, column name or None,
            "mag_sigma" : optional, column name or None,
            "filter" : optional, column name or None,
            "astrometric_catalog" : optional, column name or None,
        }
        Description of columns and their assumed values:
            'obs_id' : column name or None
                Observation IDs as type string. If None, THOR will assign
                an observation ID to each observation.
            'mjd' : column name
                Observation time in MJD, the input time scale can be set with the
                'time_scale' parameter. Time scale will be converted if not in UTC.
            'RA_deg' : column name
                Topocentric J2000 Right Ascension in degrees.
            'Dec_deg' : column name
                Topocentric J2000 Declination in degrees.
            'RA_sigma_deg' : column name or None
                 1-sigma astrometric uncertainty in RA in degrees.
                 If certain or all observations are missing astrometric errors, use
                 the 'astrometric_errors' parameter to configure defaults for all observatories
                 or for each observatory individually. If None, THOR will use the 'astrometric_error'
                 parameter to assign errors.
            'Dec_sigma_deg' : column name or None
                 1-sigma astrometric uncertainty in Dec in degrees.
                 If certain or all observations are missing astrometric errors, use
                 the 'astrometric_errors' parameter to configure defaults for all observatories
                 or for each observatory individually. If None, THOR will use the 'astrometric_error'
                 parameter to assign errors.
            'observatory_code' : column name
                The MPC observatory code from which each observation was made. THOR currently
                only supports ground-based observatories.
            'obj_id' : column name or None
                If known, the designation in unpacked or packed form. If unknown, object ID should be
                set to 'NaN'. If None, THOR will assume no observations have been associated.
            'mag' : optional, column name or None
                Observed magnitude. Magnitudes are currently unused by THOR but may be convenient to have
                for visual inspection of results.
            'mag_sigma' : optional, column name or None.
                1-sigma photometric uncertainty in magnitudes.
            'filter' : optional, column name or None.
                The bandpass or filter with which the observation was made.
            'astrometric_catalog' : optional, column name or None.
                Astrometric catalog with which astrometric measurements were calibrated. Unused by THOR outside of
                creating ADES files from recoveries and discoveries.
            'night_id' : optional, column_name or None.
                ID representing the night on which an observation was made. Useful for filter for observations on
                single nights rather than using the observation time.
    mjd_scale : str, optional
        Time scale of the input MJD exposure times ("utc", "tdb", etc...)

    Returns
    -------
    preprocessed_observations : `~pandas.DataFrame`
        DataFrame with observations in the format required by THOR.
    preprocessed_attributions : `~pandas.DataFrame`
        DataFrame containing associations, any observations with no known label
        will be assigned a unique unknown ID with regex pattern "^u[0-9]{12}$".

    Raises
    ------
    ValueError
        If the astrometric_errors parameter is not of type list or dictionary,
        or if the errors are not correctly defined.

    Warns
    -----
    UserWarning:
        If the observation ID, object_ID, or astrometric error columns are not
        present in the column_mapping dictionary.
    """
    # Required columns THOR needs
    cols = [
        "obs_id",
        "mjd",
        "RA_deg",
        "Dec_deg",
        "RA_sigma_deg",
        "Dec_sigma_deg",
        "observatory_code",
        "obj_id",
    ]
    # Optional columns that can be used for filtering
    # and ADES file production
    optional_cols = [
        # ADES Columns
        "mag",
        "mag_sigma",
        "filter",
        "astrometric_catalog",
        # Useful non-ADES columns
        "night_id",
    ]

    # Check if observation IDs need to be assigned
    assign_obs_ids = False
    if column_mapping["obs_id"] == None:
        warning = (
            "No observation ID column defined in the column_mapping dictionary.\n"
            "Assigning observation IDs...\n"
        )
        warnings.warn(warning, UserWarning)
        assign_obs_ids = True
        cols.remove("obs_id")

    # Check if object IDs need to be assigned
    assign_obj_ids = False
    if column_mapping["obj_id"] == None:
        warning = (
            "No object ID column defined in the column_mapping dictionary.\n"
            "Assuming no observations have been associated with a known object...\n"
        )
        warnings.warn(warning, UserWarning)
        assign_obj_ids = True
        cols.remove("obj_id")

    # Check if astrometric errors need to be added
    use_astrometric_errors = False
    if (column_mapping["RA_sigma_deg"] == None) and (
        column_mapping["Dec_sigma_deg"] == None
    ):
        warning = (
            "No astrometric error columns defined in the column_mapping dictionary.\n"
            "Using 'astrometric_errors' parameter to assign errors...\n"
        )
        warnings.warn(warning, UserWarning)
        use_astrometric_errors = True
        cols.remove("RA_sigma_deg")
        cols.remove("Dec_sigma_deg")

    # Create a copy of the relevant columns in observations
    # Add any optional columns that may have been provided by the user
    obs_cols = [column_mapping[c] for c in cols]
    added_cols = []
    for c in optional_cols:
        if c in column_mapping.keys():
            obs_cols.append(column_mapping[c])
            added_cols.append(c)
    preprocessed_observations = observations[obs_cols].copy()

    # Rename preprocessed observation columns to those expected by THOR
    # (involves inverting the column_mapping dictionary and removing any potential
    # None values passed by the user)
    column_mapping_inv = {v: k for k, v in column_mapping.items()}
    if None in column_mapping_inv.keys():
        column_mapping_inv.pop(None)
    preprocessed_observations.rename(columns=column_mapping_inv, inplace=True)

    if use_astrometric_errors:
        if type(astrometric_errors) == list:
            if len(astrometric_errors) != 2:
                err = "astrometric_errors list is not of length 2."
            else:
                preprocessed_observations.loc[:, "RA_sigma_deg"] = astrometric_errors[0]
                preprocessed_observations.loc[:, "Dec_sigma_deg"] = astrometric_errors[
                    1
                ]

        elif type(astrometric_errors) == dict:
            for code, errors in astrometric_errors.items():
                if len(errors) != 2:
                    err = (
                        "Astrometric errors for observatory {} should be a list of length 2 with\n"
                        "the 1-sigma astrometric uncertainty in RA as the first element and the\n"
                        "1-sigma astrometric uncertainty in Dec as the second element."
                    )
                    raise ValueError(err.format(code))
                else:
                    observatory_mask = preprocessed_observations[
                        "observatory_code"
                    ].isin([code])
                    preprocessed_observations.loc[
                        observatory_mask, "RA_sigma_deg"
                    ] = errors[0]
                    preprocessed_observations.loc[
                        observatory_mask, "Dec_sigma_deg"
                    ] = errors[1]

        else:
            err = (
                "'astrometric_errors' should be one of {None, list, dict}.\n"
                "If None, then the given observations must have the ra_sigma_deg\n"
                "  and dec_sigma_deg columns.\n"
                "If a dictionary, then each observatory code present observations in\n"
                "  the observations must have a corresponding key with a list of length 2\n"
                "  as their values. The first element in the list is assumed to be the 1-sigma\n"
                "  astrometric error in RA, while the second is assumed to be the same but in Dec.\n"
                "If a list, then the first element in the list is assumed to be the 1-sigma\n"
                "  astrometric error in RA, while the second is assumed to be the same but in Dec.\n"
                "  Each observation will be given these errors regardless of if one is present or not.\n"
            )
            raise ValueError(err)

    # Make sure all observations have astrometric errors
    missing_codes = preprocessed_observations[
        (
            (preprocessed_observations["RA_sigma_deg"].isna())
            | (preprocessed_observations["Dec_sigma_deg"].isna())
        )
    ]["observatory_code"].unique()

    if len(missing_codes) > 0:
        err = "Missing astrometric errors for observations from:\n" "  {}\n"
        raise ValueError(err.format(", ".join(missing_codes)))

    # Make sure all observations are given in UTC, if not convert to UTC
    if mjd_scale != "utc":
        mjds = Time(
            preprocessed_observations["mjd"].values, format="mjd", scale=mjd_scale
        )
        preprocessed_observations["mjd"] = mjds.utc.mjd

    # Add _utc to mjd column name
    preprocessed_observations.rename(columns={"mjd": "mjd_utc"}, inplace=True)

    # Make sure that the observations are sorted by observation time
    preprocessed_observations.sort_values(
        by=["mjd_utc"], inplace=True, ignore_index=True
    )
    # Assign obervation IDs if needed
    if assign_obs_ids:
        preprocessed_observations.loc[:, "obs_id"] = [
            "obs{:09d}".format(i) for i in range(len(preprocessed_observations))
        ]
    else:
        if type(preprocessed_observations["obs_id"]) != object:
            warn = "Observation IDs should be of type string, converting..."
            warnings.warn(warn)
            preprocessed_observations["obs_id"] = preprocessed_observations[
                "obs_id"
            ].astype(str)

    # Assign object IDs if needed
    if assign_obj_ids:
        # This must match UNKNOWN_ID_REGEX
        preprocessed_observations.loc[:, "obj_id"] = [
            f"u{i:012d}" for i in range(len(preprocessed_observations))
        ]
    else:
        if type(preprocessed_observations["obj_id"]) != object:
            warn = "Object IDs should be of type string, converting..."
            warnings.warn(warn)
            num_unassociated = len(
                preprocessed_observations[preprocessed_observations["obj_id"].isna()]
            )
            # This must match UNKNOWN_ID_REGEX
            preprocessed_observations.loc[
                preprocessed_observations["obj_id"].isna(), "obj_id"
            ] = [f"u{i:012d}" for i in range(num_unassociated)]
            preprocessed_observations["obj_id"] = preprocessed_observations[
                "obj_id"
            ].astype(str)

    # Split observations into two dataframes (make THOR run only on completely blind observations)
    preprocessed_associations = preprocessed_observations[["obs_id", "obj_id"]].copy()
    cols_sorted = [
        "obs_id",
        "mjd_utc",
        "RA_deg",
        "Dec_deg",
        "RA_sigma_deg",
        "Dec_sigma_deg",
        "observatory_code",
    ]
    cols_sorted += added_cols
    preprocessed_observations = preprocessed_observations[cols_sorted]

    return preprocessed_observations, preprocessed_associations

1	import warnings	1✔
2
3	import numpy as np	1✔
4	import pandas as pd	1✔
5	from astropy.time import Time	1✔
6
7	__all__ = [	1✔
8	"UNKNOWN_ID_REGEX",
9	"preprocessObservations",
10	]
11
12	UNKNOWN_ID_REGEX = "^u[0-9]{12}$"	1✔
13
14
15	def preprocessObservations(	1✔
16	observations, column_mapping, astrometric_errors=None, mjd_scale="utc"
17	):
18	"""
19	Create two seperate data frames: one with all observation data needed to run THOR stripped of
20	object IDs and the other with known object IDs and attempts to attribute unknown observations to
21	the latest catalog of known objects from the MPC.
22
23	Parameters
24	----------
25	observations : `~pandas.DataFrame`
26	DataFrame containing at minimum a column of observation IDs, exposure times in MJD (with scale
27	set by mjd_scale), RA in degrees, Dec in degrees, 1-sigma error in RA in degrees, 1-sigma error in
28	Dec in degrees and the observatory code.
29	column_mapping : dict
30	Dictionary containing internal column names as keys mapped to column names in the data frame as values.
31	Should include the following:
32	{# Internal : # External
33	"obs_id" : column name or None,
34	"mjd" : column name,
35	"RA_deg" : column name,
36	"Dec_deg" : column name,
37	"RA_sigma_deg" : column name or None,
38	"Dec_sigma_deg" : column name or None,
39	"observatory_code" : column name,
40	"obj_id" : column name or None,
41	"mag" : optional, column name or None,
42	"mag_sigma" : optional, column name or None,
43	"filter" : optional, column name or None,
44	"astrometric_catalog" : optional, column name or None,
45	}
46	Description of columns and their assumed values:
47	'obs_id' : column name or None
48	Observation IDs as type string. If None, THOR will assign
49	an observation ID to each observation.
50	'mjd' : column name
51	Observation time in MJD, the input time scale can be set with the
52	'time_scale' parameter. Time scale will be converted if not in UTC.
53	'RA_deg' : column name
54	Topocentric J2000 Right Ascension in degrees.
55	'Dec_deg' : column name
56	Topocentric J2000 Declination in degrees.
57	'RA_sigma_deg' : column name or None
58	1-sigma astrometric uncertainty in RA in degrees.
59	If certain or all observations are missing astrometric errors, use
60	the 'astrometric_errors' parameter to configure defaults for all observatories
61	or for each observatory individually. If None, THOR will use the 'astrometric_error'
62	parameter to assign errors.
63	'Dec_sigma_deg' : column name or None
64	1-sigma astrometric uncertainty in Dec in degrees.
65	If certain or all observations are missing astrometric errors, use
66	the 'astrometric_errors' parameter to configure defaults for all observatories
67	or for each observatory individually. If None, THOR will use the 'astrometric_error'
68	parameter to assign errors.
69	'observatory_code' : column name
70	The MPC observatory code from which each observation was made. THOR currently
71	only supports ground-based observatories.
72	'obj_id' : column name or None
73	If known, the designation in unpacked or packed form. If unknown, object ID should be
74	set to 'NaN'. If None, THOR will assume no observations have been associated.
75	'mag' : optional, column name or None
76	Observed magnitude. Magnitudes are currently unused by THOR but may be convenient to have
77	for visual inspection of results.
78	'mag_sigma' : optional, column name or None.
79	1-sigma photometric uncertainty in magnitudes.
80	'filter' : optional, column name or None.
81	The bandpass or filter with which the observation was made.
82	'astrometric_catalog' : optional, column name or None.
83	Astrometric catalog with which astrometric measurements were calibrated. Unused by THOR outside of
84	creating ADES files from recoveries and discoveries.
85	'night_id' : optional, column_name or None.
86	ID representing the night on which an observation was made. Useful for filter for observations on
87	single nights rather than using the observation time.
88	mjd_scale : str, optional
89	Time scale of the input MJD exposure times ("utc", "tdb", etc...)
90
91	Returns
92	-------
93	preprocessed_observations : `~pandas.DataFrame`
94	DataFrame with observations in the format required by THOR.
95	preprocessed_attributions : `~pandas.DataFrame`
96	DataFrame containing associations, any observations with no known label
97	will be assigned a unique unknown ID with regex pattern "^u[0-9]{12}$".
98
99	Raises
100	------
101	ValueError
102	If the astrometric_errors parameter is not of type list or dictionary,
103	or if the errors are not correctly defined.
104
105	Warns
106	-----
107	UserWarning:
108	If the observation ID, object_ID, or astrometric error columns are not
109	present in the column_mapping dictionary.
110	"""
111	# Required columns THOR needs
112	cols = [	×
113	"obs_id",
114	"mjd",
115	"RA_deg",
116	"Dec_deg",
117	"RA_sigma_deg",
118	"Dec_sigma_deg",
119	"observatory_code",
120	"obj_id",
121	]
122	# Optional columns that can be used for filtering
123	# and ADES file production
124	optional_cols = [	×
125	# ADES Columns
126	"mag",
127	"mag_sigma",
128	"filter",
129	"astrometric_catalog",
130	# Useful non-ADES columns
131	"night_id",
132	]
133
134	# Check if observation IDs need to be assigned
135	assign_obs_ids = False	×
136	if column_mapping["obs_id"] == None:	×
137	warning = (	×
138	"No observation ID column defined in the column_mapping dictionary.\n"
139	"Assigning observation IDs...\n"
140	)
141	warnings.warn(warning, UserWarning)	×
142	assign_obs_ids = True	×
143	cols.remove("obs_id")	×
144
145	# Check if object IDs need to be assigned
146	assign_obj_ids = False	×
147	if column_mapping["obj_id"] == None:	×
148	warning = (	×
149	"No object ID column defined in the column_mapping dictionary.\n"
150	"Assuming no observations have been associated with a known object...\n"
151	)
152	warnings.warn(warning, UserWarning)	×
153	assign_obj_ids = True	×
154	cols.remove("obj_id")	×
155
156	# Check if astrometric errors need to be added
157	use_astrometric_errors = False	×
158	if (column_mapping["RA_sigma_deg"] == None) and (	×
159	column_mapping["Dec_sigma_deg"] == None
160	):
161	warning = (	×
162	"No astrometric error columns defined in the column_mapping dictionary.\n"
163	"Using 'astrometric_errors' parameter to assign errors...\n"
164	)
165	warnings.warn(warning, UserWarning)	×
166	use_astrometric_errors = True	×
167	cols.remove("RA_sigma_deg")	×
168	cols.remove("Dec_sigma_deg")	×
169
170	# Create a copy of the relevant columns in observations
171	# Add any optional columns that may have been provided by the user
172	obs_cols = [column_mapping[c] for c in cols]	×
173	added_cols = []	×
174	for c in optional_cols:	×
175	if c in column_mapping.keys():	×
176	obs_cols.append(column_mapping[c])	×
177	added_cols.append(c)	×
178	preprocessed_observations = observations[obs_cols].copy()	×
179
180	# Rename preprocessed observation columns to those expected by THOR
181	# (involves inverting the column_mapping dictionary and removing any potential
182	# None values passed by the user)
183	column_mapping_inv = {v: k for k, v in column_mapping.items()}	×
184	if None in column_mapping_inv.keys():	×
185	column_mapping_inv.pop(None)	×
186	preprocessed_observations.rename(columns=column_mapping_inv, inplace=True)	×
187
188	if use_astrometric_errors:	×
189	if type(astrometric_errors) == list:	×
190	if len(astrometric_errors) != 2:	×
191	err = "astrometric_errors list is not of length 2."	×
192	else:
193	preprocessed_observations.loc[:, "RA_sigma_deg"] = astrometric_errors[0]	×
194	preprocessed_observations.loc[:, "Dec_sigma_deg"] = astrometric_errors[	×
195	1
196	]
197
198	elif type(astrometric_errors) == dict:	×
199	for code, errors in astrometric_errors.items():	×
200	if len(errors) != 2:	×
201	err = (	×
202	"Astrometric errors for observatory {} should be a list of length 2 with\n"
203	"the 1-sigma astrometric uncertainty in RA as the first element and the\n"
204	"1-sigma astrometric uncertainty in Dec as the second element."
205	)
206	raise ValueError(err.format(code))	×
207	else:
208	observatory_mask = preprocessed_observations[	×
209	"observatory_code"
210	].isin([code])
211	preprocessed_observations.loc[	×
212	observatory_mask, "RA_sigma_deg"
213	] = errors[0]
214	preprocessed_observations.loc[	×
215	observatory_mask, "Dec_sigma_deg"
216	] = errors[1]
217
218	else:
219	err = (	×
220	"'astrometric_errors' should be one of {None, list, dict}.\n"
221	"If None, then the given observations must have the ra_sigma_deg\n"
222	" and dec_sigma_deg columns.\n"
223	"If a dictionary, then each observatory code present observations in\n"
224	" the observations must have a corresponding key with a list of length 2\n"
225	" as their values. The first element in the list is assumed to be the 1-sigma\n"
226	" astrometric error in RA, while the second is assumed to be the same but in Dec.\n"
227	"If a list, then the first element in the list is assumed to be the 1-sigma\n"
228	" astrometric error in RA, while the second is assumed to be the same but in Dec.\n"
229	" Each observation will be given these errors regardless of if one is present or not.\n"
230	)
231	raise ValueError(err)	×
232
233	# Make sure all observations have astrometric errors
234	missing_codes = preprocessed_observations[	×
235	(
236	(preprocessed_observations["RA_sigma_deg"].isna())
237	\| (preprocessed_observations["Dec_sigma_deg"].isna())
238	)
239	]["observatory_code"].unique()
240
241	if len(missing_codes) > 0:	×
242	err = "Missing astrometric errors for observations from:\n" " {}\n"	×
243	raise ValueError(err.format(", ".join(missing_codes)))	×
244
245	# Make sure all observations are given in UTC, if not convert to UTC
246	if mjd_scale != "utc":	×
247	mjds = Time(	×
248	preprocessed_observations["mjd"].values, format="mjd", scale=mjd_scale
249	)
250	preprocessed_observations["mjd"] = mjds.utc.mjd	×
251
252	# Add _utc to mjd column name
253	preprocessed_observations.rename(columns={"mjd": "mjd_utc"}, inplace=True)	×
254
255	# Make sure that the observations are sorted by observation time
256	preprocessed_observations.sort_values(	×
257	by=["mjd_utc"], inplace=True, ignore_index=True
258	)
259	# Assign obervation IDs if needed
260	if assign_obs_ids:	×
261	preprocessed_observations.loc[:, "obs_id"] = [	×
262	"obs{:09d}".format(i) for i in range(len(preprocessed_observations))
263	]
264	else:
265	if type(preprocessed_observations["obs_id"]) != object:	×
266	warn = "Observation IDs should be of type string, converting..."	×
267	warnings.warn(warn)	×
268	preprocessed_observations["obs_id"] = preprocessed_observations[	×
269	"obs_id"
270	].astype(str)
271
272	# Assign object IDs if needed
273	if assign_obj_ids:	×
274	# This must match UNKNOWN_ID_REGEX
275	preprocessed_observations.loc[:, "obj_id"] = [	×
276	f"u{i:012d}" for i in range(len(preprocessed_observations))
277	]
278	else:
279	if type(preprocessed_observations["obj_id"]) != object:	×
280	warn = "Object IDs should be of type string, converting..."	×
281	warnings.warn(warn)	×
282	num_unassociated = len(	×
283	preprocessed_observations[preprocessed_observations["obj_id"].isna()]
284	)
285	# This must match UNKNOWN_ID_REGEX
286	preprocessed_observations.loc[	×
287	preprocessed_observations["obj_id"].isna(), "obj_id"
288	] = [f"u{i:012d}" for i in range(num_unassociated)]
289	preprocessed_observations["obj_id"] = preprocessed_observations[	×
290	"obj_id"
291	].astype(str)
292
293	# Split observations into two dataframes (make THOR run only on completely blind observations)
294	preprocessed_associations = preprocessed_observations[["obs_id", "obj_id"]].copy()	×
295	cols_sorted = [	×
296	"obs_id",
297	"mjd_utc",
298	"RA_deg",
299	"Dec_deg",
300	"RA_sigma_deg",
301	"Dec_sigma_deg",
302	"observatory_code",
303	]
304	cols_sorted += added_cols	×
305	preprocessed_observations = preprocessed_observations[cols_sorted]	×
306
307	return preprocessed_observations, preprocessed_associations	×

moeyensj / thor / 5545066420

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous