• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

moeyensj / thor / 5545066420

13 Jul 2023 03:36PM UTC coverage: 48.496%. First build
5545066420

Pull #102

github

web-flow
Merge 0823ae2a9 into 702d175a8
Pull Request #102: Add newer version of pyoorb.py

64 of 64 new or added lines in 3 files covered. (100.0%)

3095 of 6382 relevant lines covered (48.5%)

0.48 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

8.43
/thor/data_processing.py
1
import warnings
1✔
2

3
import numpy as np
1✔
4
import pandas as pd
1✔
5
from astropy.time import Time
1✔
6

7
__all__ = [
1✔
8
    "UNKNOWN_ID_REGEX",
9
    "preprocessObservations",
10
]
11

12
UNKNOWN_ID_REGEX = "^u[0-9]{12}$"
1✔
13

14

15
def preprocessObservations(
1✔
16
    observations, column_mapping, astrometric_errors=None, mjd_scale="utc"
17
):
18
    """
19
    Create two seperate data frames: one with all observation data needed to run THOR stripped of
20
    object IDs and the other with known object IDs and attempts to attribute unknown observations to
21
    the latest catalog of known objects from the MPC.
22

23
    Parameters
24
    ----------
25
    observations : `~pandas.DataFrame`
26
        DataFrame containing at minimum a column of observation IDs, exposure times in MJD (with scale
27
        set by mjd_scale), RA in degrees, Dec in degrees, 1-sigma error in RA in degrees, 1-sigma error in
28
        Dec in degrees and the observatory code.
29
    column_mapping : dict
30
        Dictionary containing internal column names as keys mapped to column names in the data frame as values.
31
        Should include the following:
32
        {# Internal : # External
33
            "obs_id" : column name or None,
34
            "mjd" : column name,
35
            "RA_deg" : column name,
36
            "Dec_deg" : column name,
37
            "RA_sigma_deg" : column name or None,
38
            "Dec_sigma_deg" : column name or None,
39
            "observatory_code" : column name,
40
            "obj_id" : column name or None,
41
            "mag" : optional, column name or None,
42
            "mag_sigma" : optional, column name or None,
43
            "filter" : optional, column name or None,
44
            "astrometric_catalog" : optional, column name or None,
45
        }
46
        Description of columns and their assumed values:
47
            'obs_id' : column name or None
48
                Observation IDs as type string. If None, THOR will assign
49
                an observation ID to each observation.
50
            'mjd' : column name
51
                Observation time in MJD, the input time scale can be set with the
52
                'time_scale' parameter. Time scale will be converted if not in UTC.
53
            'RA_deg' : column name
54
                Topocentric J2000 Right Ascension in degrees.
55
            'Dec_deg' : column name
56
                Topocentric J2000 Declination in degrees.
57
            'RA_sigma_deg' : column name or None
58
                 1-sigma astrometric uncertainty in RA in degrees.
59
                 If certain or all observations are missing astrometric errors, use
60
                 the 'astrometric_errors' parameter to configure defaults for all observatories
61
                 or for each observatory individually. If None, THOR will use the 'astrometric_error'
62
                 parameter to assign errors.
63
            'Dec_sigma_deg' : column name or None
64
                 1-sigma astrometric uncertainty in Dec in degrees.
65
                 If certain or all observations are missing astrometric errors, use
66
                 the 'astrometric_errors' parameter to configure defaults for all observatories
67
                 or for each observatory individually. If None, THOR will use the 'astrometric_error'
68
                 parameter to assign errors.
69
            'observatory_code' : column name
70
                The MPC observatory code from which each observation was made. THOR currently
71
                only supports ground-based observatories.
72
            'obj_id' : column name or None
73
                If known, the designation in unpacked or packed form. If unknown, object ID should be
74
                set to 'NaN'. If None, THOR will assume no observations have been associated.
75
            'mag' : optional, column name or None
76
                Observed magnitude. Magnitudes are currently unused by THOR but may be convenient to have
77
                for visual inspection of results.
78
            'mag_sigma' : optional, column name or None.
79
                1-sigma photometric uncertainty in magnitudes.
80
            'filter' : optional, column name or None.
81
                The bandpass or filter with which the observation was made.
82
            'astrometric_catalog' : optional, column name or None.
83
                Astrometric catalog with which astrometric measurements were calibrated. Unused by THOR outside of
84
                creating ADES files from recoveries and discoveries.
85
            'night_id' : optional, column_name or None.
86
                ID representing the night on which an observation was made. Useful for filter for observations on
87
                single nights rather than using the observation time.
88
    mjd_scale : str, optional
89
        Time scale of the input MJD exposure times ("utc", "tdb", etc...)
90

91
    Returns
92
    -------
93
    preprocessed_observations : `~pandas.DataFrame`
94
        DataFrame with observations in the format required by THOR.
95
    preprocessed_attributions : `~pandas.DataFrame`
96
        DataFrame containing associations, any observations with no known label
97
        will be assigned a unique unknown ID with regex pattern "^u[0-9]{12}$".
98

99
    Raises
100
    ------
101
    ValueError
102
        If the astrometric_errors parameter is not of type list or dictionary,
103
        or if the errors are not correctly defined.
104

105
    Warns
106
    -----
107
    UserWarning:
108
        If the observation ID, object_ID, or astrometric error columns are not
109
        present in the column_mapping dictionary.
110
    """
111
    # Required columns THOR needs
112
    cols = [
×
113
        "obs_id",
114
        "mjd",
115
        "RA_deg",
116
        "Dec_deg",
117
        "RA_sigma_deg",
118
        "Dec_sigma_deg",
119
        "observatory_code",
120
        "obj_id",
121
    ]
122
    # Optional columns that can be used for filtering
123
    # and ADES file production
124
    optional_cols = [
×
125
        # ADES Columns
126
        "mag",
127
        "mag_sigma",
128
        "filter",
129
        "astrometric_catalog",
130
        # Useful non-ADES columns
131
        "night_id",
132
    ]
133

134
    # Check if observation IDs need to be assigned
135
    assign_obs_ids = False
×
136
    if column_mapping["obs_id"] == None:
×
137
        warning = (
×
138
            "No observation ID column defined in the column_mapping dictionary.\n"
139
            "Assigning observation IDs...\n"
140
        )
141
        warnings.warn(warning, UserWarning)
×
142
        assign_obs_ids = True
×
143
        cols.remove("obs_id")
×
144

145
    # Check if object IDs need to be assigned
146
    assign_obj_ids = False
×
147
    if column_mapping["obj_id"] == None:
×
148
        warning = (
×
149
            "No object ID column defined in the column_mapping dictionary.\n"
150
            "Assuming no observations have been associated with a known object...\n"
151
        )
152
        warnings.warn(warning, UserWarning)
×
153
        assign_obj_ids = True
×
154
        cols.remove("obj_id")
×
155

156
    # Check if astrometric errors need to be added
157
    use_astrometric_errors = False
×
158
    if (column_mapping["RA_sigma_deg"] == None) and (
×
159
        column_mapping["Dec_sigma_deg"] == None
160
    ):
161
        warning = (
×
162
            "No astrometric error columns defined in the column_mapping dictionary.\n"
163
            "Using 'astrometric_errors' parameter to assign errors...\n"
164
        )
165
        warnings.warn(warning, UserWarning)
×
166
        use_astrometric_errors = True
×
167
        cols.remove("RA_sigma_deg")
×
168
        cols.remove("Dec_sigma_deg")
×
169

170
    # Create a copy of the relevant columns in observations
171
    # Add any optional columns that may have been provided by the user
172
    obs_cols = [column_mapping[c] for c in cols]
×
173
    added_cols = []
×
174
    for c in optional_cols:
×
175
        if c in column_mapping.keys():
×
176
            obs_cols.append(column_mapping[c])
×
177
            added_cols.append(c)
×
178
    preprocessed_observations = observations[obs_cols].copy()
×
179

180
    # Rename preprocessed observation columns to those expected by THOR
181
    # (involves inverting the column_mapping dictionary and removing any potential
182
    # None values passed by the user)
183
    column_mapping_inv = {v: k for k, v in column_mapping.items()}
×
184
    if None in column_mapping_inv.keys():
×
185
        column_mapping_inv.pop(None)
×
186
    preprocessed_observations.rename(columns=column_mapping_inv, inplace=True)
×
187

188
    if use_astrometric_errors:
×
189
        if type(astrometric_errors) == list:
×
190
            if len(astrometric_errors) != 2:
×
191
                err = "astrometric_errors list is not of length 2."
×
192
            else:
193
                preprocessed_observations.loc[:, "RA_sigma_deg"] = astrometric_errors[0]
×
194
                preprocessed_observations.loc[:, "Dec_sigma_deg"] = astrometric_errors[
×
195
                    1
196
                ]
197

198
        elif type(astrometric_errors) == dict:
×
199
            for code, errors in astrometric_errors.items():
×
200
                if len(errors) != 2:
×
201
                    err = (
×
202
                        "Astrometric errors for observatory {} should be a list of length 2 with\n"
203
                        "the 1-sigma astrometric uncertainty in RA as the first element and the\n"
204
                        "1-sigma astrometric uncertainty in Dec as the second element."
205
                    )
206
                    raise ValueError(err.format(code))
×
207
                else:
208
                    observatory_mask = preprocessed_observations[
×
209
                        "observatory_code"
210
                    ].isin([code])
211
                    preprocessed_observations.loc[
×
212
                        observatory_mask, "RA_sigma_deg"
213
                    ] = errors[0]
214
                    preprocessed_observations.loc[
×
215
                        observatory_mask, "Dec_sigma_deg"
216
                    ] = errors[1]
217

218
        else:
219
            err = (
×
220
                "'astrometric_errors' should be one of {None, list, dict}.\n"
221
                "If None, then the given observations must have the ra_sigma_deg\n"
222
                "  and dec_sigma_deg columns.\n"
223
                "If a dictionary, then each observatory code present observations in\n"
224
                "  the observations must have a corresponding key with a list of length 2\n"
225
                "  as their values. The first element in the list is assumed to be the 1-sigma\n"
226
                "  astrometric error in RA, while the second is assumed to be the same but in Dec.\n"
227
                "If a list, then the first element in the list is assumed to be the 1-sigma\n"
228
                "  astrometric error in RA, while the second is assumed to be the same but in Dec.\n"
229
                "  Each observation will be given these errors regardless of if one is present or not.\n"
230
            )
231
            raise ValueError(err)
×
232

233
    # Make sure all observations have astrometric errors
234
    missing_codes = preprocessed_observations[
×
235
        (
236
            (preprocessed_observations["RA_sigma_deg"].isna())
237
            | (preprocessed_observations["Dec_sigma_deg"].isna())
238
        )
239
    ]["observatory_code"].unique()
240

241
    if len(missing_codes) > 0:
×
242
        err = "Missing astrometric errors for observations from:\n" "  {}\n"
×
243
        raise ValueError(err.format(", ".join(missing_codes)))
×
244

245
    # Make sure all observations are given in UTC, if not convert to UTC
246
    if mjd_scale != "utc":
×
247
        mjds = Time(
×
248
            preprocessed_observations["mjd"].values, format="mjd", scale=mjd_scale
249
        )
250
        preprocessed_observations["mjd"] = mjds.utc.mjd
×
251

252
    # Add _utc to mjd column name
253
    preprocessed_observations.rename(columns={"mjd": "mjd_utc"}, inplace=True)
×
254

255
    # Make sure that the observations are sorted by observation time
256
    preprocessed_observations.sort_values(
×
257
        by=["mjd_utc"], inplace=True, ignore_index=True
258
    )
259
    # Assign obervation IDs if needed
260
    if assign_obs_ids:
×
261
        preprocessed_observations.loc[:, "obs_id"] = [
×
262
            "obs{:09d}".format(i) for i in range(len(preprocessed_observations))
263
        ]
264
    else:
265
        if type(preprocessed_observations["obs_id"]) != object:
×
266
            warn = "Observation IDs should be of type string, converting..."
×
267
            warnings.warn(warn)
×
268
            preprocessed_observations["obs_id"] = preprocessed_observations[
×
269
                "obs_id"
270
            ].astype(str)
271

272
    # Assign object IDs if needed
273
    if assign_obj_ids:
×
274
        # This must match UNKNOWN_ID_REGEX
275
        preprocessed_observations.loc[:, "obj_id"] = [
×
276
            f"u{i:012d}" for i in range(len(preprocessed_observations))
277
        ]
278
    else:
279
        if type(preprocessed_observations["obj_id"]) != object:
×
280
            warn = "Object IDs should be of type string, converting..."
×
281
            warnings.warn(warn)
×
282
            num_unassociated = len(
×
283
                preprocessed_observations[preprocessed_observations["obj_id"].isna()]
284
            )
285
            # This must match UNKNOWN_ID_REGEX
286
            preprocessed_observations.loc[
×
287
                preprocessed_observations["obj_id"].isna(), "obj_id"
288
            ] = [f"u{i:012d}" for i in range(num_unassociated)]
289
            preprocessed_observations["obj_id"] = preprocessed_observations[
×
290
                "obj_id"
291
            ].astype(str)
292

293
    # Split observations into two dataframes (make THOR run only on completely blind observations)
294
    preprocessed_associations = preprocessed_observations[["obs_id", "obj_id"]].copy()
×
295
    cols_sorted = [
×
296
        "obs_id",
297
        "mjd_utc",
298
        "RA_deg",
299
        "Dec_deg",
300
        "RA_sigma_deg",
301
        "Dec_sigma_deg",
302
        "observatory_code",
303
    ]
304
    cols_sorted += added_cols
×
305
    preprocessed_observations = preprocessed_observations[cols_sorted]
×
306

307
    return preprocessed_observations, preprocessed_associations
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc