10947976957

Committed 19 Sep 2024 07:55PM UTC coverage: 30.106% (-0.06%) from 30.164%

Build # 10947976957

Build Type

Pull #2376

github

Committed by

web-flow

Commit Message

Merge 84f44afca into 97b174838

Pull Request Pull Request #2376: Add script to update processing table column layout

Run Details

1 of 95 new or added lines in 2 files covered. (1.05%)

59 existing lines in 1 file now uncovered.

14622 of 48569 relevant lines covered (30.11%)

0.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/py/desispec/scripts/reformat_exptables.py

"""
desispec.scripts.updateexptables
================================

"""
import os
import sys
import numpy as np
import re
import time

from desispec.workflow.exptable import get_exposure_table_path, \
                                       get_exposure_table_name, \
                                       default_obstypes_for_exptable,\
                                       night_to_month, \
                                       get_exposure_table_column_defaults
from desispec.workflow.utils import define_variable_from_environment, listpath, \
                                    pathjoin
from desispec.workflow.tableio import write_table, load_table
from desispec.scripts.exposuretable import create_exposure_tables



def update_exposure_tables(nights=None, night_range=None, path_to_data=None,
                           exp_table_path=None, obstypes=None, orig_filetype='csv',
                           out_filetype='csv',  verbose=False, no_specprod=False,
                           dry_run=False):
    """
    Generates updated exposure tables for the nights requested. Requires
    exposure tables to exist on disk.

    Args:
        nights: str, int, or comma separated list. The night(s) to generate
                                                   procesing tables for.
        night_range: str. comma separated pair of nights in form
                          YYYYMMDD,YYYYMMDD for first_night,last_night
                          specifying the beginning and end of a range of
                          nights to be generated. last_night should be
                          inclusive.
        path_to_data: str. The path to the raw data and request*.json and
                           manifest* files.
        exp_table_path: str. Full path to where to exposure tables should be
                             saved, WITHOUT the monthly directory included.
        obstypes: str. The exposure OBSTYPE's that you want to include in the
                       exposure table. Can be a comma separated list.
        orig_filetype: str. The file extension (without the '.') of the exposure
                            tables.
        out_filetype: str. The file extension for the outputted exposure tables
                           (without the '.').
        verbose: boolean. Whether to give verbose output information or not.
                          True prints more information.
        no_specprod: boolean. Create exposure table in repository location
                              rather than the SPECPROD location

    Returns:
        Nothing
    """
    ## Make sure user specified what nights to run on
    if nights is None and night_range is None:
        raise ValueError("Must specify either nights or night_range."
                         +" To process all nights give nights=all")

    ## Define where to find the data
    if path_to_data is None:
        path_to_data = define_variable_from_environment(env_name='DESI_SPECTRO_DATA',
                                                        var_descr="The data path")

    ## Get all nights in 2020's with data
    nights_with_data = list()
    for n in listpath(path_to_data):
        # - nights are 20YYMMDD
        if re.match('^202\d{5}$', n):
            nights_with_data.append(n)

    ## If unpecified or given "all", set nights to all nights with data
    check_night = False
    if nights is None or nights == 'all':
        nights = [int(night) for night in nights_with_data]
        ## No need to check nights since derived from disk
    else:
        nights = [int(val.strip()) for val in nights.split(",")]
        ## If nights are specified, make sure we check that there is actually data
        check_night = True
    nights = np.array(nights)

    ## If user specified a night range, cut nights to that range of dates
    if night_range is not None:
        if ',' not in night_range:
            raise ValueError("night_range must be a comma separated pair of "
                             + "nights in form YYYYMMDD,YYYYMMDD")
        nightpair = night_range.split(',')
        if len(nightpair) != 2 or not nightpair[0].isnumeric() \
                or not nightpair[1].isnumeric():
            raise ValueError("night_range must be a comma separated pair of "
                             + "nights in form YYYYMMDD,YYYYMMDD")
        first_night, last_night = nightpair
        nights = nights[np.where(int(first_night)<=nights.astype(int))[0]]
        nights = nights[np.where(int(last_night)>=nights.astype(int))[0]]

    ## Parse the obstypes of the input
    if obstypes is not None:
        obstypes = [ val.strip('\t ') for val in obstypes.split(",") ]
    else:
        obstypes = default_obstypes_for_exptable()

    ## Define where to save the data
    usespecprod = (not no_specprod)
    if exp_table_path is None:
        exp_table_path = get_exposure_table_path(night=None,
                                                 usespecprod=usespecprod)

    ## Tell user the final list of nights and starting looping over them
    print("Nights: ", nights)
    for night in nights:
        if check_night and str(night) not in nights_with_data:
            print(f'Night {night} not in data directory: {path_to_data}. Skipping')
            continue

        ## Define where we should be looking for the exposure tables
        month = night_to_month(night)
        exptab_path = pathjoin(exp_table_path,month)
        orig_name = get_exposure_table_name(night, extension=orig_filetype)
        orig_pathname = pathjoin(exptab_path, orig_name)

        ## If the exposure table doesn't exist, skip, since we are updating
        ## not generating.
        if not os.path.exists(orig_pathname):
            print(f'Could not find exposure table for night={night} at:'
                  + f' {orig_pathname}. Skipping this night.')
            continue

        ## Create a temporary file pathname
        temp_filetype = f"updatetemp.{out_filetype}"
        temp_pathname = orig_pathname.replace(f".{orig_filetype}",
                                              f".{temp_filetype}")

        ## Create a fresh version of the exposure table using the current
        ## code and save it to the temporary pathname
        obstypes_str = ','.join(obstypes)
        create_exposure_tables(nights=str(night), night_range=None,
                               path_to_data=path_to_data,
                               exp_table_path=exp_table_path,
                               obstypes=obstypes_str, exp_filetype=temp_filetype,
                               cameras=None, bad_cameras=None,
                               badamps=None, verbose=verbose,
                               no_specprod=no_specprod, overwrite_files=False)

        ## Load the old and new tables to compare
        newtable = load_table(temp_pathname, tabletype='exptab',
                              use_specprod=usespecprod)
        origtable = load_table(orig_pathname, tabletype='exptab',
                               use_specprod=usespecprod)

        ## Print some useful information and do some sanity checks that
        ## The new table has as much or more data than the old
        print(f"\n\nNumber of rows in original: {len(origtable)}"
              + f", Number of rows in new: {len(newtable)}")

        if 'OBSTYPE' in origtable.colnames \
                and not set(origtable['OBSTYPE']).issubset(set(obstypes)):
            subset_rows = np.array([obs in obstypes for obs in origtable['OBSTYPE']])
            subset_orig = origtable[subset_rows]
        else:
            subset_orig = origtable

        assert len(newtable) >= len(subset_orig), \
               "Tables for given obstypes must greater or equal length"
        assert np.all([exp in newtable['EXPID'] for exp in subset_orig['EXPID']]), \
               "All old exposures of given obstype must be present in the new table"

        ## Go through exposure by exposure and check each columns value
        ## in the new vs the original
        mutual_colnames = [col for col in newtable.colnames if col in origtable.colnames]
        coldefs = get_exposure_table_column_defaults(asdict=True)
        for newloc,expid in enumerate(newtable['EXPID']):
            ## Match to the row in the original table
            origloc = np.where(origtable['EXPID']==expid)[0]
            if len(origloc) > 1:
                print(f"ERROR on night {night}: found more than one exposure"
                      + f"matching expid {expid}")
                continue
            elif len(origloc) == 1:
                origloc = origloc[0]
            else:
                print(f"New exposure identified: {newtable[newloc]}")
                continue
            ## For colnames that the two columns share, compare values.
            for col in mutual_colnames:
                origval = origtable[col][origloc]
                newval = newtable[col][newloc]
                ## Clean up three special cases of bad flags/comments in early data
                if col == 'EXPFLAG'        and 'EFFTIME_ETC' in newtable.colnames and \
                        newtable['EFFTIME_ETC'][newloc] > 0. and 'aborted' in origval:
                    origorigval = origval.copy()
                    origval = origval[np.where(origval != 'aborted')]
                    print("Identified outdated aborted exposure flag. "
                          + "Removing that. Original set: "
                          + f"{origorigval}, Updated origset: {origval}")
                if col == 'COMMENTS' and 'EFFTIME_ETC' in newtable.colnames \
                        and newtable['EFFTIME_ETC'][newloc] > 0. and \
                        'EXPFLAG' in origtable.colnames \
                        and 'aborted' in origtable['EXPFLAG'][origloc]:
                    origorigval = origval.copy()
                    valcheck = np.array([('For EXPTIME:' not in val) for val in origval])
                    origval = origval[valcheck]
                    print(f"Identified outdated aborted exptime COMMENT."
                          + "Removing that. Original set: "
                          + f"{origorigval}, Updated origset: {origval}")
                if col == 'HEADERERR' and 'PURPOSE:->' in origval:
                    origorigval = origval.copy()
                    valcheck = (np.array(origval) != 'PURPOSE:->')
                    origval = origval[valcheck]
                    print(f"Identified outdated PURPOSE null->null HEADERERR."
                          + " Removing that. Original set: "
                          + f"{origorigval}, Updated origset: {origval}")
                ## If columns differ and original isn't a default value,
                ## then take the original user-defined value
                if np.isscalar(origtable[col][origloc]):
                    if origval != coldefs[col] and newval != origval:
                        print(f"Difference detected for Night {night}, exp {expid}, "
                              + f"col {col}: orig={origval}, new={newval}. "
                              + "Taking the original value. ")
                        newtable[col][newloc] = origval
                else:
                    if not np.array_equal(origval, coldefs[col]) and \
                       not np.array_equal(newval, origval):
                        print(f"Difference detected for Night {night}, exp {expid}, "
                              + f"col {col}: orig={origval}, new={newval}. "
                              + "Taking union of the two arrays.")
                        combined_val = newval[newval != '']
                        for val in origval:
                            if val != '' and val not in newval:
                                combined_val = np.append(combined_val,[val])
                        newtable[col][newloc] = combined_val

        ## If just testing, print the table and a cell-by-cell equality test
        ## for the scalar columns
        ## If not testing, move the original table to an archived filename
        ## and save the updated table to the official exptable pathname
        if dry_run:
            print("\n\nOutput file would have been:")
            newtable.pprint_all()

            names = [col for col in newtable.colnames if col not in ['HEADERERR','EXPFLAG','COMMENTS']]
            t1 = newtable[names]
            t2 = load_table(temp_pathname, tabletype='exptab',
                                           use_specprod=usespecprod)[names]
            t1.values_equal(t2).pprint_all()
        else:
            ftime = time.strftime("%Y%m%d_%Hh%Mm")
            replaced_pathname = orig_pathname.replace(f".{orig_filetype}",
                                                      f".replaced-{ftime}.{orig_filetype}")
            print(f"Moving original file from {orig_pathname} to {replaced_pathname}")
            os.rename(orig_pathname,replaced_pathname)
            time.sleep(0.1)
            out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}")
            write_table(newtable, out_pathname)
            print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}")

        ## Cleanup the temporary table created with the fresh version of the
        ## create_exposure_table script
        os.remove(temp_pathname)
        print(f"Removed the temporary file {temp_pathname}")
        print("\n\n")

        ## Flush the outputs
        sys.stdout.flush()
        sys.stderr.flush()
    print("Exposure table regenerations complete")

1	"""
2	desispec.scripts.updateexptables
3	================================
4
5	"""
6	import os	×
7	import sys	×
8	import numpy as np	×
9	import re	×
10	import time	×
11
UNCOV 12	from desispec.workflow.exptable import get_exposure_table_path, \	×
13	get_exposure_table_name, \
14	default_obstypes_for_exptable,\
15	night_to_month, \
16	get_exposure_table_column_defaults
UNCOV 17	from desispec.workflow.utils import define_variable_from_environment, listpath, \	×
18	pathjoin
UNCOV 19	from desispec.workflow.tableio import write_table, load_table	×
20	from desispec.scripts.exposuretable import create_exposure_tables	×
21
22
23
24	def update_exposure_tables(nights=None, night_range=None, path_to_data=None,	×
25	exp_table_path=None, obstypes=None, orig_filetype='csv',
26	out_filetype='csv', verbose=False, no_specprod=False,
27	dry_run=False):
28	"""
29	Generates updated exposure tables for the nights requested. Requires
30	exposure tables to exist on disk.
31
32	Args:
33	nights: str, int, or comma separated list. The night(s) to generate
34	procesing tables for.
35	night_range: str. comma separated pair of nights in form
36	YYYYMMDD,YYYYMMDD for first_night,last_night
37	specifying the beginning and end of a range of
38	nights to be generated. last_night should be
39	inclusive.
40	path_to_data: str. The path to the raw data and request*.json and
41	manifest* files.
42	exp_table_path: str. Full path to where to exposure tables should be
43	saved, WITHOUT the monthly directory included.
44	obstypes: str. The exposure OBSTYPE's that you want to include in the
45	exposure table. Can be a comma separated list.
46	orig_filetype: str. The file extension (without the '.') of the exposure
47	tables.
48	out_filetype: str. The file extension for the outputted exposure tables
49	(without the '.').
50	verbose: boolean. Whether to give verbose output information or not.
51	True prints more information.
52	no_specprod: boolean. Create exposure table in repository location
53	rather than the SPECPROD location
54
55	Returns:
56	Nothing
57	"""
58	## Make sure user specified what nights to run on
UNCOV 59	if nights is None and night_range is None:	×
UNCOV 60	raise ValueError("Must specify either nights or night_range."	×
61	+" To process all nights give nights=all")
62
63	## Define where to find the data
UNCOV 64	if path_to_data is None:	×
UNCOV 65	path_to_data = define_variable_from_environment(env_name='DESI_SPECTRO_DATA',	×
66	var_descr="The data path")
67
68	## Get all nights in 2020's with data
UNCOV 69	nights_with_data = list()	×
UNCOV 70	for n in listpath(path_to_data):	×
71	# - nights are 20YYMMDD
UNCOV 72	if re.match('^202\d{5}$', n):	×
UNCOV 73	nights_with_data.append(n)	×
74
75	## If unpecified or given "all", set nights to all nights with data
UNCOV 76	check_night = False	×
UNCOV 77	if nights is None or nights == 'all':	×
UNCOV 78	nights = [int(night) for night in nights_with_data]	×
79	## No need to check nights since derived from disk
80	else:
UNCOV 81	nights = [int(val.strip()) for val in nights.split(",")]	×
82	## If nights are specified, make sure we check that there is actually data
83	check_night = True	×
84	nights = np.array(nights)	×
85
86	## If user specified a night range, cut nights to that range of dates
UNCOV 87	if night_range is not None:	×
88	if ',' not in night_range:	×
89	raise ValueError("night_range must be a comma separated pair of "	×
90	+ "nights in form YYYYMMDD,YYYYMMDD")
UNCOV 91	nightpair = night_range.split(',')	×
UNCOV 92	if len(nightpair) != 2 or not nightpair[0].isnumeric() \	×
93	or not nightpair[1].isnumeric():
94	raise ValueError("night_range must be a comma separated pair of "	×
95	+ "nights in form YYYYMMDD,YYYYMMDD")
96	first_night, last_night = nightpair	×
97	nights = nights[np.where(int(first_night)<=nights.astype(int))[0]]	×
UNCOV 98	nights = nights[np.where(int(last_night)>=nights.astype(int))[0]]	×
99
100	## Parse the obstypes of the input
101	if obstypes is not None:	×
102	obstypes = [ val.strip('\t ') for val in obstypes.split(",") ]	×
103	else:
UNCOV 104	obstypes = default_obstypes_for_exptable()	×
105
106	## Define where to save the data
107	usespecprod = (not no_specprod)	×
108	if exp_table_path is None:	×
UNCOV 109	exp_table_path = get_exposure_table_path(night=None,	×
110	usespecprod=usespecprod)
111
112	## Tell user the final list of nights and starting looping over them
113	print("Nights: ", nights)	×
UNCOV 114	for night in nights:	×
115	if check_night and str(night) not in nights_with_data:	×
116	print(f'Night {night} not in data directory: {path_to_data}. Skipping')	×
UNCOV 117	continue	×
118
119	## Define where we should be looking for the exposure tables
120	month = night_to_month(night)	×
121	exptab_path = pathjoin(exp_table_path,month)	×
122	orig_name = get_exposure_table_name(night, extension=orig_filetype)	×
UNCOV 123	orig_pathname = pathjoin(exptab_path, orig_name)	×
124
125	## If the exposure table doesn't exist, skip, since we are updating
126	## not generating.
UNCOV 127	if not os.path.exists(orig_pathname):	×
128	print(f'Could not find exposure table for night={night} at:'	×
129	+ f' {orig_pathname}. Skipping this night.')
UNCOV 130	continue	×
131
132	## Create a temporary file pathname
133	temp_filetype = f"updatetemp.{out_filetype}"	×
UNCOV 134	temp_pathname = orig_pathname.replace(f".{orig_filetype}",	×
135	f".{temp_filetype}")
136
137	## Create a fresh version of the exposure table using the current
138	## code and save it to the temporary pathname
139	obstypes_str = ','.join(obstypes)	×
140	create_exposure_tables(nights=str(night), night_range=None,	×
141	path_to_data=path_to_data,
142	exp_table_path=exp_table_path,
143	obstypes=obstypes_str, exp_filetype=temp_filetype,
144	cameras=None, bad_cameras=None,
145	badamps=None, verbose=verbose,
146	no_specprod=no_specprod, overwrite_files=False)
147
148	## Load the old and new tables to compare
UNCOV 149	newtable = load_table(temp_pathname, tabletype='exptab',	×
150	use_specprod=usespecprod)
151	origtable = load_table(orig_pathname, tabletype='exptab',	×
152	use_specprod=usespecprod)
153
154	## Print some useful information and do some sanity checks that
155	## The new table has as much or more data than the old
UNCOV 156	print(f"\n\nNumber of rows in original: {len(origtable)}"	×
157	+ f", Number of rows in new: {len(newtable)}")
158
UNCOV 159	if 'OBSTYPE' in origtable.colnames \	×
160	and not set(origtable['OBSTYPE']).issubset(set(obstypes)):
UNCOV 161	subset_rows = np.array([obs in obstypes for obs in origtable['OBSTYPE']])	×
UNCOV 162	subset_orig = origtable[subset_rows]	×
163	else:
164	subset_orig = origtable	×
165
UNCOV 166	assert len(newtable) >= len(subset_orig), \	×
167	"Tables for given obstypes must greater or equal length"
UNCOV 168	assert np.all([exp in newtable['EXPID'] for exp in subset_orig['EXPID']]), \	×
169	"All old exposures of given obstype must be present in the new table"
170
171	## Go through exposure by exposure and check each columns value
172	## in the new vs the original
173	mutual_colnames = [col for col in newtable.colnames if col in origtable.colnames]	×
UNCOV 174	coldefs = get_exposure_table_column_defaults(asdict=True)	×
175	for newloc,expid in enumerate(newtable['EXPID']):	×
176	## Match to the row in the original table
UNCOV 177	origloc = np.where(origtable['EXPID']==expid)[0]	×
UNCOV 178	if len(origloc) > 1:	×
UNCOV 179	print(f"ERROR on night {night}: found more than one exposure"	×
180	+ f"matching expid {expid}")
UNCOV 181	continue	×
UNCOV 182	elif len(origloc) == 1:	×
183	origloc = origloc[0]	×
184	else:
185	print(f"New exposure identified: {newtable[newloc]}")	×
186	continue	×
187	## For colnames that the two columns share, compare values.
188	for col in mutual_colnames:	×
UNCOV 189	origval = origtable[col][origloc]	×
190	newval = newtable[col][newloc]	×
191	## Clean up three special cases of bad flags/comments in early data
192	if col == 'EXPFLAG' and 'EFFTIME_ETC' in newtable.colnames and \	×
193	newtable['EFFTIME_ETC'][newloc] > 0. and 'aborted' in origval:
UNCOV 194	origorigval = origval.copy()	×
UNCOV 195	origval = origval[np.where(origval != 'aborted')]	×
UNCOV 196	print("Identified outdated aborted exposure flag. "	×
197	+ "Removing that. Original set: "
198	+ f"{origorigval}, Updated origset: {origval}")
199	if col == 'COMMENTS' and 'EFFTIME_ETC' in newtable.colnames \	×
200	and newtable['EFFTIME_ETC'][newloc] > 0. and \
201	'EXPFLAG' in origtable.colnames \
202	and 'aborted' in origtable['EXPFLAG'][origloc]:
203	origorigval = origval.copy()	×
UNCOV 204	valcheck = np.array([('For EXPTIME:' not in val) for val in origval])	×
205	origval = origval[valcheck]	×
206	print(f"Identified outdated aborted exptime COMMENT."	×
207	+ "Removing that. Original set: "
208	+ f"{origorigval}, Updated origset: {origval}")
209	if col == 'HEADERERR' and 'PURPOSE:->' in origval:	×
210	origorigval = origval.copy()	×
UNCOV 211	valcheck = (np.array(origval) != 'PURPOSE:->')	×
212	origval = origval[valcheck]	×
213	print(f"Identified outdated PURPOSE null->null HEADERERR."	×
214	+ " Removing that. Original set: "
215	+ f"{origorigval}, Updated origset: {origval}")
216	## If columns differ and original isn't a default value,
217	## then take the original user-defined value
218	if np.isscalar(origtable[col][origloc]):	×
219	if origval != coldefs[col] and newval != origval:	×
220	print(f"Difference detected for Night {night}, exp {expid}, "	×
221	+ f"col {col}: orig={origval}, new={newval}. "
222	+ "Taking the original value. ")
223	newtable[col][newloc] = origval	×
224	else:
UNCOV 225	if not np.array_equal(origval, coldefs[col]) and \	×
226	not np.array_equal(newval, origval):
227	print(f"Difference detected for Night {night}, exp {expid}, "	×
228	+ f"col {col}: orig={origval}, new={newval}. "
229	+ "Taking union of the two arrays.")
230	combined_val = newval[newval != '']	×
UNCOV 231	for val in origval:	×
UNCOV 232	if val != '' and val not in newval:	×
233	combined_val = np.append(combined_val,[val])	×
234	newtable[col][newloc] = combined_val	×
235
236	## If just testing, print the table and a cell-by-cell equality test
237	## for the scalar columns
238	## If not testing, move the original table to an archived filename
239	## and save the updated table to the official exptable pathname
UNCOV 240	if dry_run:	×
UNCOV 241	print("\n\nOutput file would have been:")	×
242	newtable.pprint_all()	×
243
244	names = [col for col in newtable.colnames if col not in ['HEADERERR','EXPFLAG','COMMENTS']]	×
UNCOV 245	t1 = newtable[names]	×
UNCOV 246	t2 = load_table(temp_pathname, tabletype='exptab',	×
247	use_specprod=usespecprod)[names]
UNCOV 248	t1.values_equal(t2).pprint_all()	×
249	else:
UNCOV 250	ftime = time.strftime("%Y%m%d_%Hh%Mm")	×
251	replaced_pathname = orig_pathname.replace(f".{orig_filetype}",	×
252	f".replaced-{ftime}.{orig_filetype}")
UNCOV 253	print(f"Moving original file from {orig_pathname} to {replaced_pathname}")	×
254	os.rename(orig_pathname,replaced_pathname)	×
255	time.sleep(0.1)	×
256	out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}")	×
257	write_table(newtable, out_pathname)	×
258	print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}")	×
259
260	## Cleanup the temporary table created with the fresh version of the
261	## create_exposure_table script
UNCOV 262	os.remove(temp_pathname)	×
UNCOV 263	print(f"Removed the temporary file {temp_pathname}")	×
264	print("\n\n")	×
265
266	## Flush the outputs
UNCOV 267	sys.stdout.flush()	×
268	sys.stderr.flush()	×
269	print("Exposure table regenerations complete")	×

desihub / desispec / 10947976957

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous