13461218470

Committed 21 Feb 2025 04:40PM UTC coverage: 55.602% (-0.05%) from 55.653%

Build # 13461218470

Build Type

push

github

Committed by

web-flow

Commit Message

Work on support for numpy-2 and python-3.12 (#198)

* Work on support for numpy-2 and python-3.12

- Remove numpy-1 checks / constraints.

- Update wheel workflows and scripts.  Add dependabot workflow and
  combine wheel test / deploy workflows into one.  Remove python-3.8
  support.

- Update README instructions.

- Add pyproject.toml and remove unsupported distutils from setup.py

- Fix healpix_bare.c compiler warnings

- Add missing guard around omp.h inclusion

- Remove deprecated datetime functions

- Bump vendored boost to version 1.87 and compile our own libflac-1.5.0

- Build our own OpenBLAS for macos wheels

* Fix README typo

* Add note on the provenance and local modifications to the healpix_bare.c source

Run Details

1335 of 2401 relevant lines covered (55.6%)

0.56 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

10.22

/python/hk/cli.py

import so3g
from spt3g import core
import numpy as np
import os
import sys
import csv
import argparse


_UNITS = {
    'bytes': 1,
    'kb': 1024,
    'mb': 1024*1024,
    'gb': 1024*1024*1024,
}

def get_parser():
    parser = argparse.ArgumentParser(
        epilog="Run '%(prog)s COMMAND --help' to see additional "
        "details and options.")
    cmdsubp = parser.add_subparsers(
        dest='mode')

    # Shared arguments for subprocessors ...
    data_args = argparse.ArgumentParser(add_help=False)
    data_args.add_argument(
        'files', nargs='+', default=None,
        help="One or more G3 files to scan.")
    data_args.add_argument(
        '--recursive', '-r', action='store_true',
        help="All arguments are traversed recursively; only files "
        ".g3 extension files are scanned.")
    data_args.add_argument(
        '--strip-tokens', default='observatory.feeds',
        help="Tokens to hide in provider and field names. "
        "Pass this is a single .-delimited string.")
    data_args.add_argument(
        '--block-size', '-B', default='b',
        help="Summarize storage in units of bytes,kB,MB,GB (pass b,k,M,G).")
    data_args.add_argument(
        '--sort-size', '-s', action='store_true',
        help="Sort results, if applicable, by size (descending).")

    output_args = argparse.ArgumentParser(add_help=False)
    output_args.add_argument(
        '--csv', help=
        "Store data as CSV to specified filename.")

    # Main "mode" subprocessors.

    # "list-files"
    p = cmdsubp.add_parser(
        'list-files',
        parents=[data_args, output_args],
        help="Report per-file stats.",
        usage="""

    %(prog)s [options] FILE [...]

        This module reads each file and reports basic stats such as size and
        whether the stream is valid.
        """)

    # "list-provs"
    p = cmdsubp.add_parser(
        'list-provs',
        parents=[data_args, output_args],
        help="List all data providers (feeds).",
        usage="""

    %(prog)s [options] FILE [...]

        This module reads all specified files and reports a list of
        all data providers (a.k.a. feeds) encountered in the data,
        along with total data volume and average frame size, per
        provider.
        """)

    # "list-fields"
    p = cmdsubp.add_parser(
        'list-fields',
        parents=[data_args, output_args],
        help="List all data field names.",
        usage="""

    %(prog)s [options] FILE [...]

        This module reads all specified files and reports a list of
        all data fields with their total sample count.
        """)

    # Done.
    return parser


def get_file_list(args, suffix='.g3'):
    if args.recursive:
        all_files = []
        for root in args.files:
            these_files = []
            for base, dirs, files in os.walk(root):
                for f in files:
                    if f.endswith(suffix):
                        these_files.append(os.path.join(base, f))
            all_files.extend(sorted(these_files))
        return all_files
    else:
        return args.files


def format_table(rows, header=None, fmts=None, align=None):
    """Return a string with data from rows organized into a text table.

    If you pass header, it must be a list with the same number of
    elements as the first row.

    If you pass fmts or align, it must be a dict where the key is
    either the column index or the corresponding header name.

    For fmts, each dict value must be an anonymous python format
    string, e.g. "{:5.1f}".  For align, each dict value must be either
    'right' or 'left'.

    Default alignment is based on data types in the first row of data
    -- if float or int, it will be right aligned.  Otherwise, left.

    """
    if header is None:
        if len(rows):
            ncol = len(rows[0])
    else:
        ncol = len(header)

    def dict_to_per_col(data, default=None, col_defaults=None):
        # Change dict into a list, per-column, taking default from
        # col_defaults dict if possible and then from default.
        if data is None:
            data = {}
        if col_defaults is None:
            col_defaults = [default] * ncol
        elif isinstance(col_defaults, dict):
            col_defaults = dict_to_per_col(col_defaults, default)
        output = []
        for i in range(ncol):
            if i in data:
                output.append(data[i])
            elif header is not None and header[i] in data:
                output.append(data[header[i]])
            else:
                output.append(col_defaults[i])
        return output

    fmts = dict_to_per_col(fmts, '{}')
    fmt_align = {}
    for i, fmt in enumerate(fmts):
        if '>' in fmt:
            fmt_align[i] = 'right'
        elif '<' in fmt:
            fmt_align[i] = 'left'
        elif len(rows) and isinstance(rows[0][i], (float, int)):
            fmt_align[i] = 'right'
        else:
            fmt_align[i] = 'left'
    align = dict_to_per_col(align, 'left', fmt_align)

    # First round formatting ...
    rows = [[fmt.format(c) for fmt, c in zip(fmts, row)]
            for row in rows]

    # Now set column widths ...
    col_lens = [max([0 if header is None else len(header[i])]
                    + [len(r[i]) for r in rows])
                for i in range(ncol)]

    # Second round formatting ...
    align_fmts = [('{:<%is}' if al == 'left' else '{:>%is}') % cl
             for al, cl in zip(align, col_lens)]
    rows = [[a.format(c) for a, c in zip(align_fmts, r)]
            for r in [header] + rows]

    # Insert hline?
    if header is not None:
        line = ['-' * len(c) for c in rows[0]]
        rows.insert(1, line)

    return '\n'.join([' '.join(r) for r in rows])


def convert_units(rows, cols, args):
    units = args.block_size.lower()
    if units == 'b':
        units = 'bytes'
    if units not in _UNITS:
        units += 'b'
    if units not in _UNITS:
        raise ValueError(f'Cannot interpret {args.block_size} '
                         'as a block size (b,k,M,G).')
    factor = _UNITS[units]
    if factor == 1:
        fmt = '%d'
    else:
        fmt = '%.1f'
    rows = [
        tuple([(fmt % (c/factor)) if i in cols else c
               for i, c in enumerate(r)])
        for r in rows]
    return rows, units


def write_csv(filename, rows, header=None):
    with open(filename, 'w', newline='') as csvfile:
        cw = csv.writer(csvfile)#, dialect='memberdb')
        if header is not None:
            cw.writerow(header)
        for r in rows:
            cw.writerow(list(r))

def produce_output(rows, header, fmts=None, align=None, csv=False):
    if csv:
        write_csv(csv, rows, header=header)
    else:
        print(format_table(rows, fmts=fmts, header=header, align=align))


class TokenCleanser:
    def __init__(self, boring_tokens):
        self.boring = [t for t in boring_tokens.split('.')
                       if len(t)]
    def __call__(self, name):
        if len(self.boring):
            name = '.'.join([t for t in name.split('.')
                             if t not in self.boring])
        return name


def main(args=None):
    if args is None:
        args = sys.argv[1:]

    parser = get_parser()
    args = parser.parse_args(args=args)

    if args.mode == 'list-files':
        rows = []
        file_list = get_file_list
        for filename in get_file_list(args):
            file_size = os.path.getsize(filename)
            clean_exit = True
            r = core.G3Reader(filename)
            while True:
                try:
                    f = r.Process(None)
                except:
                    clean_exit = False
                    break
                end = r.tell()
                if f is None or len(f) == 0:
                    break
            rows.append((filename, file_size, end,
                         {True: 'no', False: 'YES'}[clean_exit]))

        if args.sort_size:
            rows = sorted(rows, key=lambda row: -row[1])

        rows, units = convert_units(rows, [1, 2], args)

        header = ['filename', f'size_{units}',
                  f'usable_{units}', 'error']
        produce_output(rows, header, align={1: 'right', 2: 'right'},
                       csv=args.csv)

    elif args.mode == 'list-provs':
        counts = {}
        renamer = TokenCleanser(args.strip_tokens)

        for filename in get_file_list(args):
            r = core.G3Reader(filename)
            while True:
                start = r.tell()
                f = r.Process(None)
                end = r.tell()
                if f is None or len(f) == 0:
                    break
                f = f[0]
                if 'address' in f:
                    key = renamer(f['address'])
                    if key not in counts:
                        counts[key] = []
                    counts[key].append(end - start)

        rows = [(k, np.sum(v).tolist(), np.mean(v))
                for k, v in sorted(counts.items())]

        if args.sort_size:
            rows = sorted(rows, key=lambda row: -row[1])

        rows, units = convert_units(rows, [1, 2], args)
        header = ['provider_name', f'total_{units}', f'frame_{units}']
        produce_output(rows, header, align={1: 'right', 2: 'right'},
                       csv=args.csv)

    elif args.mode == 'list-fields':
        # Count samples.
        counts = {}
        renamer = TokenCleanser(args.strip_tokens)

        for filename in get_file_list(args):
            r = core.G3Reader(filename)
            while True:
                f = r.Process(None)
                if f is None or len(f) == 0:
                    break
                f = f[0]
                if 'address' in f:
                    addr = renamer(f['address'])
                    for block in f['blocks']:
                        keys = block.keys()
                        n = len(block.times)
                        for k in keys:
                            field = addr + '.' + k
                            if not field in counts:
                                counts[field] = 0
                            counts[field] += n
        header = ['field_name', 'samples']
        rows = sorted(counts.items())

        if args.sort_size:
            rows = sorted(rows, key=lambda row: -row[1])

        produce_output(rows, header, csv=args.csv)

    elif args.mode is None:
        parser.error('Provide a valid mode. (See --help for more.)')

    else:
        print(f'Unimplemented mode "{args.mode}"!')


if __name__ == '__main__':
    main()

1	import so3g	1✔
2	from spt3g import core	1✔
3	import numpy as np	1✔
4	import os	1✔
5	import sys	1✔
6	import csv	1✔
7	import argparse	1✔
8
9
10	_UNITS = {	1✔
11	'bytes': 1,
12	'kb': 1024,
13	'mb': 1024*1024,
14	'gb': 102410241024,
15	}
16
17	def get_parser():	1✔
18	parser = argparse.ArgumentParser(	×
19	epilog="Run '%(prog)s COMMAND --help' to see additional "
20	"details and options.")
21	cmdsubp = parser.add_subparsers(	×
22	dest='mode')
23
24	# Shared arguments for subprocessors ...
25	data_args = argparse.ArgumentParser(add_help=False)	×
26	data_args.add_argument(	×
27	'files', nargs='+', default=None,
28	help="One or more G3 files to scan.")
29	data_args.add_argument(	×
30	'--recursive', '-r', action='store_true',
31	help="All arguments are traversed recursively; only files "
32	".g3 extension files are scanned.")
33	data_args.add_argument(	×
34	'--strip-tokens', default='observatory.feeds',
35	help="Tokens to hide in provider and field names. "
36	"Pass this is a single .-delimited string.")
37	data_args.add_argument(	×
38	'--block-size', '-B', default='b',
39	help="Summarize storage in units of bytes,kB,MB,GB (pass b,k,M,G).")
40	data_args.add_argument(	×
41	'--sort-size', '-s', action='store_true',
42	help="Sort results, if applicable, by size (descending).")
43
44	output_args = argparse.ArgumentParser(add_help=False)	×
45	output_args.add_argument(	×
46	'--csv', help=
47	"Store data as CSV to specified filename.")
48
49	# Main "mode" subprocessors.
50
51	# "list-files"
52	p = cmdsubp.add_parser(	×
53	'list-files',
54	parents=[data_args, output_args],
55	help="Report per-file stats.",
56	usage="""
57
58	%(prog)s [options] FILE [...]
59
60	This module reads each file and reports basic stats such as size and
61	whether the stream is valid.
62	""")
63
64	# "list-provs"
65	p = cmdsubp.add_parser(	×
66	'list-provs',
67	parents=[data_args, output_args],
68	help="List all data providers (feeds).",
69	usage="""
70
71	%(prog)s [options] FILE [...]
72
73	This module reads all specified files and reports a list of
74	all data providers (a.k.a. feeds) encountered in the data,
75	along with total data volume and average frame size, per
76	provider.
77	""")
78
79	# "list-fields"
80	p = cmdsubp.add_parser(	×
81	'list-fields',
82	parents=[data_args, output_args],
83	help="List all data field names.",
84	usage="""
85
86	%(prog)s [options] FILE [...]
87
88	This module reads all specified files and reports a list of
89	all data fields with their total sample count.
90	""")
91
92	# Done.
93	return parser	×
94
95
96	def get_file_list(args, suffix='.g3'):	1✔
97	if args.recursive:	×
98	all_files = []	×
99	for root in args.files:	×
100	these_files = []	×
101	for base, dirs, files in os.walk(root):	×
102	for f in files:	×
103	if f.endswith(suffix):	×
104	these_files.append(os.path.join(base, f))	×
105	all_files.extend(sorted(these_files))	×
106	return all_files	×
107	else:
108	return args.files	×
109
110
111	def format_table(rows, header=None, fmts=None, align=None):	1✔
112	"""Return a string with data from rows organized into a text table.
113
114	If you pass header, it must be a list with the same number of
115	elements as the first row.
116
117	If you pass fmts or align, it must be a dict where the key is
118	either the column index or the corresponding header name.
119
120	For fmts, each dict value must be an anonymous python format
121	string, e.g. "{:5.1f}". For align, each dict value must be either
122	'right' or 'left'.
123
124	Default alignment is based on data types in the first row of data
125	-- if float or int, it will be right aligned. Otherwise, left.
126
127	"""
128	if header is None:	×
129	if len(rows):	×
130	ncol = len(rows[0])	×
131	else:
132	ncol = len(header)	×
133
134	def dict_to_per_col(data, default=None, col_defaults=None):	×
135	# Change dict into a list, per-column, taking default from
136	# col_defaults dict if possible and then from default.
137	if data is None:	×
138	data = {}	×
139	if col_defaults is None:	×
140	col_defaults = [default] * ncol	×
141	elif isinstance(col_defaults, dict):	×
142	col_defaults = dict_to_per_col(col_defaults, default)	×
143	output = []	×
144	for i in range(ncol):	×
145	if i in data:	×
146	output.append(data[i])	×
147	elif header is not None and header[i] in data:	×
148	output.append(data[header[i]])	×
149	else:
150	output.append(col_defaults[i])	×
151	return output	×
152
153	fmts = dict_to_per_col(fmts, '{}')	×
154	fmt_align = {}	×
155	for i, fmt in enumerate(fmts):	×
156	if '>' in fmt:	×
157	fmt_align[i] = 'right'	×
158	elif '<' in fmt:	×
159	fmt_align[i] = 'left'	×
160	elif len(rows) and isinstance(rows[0][i], (float, int)):	×
161	fmt_align[i] = 'right'	×
162	else:
163	fmt_align[i] = 'left'	×
164	align = dict_to_per_col(align, 'left', fmt_align)	×
165
166	# First round formatting ...
167	rows = [[fmt.format(c) for fmt, c in zip(fmts, row)]	×
168	for row in rows]
169
170	# Now set column widths ...
171	col_lens = [max([0 if header is None else len(header[i])]	×
172	+ [len(r[i]) for r in rows])
173	for i in range(ncol)]
174
175	# Second round formatting ...
176	align_fmts = [('{:<%is}' if al == 'left' else '{:>%is}') % cl	×
177	for al, cl in zip(align, col_lens)]
178	rows = [[a.format(c) for a, c in zip(align_fmts, r)]	×
179	for r in [header] + rows]
180
181	# Insert hline?
182	if header is not None:	×
183	line = ['-' * len(c) for c in rows[0]]	×
184	rows.insert(1, line)	×
185
186	return '\n'.join([' '.join(r) for r in rows])	×
187
188
189	def convert_units(rows, cols, args):	1✔
190	units = args.block_size.lower()	×
191	if units == 'b':	×
192	units = 'bytes'	×
193	if units not in _UNITS:	×
194	units += 'b'	×
195	if units not in _UNITS:	×
196	raise ValueError(f'Cannot interpret {args.block_size} '	×
197	'as a block size (b,k,M,G).')
198	factor = _UNITS[units]	×
199	if factor == 1:	×
200	fmt = '%d'	×
201	else:
202	fmt = '%.1f'	×
203	rows = [	×
204	tuple([(fmt % (c/factor)) if i in cols else c
205	for i, c in enumerate(r)])
206	for r in rows]
207	return rows, units	×
208
209
210	def write_csv(filename, rows, header=None):	1✔
211	with open(filename, 'w', newline='') as csvfile:	×
212	cw = csv.writer(csvfile)#, dialect='memberdb')	×
213	if header is not None:	×
214	cw.writerow(header)	×
215	for r in rows:	×
216	cw.writerow(list(r))	×
217
218	def produce_output(rows, header, fmts=None, align=None, csv=False):	1✔
219	if csv:	×
220	write_csv(csv, rows, header=header)	×
221	else:
222	print(format_table(rows, fmts=fmts, header=header, align=align))	×
223
224
225	class TokenCleanser:	1✔
226	def __init__(self, boring_tokens):	1✔
227	self.boring = [t for t in boring_tokens.split('.')	×
228	if len(t)]
229	def __call__(self, name):	1✔
230	if len(self.boring):	×
231	name = '.'.join([t for t in name.split('.')	×
232	if t not in self.boring])
233	return name	×
234
235
236	def main(args=None):	1✔
237	if args is None:	×
238	args = sys.argv[1:]	×
239
240	parser = get_parser()	×
241	args = parser.parse_args(args=args)	×
242
243	if args.mode == 'list-files':	×
244	rows = []	×
245	file_list = get_file_list	×
246	for filename in get_file_list(args):	×
247	file_size = os.path.getsize(filename)	×
248	clean_exit = True	×
249	r = core.G3Reader(filename)	×
250	while True:	×
251	try:	×
252	f = r.Process(None)	×
253	except:	×
254	clean_exit = False	×
255	break	×
256	end = r.tell()	×
257	if f is None or len(f) == 0:	×
258	break	×
259	rows.append((filename, file_size, end,	×
260	{True: 'no', False: 'YES'}[clean_exit]))
261
262	if args.sort_size:	×
263	rows = sorted(rows, key=lambda row: -row[1])	×
264
265	rows, units = convert_units(rows, [1, 2], args)	×
266
267	header = ['filename', f'size_{units}',	×
268	f'usable_{units}', 'error']
269	produce_output(rows, header, align={1: 'right', 2: 'right'},	×
270	csv=args.csv)
271
272	elif args.mode == 'list-provs':	×
273	counts = {}	×
274	renamer = TokenCleanser(args.strip_tokens)	×
275
276	for filename in get_file_list(args):	×
277	r = core.G3Reader(filename)	×
278	while True:	×
279	start = r.tell()	×
280	f = r.Process(None)	×
281	end = r.tell()	×
282	if f is None or len(f) == 0:	×
283	break	×
284	f = f[0]	×
285	if 'address' in f:	×
286	key = renamer(f['address'])	×
287	if key not in counts:	×
288	counts[key] = []	×
289	counts[key].append(end - start)	×
290
291	rows = [(k, np.sum(v).tolist(), np.mean(v))	×
292	for k, v in sorted(counts.items())]
293
294	if args.sort_size:	×
295	rows = sorted(rows, key=lambda row: -row[1])	×
296
297	rows, units = convert_units(rows, [1, 2], args)	×
298	header = ['provider_name', f'total_{units}', f'frame_{units}']	×
299	produce_output(rows, header, align={1: 'right', 2: 'right'},	×
300	csv=args.csv)
301
302	elif args.mode == 'list-fields':	×
303	# Count samples.
304	counts = {}	×
305	renamer = TokenCleanser(args.strip_tokens)	×
306
307	for filename in get_file_list(args):	×
308	r = core.G3Reader(filename)	×
309	while True:	×
310	f = r.Process(None)	×
311	if f is None or len(f) == 0:	×
312	break	×
313	f = f[0]	×
314	if 'address' in f:	×
315	addr = renamer(f['address'])	×
316	for block in f['blocks']:	×
317	keys = block.keys()	×
318	n = len(block.times)	×
319	for k in keys:	×
320	field = addr + '.' + k	×
321	if not field in counts:	×
322	counts[field] = 0	×
323	counts[field] += n	×
324	header = ['field_name', 'samples']	×
325	rows = sorted(counts.items())	×
326
327	if args.sort_size:	×
328	rows = sorted(rows, key=lambda row: -row[1])	×
329
330	produce_output(rows, header, csv=args.csv)	×
331
332	elif args.mode is None:	×
333	parser.error('Provide a valid mode. (See --help for more.)')	×
334
335	else:
336	print(f'Unimplemented mode "{args.mode}"!')	×
337
338
339	if __name__ == '__main__':	1✔
340	main()	×

simonsobs / so3g / 13461218470

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous