• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

desihub / desispec / 6513886344

13 Oct 2023 11:07PM UTC coverage: 24.144%. Remained the same
6513886344

Pull #2124

github-actions

akremin
Sort files before printing in desi_exposure_info
Pull Request #2124: Add script that summarizes info about an exposure in a specprod

10781 of 44653 relevant lines covered (24.14%)

0.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/py/desispec/database/duplicates.py
1
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2
# -*- coding: utf-8 -*-
3
"""
×
4
desispec.database.duplicates
5
============================
6

7
Find rows in a file that contain the same value in a certain column.
8
The file in question could potentially be very large.
9
"""
10
import os
×
11
import sys
×
12
import json
×
13
import numpy as np
×
14
from argparse import ArgumentParser
×
15
from astropy.table import Table, Column, MaskedColumn
×
16

17

18
def find_duplicate_rows(data, column):
×
19
    """Find rows in `data` for which `column` has the same value.
20

21
    Parameters
22
    ----------
23
    data : :class:`astropy.table.Table`
24
        Data set to analyze.
25
    column : :class:`str`
26
        Search for duplicates in this column.
27

28
    Returns
29
    -------
30
    :class:`dict`
31
        A mapping of unique values of `column` to row numbers in `data`.
32
    """
33
    values = data[column].data
×
34
    unique_values, unique_indexes, column_indexes, column_counts = np.unique(values, return_index=True, return_inverse=True, return_counts=True)
×
35
    duplicate_values = np.nonzero(column_counts > 1)[0]
×
36
    map_duplicates_to_rows = dict()
×
37
    for i in duplicate_values:
×
38
        try:
×
39
            v = int(unique_values[i])
×
40
        except ValueError:
×
41
            v = str(unique_values[i])
×
42
        rows = np.nonzero(column_indexes == i)[0]
×
43
        assert rows.shape[0] > 1
×
44
        map_duplicates_to_rows[v] = rows.tolist()
×
45
    return map_duplicates_to_rows
×
46

47

48
def get_options(*args):
×
49
    """Parse command-line options.
50

51
    Parameters
52
    ----------
53
    args : iterable
54
        If arguments are passed, use them instead of ``sys.argv``.
55

56
    Returns
57
    -------
58
    :class:`argparse.Namespace`
59
        The parsed options.
60
    """
61
    prsr = ArgumentParser(description=("Find rows in a file that contain the same value in a certain column."),
×
62
                          prog=os.path.basename(sys.argv[0]))
63
    prsr.add_argument('-H', '--hdu', action='store', type=int, default=1, dest='hdu', metavar='HDU', help="Read tabular data from HDU (default %(default)s).")
×
64
    # prsr.add_argument('-f', '--filename', action='store', dest='dbfile',
65
    #                   default='redshift.db', metavar='FILE',
66
    #                   help="Store data in FILE (default %(default)s).")
67
    # prsr.add_argument('-v', '--verbose', action='store_true', dest='verbose',
68
    #                   help='Print extra information.')
69
    prsr.add_argument('column', metavar='COL', help='Search for duplicate values in column COL.')
×
70
    prsr.add_argument('filename', metavar='FILE', help='Data are in FILE.')
×
71
    options = prsr.parse_args()
×
72
    return options
×
73

74

75
def main():
×
76
    """Entry point for command-line script.
77

78
    Returns
79
    -------
80
    :class:`int`
81
        An integer suitable for passing to :func:`sys.exit`.
82
    """
83
    options = get_options()
×
84
    data = Table.read(options.filename, hdu=options.hdu)
×
85
    map_duplicates_to_rows = find_duplicate_rows(data, options.column)
×
86
    output = os.path.join(os.environ['SCRATCH'], os.path.splitext(os.path.basename(options.filename))[0] + '.json')
×
87
    with open(output, 'w') as fp:
×
88
        json.dump(map_duplicates_to_rows, fp, indent=None, separators=(',', ':'))
×
89
    return 0
×
90

91

92
if __name__ == '__main__':
×
93
    sys.exit(main())
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc