• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PyMassSpec / mh_utils / 22197751771

19 Feb 2026 07:56PM UTC coverage: 88.836%. Remained the same
22197751771

Pull #61

github

web-flow
Merge d1d9a10a3 into 0c01ca17b
Pull Request #61: [repo-helper] Configuration Update

931 of 1048 relevant lines covered (88.84%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.55
/mh_utils/csv_parser/utils.py
1
#!/usr/bin/env python3
2
#
3
#  utils.py
4
"""
5
CSV utility functions.
6

7
.. versionadded:: 0.2.0
8
"""
9
#
10
#  Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
11
#
12
#  Permission is hereby granted, free of charge, to any person obtaining a copy
13
#  of this software and associated documentation files (the "Software"), to deal
14
#  in the Software without restriction, including without limitation the rights
15
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
#  copies of the Software, and to permit persons to whom the Software is
17
#  furnished to do so, subject to the following conditions:
18
#
19
#  The above copyright notice and this permission notice shall be included in all
20
#  copies or substantial portions of the Software.
21
#
22
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
28
#  OR OTHER DEALINGS IN THE SOFTWARE.
29
#
30

31
# stdlib
32
from typing import Optional
1✔
33

34
# 3rd party
35
import pandas  # type: ignore
1✔
36
import sdjson
1✔
37
from domdf_python_tools.paths import PathPlus
1✔
38
from domdf_python_tools.typing import PathLike
1✔
39

40
# this package
41
from mh_utils.csv_parser import Sample, SampleList
1✔
42

43
__all__ = ["drop_columns", "reorder_columns", "concatenate_json"]
1✔
44

45
pandas.DataFrame.__module__ = "pandas"
1✔
46

47

48
def drop_columns(df: pandas.DataFrame, *, axis: int = 1, inplace: bool = True, **kwargs) -> pandas.DataFrame:
1✔
49
        """
50
        Drop columns from the MassHunter CSV file.
51

52
        :param df: The :class:`pandas.DataFrame` to drop columns in.
53
        :param axis: Which axis to drop columns on.
54
        :param inplace: Whether to modify the :class:`pandas.DataFrame` in place.
55
        :param kwargs: Additional keyword arguments passed to :meth:`pandas.DataFrame.drop`.
56
        """
57

58
        # Columns where I have no idea what they represent
59
        unknown_cols = [
1✔
60
                        "HMP",
61
                        "KEGG",
62
                        "LMP",
63
                        "METLIN",
64
                        "Notes",
65
                        "Swiss-Prot",
66
                        "CE",
67
                        "Tgt Hit Pos",
68
                        "Score Diff",
69
                        "FV",
70
                        "Saturated",
71
                        "Vol",
72
                        "Cpds/Group",
73
                        "Group",
74
                        "Std Dev",
75
                        "Score (MFE)",
76
                        "Vol %",
77
                        "EIC/TIC% Area",
78
                        "EIC/TIC% Height",
79
                        "TIC% Area",
80
                        "TIC% Height",
81
                        "TWC% Area",
82
                        "TWC% Height",
83
                        "Purity Comments",
84
                        "Purity Result",
85
                        "Purity Value",
86
                        "Score (Frag Coelution)",
87
                        "FIs Conf.",
88
                        "FIs Conf. %",
89
                        "Score (Frag Ratio)",
90
                        "FragMassDiff(ppm)",
91
                        "FIs Eval.",
92
                        "Source",
93
                        "Flags",
94
                        ]
95

96
        db_cols = [
1✔
97
                        "Mass (DB)",
98
                        "Diff (DB, mDa)",
99
                        "Diff (DB, ppm)",
100
                        "RT (Lib/DB)",
101
                        "RT Diff (Lib/DB)",
102
                        "Score (DB)",
103
                        "Shared (DB)",
104
                        "Unique (DB)",
105
                        ]
106

107
        mfg_cols = [
1✔
108
                        "Diff (MFG, mDa)",
109
                        "Mass (MFG)",
110
                        "Diff (MFG, ppm)",
111
                        "Score (MFG)",
112
                        ]
113

114
        lib_cols = ["Lib/DB", "Score (Lib)"]
1✔
115

116
        new_df = df.drop(
1✔
117
                        [
118
                                        *unknown_cols,
119
                                        *db_cols,
120
                                        *mfg_cols,
121
                                        *lib_cols,
122
                                        ],
123
                        axis=axis,
124
                        inplace=inplace,
125
                        **kwargs,
126
                        )
127

128
        if inplace:
1✔
129
                return df
1✔
130
        else:
131
                return new_df
×
132

133

134
def reorder_columns(df: pandas.DataFrame) -> pandas.DataFrame:
1✔
135
        """
136
        Reorder columns from the MassHunter CSV file.
137

138
        :param df: The :class:`pandas.DataFrame` to reorder columns in.
139
        """
140

141
        # Make sure to remove columns that got deleted above
142
        output_col_order = [
1✔
143
                        "Sample Name",
144
                        "Cpd",
145
                        "CAS",
146
                        "Name",
147
                        "Hits",
148
                        "Abund",
149
                        "Mining Algorithm",
150
                        "Area",
151
                        "Base Peak",
152
                        "Mass",
153
                        "Avg Mass",
154
                        "Score",
155
                        "m/z",
156
                        "m/z (prod.)",
157
                        "RT",
158
                        "Start",
159
                        "End",
160
                        "Width",
161
                        "Diff (Tgt, mDa)",
162
                        "Diff (Tgt, ppm)",
163
                        "Score (Tgt)",
164
                        "Flags (Tgt)",
165
                        "Flag Severity (Tgt)",
166
                        "Flag Severity Code (Tgt)",
167
                        "Mass (Tgt)",
168
                        "RT (Tgt)",
169
                        "RT Diff (Tgt)",
170
                        "Sample Type",
171
                        "Formula",
172
                        "Height",
173
                        "Ions",
174
                        "Polarity",
175
                        "Z Count",
176
                        "Max Z",
177
                        "Min Z",
178
                        "Label",
179
                        "File",
180
                        "Instrument Name",
181
                        "Position",
182
                        "User Name",
183
                        "Acq Method",
184
                        "DA Method",
185
                        "IRM Calibration status",
186
                        ]
187

188
        # Omitted columns
189
        # "ID Source", "ID Techniques Applied"
190
        # "MS/MS Count",                because blank
191

192
        return df[output_col_order]
1✔
193

194

195
def concatenate_json(*files: PathLike, outfile: Optional[PathLike] = None) -> SampleList:
1✔
196
        r"""
197
        Concatenate multiple JSON files together and return a list of :class:`Sample`
198
        objects in the concatenated json output.
199

200
        :param \*files: The files to concatenate.
201
        :param outfile: The file to save the output as. If :py:obj:`None` no file will be saved.
202
        """  # noqa: D400
203

204
        all_samples = SampleList()
1✔
205

206
        for json_file in files:
1✔
207
                samples = PathPlus(json_file).load_json(
1✔
208
                                json_library=sdjson,  # type: ignore
209
                                )
210
                # TODO: https://github.com/python/mypy/issues/5018
211
                # If it ever gets fixed
212

213
                for sample in samples:
1✔
214
                        all_samples.append(Sample(**sample))
1✔
215

216
        if outfile is not None:
1✔
217
                PathPlus(outfile).dump_json(
1✔
218
                                all_samples,
219
                                json_library=sdjson,  # type: ignore
220
                                indent=2,
221
                                )
222
                # TODO: https://github.com/python/mypy/issues/5018
223
                # If it ever gets fixed
224

225
        return all_samples
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc