• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PyMassSpec / mh_utils / 22197733821

19 Feb 2026 07:55PM UTC coverage: 88.836%. Remained the same
22197733821

Pull #61

github

web-flow
Merge 87cb99437 into 0c01ca17b
Pull Request #61: [repo-helper] Configuration Update

5 of 6 new or added lines in 2 files covered. (83.33%)

931 of 1048 relevant lines covered (88.84%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.55
/mh_utils/csv_parser/utils.py
1
#!/usr/bin/env python3
2
#
3
#  utils.py
4
"""
5
CSV utility functions.
6

7
.. versionadded:: 0.2.0
8
"""
9
#
10
#  Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
11
#
12
#  Permission is hereby granted, free of charge, to any person obtaining a copy
13
#  of this software and associated documentation files (the "Software"), to deal
14
#  in the Software without restriction, including without limitation the rights
15
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
#  copies of the Software, and to permit persons to whom the Software is
17
#  furnished to do so, subject to the following conditions:
18
#
19
#  The above copyright notice and this permission notice shall be included in all
20
#  copies or substantial portions of the Software.
21
#
22
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
28
#  OR OTHER DEALINGS IN THE SOFTWARE.
29
#
30

31
# stdlib
32
from typing import Optional
1✔
33

34
# 3rd party
35
import pandas  # type: ignore
1✔
36
import sdjson
1✔
37
from domdf_python_tools.paths import PathPlus
1✔
38
from domdf_python_tools.typing import PathLike
1✔
39

40
# this package
41
from mh_utils.csv_parser import Sample, SampleList
1✔
42

43
__all__ = ["drop_columns", "reorder_columns", "concatenate_json"]
1✔
44

45
pandas.DataFrame.__module__ = "pandas"
1✔
46

47

48
def drop_columns(df: pandas.DataFrame, *, axis: int = 1, inplace: bool = True, **kwargs) -> pandas.DataFrame:
1✔
49
        """
50
        Drop columns from the MassHunter CSV file.
51

52
        :param df: The :class:`pandas.DataFrame` to drop columns in.
53
        :param axis: Which axis to drop columns on.
54
        :param inplace: Whether to modify the :class:`pandas.DataFrame` in place.
55
        :param kwargs: Additional keyword arguments passed to :meth:`pandas.DataFrame.drop`.
56
        """
57

58
        # Columns where I have no idea what they represent
59
        unknown_cols = [
1✔
60
                        "HMP",
61
                        "KEGG",
62
                        "LMP",
63
                        "METLIN",
64
                        "Notes",
65
                        "Swiss-Prot",
66
                        "CE",
67
                        "Tgt Hit Pos",
68
                        "Score Diff",
69
                        "FV",
70
                        "Saturated",
71
                        "Vol",
72
                        "Cpds/Group",
73
                        "Group",
74
                        "Std Dev",
75
                        "Score (MFE)",
76
                        "Vol %",
77
                        "EIC/TIC% Area",
78
                        "EIC/TIC% Height",
79
                        "TIC% Area",
80
                        "TIC% Height",
81
                        "TWC% Area",
82
                        "TWC% Height",
83
                        "Purity Comments",
84
                        "Purity Result",
85
                        "Purity Value",
86
                        "Score (Frag Coelution)",
87
                        "FIs Conf.",
88
                        "FIs Conf. %",
89
                        "Score (Frag Ratio)",
90
                        "FragMassDiff(ppm)",
91
                        "FIs Eval.",
92
                        "Source",
93
                        "Flags",
94
                        ]
95

96
        db_cols = [
1✔
97
                        "Mass (DB)",
98
                        "Diff (DB, mDa)",
99
                        "Diff (DB, ppm)",
100
                        "RT (Lib/DB)",
101
                        "RT Diff (Lib/DB)",
102
                        "Score (DB)",
103
                        "Shared (DB)",
104
                        "Unique (DB)",
105
                        ]
106

107
        mfg_cols = [
1✔
108
                        "Diff (MFG, mDa)",
109
                        "Mass (MFG)",
110
                        "Diff (MFG, ppm)",
111
                        "Score (MFG)",
112
                        ]
113

114
        lib_cols = ["Lib/DB", "Score (Lib)"]
1✔
115

116
        new_df = df.drop([
1✔
117
                        *unknown_cols,
118
                        *db_cols,
119
                        *mfg_cols,
120
                        *lib_cols,
121
                        ], axis=axis, inplace=inplace, **kwargs)
122

123
        if inplace:
1✔
124
                return df
1✔
125
        else:
NEW
126
                return new_df
×
127

128

129
def reorder_columns(df: pandas.DataFrame) -> pandas.DataFrame:
1✔
130
        """
131
        Reorder columns from the MassHunter CSV file.
132

133
        :param df: The :class:`pandas.DataFrame` to reorder columns in.
134
        """
135

136
        # Make sure to remove columns that got deleted above
137
        output_col_order = [
1✔
138
                        "Sample Name",
139
                        "Cpd",
140
                        "CAS",
141
                        "Name",
142
                        "Hits",
143
                        "Abund",
144
                        "Mining Algorithm",
145
                        "Area",
146
                        "Base Peak",
147
                        "Mass",
148
                        "Avg Mass",
149
                        "Score",
150
                        "m/z",
151
                        "m/z (prod.)",
152
                        "RT",
153
                        "Start",
154
                        "End",
155
                        "Width",
156
                        "Diff (Tgt, mDa)",
157
                        "Diff (Tgt, ppm)",
158
                        "Score (Tgt)",
159
                        "Flags (Tgt)",
160
                        "Flag Severity (Tgt)",
161
                        "Flag Severity Code (Tgt)",
162
                        "Mass (Tgt)",
163
                        "RT (Tgt)",
164
                        "RT Diff (Tgt)",
165
                        "Sample Type",
166
                        "Formula",
167
                        "Height",
168
                        "Ions",
169
                        "Polarity",
170
                        "Z Count",
171
                        "Max Z",
172
                        "Min Z",
173
                        "Label",
174
                        "File",
175
                        "Instrument Name",
176
                        "Position",
177
                        "User Name",
178
                        "Acq Method",
179
                        "DA Method",
180
                        "IRM Calibration status",
181
                        ]
182

183
        # Omitted columns
184
        # "ID Source", "ID Techniques Applied"
185
        # "MS/MS Count",                because blank
186

187
        return df[output_col_order]
1✔
188

189

190
def concatenate_json(*files: PathLike, outfile: Optional[PathLike] = None) -> SampleList:
1✔
191
        r"""
192
        Concatenate multiple JSON files together and return a list of :class:`Sample`
193
        objects in the concatenated json output.
194

195
        :param \*files: The files to concatenate.
196
        :param outfile: The file to save the output as. If :py:obj:`None` no file will be saved.
197
        """  # noqa: D400
198

199
        all_samples = SampleList()
1✔
200

201
        for json_file in files:
1✔
202
                samples = PathPlus(json_file).load_json(
1✔
203
                                json_library=sdjson,  # type: ignore
204
                                )
205
                # TODO: https://github.com/python/mypy/issues/5018
206
                # If it ever gets fixed
207

208
                for sample in samples:
1✔
209
                        all_samples.append(Sample(**sample))
1✔
210

211
        if outfile is not None:
1✔
212
                PathPlus(outfile).dump_json(
1✔
213
                                all_samples,
214
                                json_library=sdjson,  # type: ignore
215
                                indent=2,
216
                                )
217
                # TODO: https://github.com/python/mypy/issues/5018
218
                # If it ever gets fixed
219

220
        return all_samples
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc