22197733821

Committed 19 Feb 2026 07:55PM UTC coverage: 88.836%. Remained the same

Build # 22197733821

Build Type

Pull #61

github

Committed by

web-flow

Commit Message

Merge 87cb99437 into 0c01ca17b

Pull Request Pull Request #61: [repo-helper] Configuration Update

Coverage Stats

5 of 6 new or added lines in 2 files covered. (83.33%)

931 of 1048 relevant lines covered (88.84%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.55

/mh_utils/csv_parser/utils.py

#!/usr/bin/env python3
#
#  utils.py
"""
CSV utility functions.

.. versionadded:: 0.2.0
"""
#
#  Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from typing import Optional

# 3rd party
import pandas  # type: ignore
import sdjson
from domdf_python_tools.paths import PathPlus
from domdf_python_tools.typing import PathLike

# this package
from mh_utils.csv_parser import Sample, SampleList

__all__ = ["drop_columns", "reorder_columns", "concatenate_json"]

pandas.DataFrame.__module__ = "pandas"


def drop_columns(df: pandas.DataFrame, *, axis: int = 1, inplace: bool = True, **kwargs) -> pandas.DataFrame:
        """
        Drop columns from the MassHunter CSV file.

        :param df: The :class:`pandas.DataFrame` to drop columns in.
        :param axis: Which axis to drop columns on.
        :param inplace: Whether to modify the :class:`pandas.DataFrame` in place.
        :param kwargs: Additional keyword arguments passed to :meth:`pandas.DataFrame.drop`.
        """

        # Columns where I have no idea what they represent
        unknown_cols = [
                        "HMP",
                        "KEGG",
                        "LMP",
                        "METLIN",
                        "Notes",
                        "Swiss-Prot",
                        "CE",
                        "Tgt Hit Pos",
                        "Score Diff",
                        "FV",
                        "Saturated",
                        "Vol",
                        "Cpds/Group",
                        "Group",
                        "Std Dev",
                        "Score (MFE)",
                        "Vol %",
                        "EIC/TIC% Area",
                        "EIC/TIC% Height",
                        "TIC% Area",
                        "TIC% Height",
                        "TWC% Area",
                        "TWC% Height",
                        "Purity Comments",
                        "Purity Result",
                        "Purity Value",
                        "Score (Frag Coelution)",
                        "FIs Conf.",
                        "FIs Conf. %",
                        "Score (Frag Ratio)",
                        "FragMassDiff(ppm)",
                        "FIs Eval.",
                        "Source",
                        "Flags",
                        ]

        db_cols = [
                        "Mass (DB)",
                        "Diff (DB, mDa)",
                        "Diff (DB, ppm)",
                        "RT (Lib/DB)",
                        "RT Diff (Lib/DB)",
                        "Score (DB)",
                        "Shared (DB)",
                        "Unique (DB)",
                        ]

        mfg_cols = [
                        "Diff (MFG, mDa)",
                        "Mass (MFG)",
                        "Diff (MFG, ppm)",
                        "Score (MFG)",
                        ]

        lib_cols = ["Lib/DB", "Score (Lib)"]

        new_df = df.drop([
                        *unknown_cols,
                        *db_cols,
                        *mfg_cols,
                        *lib_cols,
                        ], axis=axis, inplace=inplace, **kwargs)

        if inplace:
                return df
        else:
                return new_df


def reorder_columns(df: pandas.DataFrame) -> pandas.DataFrame:
        """
        Reorder columns from the MassHunter CSV file.

        :param df: The :class:`pandas.DataFrame` to reorder columns in.
        """

        # Make sure to remove columns that got deleted above
        output_col_order = [
                        "Sample Name",
                        "Cpd",
                        "CAS",
                        "Name",
                        "Hits",
                        "Abund",
                        "Mining Algorithm",
                        "Area",
                        "Base Peak",
                        "Mass",
                        "Avg Mass",
                        "Score",
                        "m/z",
                        "m/z (prod.)",
                        "RT",
                        "Start",
                        "End",
                        "Width",
                        "Diff (Tgt, mDa)",
                        "Diff (Tgt, ppm)",
                        "Score (Tgt)",
                        "Flags (Tgt)",
                        "Flag Severity (Tgt)",
                        "Flag Severity Code (Tgt)",
                        "Mass (Tgt)",
                        "RT (Tgt)",
                        "RT Diff (Tgt)",
                        "Sample Type",
                        "Formula",
                        "Height",
                        "Ions",
                        "Polarity",
                        "Z Count",
                        "Max Z",
                        "Min Z",
                        "Label",
                        "File",
                        "Instrument Name",
                        "Position",
                        "User Name",
                        "Acq Method",
                        "DA Method",
                        "IRM Calibration status",
                        ]

        # Omitted columns
        # "ID Source", "ID Techniques Applied"
        # "MS/MS Count",                because blank

        return df[output_col_order]


def concatenate_json(*files: PathLike, outfile: Optional[PathLike] = None) -> SampleList:
        r"""
        Concatenate multiple JSON files together and return a list of :class:`Sample`
        objects in the concatenated json output.

        :param \*files: The files to concatenate.
        :param outfile: The file to save the output as. If :py:obj:`None` no file will be saved.
        """  # noqa: D400

        all_samples = SampleList()

        for json_file in files:
                samples = PathPlus(json_file).load_json(
                                json_library=sdjson,  # type: ignore
                                )
                # TODO: https://github.com/python/mypy/issues/5018
                # If it ever gets fixed

                for sample in samples:
                        all_samples.append(Sample(**sample))

        if outfile is not None:
                PathPlus(outfile).dump_json(
                                all_samples,
                                json_library=sdjson,  # type: ignore
                                indent=2,
                                )
                # TODO: https://github.com/python/mypy/issues/5018
                # If it ever gets fixed

        return all_samples

1	#!/usr/bin/env python3
2	#
3	# utils.py
4	"""
5	CSV utility functions.
6
7	.. versionadded:: 0.2.0
8	"""
9	#
10	# Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
11	#
12	# Permission is hereby granted, free of charge, to any person obtaining a copy
13	# of this software and associated documentation files (the "Software"), to deal
14	# in the Software without restriction, including without limitation the rights
15	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16	# copies of the Software, and to permit persons to whom the Software is
17	# furnished to do so, subject to the following conditions:
18	#
19	# The above copyright notice and this permission notice shall be included in all
20	# copies or substantial portions of the Software.
21	#
22	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24	# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25	# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26	# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27	# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
28	# OR OTHER DEALINGS IN THE SOFTWARE.
29	#
30
31	# stdlib
32	from typing import Optional	1✔
33
34	# 3rd party
35	import pandas # type: ignore	1✔
36	import sdjson	1✔
37	from domdf_python_tools.paths import PathPlus	1✔
38	from domdf_python_tools.typing import PathLike	1✔
39
40	# this package
41	from mh_utils.csv_parser import Sample, SampleList	1✔
42
43	__all__ = ["drop_columns", "reorder_columns", "concatenate_json"]	1✔
44
45	pandas.DataFrame.__module__ = "pandas"	1✔
46
47
48	def drop_columns(df: pandas.DataFrame, , axis: int = 1, inplace: bool = True, *kwargs) -> pandas.DataFrame:	1✔
49	"""
50	Drop columns from the MassHunter CSV file.
51
52	:param df: The :class:`pandas.DataFrame` to drop columns in.
53	:param axis: Which axis to drop columns on.
54	:param inplace: Whether to modify the :class:`pandas.DataFrame` in place.
55	:param kwargs: Additional keyword arguments passed to :meth:`pandas.DataFrame.drop`.
56	"""
57
58	# Columns where I have no idea what they represent
59	unknown_cols = [	1✔
60	"HMP",
61	"KEGG",
62	"LMP",
63	"METLIN",
64	"Notes",
65	"Swiss-Prot",
66	"CE",
67	"Tgt Hit Pos",
68	"Score Diff",
69	"FV",
70	"Saturated",
71	"Vol",
72	"Cpds/Group",
73	"Group",
74	"Std Dev",
75	"Score (MFE)",
76	"Vol %",
77	"EIC/TIC% Area",
78	"EIC/TIC% Height",
79	"TIC% Area",
80	"TIC% Height",
81	"TWC% Area",
82	"TWC% Height",
83	"Purity Comments",
84	"Purity Result",
85	"Purity Value",
86	"Score (Frag Coelution)",
87	"FIs Conf.",
88	"FIs Conf. %",
89	"Score (Frag Ratio)",
90	"FragMassDiff(ppm)",
91	"FIs Eval.",
92	"Source",
93	"Flags",
94	]
95
96	db_cols = [	1✔
97	"Mass (DB)",
98	"Diff (DB, mDa)",
99	"Diff (DB, ppm)",
100	"RT (Lib/DB)",
101	"RT Diff (Lib/DB)",
102	"Score (DB)",
103	"Shared (DB)",
104	"Unique (DB)",
105	]
106
107	mfg_cols = [	1✔
108	"Diff (MFG, mDa)",
109	"Mass (MFG)",
110	"Diff (MFG, ppm)",
111	"Score (MFG)",
112	]
113
114	lib_cols = ["Lib/DB", "Score (Lib)"]	1✔
115
116	new_df = df.drop([	1✔
117	*unknown_cols,
118	*db_cols,
119	*mfg_cols,
120	*lib_cols,
121	], axis=axis, inplace=inplace, **kwargs)
122
123	if inplace:	1✔
124	return df	1✔
125	else:
NEW 126	return new_df	×
127
128
129	def reorder_columns(df: pandas.DataFrame) -> pandas.DataFrame:	1✔
130	"""
131	Reorder columns from the MassHunter CSV file.
132
133	:param df: The :class:`pandas.DataFrame` to reorder columns in.
134	"""
135
136	# Make sure to remove columns that got deleted above
137	output_col_order = [	1✔
138	"Sample Name",
139	"Cpd",
140	"CAS",
141	"Name",
142	"Hits",
143	"Abund",
144	"Mining Algorithm",
145	"Area",
146	"Base Peak",
147	"Mass",
148	"Avg Mass",
149	"Score",
150	"m/z",
151	"m/z (prod.)",
152	"RT",
153	"Start",
154	"End",
155	"Width",
156	"Diff (Tgt, mDa)",
157	"Diff (Tgt, ppm)",
158	"Score (Tgt)",
159	"Flags (Tgt)",
160	"Flag Severity (Tgt)",
161	"Flag Severity Code (Tgt)",
162	"Mass (Tgt)",
163	"RT (Tgt)",
164	"RT Diff (Tgt)",
165	"Sample Type",
166	"Formula",
167	"Height",
168	"Ions",
169	"Polarity",
170	"Z Count",
171	"Max Z",
172	"Min Z",
173	"Label",
174	"File",
175	"Instrument Name",
176	"Position",
177	"User Name",
178	"Acq Method",
179	"DA Method",
180	"IRM Calibration status",
181	]
182
183	# Omitted columns
184	# "ID Source", "ID Techniques Applied"
185	# "MS/MS Count", because blank
186
187	return df[output_col_order]	1✔
188
189
190	def concatenate_json(*files: PathLike, outfile: Optional[PathLike] = None) -> SampleList:	1✔
191	r"""
192	Concatenate multiple JSON files together and return a list of :class:`Sample`
193	objects in the concatenated json output.
194
195	:param \*files: The files to concatenate.
196	:param outfile: The file to save the output as. If :py:obj:`None` no file will be saved.
197	""" # noqa: D400
198
199	all_samples = SampleList()	1✔
200
201	for json_file in files:	1✔
202	samples = PathPlus(json_file).load_json(	1✔
203	json_library=sdjson, # type: ignore
204	)
205	# TODO: https://github.com/python/mypy/issues/5018
206	# If it ever gets fixed
207
208	for sample in samples:	1✔
209	all_samples.append(Sample(**sample))	1✔
210
211	if outfile is not None:	1✔
212	PathPlus(outfile).dump_json(	1✔
213	all_samples,
214	json_library=sdjson, # type: ignore
215	indent=2,
216	)
217	# TODO: https://github.com/python/mypy/issues/5018
218	# If it ever gets fixed
219
220	return all_samples	1✔

PyMassSpec / mh_utils / 22197733821

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous