22197751771

Committed 19 Feb 2026 07:56PM UTC coverage: 88.836%. Remained the same

Build # 22197751771

Build Type

Pull #61

github

Committed by

web-flow

Commit Message

Merge d1d9a10a3 into 0c01ca17b

Pull Request Pull Request #61: [repo-helper] Configuration Update

Run Details

931 of 1048 relevant lines covered (88.84%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.55

/mh_utils/csv_parser/utils.py

#!/usr/bin/env python3
#
#  utils.py
"""
CSV utility functions.

.. versionadded:: 0.2.0
"""
#
#  Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from typing import Optional

# 3rd party
import pandas  # type: ignore
import sdjson
from domdf_python_tools.paths import PathPlus
from domdf_python_tools.typing import PathLike

# this package
from mh_utils.csv_parser import Sample, SampleList

__all__ = ["drop_columns", "reorder_columns", "concatenate_json"]

pandas.DataFrame.__module__ = "pandas"


def drop_columns(df: pandas.DataFrame, *, axis: int = 1, inplace: bool = True, **kwargs) -> pandas.DataFrame:
        """
        Drop columns from the MassHunter CSV file.

        :param df: The :class:`pandas.DataFrame` to drop columns in.
        :param axis: Which axis to drop columns on.
        :param inplace: Whether to modify the :class:`pandas.DataFrame` in place.
        :param kwargs: Additional keyword arguments passed to :meth:`pandas.DataFrame.drop`.
        """

        # Columns where I have no idea what they represent
        unknown_cols = [
                        "HMP",
                        "KEGG",
                        "LMP",
                        "METLIN",
                        "Notes",
                        "Swiss-Prot",
                        "CE",
                        "Tgt Hit Pos",
                        "Score Diff",
                        "FV",
                        "Saturated",
                        "Vol",
                        "Cpds/Group",
                        "Group",
                        "Std Dev",
                        "Score (MFE)",
                        "Vol %",
                        "EIC/TIC% Area",
                        "EIC/TIC% Height",
                        "TIC% Area",
                        "TIC% Height",
                        "TWC% Area",
                        "TWC% Height",
                        "Purity Comments",
                        "Purity Result",
                        "Purity Value",
                        "Score (Frag Coelution)",
                        "FIs Conf.",
                        "FIs Conf. %",
                        "Score (Frag Ratio)",
                        "FragMassDiff(ppm)",
                        "FIs Eval.",
                        "Source",
                        "Flags",
                        ]

        db_cols = [
                        "Mass (DB)",
                        "Diff (DB, mDa)",
                        "Diff (DB, ppm)",
                        "RT (Lib/DB)",
                        "RT Diff (Lib/DB)",
                        "Score (DB)",
                        "Shared (DB)",
                        "Unique (DB)",
                        ]

        mfg_cols = [
                        "Diff (MFG, mDa)",
                        "Mass (MFG)",
                        "Diff (MFG, ppm)",
                        "Score (MFG)",
                        ]

        lib_cols = ["Lib/DB", "Score (Lib)"]

        new_df = df.drop(
                        [
                                        *unknown_cols,
                                        *db_cols,
                                        *mfg_cols,
                                        *lib_cols,
                                        ],
                        axis=axis,
                        inplace=inplace,
                        **kwargs,
                        )

        if inplace:
                return df
        else:
                return new_df


def reorder_columns(df: pandas.DataFrame) -> pandas.DataFrame:
        """
        Reorder columns from the MassHunter CSV file.

        :param df: The :class:`pandas.DataFrame` to reorder columns in.
        """

        # Make sure to remove columns that got deleted above
        output_col_order = [
                        "Sample Name",
                        "Cpd",
                        "CAS",
                        "Name",
                        "Hits",
                        "Abund",
                        "Mining Algorithm",
                        "Area",
                        "Base Peak",
                        "Mass",
                        "Avg Mass",
                        "Score",
                        "m/z",
                        "m/z (prod.)",
                        "RT",
                        "Start",
                        "End",
                        "Width",
                        "Diff (Tgt, mDa)",
                        "Diff (Tgt, ppm)",
                        "Score (Tgt)",
                        "Flags (Tgt)",
                        "Flag Severity (Tgt)",
                        "Flag Severity Code (Tgt)",
                        "Mass (Tgt)",
                        "RT (Tgt)",
                        "RT Diff (Tgt)",
                        "Sample Type",
                        "Formula",
                        "Height",
                        "Ions",
                        "Polarity",
                        "Z Count",
                        "Max Z",
                        "Min Z",
                        "Label",
                        "File",
                        "Instrument Name",
                        "Position",
                        "User Name",
                        "Acq Method",
                        "DA Method",
                        "IRM Calibration status",
                        ]

        # Omitted columns
        # "ID Source", "ID Techniques Applied"
        # "MS/MS Count",                because blank

        return df[output_col_order]


def concatenate_json(*files: PathLike, outfile: Optional[PathLike] = None) -> SampleList:
        r"""
        Concatenate multiple JSON files together and return a list of :class:`Sample`
        objects in the concatenated json output.

        :param \*files: The files to concatenate.
        :param outfile: The file to save the output as. If :py:obj:`None` no file will be saved.
        """  # noqa: D400

        all_samples = SampleList()

        for json_file in files:
                samples = PathPlus(json_file).load_json(
                                json_library=sdjson,  # type: ignore
                                )
                # TODO: https://github.com/python/mypy/issues/5018
                # If it ever gets fixed

                for sample in samples:
                        all_samples.append(Sample(**sample))

        if outfile is not None:
                PathPlus(outfile).dump_json(
                                all_samples,
                                json_library=sdjson,  # type: ignore
                                indent=2,
                                )
                # TODO: https://github.com/python/mypy/issues/5018
                # If it ever gets fixed

        return all_samples

1	#!/usr/bin/env python3
2	#
3	# utils.py
4	"""
5	CSV utility functions.
6
7	.. versionadded:: 0.2.0
8	"""
9	#
10	# Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
11	#
12	# Permission is hereby granted, free of charge, to any person obtaining a copy
13	# of this software and associated documentation files (the "Software"), to deal
14	# in the Software without restriction, including without limitation the rights
15	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16	# copies of the Software, and to permit persons to whom the Software is
17	# furnished to do so, subject to the following conditions:
18	#
19	# The above copyright notice and this permission notice shall be included in all
20	# copies or substantial portions of the Software.
21	#
22	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24	# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25	# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26	# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27	# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
28	# OR OTHER DEALINGS IN THE SOFTWARE.
29	#
30
31	# stdlib
32	from typing import Optional	1✔
33
34	# 3rd party
35	import pandas # type: ignore	1✔
36	import sdjson	1✔
37	from domdf_python_tools.paths import PathPlus	1✔
38	from domdf_python_tools.typing import PathLike	1✔
39
40	# this package
41	from mh_utils.csv_parser import Sample, SampleList	1✔
42
43	__all__ = ["drop_columns", "reorder_columns", "concatenate_json"]	1✔
44
45	pandas.DataFrame.__module__ = "pandas"	1✔
46
47
48	def drop_columns(df: pandas.DataFrame, , axis: int = 1, inplace: bool = True, *kwargs) -> pandas.DataFrame:	1✔
49	"""
50	Drop columns from the MassHunter CSV file.
51
52	:param df: The :class:`pandas.DataFrame` to drop columns in.
53	:param axis: Which axis to drop columns on.
54	:param inplace: Whether to modify the :class:`pandas.DataFrame` in place.
55	:param kwargs: Additional keyword arguments passed to :meth:`pandas.DataFrame.drop`.
56	"""
57
58	# Columns where I have no idea what they represent
59	unknown_cols = [	1✔
60	"HMP",
61	"KEGG",
62	"LMP",
63	"METLIN",
64	"Notes",
65	"Swiss-Prot",
66	"CE",
67	"Tgt Hit Pos",
68	"Score Diff",
69	"FV",
70	"Saturated",
71	"Vol",
72	"Cpds/Group",
73	"Group",
74	"Std Dev",
75	"Score (MFE)",
76	"Vol %",
77	"EIC/TIC% Area",
78	"EIC/TIC% Height",
79	"TIC% Area",
80	"TIC% Height",
81	"TWC% Area",
82	"TWC% Height",
83	"Purity Comments",
84	"Purity Result",
85	"Purity Value",
86	"Score (Frag Coelution)",
87	"FIs Conf.",
88	"FIs Conf. %",
89	"Score (Frag Ratio)",
90	"FragMassDiff(ppm)",
91	"FIs Eval.",
92	"Source",
93	"Flags",
94	]
95
96	db_cols = [	1✔
97	"Mass (DB)",
98	"Diff (DB, mDa)",
99	"Diff (DB, ppm)",
100	"RT (Lib/DB)",
101	"RT Diff (Lib/DB)",
102	"Score (DB)",
103	"Shared (DB)",
104	"Unique (DB)",
105	]
106
107	mfg_cols = [	1✔
108	"Diff (MFG, mDa)",
109	"Mass (MFG)",
110	"Diff (MFG, ppm)",
111	"Score (MFG)",
112	]
113
114	lib_cols = ["Lib/DB", "Score (Lib)"]	1✔
115
116	new_df = df.drop(	1✔
117	[
118	*unknown_cols,
119	*db_cols,
120	*mfg_cols,
121	*lib_cols,
122	],
123	axis=axis,
124	inplace=inplace,
125	**kwargs,
126	)
127
128	if inplace:	1✔
129	return df	1✔
130	else:
131	return new_df	×
132
133
134	def reorder_columns(df: pandas.DataFrame) -> pandas.DataFrame:	1✔
135	"""
136	Reorder columns from the MassHunter CSV file.
137
138	:param df: The :class:`pandas.DataFrame` to reorder columns in.
139	"""
140
141	# Make sure to remove columns that got deleted above
142	output_col_order = [	1✔
143	"Sample Name",
144	"Cpd",
145	"CAS",
146	"Name",
147	"Hits",
148	"Abund",
149	"Mining Algorithm",
150	"Area",
151	"Base Peak",
152	"Mass",
153	"Avg Mass",
154	"Score",
155	"m/z",
156	"m/z (prod.)",
157	"RT",
158	"Start",
159	"End",
160	"Width",
161	"Diff (Tgt, mDa)",
162	"Diff (Tgt, ppm)",
163	"Score (Tgt)",
164	"Flags (Tgt)",
165	"Flag Severity (Tgt)",
166	"Flag Severity Code (Tgt)",
167	"Mass (Tgt)",
168	"RT (Tgt)",
169	"RT Diff (Tgt)",
170	"Sample Type",
171	"Formula",
172	"Height",
173	"Ions",
174	"Polarity",
175	"Z Count",
176	"Max Z",
177	"Min Z",
178	"Label",
179	"File",
180	"Instrument Name",
181	"Position",
182	"User Name",
183	"Acq Method",
184	"DA Method",
185	"IRM Calibration status",
186	]
187
188	# Omitted columns
189	# "ID Source", "ID Techniques Applied"
190	# "MS/MS Count", because blank
191
192	return df[output_col_order]	1✔
193
194
195	def concatenate_json(*files: PathLike, outfile: Optional[PathLike] = None) -> SampleList:	1✔
196	r"""
197	Concatenate multiple JSON files together and return a list of :class:`Sample`
198	objects in the concatenated json output.
199
200	:param \*files: The files to concatenate.
201	:param outfile: The file to save the output as. If :py:obj:`None` no file will be saved.
202	""" # noqa: D400
203
204	all_samples = SampleList()	1✔
205
206	for json_file in files:	1✔
207	samples = PathPlus(json_file).load_json(	1✔
208	json_library=sdjson, # type: ignore
209	)
210	# TODO: https://github.com/python/mypy/issues/5018
211	# If it ever gets fixed
212
213	for sample in samples:	1✔
214	all_samples.append(Sample(**sample))	1✔
215
216	if outfile is not None:	1✔
217	PathPlus(outfile).dump_json(	1✔
218	all_samples,
219	json_library=sdjson, # type: ignore
220	indent=2,
221	)
222	# TODO: https://github.com/python/mypy/issues/5018
223	# If it ever gets fixed
224
225	return all_samples	1✔

PyMassSpec / mh_utils / 22197751771

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous