• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PyMassSpec / mh_utils / 20070565680

09 Dec 2025 04:20PM UTC coverage: 88.836%. Remained the same
20070565680

Pull #56

github

web-flow
Merge 944d3866f into aa7341fec
Pull Request #56: [repo-helper] Configuration Update

931 of 1048 relevant lines covered (88.84%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

64.16
/mh_utils/csv_parser/classes.py
1
#!/usr/bin/env python3
2
#
3
#  classes.py
4
"""
5
Classes to model parts of MassHunter CSV files.
6

7
.. versionadded:: 0.2.0
8
"""
9
#
10
#  Copyright © 2020-2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
11
#
12
#  Permission is hereby granted, free of charge, to any person obtaining a copy
13
#  of this software and associated documentation files (the "Software"), to deal
14
#  in the Software without restriction, including without limitation the rights
15
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
#  copies of the Software, and to permit persons to whom the Software is
17
#  furnished to do so, subject to the following conditions:
18
#
19
#  The above copyright notice and this permission notice shall be included in all
20
#  copies or substantial portions of the Software.
21
#
22
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
26
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
27
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
28
#  OR OTHER DEALINGS IN THE SOFTWARE.
29
#
30

31
# stdlib
32
from collections import OrderedDict
1✔
33
from decimal import Decimal
1✔
34
from typing import Dict, Iterable, List, Optional, Tuple, Type, TypeVar
1✔
35

36
# 3rd party
37
import numpy
1✔
38
import pandas  # type: ignore
1✔
39
import sdjson
1✔
40
from cawdrey import AlphaDict
1✔
41
from domdf_python_tools import doctools
1✔
42
from domdf_python_tools.doctools import prettify_docstrings
1✔
43
from domdf_python_tools.paths import PathPlus
1✔
44
from domdf_python_tools.typing import PathLike
1✔
45

46
# this package
47
from mh_utils import Dictable
1✔
48

49
__all__ = [
1✔
50
                "Sample",
51
                "Result",
52
                "SampleList",
53
                "BaseSamplePropertyDict",
54
                "SamplesAreaDict",
55
                "SamplesScoresDict",
56
                "encode_result_or_sample",
57
                "encode_set",
58
                "encode_decimal",
59
                "_S",
60
                "_SL",
61
                "_R",
62
                ]
63

64
pandas.Series.__module_ = "pandas"
1✔
65

66
_S = TypeVar("_S", bound="Sample")
1✔
67
_SL = TypeVar("_SL", bound="SampleList")
1✔
68
_R = TypeVar("_R", bound="Result")
1✔
69

70

71
@prettify_docstrings
1✔
72
class Sample(Dictable):
1✔
73
        """
74
        Represents a sample in a MassHunter CSV file.
75

76
        :param sample_name:
77
        :param sample_type:
78
        :param instrument_name:
79
        :param position:
80
        :param user:
81
        :param acq_method:
82
        :param da_method:
83
        :param irm_cal_status:
84
        :param filename:
85
        :param results:
86
        """
87

88
        def __init__(
1✔
89
                        self,
90
                        sample_name,
91
                        sample_type,
92
                        instrument_name,
93
                        position,
94
                        user,
95
                        acq_method,
96
                        da_method,
97
                        irm_cal_status,
98
                        filename,
99
                        results=None,
100
                        ):
101

102
                self.sample_name = sample_name
1✔
103
                self.sample_type = sample_type
1✔
104
                self.instrument_name = instrument_name
1✔
105
                self.position = position
1✔
106
                self.user = user
1✔
107
                self.acq_method = acq_method
1✔
108
                self.da_method = da_method
1✔
109
                self.irm_cal_status = irm_cal_status
1✔
110
                self.filename = filename
1✔
111

112
                self._results: Dict[float, Result]
1✔
113

114
                if results is None:
1✔
115
                        self._results = {}
1✔
116
                elif isinstance(results, dict):
1✔
117
                        self._results = {}
×
118

119
                        for cpd_no, compound in results.items():
×
120
                                if isinstance(compound, dict):
×
121
                                        self._results[cpd_no] = Result(**compound)
×
122
                                else:
123
                                        self._results[cpd_no] = compound
×
124
                elif isinstance(results, list):
1✔
125
                        self._results = {}
1✔
126

127
                        for compound in results:
1✔
128
                                if isinstance(compound, dict):
1✔
129
                                        tmp_result = Result(**compound)
1✔
130
                                        cpd_no = tmp_result.index
1✔
131
                                        self._results[cpd_no] = tmp_result
1✔
132
                                else:
133
                                        self._results[compound.index] = compound
×
134
                else:
135
                        raise TypeError(f"Unknown type for `results`: {type(results)}")
×
136

137
        def add_result(self, result):
1✔
138
                """
139
                Add a result to the sample.
140

141
                :param result:
142
                """
143

144
                self._results[result.index] = result
1✔
145

146
        @property
1✔
147
        def results_list(self) -> List["Result"]:
1✔
148
                """
149
                Returns a list of results in the order in which they were identified.
150

151
                I.e. sorted by the ``Cpd`` value from the csv export.
152

153
                :rtype:
154

155
                .. clearpage::
156
                """
157

158
                results_list = []
1✔
159

160
                for key in sorted(self._results.keys()):
1✔
161
                        results_list.append(self._results[key])
1✔
162

163
                return results_list
1✔
164

165
        def __eq__(self, other):
1✔
166
                if isinstance(other, self.__class__):
1✔
167
                        return (
1✔
168
                                        self.sample_name == other.sample_name and self.sample_type == other.sample_type
169
                                        and self.filename == other.filename and self.acq_method == other.acq_method
170
                                        )
171

172
        @classmethod
1✔
173
        def from_series(cls: Type[_S], series) -> _S:
1✔
174
                """
175
                Constuct a :class:`~.Sample` from a :class:`pandas.Series`.
176

177
                :param series:
178
                :return:
179
                """
180

181
                sample_name = series["Sample Name"]
1✔
182
                sample_type = series["Sample Type"]
1✔
183
                filename = series["File"]
1✔
184
                instrument_name = series["Instrument Name"]
1✔
185
                position = series["Position"]
1✔
186
                user = series["User Name"]
1✔
187
                acq_method = series["Acq Method"]
1✔
188
                da_method = series["DA Method"]
1✔
189
                irm_cal_status = series["IRM Calibration status"]
1✔
190

191
                return cls(
1✔
192
                                sample_name,
193
                                sample_type,
194
                                instrument_name,
195
                                position,
196
                                user,
197
                                acq_method,
198
                                da_method,
199
                                irm_cal_status,
200
                                filename,
201
                                )
202

203
        def __repr__(self):
1✔
204
                return f"Sample({self.sample_name})"
×
205

206
        def to_dict(self):
1✔
207
                """
208
                Return a dictionary representation of the class.
209
                """
210

211
                return AlphaDict(
1✔
212
                                sample_name=self.sample_name,
213
                                sample_type=self.sample_type,
214
                                instrument_name=self.instrument_name,
215
                                position=self.position,
216
                                user=self.user,
217
                                acq_method=self.acq_method,
218
                                da_method=self.da_method,
219
                                irm_cal_status=self.irm_cal_status,
220
                                filename=self.filename,
221
                                results=self.results_list
222
                                )
223

224

225
@prettify_docstrings
1✔
226
class Result(Dictable):
1✔
227
        r"""
228
        Represents a Result in a MassHunter CSV file.
229

230
        .. raw:: latex
231

232
                \begin{multicols}{2}
233

234
        :param cas:
235
        :param name:
236
        :param hits:
237
        :param index:
238
        :param formula:
239
        :param score:
240
        :param abundance:
241
        :param height:
242
        :param area:
243
        :param diff_mDa:
244
        :param diff_ppm:
245
        :param rt:
246
        :param start:
247
        :param end:
248
        :param width:
249
        :param tgt_rt:
250
        :param rt_diff:
251
        :param mz:
252
        :param product_mz:
253
        :param base_peak:
254
        :param mass:
255
        :param average_mass:
256
        :param tgt_mass:
257
        :param mining_algorithm:
258
        :param z_count:
259
        :param max_z:
260
        :param min_z:
261
        :param n_ions:
262
        :param polarity:
263
        :param label:
264
        :param flags:
265
        :param flag_severity:
266
        :param flag_severity_code:
267

268
        .. raw:: latex
269

270
                \end{multicols}
271
        """
272

273
        def __init__(
1✔
274
                        self,
275
                        cas,
276
                        name: str,
277
                        hits,
278
                        index: int = -1,
279
                        formula: str = '',
280
                        score: float = 0.0,
281
                        abundance: float = 0,
282
                        height: float = 0,
283
                        area: float = 0,
284
                        diff_mDa: float = 0.0,
285
                        diff_ppm: float = 0.0,
286
                        rt: float = 0.0,
287
                        start: float = 0.0,
288
                        end: float = 0.0,
289
                        width: float = 0.0,
290
                        tgt_rt: float = 0.0,
291
                        rt_diff: float = 0.0,
292
                        mz: float = 0.0,
293
                        product_mz: float = 0.0,
294
                        base_peak: float = 0.0,
295
                        mass: float = 0.0,
296
                        average_mass: float = 0.0,
297
                        tgt_mass: float = 0.0,
298
                        mining_algorithm: str = '',
299
                        z_count: int = 0,
300
                        max_z: int = 0,
301
                        min_z: int = 0,
302
                        n_ions: int = 0,
303
                        polarity: str = '',
304
                        label: str = '',
305
                        flags: str = '',
306
                        flag_severity: str = '',
307
                        flag_severity_code: int = 0,
308
                        ):
309

310
                # Possible also AL (ID Source) and AM (ID Techniques Applied)
311
                self._cas = cas
1✔
312
                self.name: str = str(name)
1✔
313
                self.hits = hits
1✔
314
                self.formula: str = str(formula)
1✔
315
                self.score: Decimal = Decimal(score)
1✔
316
                self.abundance: float = int(abundance)
1✔
317
                self.height: float = int(height)
1✔
318
                self.area: float = int(area)
1✔
319
                self.diff_mDa: Decimal = Decimal(diff_mDa)
1✔
320
                self.diff_ppm: Decimal = Decimal(diff_ppm)
1✔
321
                self.rt: Decimal = Decimal(rt)
1✔
322
                self.start: Decimal = Decimal(start)
1✔
323
                self.end: Decimal = Decimal(end)
1✔
324
                self.width: Decimal = Decimal(width)
1✔
325
                self.tgt_rt: Decimal = Decimal(tgt_rt)
1✔
326
                self.rt_diff: Decimal = Decimal(rt_diff)
1✔
327
                self.mz: Decimal = Decimal(mz)
1✔
328
                self.product_mz: Decimal = Decimal(product_mz)
1✔
329
                self.base_peak: Decimal = Decimal(base_peak)
1✔
330
                self.mass: Decimal = Decimal(mass)
1✔
331
                self.average_mass: Decimal = Decimal(average_mass)
1✔
332
                self.tgt_mass: Decimal = Decimal(tgt_mass)
1✔
333
                self.mining_algorithm: str = str(mining_algorithm)
1✔
334
                self.z_count: int = int(z_count)
1✔
335
                self.max_z: int = int(max_z)
1✔
336
                self.min_z: int = int(min_z)
1✔
337
                self.n_ions: int = int(n_ions)
1✔
338
                self.polarity: str = str(polarity)
1✔
339
                self.label: str = str(label)
1✔
340
                self.flags: str = str(flags)
1✔
341
                self.flag_severity: str = str(flag_severity)
1✔
342
                self.flag_severity_code: int = int(flag_severity_code)
1✔
343
                self.index: int = index  # Tracks the number of the result in the sample
1✔
344

345
        # "Score (Tgt)",
346
        @classmethod
1✔
347
        def from_series(cls: Type[_R], series: pandas.Series) -> _R:
1✔
348
                """
349
                Consruct a :class:`~.classes.Result` from a :class:`pandas.Series`.
350

351
                :param series:
352

353
                :rtype:
354

355
                .. clearpage::
356
                """
357

358
                cas = series["CAS"]
1✔
359
                name = series["Name"]
1✔
360
                index = series["Cpd"]
1✔
361
                hits = series["Hits"]
1✔
362
                formula = series["Formula"]
1✔
363
                score = series["Score"]
1✔
364
                abundance = series["Abund"]
1✔
365
                height = series["Height"]
1✔
366
                area = series["Area"]
1✔
367
                diff_mDa = series["Diff (Tgt, mDa)"]
1✔
368
                diff_ppm = series["Diff (Tgt, ppm)"]
1✔
369
                rt = series["RT"]
1✔
370
                start = series["Start"]
1✔
371
                end = series["End"]
1✔
372
                width = series["Width"]
1✔
373
                tgt_rt = series["RT (Tgt)"]
1✔
374
                rt_diff = series["RT Diff (Tgt)"]
1✔
375
                mz = series["m/z"]
1✔
376
                product_mz = series["m/z (prod.)"]
1✔
377
                base_peak = series["Base Peak"]
1✔
378
                mass = series["Mass"]
1✔
379
                average_mass = series["Avg Mass"]
1✔
380
                tgt_mass = series["Mass (Tgt)"]
1✔
381
                mining_algorithm = series["Mining Algorithm"]
1✔
382
                z_count = series["Z Count"]
1✔
383
                max_z = series["Max Z"]
1✔
384
                min_z = series["Min Z"]
1✔
385
                n_ions = series["Ions"]
1✔
386
                polarity = series["Polarity"]
1✔
387
                label = series["Label"]
1✔
388
                flags = series["Flags (Tgt)"]
1✔
389
                flag_severity = series["Flag Severity (Tgt)"]
1✔
390
                flag_severity_code = series["Flag Severity Code (Tgt)"]
1✔
391

392
                return cls(
1✔
393
                                cas,
394
                                name,
395
                                hits,
396
                                index,
397
                                formula,
398
                                score,
399
                                abundance,
400
                                height,
401
                                area,
402
                                diff_mDa,
403
                                diff_ppm,
404
                                rt,
405
                                start,
406
                                end,
407
                                width,
408
                                tgt_rt,
409
                                rt_diff,
410
                                mz,
411
                                product_mz,
412
                                base_peak,
413
                                mass,
414
                                average_mass,
415
                                tgt_mass,
416
                                mining_algorithm,
417
                                z_count,
418
                                max_z,
419
                                min_z,
420
                                n_ions,
421
                                polarity,
422
                                label,
423
                                flags,
424
                                flag_severity,
425
                                flag_severity_code,
426
                                )
427

428
        def __repr__(self):
1✔
429
                return f"Result({self.name}; {self.formula}; {self.rt}; {self.score})"
×
430

431
        def to_dict(self):
1✔
432
                """
433
                Return a dictionary representation of the class.
434
                """
435

436
                return AlphaDict(
1✔
437
                                cas=self._cas,
438
                                name=self.name,
439
                                hits=self.hits,
440
                                formula=self.formula,
441
                                score=self.score,
442
                                abundance=self.abundance,
443
                                height=self.height,
444
                                area=self.area,
445
                                diff_mDa=self.diff_mDa,
446
                                diff_ppm=self.diff_ppm,
447
                                rt=self.rt,
448
                                start=self.start,
449
                                end=self.end,
450
                                width=self.width,
451
                                tgt_rt=self.tgt_rt,
452
                                rt_diff=self.rt_diff,
453
                                mz=self.mz,
454
                                product_mz=self.product_mz,
455
                                base_peak=self.base_peak,
456
                                mass=self.mass,
457
                                average_mass=self.average_mass,
458
                                tgt_mass=self.tgt_mass,
459
                                mining_algorithm=self.mining_algorithm,
460
                                z_count=self.z_count,
461
                                max_z=self.max_z,
462
                                min_z=self.min_z,
463
                                n_ions=self.n_ions,
464
                                polarity=self.polarity,
465
                                label=self.label,
466
                                flags=self.flags,
467
                                flag_severity=self.flag_severity,
468
                                flag_severity_code=self.flag_severity_code,
469
                                index=self.index,
470
                                )
471

472
        def __eq__(self, other):
1✔
473
                if isinstance(other, str):
×
474
                        return other.casefold() == self.name.casefold()
×
475
                else:
476
                        return NotImplemented
×
477

478

479
class SampleList(List[Sample]):
1✔
480
        """
481
        A list of :class:`mh_utils.csv_parser.classes.Sample` objects.
482
        """
483

484
        @doctools.append_docstring_from(Sample.__init__)
1✔
485
        def add_new_sample(self, *args, **kwargs):
1✔
486
                """
487
                Add a new sample to the list and return the
488
                :class:`~classes.Sample` object representing it.
489

490
                """  # noqa: D400
491

492
                tmp_sample = Sample(*args, **kwargs)
×
493
                return self.add_sample(tmp_sample)
×
494

495
        def add_sample(self, sample: Sample) -> Sample:
1✔
496
                """
497
                Add a :class:`~.Sample` object to the list.
498

499
                :param sample:
500

501
                :rtype:
502

503
                .. clearpage::
504
                """
505

506
                if sample in self:
1✔
507
                        return self[self.index(sample)]
1✔
508
                else:
509
                        self.append(sample)
1✔
510
                        return sample
1✔
511

512
        # def find_sample(self, sample_name: str) -> Optional[Sample]:
513
        #         if sample_name in self:
514
        #                 return self[self.index(sample_name)]
515
        #         else:
516
        #                 return None
517

518
        def add_sample_from_series(self, series: pandas.Series) -> Sample:
1✔
519
                """
520
                Create a new sample object from a :class:`pandas.series` and add it to the list.
521

522
                :returns: The newly created :class:`~classes.Sample` object.
523

524
                :param series:
525
                """
526

527
                tmp_sample = Sample.from_series(series)
1✔
528
                return self.add_sample(tmp_sample)
1✔
529

530
        def sort_samples(self, key: str, reverse: bool = False):
1✔
531
                """
532
                Sort the list of :class:`~.Samples` in place.
533

534
                :param key: The name of the property in the sample to sort by.
535
                :param reverse: Whether the list should be sorted in reverse order.
536

537
                :rtype:
538

539
                .. clearpage::
540
                """
541

542
                self.sort(key=lambda samp: getattr(samp, key), reverse=reverse)
×
543

544
        def reorder_samples(self, order_mapping: Dict, key: str = "sample_name"):
1✔
545
                """
546
                Reorder the list of :class:`~.Samples` in place.
547

548
                :param order_mapping: A mapping between sample names and their new position in the list.
549
                        For example:
550

551
                                .. code-block:: python
552

553
                                        order_mapping = {
554
                                                "Propellant 1ug +ve": 0,
555
                                                "Propellant 1mg +ve": 1,
556
                                                "Propellant 1ug -ve": 2,
557
                                                "Propellant 1mg -ve": 3,
558
                                                }
559

560
                :param key: The name of the property in the sample to sort by.
561
                """
562

563
                self.sort(key=lambda s: order_mapping[getattr(s, key)], reverse=True)
×
564

565
        def rename_samples(self, rename_mapping: Dict, key: str = "sample_name"):
1✔
566
                r"""
567
                Rename the samples in the list.
568

569
                :param rename_mapping: A mapping between current sample names and their new names.
570
                :param key: The name of the property in the sample to sort by.
571

572
                Use ``rename_mapping=``\:py:obj:`None` or omit the sample from the ``rename_mapping`` entirely
573
                to leave the name unchanged.
574

575
                For example:
576

577
                .. code-block:: python
578

579
                        rename_mapping = {
580
                                "Propellant 1ug +ve": "Alliant Unique 1µg/L +ESI",
581
                                "Propellant 1mg +ve": "Alliant Unique 1mg/L +ESI",
582
                                "Propellant 1mg -ve": None,
583
                                }
584
                """
585

586
                for sample in self:
×
587
                        if getattr(sample, key) in rename_mapping and rename_mapping[getattr(sample, key)]:
×
588
                                sample.sample_name = rename_mapping.pop(getattr(sample, key))
×
589

590
        def get_areas_and_scores(
1✔
591
                        self,
592
                        compound_name: str,
593
                        include_none: bool = False,
594
                        ) -> Tuple[OrderedDict, OrderedDict]:
595
                """
596
                Returns two dictionaries: one containing sample names and peak areas for the
597
                compound with the given name, the other containing sample names and scores.
598

599
                :param compound_name:
600
                :param include_none: Whether samples where the compound was not found
601
                        should be included in the results.
602
                """  # noqa: D400
603

604
                peak_areas: "OrderedDict[str, Optional[float]]" = OrderedDict()
×
605
                scores: "OrderedDict[str, Optional[Decimal]]" = OrderedDict()
×
606

607
                for sample in self:
×
608
                        for result in sample.results_list:
×
609
                                if result.name == compound_name:
×
610
                                        peak_areas[sample.sample_name] = result.area
×
611
                                        scores[sample.sample_name] = result.score
×
612
                                        break
×
613
                        else:
614
                                if include_none:
×
615
                                        peak_areas[sample.sample_name] = None
×
616
                                        scores[sample.sample_name] = None
×
617

618
                return peak_areas, scores
×
619

620
        def get_retention_times(self, compound_name: str, include_none: bool = False) -> OrderedDict:
1✔
621
                """
622
                Returns a dictionary containing sample names and retention times for the
623
                compound with the given name.
624

625
                :param compound_name:
626
                :param include_none: Whether samples where the compound was not found
627
                        should be included in the results.
628
                """  # noqa: D400
629

630
                times = OrderedDict()
×
631

632
                for sample in self:
×
633
                        for result in sample.results_list:
×
634
                                if result.name == compound_name:
×
635
                                        times[sample.sample_name] = float(result.rt)
×
636
                                        break
×
637
                        else:
638
                                if include_none:
×
639
                                        times[sample.sample_name] = numpy.nan
×
640

641
                return times
×
642

643
        def get_peak_areas(self, compound_name: str, include_none: bool = False) -> OrderedDict:
1✔
644
                """
645
                Returns a dictionary containing sample names and peak areas for the
646
                compound with the given name.
647

648
                :param compound_name:
649
                :param include_none: Whether samples where the compound was not found
650
                        should be included in the results.
651
                """  # noqa: D400
652

653
                return self.get_areas_and_scores(compound_name, include_none)[0]
×
654

655
        def get_areas_for_compounds(
1✔
656
                        self,
657
                        compound_names: Iterable[str],
658
                        include_none: bool = False,
659
                        ) -> "SamplesAreaDict":
660
                """
661
                Returns a dictionary containing sample names and peak areas for the
662
                compounds with the given names.
663

664
                :param compound_names:
665
                :param include_none: Whether samples where none of the specified compounds
666
                        were found should be included in the results.
667
                """  # noqa: D400
668

669
                all_areas, all_scores = self.get_areas_and_scores_for_compounds(compound_names, include_none)
×
670
                return all_areas
×
671

672
        def get_areas_and_scores_for_compounds(
1✔
673
                        self,
674
                        compound_names: Iterable[str],
675
                        include_none: bool = False,
676
                        ) -> Tuple["SamplesAreaDict", "SamplesScoresDict"]:
677
                """
678
                Returns two dictionaries: one containing sample names and peak areas for the
679
                compounds with the given names, the other containing sample names and scores.
680

681
                :param compound_names:
682
                :param include_none: Whether samples where none of the specified compounds
683
                        were found should be included in the results.
684

685
                :rtype:
686

687
                .. clearpage::
688
                """  # noqa: D400
689

690
                tmp_all_areas = SamplesAreaDict()
×
691
                tmp_all_scores = SamplesScoresDict()
×
692

693
                for name in compound_names:
×
694
                        areas = self.get_peak_areas(name, True)
×
695
                        scores = self.get_scores(name, True)
×
696

697
                        for sample_name, area in areas.items():
×
698
                                if sample_name not in tmp_all_areas:
×
699
                                        tmp_all_areas[sample_name] = dict()
×
700
                                        tmp_all_scores[sample_name] = dict()
×
701

702
                                tmp_all_areas[sample_name][name] = area
×
703
                                tmp_all_scores[sample_name][name] = scores[sample_name]
×
704

705
                if include_none:
×
706
                        return tmp_all_areas, tmp_all_scores
×
707

708
                else:
709
                        all_areas = SamplesAreaDict()
×
710
                        all_scores = SamplesScoresDict()
×
711

712
                        for sample_name, compound_areas in tmp_all_areas.items():
×
713
                                if any(list(compound_areas.values())):
×
714
                                        all_areas[sample_name] = compound_areas
×
715
                                        all_scores[sample_name] = tmp_all_scores[sample_name]
×
716

717
                        return all_areas, all_scores
×
718

719
        def get_compounds(self) -> List[str]:
1✔
720
                """
721
                Returns a list containing the names of the compounds present in the samples in alphabetical order.
722
                """
723

724
                compounds = set()
×
725

726
                for sample in self:
×
727
                        for result in sample.results_list:
×
728
                                compounds.add(result.name)
×
729

730
                return sorted(compounds)
×
731

732
        def get_scores(self, compound_name: str, include_none: bool = False) -> OrderedDict:
1✔
733
                """
734
                Returns a dictionary containing sample names and scores for the
735
                compound with the given name.
736

737
                :param compound_name:
738
                :param include_none: Whether samples where the compound was not found
739
                        should be included in the results.
740

741
                :rtype:
742

743
                .. clearpage::
744
                """  # noqa: D400
745

746
                return self.get_areas_and_scores(compound_name, include_none)[1]
×
747

748
        def filter(  # noqa: A003  # pylint: disable=redefined-builtin
1✔
749
                self: _SL,
750
                sample_names: Iterable[str],
751
                key: str = "sample_name",
752
                exclude: bool = False,
753
                ) -> _SL:
754
                """
755
                Filter the list to only contain sample_names whose name is in ``sample_names``.
756

757
                :param sample_names: A list of sample names to include
758
                :param key: The name of the property in the sample to sort by.
759
                :param exclude: If :py:obj:`True`, any sample whose name is in ``sample_names``
760
                        will be excluded from the output, rather than included.
761
                """
762

763
                new_sample_list = self.__class__()
×
764

765
                for sample in self:
×
766
                        if exclude:
×
767
                                if getattr(sample, key) in sample_names:
×
768
                                        continue
×
769
                        else:
770
                                if getattr(sample, key) not in sample_names:
×
771
                                        continue
×
772

773
                        new_sample_list.append(sample)
×
774

775
                return new_sample_list
×
776

777
        @property
1✔
778
        def sample_names(self) -> List[str]:
1✔
779
                """
780
                Returns a list of sample names in the :class:`~.classes.SampleList`.
781
                """
782

783
                return [sample.sample_name for sample in self]
×
784

785
        @classmethod
1✔
786
        def from_json_file(cls: Type[_SL], filename: PathLike, **kwargs) -> _SL:
1✔
787
                r"""
788
                Construct a :class:`~.classes.SampleList` from JSON file.
789

790
                :param filename: The filename of the JSON file.
791
                :param \*\*kwargs: Keyword arguments passed to :meth:`domdf_python_tools.paths.PathPlus.load_json`.
792
                """
793

794
                all_samples = cls()
×
795

796
                for sample in PathPlus(filename).load_json(
×
797
                                json_library=sdjson,  # type: ignore
798
                                **kwargs,
799
                                ):
800
                        all_samples.append(Sample(**sample))
×
801

802
                return all_samples
×
803

804

805
class BaseSamplePropertyDict(OrderedDict):
1✔
806
        """
807
        OrderedDict to store a single property of a set of samples.
808

809
        Keys are the sample names and the values are dictionaries mapping compound names to property values.
810
        """
811

812
        @property
1✔
813
        def sample_names(self) -> List[str]:
1✔
814
                """
815
                Returns a list of sample names in the :class:`~.BaseSamplePropertyDict`.
816
                """
817

818
                return list(self.keys())
×
819

820
        @property
1✔
821
        def n_samples(self) -> int:
1✔
822
                """
823
                Returns the number of samples in the :class:`~.BaseSamplePropertyDict`.
824
                """
825

826
                return len(self.keys())
×
827

828
        @property
1✔
829
        def n_compounds(self) -> int:
1✔
830
                """
831
                Returns the number of compounds in the :class:`~.BaseSamplePropertyDict`.
832
                """
833

834
                for val in self.values():
×
835
                        return len(val)
×
836
                return 0
×
837

838

839
class SamplesAreaDict(BaseSamplePropertyDict):
1✔
840
        """
841
        :class:`collections.OrderedDict` to store area information parsed from MassHunter results CSV files.
842
        """
843

844
        def get_compound_areas(self, compound_name: str) -> List[float]:
1✔
845
                """
846
                Get the peak areas for the given compound in every sample.
847

848
                :param compound_name:
849
                """
850

851
                areas = []
×
852

853
                for sample_name, compound_areas in self.items():
×
854
                        for name, area in compound_areas.items():
×
855
                                if compound_name == name:
×
856
                                        if area is None:
×
857
                                                areas.append(0.0)
×
858
                                        else:
859
                                                areas.append(area)
×
860

861
                return areas
×
862

863

864
class SamplesScoresDict(BaseSamplePropertyDict):
1✔
865
        """
866
        :class:`collections.OrderedDict` to store score information parsed from MassHunter results CSV files.
867
        """
868

869
        def get_compound_scores(self, compound_name: str) -> List[float]:
1✔
870
                """
871
                Get the peak scores for the given compound in every sample.
872

873
                :param compound_name:
874
                """
875

876
                scores = []
×
877

878
                for sample_name, compound_scores in self.items():
×
879
                        for name, score in compound_scores.items():
×
880
                                if compound_name == name:
×
881
                                        if score is None:
×
882
                                                scores.append(0.0)
×
883
                                        else:
884
                                                scores.append(score)
×
885

886
                return scores
×
887

888

889
@sdjson.encoders.register(Sample)
1✔
890
@sdjson.encoders.register(Result)
1✔
891
def encode_result_or_sample(obj):  # noqa: D103
1✔
892
        return dict(obj)
1✔
893

894

895
@sdjson.encoders.register(set)
1✔
896
def encode_set(obj):  # noqa: D103
1✔
897
        return list(obj)
×
898

899

900
@sdjson.encoders.register(Decimal)
1✔
901
def encode_decimal(obj):  # noqa: D103
1✔
902
        return str(obj)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc