• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

materialsproject / pymatgen / 4075885785

pending completion
4075885785

push

github

Shyue Ping Ong
Merge branch 'master' of github.com:materialsproject/pymatgen

96 of 96 new or added lines in 27 files covered. (100.0%)

81013 of 102710 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.19
/pymatgen/core/composition.py
1
# Copyright (c) Pymatgen Development Team.
2
# Distributed under the terms of the MIT License.
3

4
"""
1✔
5
This module implements a Composition class to represent compositions,
6
and a ChemicalPotential class to represent potentials.
7
"""
8

9
from __future__ import annotations
1✔
10

11
import collections
1✔
12
import os
1✔
13
import re
1✔
14
import string
1✔
15
import warnings
1✔
16
from functools import total_ordering
1✔
17
from itertools import combinations_with_replacement, product
1✔
18
from typing import Generator, Iterator, Union, cast
1✔
19

20
from monty.fractions import gcd, gcd_float
1✔
21
from monty.json import MSONable
1✔
22
from monty.serialization import loadfn
1✔
23

24
from pymatgen.core.periodic_table import DummySpecies, Element, Species, get_el_sp
1✔
25
from pymatgen.core.units import Mass
1✔
26
from pymatgen.util.string import Stringify, formula_double_format
1✔
27

28
SpeciesLike = Union[str, Element, Species, DummySpecies]
1✔
29

30

31
@total_ordering
1✔
32
class Composition(collections.abc.Hashable, collections.abc.Mapping, MSONable, Stringify):
1✔
33
    """
34
    Represents a Composition, which is essentially a {element:amount} mapping
35
    type. Composition is written to be immutable and hashable,
36
    unlike a standard Python dict.
37

38
    Note that the key can be either an Element or a Species. Elements and Species
39
    are treated differently. i.e., a Fe2+ is not the same as a Fe3+ Species and
40
    would be put in separate keys. This differentiation is deliberate to
41
    support using Composition to determine the fraction of a particular Species.
42

43
    Works almost completely like a standard python dictionary, except that
44
    __getitem__ is overridden to return 0 when an element is not found.
45
    (somewhat like a defaultdict, except it is immutable).
46

47
    Also adds more convenience methods relevant to compositions, e.g.,
48
    get_fraction.
49

50
    It should also be noted that many Composition related functionality takes
51
    in a standard string as a convenient input. For example,
52
    even though the internal representation of a Fe2O3 composition is
53
    {Element("Fe"): 2, Element("O"): 3}, you can obtain the amount of Fe
54
    simply by comp["Fe"] instead of the more verbose comp[Element("Fe")].
55

56
    >>> comp = Composition("LiFePO4")
57
    >>> comp.get_atomic_fraction(Element("Li"))
58
    0.14285714285714285
59
    >>> comp.num_atoms
60
    7.0
61
    >>> comp.reduced_formula
62
    'LiFePO4'
63
    >>> comp.formula
64
    'Li1 Fe1 P1 O4'
65
    >>> comp.get_wt_fraction(Element("Li"))
66
    0.04399794666951898
67
    >>> comp.num_atoms
68
    7.0
69
    """
70

71
    # Tolerance in distinguishing different composition amounts.
72
    # 1e-8 is fairly tight, but should cut out most floating point arithmetic
73
    # errors.
74
    amount_tolerance = 1e-8
1✔
75

76
    # Special formula handling for peroxides and certain elements. This is so
77
    # that formula output does not write LiO instead of Li2O2 for example.
78
    special_formulas = {
1✔
79
        "LiO": "Li2O2",
80
        "NaO": "Na2O2",
81
        "KO": "K2O2",
82
        "HO": "H2O2",
83
        "CsO": "Cs2O2",
84
        "RbO": "Rb2O2",
85
        "O": "O2",
86
        "N": "N2",
87
        "F": "F2",
88
        "Cl": "Cl2",
89
        "H": "H2",
90
    }
91

92
    oxi_prob = None  # prior probability of oxidation used by oxi_state_guesses
1✔
93

94
    def __init__(self, *args, strict: bool = False, **kwargs) -> None:
1✔
95
        """
96
        Very flexible Composition construction, similar to the built-in Python
97
        dict(). Also extended to allow simple string init.
98

99
        Args:
100
            Any form supported by the Python built-in {} function.
101

102
            1. A dict of either {Element/Species: amount},
103

104
               {string symbol:amount}, or {atomic number:amount} or any mixture
105
               of these. E.g., {Element("Li"):2 ,Element("O"):1},
106
               {"Li":2, "O":1}, {3:2, 8:1} all result in a Li2O composition.
107
            2. Keyword arg initialization, similar to a dict, e.g.,
108

109
               Composition(Li = 2, O = 1)
110

111
            In addition, the Composition constructor also allows a single
112
            string as an input formula. E.g., Composition("Li2O").
113

114
            strict: Only allow valid Elements and Species in the Composition.
115

116
            allow_negative: Whether to allow negative compositions. This
117
                argument must be popped from the **kwargs due to *args
118
                ambiguity.
119
        """
120
        self.allow_negative = kwargs.pop("allow_negative", False)
1✔
121
        # it's much faster to recognize a composition and use the el_map than
122
        # to pass the composition to {}
123
        if len(args) == 1 and isinstance(args[0], Composition):
1✔
124
            elem_map = args[0]
1✔
125
        elif len(args) == 1 and isinstance(args[0], str):
1✔
126
            elem_map = self._parse_formula(args[0])  # type: ignore
1✔
127
        else:
128
            elem_map = dict(*args, **kwargs)  # type: ignore
1✔
129
        elem_amt = {}
1✔
130
        self._natoms = 0
1✔
131
        for k, v in elem_map.items():
1✔
132
            if v < -Composition.amount_tolerance and not self.allow_negative:
1✔
133
                raise ValueError("Amounts in Composition cannot be negative!")
1✔
134
            if abs(v) >= Composition.amount_tolerance:
1✔
135
                elem_amt[get_el_sp(k)] = v
1✔
136
                self._natoms += abs(v)
1✔
137
        self._data = elem_amt
1✔
138
        if strict and not self.valid:
1✔
139
            raise ValueError(f"Composition is not valid, contains: {', '.join(map(str, self.elements))}")
1✔
140

141
    def __getitem__(self, item: SpeciesLike) -> float:
1✔
142
        try:
1✔
143
            sp = get_el_sp(item)
1✔
144
            return self._data.get(sp, 0)
1✔
145
        except ValueError as ex:
1✔
146
            raise TypeError(f"Invalid key {item}, {type(item)} for Composition\nValueError exception:\n{ex}")
1✔
147

148
    def __len__(self) -> int:
1✔
149
        return len(self._data)
1✔
150

151
    def __iter__(self) -> Iterator[Species | Element | DummySpecies]:
1✔
152
        return self._data.__iter__()
1✔
153

154
    def __contains__(self, item) -> bool:
1✔
155
        try:
1✔
156
            sp = get_el_sp(item)
1✔
157
            return sp in self._data
1✔
158
        except ValueError as ex:
×
159
            raise TypeError(f"Invalid key {item}, {type(item)} for Composition\nValueError exception:\n{ex}")
×
160

161
    def __eq__(self, other: object) -> bool:
1✔
162
        """Defines == for Compositions."""
163
        if not isinstance(other, (Composition, dict)):
1✔
164
            return NotImplemented
1✔
165

166
        #  elements with amounts < Composition.amount_tolerance don't show up
167
        #  in the el_map, so checking len enables us to only check one
168
        #  composition's elements
169
        if len(self) != len(other):
1✔
170
            return False
1✔
171

172
        return all(abs(amt - other[el]) <= Composition.amount_tolerance for el, amt in self.items())
1✔
173

174
    def __ge__(self, other: object) -> bool:
1✔
175
        """
176
        Defines >= for Compositions. Should ONLY be used for defining a sort
177
        order (the behavior is probably not what you'd expect).
178
        """
179
        if not isinstance(other, Composition):
1✔
180
            return NotImplemented
1✔
181

182
        for el in sorted(set(self.elements + other.elements)):
1✔
183
            if other[el] - self[el] >= Composition.amount_tolerance:
1✔
184
                return False
1✔
185
            if self[el] - other[el] >= Composition.amount_tolerance:
1✔
186
                return True
1✔
187
        return True
1✔
188

189
    def __add__(self, other: object) -> Composition:
1✔
190
        """
191
        Adds two compositions. For example, an Fe2O3 composition + an FeO
192
        composition gives a Fe3O4 composition.
193
        """
194
        if not isinstance(other, (Composition, dict)):
1✔
195
            return NotImplemented
1✔
196

197
        new_el_map: dict[SpeciesLike, float] = collections.defaultdict(float)
1✔
198
        new_el_map.update(self)
1✔
199
        for k, v in other.items():
1✔
200
            new_el_map[get_el_sp(k)] += v
1✔
201
        return Composition(new_el_map, allow_negative=self.allow_negative)
1✔
202

203
    def __sub__(self, other: object) -> Composition:
1✔
204
        """
205
        Subtracts two compositions. For example, an Fe2O3 composition - an FeO
206
        composition gives an FeO2 composition.
207

208
        Raises:
209
            ValueError if the subtracted composition is greater than the
210
            original composition in any of its elements, unless allow_negative
211
            is True
212
        """
213
        if not isinstance(other, (Composition, dict)):
1✔
214
            return NotImplemented
×
215

216
        new_el_map: dict[SpeciesLike, float] = collections.defaultdict(float)
1✔
217
        new_el_map.update(self)
1✔
218
        for k, v in other.items():
1✔
219
            new_el_map[get_el_sp(k)] -= v
1✔
220
        return Composition(new_el_map, allow_negative=self.allow_negative)
1✔
221

222
    def __mul__(self, other: object) -> Composition:
1✔
223
        """
224
        Multiply a Composition by an integer or a float.
225
        Fe2O3 * 4 -> Fe8O12
226
        """
227
        if not isinstance(other, (int, float)):
1✔
228
            return NotImplemented
×
229
        return Composition({el: self[el] * other for el in self}, allow_negative=self.allow_negative)
1✔
230

231
    __rmul__ = __mul__
1✔
232

233
    def __truediv__(self, other: object) -> Composition:
1✔
234
        if not isinstance(other, (int, float)):
1✔
235
            return NotImplemented
×
236
        return Composition({el: self[el] / other for el in self}, allow_negative=self.allow_negative)
1✔
237

238
    __div__ = __truediv__
1✔
239

240
    def __hash__(self) -> int:
1✔
241
        """
242
        Hash based on the chemical system
243
        """
244
        return hash(frozenset(self._data))
1✔
245

246
    @property
1✔
247
    def average_electroneg(self) -> float:
1✔
248
        """
249
        :return: Average electronegativity of the composition.
250
        """
251
        return sum((el.X * abs(amt) for el, amt in self.items())) / self.num_atoms
1✔
252

253
    @property
1✔
254
    def total_electrons(self) -> float:
1✔
255
        """
256
        :return: Total number of electrons in composition.
257
        """
258
        return sum((el.Z * abs(amt) for el, amt in self.items()))
1✔
259

260
    def almost_equals(self, other: Composition, rtol: float = 0.1, atol: float = 1e-8) -> bool:
1✔
261
        """
262
        Returns true if compositions are equal within a tolerance.
263

264
        Args:
265
            other (Composition): Other composition to check
266
            rtol (float): Relative tolerance
267
            atol (float): Absolute tolerance
268
        """
269
        sps = set(self.elements + other.elements)
1✔
270
        for sp in sps:
1✔
271
            a = self[sp]
1✔
272
            b = other[sp]
1✔
273
            tol = atol + rtol * (abs(a) + abs(b)) / 2
1✔
274
            if abs(b - a) > tol:
1✔
275
                return False
1✔
276
        return True
1✔
277

278
    @property
1✔
279
    def is_element(self) -> bool:
1✔
280
        """
281
        True if composition is an element.
282
        """
283
        return len(self) == 1
1✔
284

285
    def copy(self) -> Composition:
1✔
286
        """
287
        :return: A copy of the composition.
288
        """
289
        return Composition(self, allow_negative=self.allow_negative)
1✔
290

291
    @property
1✔
292
    def formula(self) -> str:
1✔
293
        """
294
        Returns a formula string, with elements sorted by electronegativity,
295
        e.g., Li4 Fe4 P4 O16.
296
        """
297
        sym_amt = self.get_el_amt_dict()
1✔
298
        syms = sorted(sym_amt, key=lambda sym: get_el_sp(sym).X)
1✔
299
        formula = [s + formula_double_format(sym_amt[s], False) for s in syms]
1✔
300
        return " ".join(formula)
1✔
301

302
    @property
1✔
303
    def alphabetical_formula(self) -> str:
1✔
304
        """
305
        Returns a formula string, with elements sorted by alphabetically
306
        e.g., Fe4 Li4 O16 P4.
307
        """
308
        return " ".join(sorted(self.formula.split(" ")))
1✔
309

310
    @property
1✔
311
    def iupac_formula(self) -> str:
1✔
312
        """
313
        Returns a formula string, with elements sorted by the iupac
314
        electronegativity ordering defined in Table VI of "Nomenclature of
315
        Inorganic Chemistry (IUPAC Recommendations 2005)". This ordering
316
        effectively follows the groups and rows of the periodic table, except
317
        the Lanthanides, Actinides and hydrogen. Polyanions are still determined
318
        based on the true electronegativity of the elements.
319
        e.g. CH2(SO4)2
320
        """
321
        sym_amt = self.get_el_amt_dict()
1✔
322
        syms = sorted(sym_amt, key=lambda s: get_el_sp(s).iupac_ordering)
1✔
323
        formula = [s + formula_double_format(sym_amt[s], False) for s in syms]
1✔
324
        return " ".join(formula)
1✔
325

326
    @property
1✔
327
    def element_composition(self) -> Composition:
1✔
328
        """
329
        Returns the composition replacing any species by the corresponding
330
        element.
331
        """
332
        return Composition(self.get_el_amt_dict(), allow_negative=self.allow_negative)
1✔
333

334
    @property
1✔
335
    def fractional_composition(self) -> Composition:
1✔
336
        """
337
        Returns the normalized composition in which the amounts of each species sum to
338
        1.
339
        E.g. "Fe2 O3".fractional_composition = "Fe0.4 O0.6".
340
        """
341
        return self / self._natoms
1✔
342

343
    @property
1✔
344
    def reduced_composition(self) -> Composition:
1✔
345
        """
346
        Returns the reduced composition, i.e. amounts normalized by greatest common denominator.
347
        E.g. "Fe4 P4 O16".reduced_composition = "Fe P O4".
348
        """
349
        return self.get_reduced_composition_and_factor()[0]
1✔
350

351
    def get_reduced_composition_and_factor(self) -> tuple[Composition, float]:
1✔
352
        """
353
        Calculates a reduced composition and factor.
354

355
        Returns:
356
            A normalized composition and a multiplicative factor, i.e.,
357
            Li4Fe4P4O16 returns (Composition("LiFePO4"), 4).
358
        """
359
        factor = self.get_reduced_formula_and_factor()[1]
1✔
360
        return self / factor, factor
1✔
361

362
    def get_reduced_formula_and_factor(self, iupac_ordering: bool = False) -> tuple[str, float]:
1✔
363
        """
364
        Calculates a reduced formula and factor.
365

366
        Args:
367
            iupac_ordering (bool, optional): Whether to order the
368
                formula by the iupac "electronegativity" series, defined in
369
                Table VI of "Nomenclature of Inorganic Chemistry (IUPAC
370
                Recommendations 2005)". This ordering effectively follows
371
                the groups and rows of the periodic table, except the
372
                Lanthanides, Actinides and hydrogen. Note that polyanions
373
                will still be determined based on the true electronegativity of
374
                the elements.
375

376
        Returns:
377
            A pretty normalized formula and a multiplicative factor, i.e.,
378
            Li4Fe4P4O16 returns (LiFePO4, 4).
379
        """
380
        all_int = all(abs(x - round(x)) < Composition.amount_tolerance for x in self.values())
1✔
381
        if not all_int:
1✔
382
            return self.formula.replace(" ", ""), 1
1✔
383
        d = {k: int(round(v)) for k, v in self.get_el_amt_dict().items()}
1✔
384
        (formula, factor) = reduce_formula(d, iupac_ordering=iupac_ordering)
1✔
385

386
        if formula in Composition.special_formulas:
1✔
387
            formula = Composition.special_formulas[formula]
1✔
388
            factor /= 2
1✔
389

390
        return formula, factor
1✔
391

392
    def get_integer_formula_and_factor(
1✔
393
        self, max_denominator: int = 10000, iupac_ordering: bool = False
394
    ) -> tuple[str, float]:
395
        """
396
        Calculates an integer formula and factor.
397

398
        Args:
399
            max_denominator (int): all amounts in the el:amt dict are
400
                first converted to a Fraction with this maximum denominator
401
            iupac_ordering (bool, optional): Whether to order the
402
                formula by the iupac "electronegativity" series, defined in
403
                Table VI of "Nomenclature of Inorganic Chemistry (IUPAC
404
                Recommendations 2005)". This ordering effectively follows
405
                the groups and rows of the periodic table, except the
406
                Lanthanides, Actinides and hydrogen. Note that polyanions
407
                will still be determined based on the true electronegativity of
408
                the elements.
409

410
        Returns:
411
            A pretty normalized formula and a multiplicative factor, i.e.,
412
            Li0.5O0.25 returns (Li2O, 0.25). O0.25 returns (O2, 0.125)
413
        """
414
        el_amt = self.get_el_amt_dict()
1✔
415
        g = gcd_float(list(el_amt.values()), 1 / max_denominator)
1✔
416

417
        d = {k: round(v / g) for k, v in el_amt.items()}
1✔
418
        (formula, factor) = reduce_formula(d, iupac_ordering=iupac_ordering)
1✔
419
        if formula in Composition.special_formulas:
1✔
420
            formula = Composition.special_formulas[formula]
1✔
421
            factor /= 2
1✔
422
        return formula, factor * g
1✔
423

424
    @property
1✔
425
    def reduced_formula(self) -> str:
1✔
426
        """
427
        Returns a pretty normalized formula, i.e., LiFePO4 instead of
428
        Li4Fe4P4O16.
429
        """
430
        return self.get_reduced_formula_and_factor()[0]
1✔
431

432
    @property
1✔
433
    def hill_formula(self) -> str:
1✔
434
        """
435
        :return: Hill formula. The Hill system (or Hill notation) is a system
436
        of writing empirical chemical formulas, molecular chemical formulas and
437
        components of a condensed formula such that the number of carbon atoms
438
        in a molecule is indicated first, the number of hydrogen atoms next,
439
        and then the number of all other chemical elements subsequently, in
440
        alphabetical order of the chemical symbols. When the formula contains
441
        no carbon, all the elements, including hydrogen, are listed
442
        alphabetically.
443
        """
444
        c = self.element_composition
1✔
445
        elements = sorted(el.symbol for el in c)
1✔
446
        if "C" in elements:
1✔
447
            elements = ["C"] + [el for el in elements if el != "C"]
1✔
448

449
        formula = [f"{el}{formula_double_format(c[el]) if c[el] != 1 else ''}" for el in elements]
1✔
450
        return " ".join(formula)
1✔
451

452
    @property
1✔
453
    def elements(self) -> list[Element | Species | DummySpecies]:
1✔
454
        """
455
        Returns view of elements in Composition.
456
        """
457
        return list(self)
1✔
458

459
    def __str__(self):
1✔
460
        return " ".join(f"{k}{formula_double_format(v, ignore_ones=False)}" for k, v in self.as_dict().items())
1✔
461

462
    def to_pretty_string(self) -> str:
1✔
463
        """
464
        Returns:
465
            str: Same as output __str__() but without spaces.
466
        """
467
        return re.sub(r"\s+", "", str(self))
1✔
468

469
    @property
1✔
470
    def num_atoms(self) -> float:
1✔
471
        """
472
        Total number of atoms in Composition. For negative amounts, sum
473
        of absolute values
474
        """
475
        return self._natoms
1✔
476

477
    @property
1✔
478
    def weight(self) -> float:
1✔
479
        """
480
        Total molecular weight of Composition
481
        """
482
        return Mass(sum(amount * el.atomic_mass for el, amount in self.items()), "amu")
1✔
483

484
    def get_atomic_fraction(self, el: SpeciesLike) -> float:
1✔
485
        """
486
        Calculate atomic fraction of an Element or Species.
487

488
        Args:
489
            el (Element/Species): Element or Species to get fraction for.
490

491
        Returns:
492
            Atomic fraction for element el in Composition
493
        """
494
        return abs(self[el]) / self._natoms
1✔
495

496
    def get_wt_fraction(self, el: SpeciesLike) -> float:
1✔
497
        """
498
        Calculate weight fraction of an Element or Species.
499

500
        Args:
501
            el (Element | Species): Element or Species to get fraction for.
502

503
        Returns:
504
            float: Weight fraction for element el in Composition.
505
        """
506
        el_mass = cast(float, get_el_sp(el).atomic_mass)
1✔
507
        return el_mass * abs(self[el]) / self.weight
1✔
508

509
    def contains_element_type(self, category: str) -> bool:
1✔
510
        """
511
        Check if Composition contains any elements matching a given category.
512

513
        Args:
514
            category (str): one of "noble_gas", "transition_metal",
515
                "post_transition_metal", "rare_earth_metal", "metal", "metalloid",
516
                "alkali", "alkaline", "halogen", "chalcogen", "lanthanoid",
517
                "actinoid", "quadrupolar", "s-block", "p-block", "d-block", "f-block"
518

519
        Returns:
520
            True if any elements in Composition match category, otherwise False
521
        """
522
        allowed_categories = (
1✔
523
            "noble_gas",
524
            "transition_metal",
525
            "post_transition_metal",
526
            "rare_earth_metal",
527
            "metal",
528
            "metalloid",
529
            "alkali",
530
            "alkaline",
531
            "halogen",
532
            "chalcogen",
533
            "lanthanoid",
534
            "actinoid",
535
            "quadrupolar",
536
            "s-block",
537
            "p-block",
538
            "d-block",
539
            "f-block",
540
        )
541

542
        if category not in allowed_categories:
1✔
543
            raise ValueError(f"Please pick a category from: {allowed_categories}")
×
544

545
        if "block" in category:
1✔
546
            return any(category[0] in el.block for el in self.elements)
1✔
547
        return any(getattr(el, f"is_{category}") for el in self.elements)
1✔
548

549
    def _parse_formula(self, formula: str) -> dict[str, float]:
1✔
550
        """
551
        Args:
552
            formula (str): A string formula, e.g. Fe2O3, Li3Fe2(PO4)3
553

554
        Returns:
555
            Composition with that formula.
556

557
        Notes:
558
            In the case of Metallofullerene formula (e.g. Y3N@C80),
559
            the @ mark will be dropped and passed to parser.
560
        """
561
        # for Metallofullerene like "Y3N@C80"
562
        formula = formula.replace("@", "")
1✔
563

564
        def get_sym_dict(form: str, factor: int | float) -> dict[str, float]:
1✔
565
            sym_dict: dict[str, float] = collections.defaultdict(float)
1✔
566
            for m in re.finditer(r"([A-Z][a-z]*)\s*([-*\.e\d]*)", form):
1✔
567
                el = m.group(1)
1✔
568
                amt = 1.0
1✔
569
                if m.group(2).strip() != "":
1✔
570
                    amt = float(m.group(2))
1✔
571
                sym_dict[el] += amt * factor
1✔
572
                form = form.replace(m.group(), "", 1)
1✔
573
            if form.strip():
1✔
574
                raise ValueError(f"{form} is an invalid formula!")
1✔
575
            return sym_dict
1✔
576

577
        m = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula)
1✔
578
        if m:
1✔
579
            factor = 1.0
1✔
580
            if m.group(2) != "":
1✔
581
                factor = float(m.group(2))
1✔
582
            unit_sym_dict = get_sym_dict(m.group(1), factor)
1✔
583
            expanded_sym = "".join(f"{el}{amt}" for el, amt in unit_sym_dict.items())
1✔
584
            expanded_formula = formula.replace(m.group(), expanded_sym)
1✔
585
            return self._parse_formula(expanded_formula)
1✔
586
        return get_sym_dict(formula, 1)
1✔
587

588
    @property
1✔
589
    def anonymized_formula(self) -> str:
1✔
590
        """
591
        An anonymized formula. Unique species are arranged in ordering of
592
        increasing amounts and assigned ascending alphabets. Useful for
593
        prototyping formulas. For example, all stoichiometric perovskites have
594
        anonymized_formula ABC3.
595
        """
596
        reduced = self.element_composition
1✔
597
        if all(x == int(x) for x in self.values()):
1✔
598
            reduced /= gcd(*(int(i) for i in self.values()))
1✔
599

600
        anon = ""
1✔
601
        for e, amt in zip(string.ascii_uppercase, sorted(reduced.values())):
1✔
602
            if amt == 1:
1✔
603
                amt_str = ""
1✔
604
            elif abs(amt % 1) < 1e-8:
1✔
605
                amt_str = str(int(amt))
1✔
606
            else:
607
                amt_str = str(amt)
1✔
608
            anon += f"{e}{amt_str}"
1✔
609
        return anon
1✔
610

611
    @property
1✔
612
    def chemical_system(self) -> str:
1✔
613
        """
614
        Get the chemical system of a Composition, for example "O-Si" for
615
        SiO2. Chemical system is a string of a list of elements
616
        sorted alphabetically and joined by dashes, by convention for use
617
        in database keys.
618
        """
619
        return "-".join(sorted(el.symbol for el in self.elements))
1✔
620

621
    @property
1✔
622
    def valid(self) -> bool:
1✔
623
        """
624
        Returns True if Composition contains valid elements or species and
625
        False if the Composition contains any dummy species.
626
        """
627
        return not any(isinstance(el, DummySpecies) for el in self.elements)
1✔
628

629
    def __repr__(self) -> str:
1✔
630
        return "Comp: " + self.formula
×
631

632
    @classmethod
1✔
633
    def from_dict(cls, d) -> Composition:
1✔
634
        """
635
        Creates a composition from a dict generated by as_dict(). Strictly not
636
        necessary given that the standard constructor already takes in such an
637
        input, but this method preserves the standard pymatgen API of having
638
        from_dict methods to reconstitute objects generated by as_dict(). Allows
639
        for easier introspection.
640

641
        Args:
642
            d (dict): {symbol: amount} dict.
643
        """
644
        return cls(d)
1✔
645

646
    @classmethod
1✔
647
    def from_weight_dict(cls, weight_dict) -> Composition:
1✔
648
        """
649
        Creates a Composition based on a dict of atomic fractions calculated
650
        from a dict of weight fractions. Allows for quick creation of the class
651
        from weight-based notations commonly used in the industry, such as
652
        Ti6V4Al and Ni60Ti40.
653

654
        Args:
655
            weight_dict (dict): {symbol: weight_fraction} dict.
656

657
        Returns:
658
            Composition
659
        """
660
        weight_sum = sum(val / Element(el).atomic_mass for el, val in weight_dict.items())
1✔
661
        comp_dict = {el: val / Element(el).atomic_mass / weight_sum for el, val in weight_dict.items()}
1✔
662

663
        return cls(comp_dict)
1✔
664

665
    def get_el_amt_dict(self) -> dict[str, float]:
1✔
666
        """
667
        Returns:
668
            dict[str, float]: element symbol and (unreduced) amount. E.g.
669
            {"Fe": 4.0, "O":6.0} or {"Fe3+": 4.0, "O2-":6.0}
670
        """
671
        dic: dict[str, float] = collections.defaultdict(float)
1✔
672
        for el, amt in self.items():
1✔
673
            dic[el.symbol] += amt
1✔
674
        return dic
1✔
675

676
    def as_dict(self) -> dict[str, float]:
1✔
677
        """
678
        Returns:
679
            dict with species symbol and (unreduced) amount e.g.,
680
            {"Fe": 4.0, "O":6.0} or {"Fe3+": 4.0, "O2-":6.0}
681
        """
682
        d: dict[str, float] = collections.defaultdict(float)
1✔
683
        for e, a in self.items():
1✔
684
            d[str(e)] += a
1✔
685
        return d
1✔
686

687
    @property
1✔
688
    def to_reduced_dict(self) -> dict:
1✔
689
        """
690
        Returns:
691
            Dict with element symbol and reduced amount e.g.,
692
            {"Fe": 2.0, "O":3.0}
693
        """
694
        return self.get_reduced_composition_and_factor()[0].as_dict()
1✔
695

696
    @property
1✔
697
    def to_weight_dict(self) -> dict:
1✔
698
        """
699
        Returns:
700
            Dict with weight fraction of each component
701
            {"Ti": 0.90, "V": 0.06, "Al": 0.04}
702
        """
703
        return {str(el): self.get_wt_fraction(el) for el in self.elements}
1✔
704

705
    @property
1✔
706
    def to_data_dict(self) -> dict:
1✔
707
        """
708
        Returns:
709
            A dict with many keys and values relating to Composition/Formula,
710
            including reduced_cell_composition, unit_cell_composition,
711
            reduced_cell_formula, elements and nelements.
712
        """
713
        return {
1✔
714
            "reduced_cell_composition": self.get_reduced_composition_and_factor()[0],
715
            "unit_cell_composition": self.as_dict(),
716
            "reduced_cell_formula": self.reduced_formula,
717
            "elements": list(self.as_dict()),
718
            "nelements": len(self.as_dict()),
719
        }
720

721
    def oxi_state_guesses(
1✔
722
        self,
723
        oxi_states_override: dict | None = None,
724
        target_charge: float = 0,
725
        all_oxi_states: bool = False,
726
        max_sites: int | None = None,
727
    ) -> list[dict[str, float]]:
728
        """
729
        Checks if the composition is charge-balanced and returns back all
730
        charge-balanced oxidation state combinations. Composition must have
731
        integer values. Note that more num_atoms in the composition gives
732
        more degrees of freedom. e.g., if possible oxidation states of
733
        element X are [2,4] and Y are [-3], then XY is not charge balanced
734
        but X2Y2 is. Results are returned from most to least probable based
735
        on ICSD statistics. Use max_sites to improve performance if needed.
736

737
        Args:
738
            oxi_states_override (dict): dict of str->list to override an
739
                element's common oxidation states, e.g. {"V": [2,3,4,5]}
740
            target_charge (int): the desired total charge on the structure.
741
                Default is 0 signifying charge balance.
742
            all_oxi_states (bool): if True, an element defaults to
743
                all oxidation states in pymatgen Element.icsd_oxidation_states.
744
                Otherwise, default is Element.common_oxidation_states. Note
745
                that the full oxidation state list is *very* inclusive and
746
                can produce nonsensical results.
747
            max_sites (int): if possible, will reduce Compositions to at most
748
                this many sites to speed up oxidation state guesses. If the
749
                composition cannot be reduced to this many sites a ValueError
750
                will be raised. Set to -1 to just reduce fully. If set to a
751
                number less than -1, the formula will be fully reduced but a
752
                ValueError will be thrown if the number of atoms in the reduced
753
                formula is greater than abs(max_sites).
754

755
        Returns:
756
            A list of dicts - each dict reports an element symbol and average
757
                oxidation state across all sites in that composition. If the
758
                composition is not charge balanced, an empty list is returned.
759
        """
760
        return self._get_oxid_state_guesses(all_oxi_states, max_sites, oxi_states_override, target_charge)[0]
1✔
761

762
    def replace(self, elem_map: dict[str, str | dict[str, int | float]]) -> Composition:
1✔
763
        """
764
        Replace elements in a composition. Returns a new Composition, leaving the old one unchanged.
765

766
        Args:
767
            elem_map (dict[str, str | dict[str, int | float]]): dict of elements or species to swap. E.g.
768
                {"Li": "Na"} performs a Li for Na substitution. The target can be a {species: factor} dict. For
769
                example, in Fe2O3 you could map {"Fe": {"Mg": 0.5, "Cu":0.5}} to obtain MgCuO3.
770

771
        Returns:
772
            Composition: New object with elements remapped according to elem_map.
773
        """
774
        # drop inapplicable substitutions
775
        invalid_elems = [key for key in elem_map if key not in self]
1✔
776
        if invalid_elems:
1✔
777
            warnings.warn(
1✔
778
                "Some elements to be substituted are not present in composition. Please check your input. "
779
                f"Problematic element = {invalid_elems}; {self}"
780
            )
781
        for elem in invalid_elems:
1✔
782
            elem_map.pop(elem)
1✔
783

784
        # start with elements that remain unchanged (not in elem_map)
785
        new_comp = {elem: amount for elem, amount in self.as_dict().items() if elem not in elem_map}
1✔
786

787
        for old_elem, new_elem in elem_map.items():
1✔
788
            amount = self[old_elem]
1✔
789

790
            # build a dictionary of substitutions to be made
791
            subs = {}
1✔
792
            if isinstance(new_elem, dict):
1✔
793
                for el, factor in new_elem.items():
1✔
794
                    subs[el] = factor * amount
1✔
795
            else:
796
                subs = {new_elem: amount}
1✔
797

798
            # and apply the substitutions to the new composition
799
            for el, amt in subs.items():
1✔
800
                if el in new_comp:
1✔
801
                    new_comp[el] += amt
1✔
802
                else:
803
                    new_comp[el] = amt
1✔
804

805
                # check for ambiguous input (see issue #2553)
806
                if el in self:
1✔
807
                    warnings.warn(
1✔
808
                        f"Same element ({el}) in both the keys and values of the substitution!"
809
                        "This can be ambiguous, so be sure to check your result."
810
                    )
811

812
        return Composition(new_comp)
1✔
813

814
    def add_charges_from_oxi_state_guesses(
1✔
815
        self,
816
        oxi_states_override: dict | None = None,
817
        target_charge: float = 0,
818
        all_oxi_states: bool = False,
819
        max_sites: int | None = None,
820
    ) -> Composition:
821
        """
822
        Assign oxidation states based on guessed oxidation states.
823

824
        See `oxi_state_guesses` for an explanation of how oxidation states are
825
        guessed. This operation uses the set of oxidation states for each site
826
        that were determined to be most likely from the oxidation state guessing
827
        routine.
828

829
        Args:
830
            oxi_states_override (dict): dict of str->list to override an
831
                element's common oxidation states, e.g. {"V": [2,3,4,5]}
832
            target_charge (int): the desired total charge on the structure.
833
                Default is 0 signifying charge balance.
834
            all_oxi_states (bool): if True, an element defaults to
835
                all oxidation states in pymatgen Element.icsd_oxidation_states.
836
                Otherwise, default is Element.common_oxidation_states. Note
837
                that the full oxidation state list is *very* inclusive and
838
                can produce nonsensical results.
839
            max_sites (int): if possible, will reduce Compositions to at most
840
                this many sites to speed up oxidation state guesses. If the
841
                composition cannot be reduced to this many sites a ValueError
842
                will be raised. Set to -1 to just reduce fully. If set to a
843
                number less than -1, the formula will be fully reduced but a
844
                ValueError will be thrown if the number of atoms in the reduced
845
                formula is greater than abs(max_sites).
846

847
        Returns:
848
            Composition, where the elements are assigned oxidation states based
849
            on the results form guessing oxidation states. If no oxidation state
850
            is possible, returns a Composition where all oxidation states are 0.
851
        """
852
        _, oxidation_states = self._get_oxid_state_guesses(
1✔
853
            all_oxi_states, max_sites, oxi_states_override, target_charge
854
        )
855

856
        # Special case: No charged compound is possible
857
        if not oxidation_states:
1✔
858
            return Composition({Species(e, 0): f for e, f in self.items()})
1✔
859

860
        # Generate the species
861
        species = []
1✔
862
        for el, charges in oxidation_states[0].items():
1✔
863
            species.extend([Species(el, c) for c in charges])
1✔
864

865
        # Return the new object
866
        return Composition(collections.Counter(species))
1✔
867

868
    def remove_charges(self) -> Composition:
1✔
869
        """
870
        Removes the charges from any species in a Composition object.
871

872
        Returns:
873
            Composition object without charge decoration, for example
874
            {"Fe3+": 2.0, "O2-":3.0} becomes {"Fe": 2.0, "O":3.0}
875
        """
876
        d: dict[Element, float] = collections.defaultdict(float)
1✔
877
        for e, a in self.items():
1✔
878
            d[Element(e.symbol)] += a
1✔
879
        return Composition(d)
1✔
880

881
    def _get_oxid_state_guesses(self, all_oxi_states, max_sites, oxi_states_override, target_charge):
1✔
882
        """
883
        Utility operation for guessing oxidation states.
884

885
        See `oxi_state_guesses` for full details. This operation does the
886
        calculation of the most likely oxidation states
887

888
        Args:
889
            oxi_states_override (dict): dict of str->list to override an
890
                element's common oxidation states, e.g. {"V": [2,3,4,5]}
891
            target_charge (int): the desired total charge on the structure.
892
                Default is 0 signifying charge balance.
893
            all_oxi_states (bool): if True, an element defaults to
894
                all oxidation states in pymatgen Element.icsd_oxidation_states.
895
                Otherwise, default is Element.common_oxidation_states. Note
896
                that the full oxidation state list is *very* inclusive and
897
                can produce nonsensical results.
898
            max_sites (int): if possible, will reduce Compositions to at most
899
                this many sites to speed up oxidation state guesses. If the
900
                composition cannot be reduced to this many sites a ValueError
901
                will be raised. Set to -1 to just reduce fully. If set to a
902
                number less than -1, the formula will be fully reduced but a
903
                ValueError will be thrown if the number of atoms in the reduced
904
                formula is greater than abs(max_sites).
905

906
        Returns:
907
            A list of dicts - each dict reports an element symbol and average
908
                oxidation state across all sites in that composition. If the
909
                composition is not charge balanced, an empty list is returned.
910
            A list of dicts - each dict maps the element symbol to a list of
911
                oxidation states for each site of that element. For example, Fe3O4 could
912
                return a list of [2,2,2,3,3,3] for the oxidation states of If the composition
913
                is
914
        """
915
        comp = self.copy()
1✔
916
        # reduce Composition if necessary
917
        if max_sites and max_sites < 0:
1✔
918
            comp = self.reduced_composition
1✔
919

920
            if max_sites < -1 and comp.num_atoms > abs(max_sites):
1✔
921
                raise ValueError(f"Composition {comp} cannot accommodate max_sites setting!")
1✔
922

923
        elif max_sites and comp.num_atoms > max_sites:
1✔
924
            reduced_comp, reduced_factor = self.get_reduced_composition_and_factor()
1✔
925
            if reduced_factor > 1:
1✔
926
                reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms))
1✔
927
                comp = reduced_comp  # as close to max_sites as possible
1✔
928
            if comp.num_atoms > max_sites:
1✔
929
                raise ValueError(f"Composition {comp} cannot accommodate max_sites setting!")
1✔
930

931
        # Load prior probabilities of oxidation states, used to rank solutions
932
        if not Composition.oxi_prob:
1✔
933
            module_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)))
1✔
934
            all_data = loadfn(os.path.join(module_dir, "..", "analysis", "icsd_bv.yaml"))
1✔
935
            Composition.oxi_prob = {Species.from_string(sp): data for sp, data in all_data["occurrence"].items()}
1✔
936
        oxi_states_override = oxi_states_override or {}
1✔
937
        # assert: Composition only has integer amounts
938
        if not all(amt == int(amt) for amt in comp.values()):
1✔
939
            raise ValueError("Charge balance analysis requires integer values in Composition!")
×
940

941
        # for each element, determine all possible sum of oxidations
942
        # (taking into account nsites for that particular element)
943
        el_amt = comp.get_el_amt_dict()
1✔
944
        elements = list(el_amt)
1✔
945
        el_sums = []  # matrix: dim1= el_idx, dim2=possible sums
1✔
946
        el_sum_scores = collections.defaultdict(set)  # dict of el_idx, sum -> score
1✔
947
        el_best_oxid_combo = {}  # dict of el_idx, sum -> oxid combo with best score
1✔
948
        for idx, el in enumerate(elements):
1✔
949
            el_sum_scores[idx] = {}
1✔
950
            el_best_oxid_combo[idx] = {}
1✔
951
            el_sums.append([])
1✔
952
            if oxi_states_override.get(el):
1✔
953
                oxids = oxi_states_override[el]
1✔
954
            elif all_oxi_states:
1✔
955
                oxids = Element(el).oxidation_states
1✔
956
            else:
957
                oxids = Element(el).icsd_oxidation_states or Element(el).oxidation_states
1✔
958

959
            # get all possible combinations of oxidation states
960
            # and sum each combination
961
            for oxid_combo in combinations_with_replacement(oxids, int(el_amt[el])):
1✔
962
                # List this sum as a possible option
963
                oxid_sum = sum(oxid_combo)
1✔
964
                if oxid_sum not in el_sums[idx]:
1✔
965
                    el_sums[idx].append(oxid_sum)
1✔
966

967
                # Determine how probable is this combo?
968
                score = sum(Composition.oxi_prob.get(Species(el, o), 0) for o in oxid_combo)
1✔
969

970
                # If it is the most probable combo for a certain sum,
971
                #   store the combination
972
                if oxid_sum not in el_sum_scores[idx] or score > el_sum_scores[idx].get(oxid_sum, 0):
1✔
973
                    el_sum_scores[idx][oxid_sum] = score
1✔
974
                    el_best_oxid_combo[idx][oxid_sum] = oxid_combo
1✔
975

976
        # Determine which combination of oxidation states for each element
977
        #    is the most probable
978
        all_sols = []  # will contain all solutions
1✔
979
        all_oxid_combo = []  # will contain the best combination of oxidation states for each site
1✔
980
        all_scores = []  # will contain a score for each solution
1✔
981
        for x in product(*el_sums):
1✔
982
            # each x is a trial of one possible oxidation sum for each element
983
            if sum(x) == target_charge:  # charge balance condition
1✔
984
                el_sum_sol = dict(zip(elements, x))  # element->oxid_sum
1✔
985
                # normalize oxid_sum by amount to get avg oxid state
986
                sol = {el: v / el_amt[el] for el, v in el_sum_sol.items()}
1✔
987
                # add the solution to the list of solutions
988
                all_sols.append(sol)
1✔
989

990
                # determine the score for this solution
991
                score = 0
1✔
992
                for idx, v in enumerate(x):
1✔
993
                    score += el_sum_scores[idx][v]
1✔
994
                all_scores.append(score)
1✔
995

996
                # collect the combination of oxidation states for each site
997
                all_oxid_combo.append({e: el_best_oxid_combo[idx][v] for idx, (e, v) in enumerate(zip(elements, x))})
1✔
998

999
        # sort the solutions by highest to lowest score
1000
        if all_scores:
1✔
1001
            all_sols, all_oxid_combo = zip(
1✔
1002
                *(
1003
                    (y, x)
1004
                    for (z, y, x) in sorted(
1005
                        zip(all_scores, all_sols, all_oxid_combo),
1006
                        key=lambda pair: pair[0],
1007
                        reverse=True,
1008
                    )
1009
                )
1010
            )
1011
        return all_sols, all_oxid_combo
1✔
1012

1013
    @staticmethod
1✔
1014
    def ranked_compositions_from_indeterminate_formula(
1✔
1015
        fuzzy_formula: str, lock_if_strict: bool = True
1016
    ) -> list[Composition]:
1017
        """
1018
        Takes in a formula where capitalization might not be correctly entered,
1019
        and suggests a ranked list of potential Composition matches.
1020
        Author: Anubhav Jain
1021

1022
        Args:
1023
            fuzzy_formula (str): A formula string, such as "co2o3" or "MN",
1024
                that may or may not have multiple interpretations
1025
            lock_if_strict (bool): If true, a properly entered formula will
1026
                only return the one correct interpretation. For example,
1027
                "Co1" will only return "Co1" if true, but will return both
1028
                "Co1" and "C1 O1" if false.
1029

1030
        Returns:
1031
            A ranked list of potential Composition matches
1032
        """
1033
        # if we have an exact match and the user specifies lock_if_strict, just
1034
        # return the exact match!
1035
        if lock_if_strict:
1✔
1036
            # the strict composition parsing might throw an error, we can ignore
1037
            # it and just get on with fuzzy matching
1038
            try:
1✔
1039
                comp = Composition(fuzzy_formula)
1✔
1040
                return [comp]
1✔
1041
            except ValueError:
1✔
1042
                pass
1✔
1043

1044
        all_matches = Composition._comps_from_fuzzy_formula(fuzzy_formula)
1✔
1045
        # remove duplicates
1046
        uniq_matches = list(set(all_matches))
1✔
1047
        # sort matches by rank descending
1048
        ranked_matches = sorted(uniq_matches, key=lambda match: (match[1], match[0]), reverse=True)
1✔
1049

1050
        return [m[0] for m in ranked_matches]
1✔
1051

1052
    @staticmethod
1✔
1053
    def _comps_from_fuzzy_formula(
1✔
1054
        fuzzy_formula: str,
1055
        m_dict: dict[str, float] | None = None,
1056
        m_points: int = 0,
1057
        factor: int | float = 1,
1058
    ) -> Generator[tuple[Composition, int], None, None]:
1059
        """
1060
        A recursive helper method for formula parsing that helps in
1061
        interpreting and ranking indeterminate formulas.
1062
        Author: Anubhav Jain
1063

1064
        Args:
1065
            fuzzy_formula (str): A formula string, such as "co2o3" or "MN",
1066
                that may or may not have multiple interpretations.
1067
            m_dict (dict): A symbol:amt dictionary from the previously parsed
1068
                formula.
1069
            m_points: Number of points gained from the previously parsed
1070
                formula.
1071
            factor: Coefficient for this parse, e.g. (PO4)2 will feed in PO4
1072
                as the fuzzy_formula with a coefficient of 2.
1073

1074
        Returns:
1075
            list[tuple[Composition, int]]: A list of tuples, with the first element being a Composition
1076
                and the second element being the number of points awarded that Composition interpretation.
1077
        """
1078
        m_dict = m_dict or {}
1✔
1079

1080
        def _parse_chomp_and_rank(m, f, m_dict, m_points):
1✔
1081
            """
1082
            A helper method for formula parsing that helps in interpreting and
1083
            ranking indeterminate formulas
1084
            Author: Anubhav Jain
1085

1086
            Args:
1087
                m: A regex match, with the first group being the element and
1088
                    the second group being the amount
1089
                f: The formula part containing the match
1090
                m_dict: A symbol:amt dictionary from the previously parsed
1091
                    formula
1092
                m_points: Number of points gained from the previously parsed
1093
                    formula
1094

1095
            Returns:
1096
                A tuple of (f, m_dict, points) where m_dict now contains data
1097
                from the match and the match has been removed (chomped) from
1098
                the formula f. The "goodness" of the match determines the
1099
                number of points returned for chomping. Returns
1100
                (None, None, None) if no element could be found...
1101
            """
1102
            points = 0
1✔
1103
            # Points awarded if the first element of the element is correctly
1104
            # specified as a capital
1105
            points_first_capital = 100
1✔
1106
            # Points awarded if the second letter of the element is correctly
1107
            # specified as lowercase
1108
            points_second_lowercase = 100
1✔
1109

1110
            # get element and amount from regex match
1111
            el = m.group(1)
1✔
1112
            if len(el) > 2 or len(el) < 1:
1✔
1113
                raise ValueError("Invalid element symbol entered!")
×
1114
            amt = float(m.group(2)) if m.group(2).strip() != "" else 1
1✔
1115

1116
            # convert the element string to proper [uppercase,lowercase] format
1117
            # and award points if it is already in that format
1118
            char1 = el[0]
1✔
1119
            char2 = el[1] if len(el) > 1 else ""
1✔
1120

1121
            if char1 == char1.upper():
1✔
1122
                points += points_first_capital
1✔
1123
            if char2 and char2 == char2.lower():
1✔
1124
                points += points_second_lowercase
1✔
1125

1126
            el = char1.upper() + char2.lower()
1✔
1127

1128
            # if it's a valid element, chomp and add to the points
1129
            if Element.is_valid_symbol(el):
1✔
1130
                if el in m_dict:
1✔
1131
                    m_dict[el] += amt * factor
1✔
1132
                else:
1133
                    m_dict[el] = amt * factor
1✔
1134
                return f.replace(m.group(), "", 1), m_dict, m_points + points
1✔
1135

1136
            # else return None
1137
            return None, None, None
1✔
1138

1139
        fuzzy_formula = fuzzy_formula.strip()
1✔
1140

1141
        if len(fuzzy_formula) == 0:
1✔
1142
            # The entire formula has been parsed into m_dict. Return the
1143
            # corresponding Composition and number of points
1144
            if m_dict:
1✔
1145
                yield (Composition.from_dict(m_dict), m_points)
1✔
1146
        else:
1147
            # if there is a parenthesis, remove it and match the remaining stuff
1148
            # with the appropriate factor
1149
            for mp in re.finditer(r"\(([^\(\)]+)\)([\.\d]*)", fuzzy_formula):
1✔
1150
                mp_points = m_points
1✔
1151
                mp_form = fuzzy_formula.replace(mp.group(), " ", 1)
1✔
1152
                mp_dict = dict(m_dict)
1✔
1153
                mp_factor = 1 if mp.group(2) == "" else float(mp.group(2))
1✔
1154
                # Match the stuff inside the parenthesis with the appropriate
1155
                # factor
1156
                for match in Composition._comps_from_fuzzy_formula(mp.group(1), mp_dict, mp_points, factor=mp_factor):
1✔
1157
                    only_me = True
1✔
1158
                    # Match the stuff outside the parentheses and return the
1159
                    # sum.
1160

1161
                    for match2 in Composition._comps_from_fuzzy_formula(mp_form, mp_dict, mp_points, factor=1):
1✔
1162
                        only_me = False
1✔
1163
                        yield (match[0] + match2[0], match[1] + match2[1])
1✔
1164
                    # if the stuff inside the parenthesis is nothing, then just
1165
                    # return the stuff inside the parentheses
1166
                    if only_me:
1✔
1167
                        yield match
1✔
1168
                return
1✔
1169

1170
            # try to match the single-letter elements
1171
            m1 = re.match(r"([A-z])([\.\d]*)", fuzzy_formula)
1✔
1172
            if m1:
1✔
1173
                m_points1 = m_points
1✔
1174
                m_form1 = fuzzy_formula
1✔
1175
                m_dict1 = dict(m_dict)
1✔
1176
                (m_form1, m_dict1, m_points1) = _parse_chomp_and_rank(m1, m_form1, m_dict1, m_points1)
1✔
1177
                if m_dict1:
1✔
1178
                    # there was a real match
1179
                    for match in Composition._comps_from_fuzzy_formula(m_form1, m_dict1, m_points1, factor):
1✔
1180
                        yield match
1✔
1181

1182
            # try to match two-letter elements
1183
            m2 = re.match(r"([A-z]{2})([\.\d]*)", fuzzy_formula)
1✔
1184
            if m2:
1✔
1185
                m_points2 = m_points
1✔
1186
                m_form2 = fuzzy_formula
1✔
1187
                m_dict2 = dict(m_dict)
1✔
1188
                (m_form2, m_dict2, m_points2) = _parse_chomp_and_rank(m2, m_form2, m_dict2, m_points2)
1✔
1189
                if m_dict2:
1✔
1190
                    # there was a real match
1191
                    for match in Composition._comps_from_fuzzy_formula(m_form2, m_dict2, m_points2, factor):
1✔
1192
                        yield match
1✔
1193

1194

1195
def reduce_formula(sym_amt, iupac_ordering: bool = False) -> tuple[str, float]:
1✔
1196
    """
1197
    Helper method to reduce a sym_amt dict to a reduced formula and factor.
1198

1199
    Args:
1200
        sym_amt (dict): {symbol: amount}.
1201
        iupac_ordering (bool, optional): Whether to order the
1202
            formula by the iupac "electronegativity" series, defined in
1203
            Table VI of "Nomenclature of Inorganic Chemistry (IUPAC
1204
            Recommendations 2005)". This ordering effectively follows
1205
            the groups and rows of the periodic table, except the
1206
            Lanthanides, Actinides and hydrogen. Note that polyanions
1207
            will still be determined based on the true electronegativity of
1208
            the elements.
1209

1210
    Returns:
1211
        (reduced_formula, factor).
1212
    """
1213
    syms = sorted(sym_amt, key=lambda x: [get_el_sp(x).X, x])
1✔
1214

1215
    syms = list(filter(lambda x: abs(sym_amt[x]) > Composition.amount_tolerance, syms))
1✔
1216

1217
    factor = 1
1✔
1218
    # Enforce integers for doing gcd.
1219
    if all(int(i) == i for i in sym_amt.values()):
1✔
1220
        factor = abs(gcd(*(int(i) for i in sym_amt.values())))
1✔
1221

1222
    polyanion = []
1✔
1223
    # if the composition contains a poly anion
1224
    if len(syms) >= 3 and get_el_sp(syms[-1]).X - get_el_sp(syms[-2]).X < 1.65:
1✔
1225
        poly_sym_amt = {syms[i]: sym_amt[syms[i]] / factor for i in [-2, -1]}
1✔
1226
        (poly_form, poly_factor) = reduce_formula(poly_sym_amt, iupac_ordering=iupac_ordering)
1✔
1227

1228
        if poly_factor != 1:
1✔
1229
            polyanion.append(f"({poly_form}){poly_factor}")
1✔
1230

1231
    syms = syms[: len(syms) - 2 if polyanion else len(syms)]
1✔
1232

1233
    if iupac_ordering:
1✔
1234
        syms = sorted(syms, key=lambda x: [get_el_sp(x).iupac_ordering, x])
1✔
1235

1236
    reduced_form = []
1✔
1237
    for s in syms:
1✔
1238
        normamt = sym_amt[s] * 1.0 / factor
1✔
1239
        reduced_form.append(s)
1✔
1240
        reduced_form.append(formula_double_format(normamt))
1✔
1241

1242
    reduced_form = "".join(reduced_form + polyanion)  # type: ignore
1✔
1243
    return reduced_form, factor  # type: ignore
1✔
1244

1245

1246
class ChemicalPotential(dict, MSONable):
1✔
1247
    """
1248
    Class to represent set of chemical potentials. Can be: multiplied/divided by a Number
1249
    multiplied by a Composition (returns an energy) added/subtracted with other ChemicalPotentials.
1250
    """
1251

1252
    def __init__(self, *args, **kwargs):
1✔
1253
        """
1254
        Args:
1255
            *args, **kwargs: any valid dict init arguments
1256
        """
1257
        d = dict(*args, **kwargs)
1✔
1258
        super().__init__((get_el_sp(k), v) for k, v in d.items())
1✔
1259
        if len(d) != len(self):
1✔
1260
            raise ValueError("Duplicate potential specified")
1✔
1261

1262
    def __mul__(self, other: object) -> ChemicalPotential:
1✔
1263
        if isinstance(other, (int, float)):
1✔
1264
            return ChemicalPotential({k: v * other for k, v in self.items()})
1✔
1265
        return NotImplemented
1✔
1266

1267
    __rmul__ = __mul__
1✔
1268

1269
    def __truediv__(self, other: object) -> ChemicalPotential:
1✔
1270
        if isinstance(other, (int, float)):
1✔
1271
            return ChemicalPotential({k: v / other for k, v in self.items()})
1✔
1272
        return NotImplemented
1✔
1273

1274
    __div__ = __truediv__
1✔
1275

1276
    def __sub__(self, other: object) -> ChemicalPotential:
1✔
1277
        if isinstance(other, ChemicalPotential):
1✔
1278
            els = {*self} | {*other}
1✔
1279
            return ChemicalPotential({e: self.get(e, 0) - other.get(e, 0) for e in els})
1✔
1280
        return NotImplemented
×
1281

1282
    def __add__(self, other: object) -> ChemicalPotential:
1✔
1283
        if isinstance(other, ChemicalPotential):
1✔
1284
            els = {*self} | {*other}
1✔
1285
            return ChemicalPotential({e: self.get(e, 0) + other.get(e, 0) for e in els})
1✔
1286
        return NotImplemented
×
1287

1288
    def get_energy(self, composition: Composition, strict: bool = True) -> float:
1✔
1289
        """
1290
        Calculates the energy of a composition.
1291

1292
        Args:
1293
            composition (Composition): input composition
1294
            strict (bool): Whether all potentials must be specified
1295
        """
1296
        if strict and set(composition) > set(self):
1✔
1297
            s = set(composition) - set(self)
1✔
1298
            raise ValueError(f"Potentials not specified for {s}")
1✔
1299
        return sum(self.get(k, 0) * v for k, v in composition.items())
1✔
1300

1301
    def __repr__(self):
1✔
1302
        return "ChemPots: " + super().__repr__()
×
1303

1304

1305
class CompositionError(Exception):
1✔
1306
    """Exception class for composition errors"""
1307

1308

1309
if __name__ == "__main__":
1✔
1310
    import doctest
×
1311

1312
    doctest.testmod()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc