• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

materialsproject / pymatgen / 4075885785

pending completion
4075885785

push

github

Shyue Ping Ong
Merge branch 'master' of github.com:materialsproject/pymatgen

96 of 96 new or added lines in 27 files covered. (100.0%)

81013 of 102710 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.88
/pymatgen/entries/mixing_scheme.py
1
# Copyright (c) Pymatgen Development Team.
2
# Distributed under the terms of the MIT License.
3
"""
1✔
4
This module implements Compatibility corrections for mixing runs of different
5
functionals.
6
"""
7

8
from __future__ import annotations
1✔
9

10
import os
1✔
11
import warnings
1✔
12
from itertools import groupby
1✔
13

14
import numpy as np
1✔
15
import pandas as pd
1✔
16

17
from pymatgen.analysis.phase_diagram import PhaseDiagram
1✔
18
from pymatgen.analysis.structure_matcher import StructureMatcher
1✔
19
from pymatgen.entries.compatibility import (
1✔
20
    Compatibility,
21
    CompatibilityError,
22
    MaterialsProject2020Compatibility,
23
)
24
from pymatgen.entries.computed_entries import (
1✔
25
    ComputedEntry,
26
    ComputedStructureEntry,
27
    ConstantEnergyAdjustment,
28
)
29
from pymatgen.entries.entry_tools import EntrySet
1✔
30

31
MODULE_DIR = os.path.dirname(os.path.abspath(__file__))
1✔
32

33
__author__ = "Ryan Kingsbury"
1✔
34
__copyright__ = "Copyright 2019-2021, The Materials Project"
1✔
35
__version__ = "0.1"
1✔
36
__email__ = "RKingsbury@lbl.gov"
1✔
37
__date__ = "October 2021"
1✔
38

39

40
class MaterialsProjectDFTMixingScheme(Compatibility):
1✔
41
    """
42
    This class implements the Materials Project mixing scheme, which allows mixing of
43
    energies from different DFT functionals. Note that this should only be used for
44
    VASP calculations using the MaterialsProject parameters (e.g. MPRelaxSet or
45
    MPScanRelaxSet). Using this compatibility scheme on runs with different parameters
46
    may lead to unexpected results.
47

48
    This is the scheme used by the Materials Project to generate Phase Diagrams containing
49
    a mixture of GGA(+U) and R2SCAN calculations. However in principle it can be used to
50
    mix energies from any two functionals.
51
    """
52

53
    def __init__(
1✔
54
        self,
55
        structure_matcher: StructureMatcher | None = None,
56
        run_type_1: str = "GGA(+U)",
57
        run_type_2: str = "R2SCAN",
58
        compat_1: Compatibility | None = MaterialsProject2020Compatibility(),  # noqa: B008
59
        compat_2: Compatibility | None = None,
60
        fuzzy_matching: bool = True,
61
    ):
62
        """
63
        Instantiate the mixing scheme. The init method creates a generator class that
64
        contains relevant settings (e.g., StructureMatcher instance, Compatibility settings
65
        for each functional) for processing groups of entries.
66

67
        Args:
68
            structure_matcher (StructureMatcher): StructureMatcher object used to determine
69
                whether calculations from different functionals describe the same material.
70
            run_type_1: The first DFT run_type. Typically this is the majority or run type or
71
                the "base case" onto which the other calculations are referenced. Valid choices
72
                are any run_type recognized by Vasprun.run_type, such as "LDA", "GGA", "GGA+U",
73
                "PBEsol", "SCAN", or "R2SCAN". The class will ignore any entries that have a
74
                run_type different than run_type_1 or run_type_2.
75

76
                The list of run_type_1 entries provided to process_entries MUST form a complete
77
                Phase Diagram in order for the mixing scheme to work. If this condition is not
78
                satisfied, processing the entries will fail.
79

80
                Note that the special string "GGA(+U)" (default) will treat both GGA and GGA+U
81
                calculations as a single type. This option exists because GGA/GGA+U mixing is
82
                already handled by MaterialsProject2020Compatibility.
83
            run_type_2: The second DFT run_type. Typically this is the run_type that is 'preferred'
84
                but has fewer calculations. If run_type_1 and run_type_2 calculations exist for all
85
                materials, run_type_2 energies will be used (hence the 'preferred' status). The class
86
                will ignore any entries that have a run_type different than run_type_1 or run_type_2.
87
            compat_1: Compatibility class used to pre-process entries of run_type_1.
88
                Defaults to MaterialsProjectCompatibility2020.
89
            compat_2: Compatibility class used to pre-process entries of run_type_2.
90
                Defaults to None.
91
            fuzzy_matching: Whether to use less strict structure matching logic for
92
                diatomic elements O2, N2, F2, H2, and Cl2 as well as I and Br. Outputs of DFT
93
                relaxations using
94
                different functionals frequently fail to structure match for these elements
95
                even though they come from the same original material. Fuzzy structure matching
96
                considers the materials equivalent if the formula, number of sites, and
97
                space group are all identical. If there are multiple materials of run_type_2
98
                that satisfy these criteria, the one with lowest energy is considered to
99
                match.
100
        """
101
        self.name = "MP DFT mixing scheme"
1✔
102
        self.structure_matcher = structure_matcher or StructureMatcher()
1✔
103
        if run_type_1 == run_type_2:
1✔
104
            raise ValueError(
1✔
105
                f"You specified the same run_type {run_type_1} for both run_type_1 and run_type_2. "
106
                "The mixing scheme is meaningless unless run_type_1 and run_type_2 are different"
107
            )
108
        self.run_type_1 = run_type_1
1✔
109
        self.run_type_2 = run_type_2
1✔
110
        if self.run_type_1 == "GGA(+U)":
1✔
111
            self.valid_rtypes_1 = ["GGA", "GGA+U"]
1✔
112
        else:
113
            self.valid_rtypes_1 = [self.run_type_1]
1✔
114

115
        if self.run_type_2 == "GGA(+U)":
1✔
116
            self.valid_rtypes_2 = ["GGA", "GGA+U"]
1✔
117
        else:
118
            self.valid_rtypes_2 = [self.run_type_2]
1✔
119

120
        self.compat_1 = compat_1
1✔
121
        self.compat_2 = compat_2
1✔
122
        self.fuzzy_matching = fuzzy_matching
1✔
123

124
    def process_entries(
1✔
125
        self,
126
        entries: ComputedStructureEntry | ComputedEntry | list,
127
        clean: bool = True,
128
        verbose: bool = True,
129
        mixing_state_data=None,
130
    ):
131
        """
132
        Process a sequence of entries with the DFT mixing scheme. Note
133
        that this method will change the data of the original entries.
134

135
        Args:
136
            entries: ComputedEntry or [ComputedEntry]. Pass all entries as a single list, even if they are
137
                computed with different functionals or require different preprocessing. This list will
138
                automatically be filtered based on run_type_1 and run_type_2, and processed according to
139
                compat_1 and compat_2.
140

141
                Note that under typical use, when mixing_state_data=None, the entries MUST be
142
                ComputedStructureEntry. They will be matched using structure_matcher.
143
            clean: bool, whether to remove any previously-applied energy adjustments.
144
                If True, all EnergyAdjustment are removed prior to processing the Entry.
145
                Default is True.
146
            verbose: bool, whether to print verbose error messages about the mixing scheme. Default is True.
147
            mixing_state_data: A DataFrame containing information about which Entries
148
                correspond to the same materials, which are stable on the phase diagrams of
149
                the respective run_types, etc. If None (default), it will be generated from the
150
                list of entries using MaterialsProjectDFTMixingScheme.get_mixing_state_data.
151
                This argument is included to facilitate use of the mixing scheme in high-throughput
152
                databases where an alternative to get_mixing_state_data is desirable for performance
153
                reasons. In general, it should always be left at the default value (None) to avoid
154
                inconsistencies between the mixing state data and the properties of the
155
                ComputedStructureEntry in entries.
156

157
        Returns:
158
            A list of adjusted entries. Entries in the original list which
159
            are not compatible are excluded.
160
        """
161
        processed_entry_list: list = []
1✔
162

163
        # We can't operate on single entries in this scheme
164
        if len(entries) == 1:
1✔
165
            warnings.warn(f"{type(self).__name__} cannot process single entries. Supply a list of entries.")
1✔
166
            return processed_entry_list
1✔
167

168
        # if clean is True, remove all previous adjustments from the entry
169
        # this code must be placed before the next block, because we don't want to remove
170
        # any corrections added by compat_1 or compat_2.
171
        if clean:
1✔
172
            for entry in entries:
1✔
173
                for ea in entry.energy_adjustments:
1✔
174
                    entry.energy_adjustments.remove(ea)
1✔
175

176
        entries_type_1, entries_type_2 = self._filter_and_sort_entries(entries, verbose=verbose)
1✔
177

178
        if mixing_state_data is None:
1✔
179
            if verbose:
1✔
180
                print("  Generating mixing state data from provided entries.")
1✔
181
            mixing_state_data = self.get_mixing_state_data(entries_type_1 + entries_type_2, verbose=False)
1✔
182

183
        if verbose:
1✔
184
            # how many stable entries from run_type_1 do we have in run_type_2?
185
            hull_entries_2 = 0
1✔
186
            stable_df = mixing_state_data[mixing_state_data["is_stable_1"]]
1✔
187
            if len(stable_df) > 0:
1✔
188
                hull_entries_2 = sum(stable_df["energy_2"].notna())
1✔
189
            print(
1✔
190
                f"  Entries contain {self.run_type_2} calculations for {hull_entries_2} of {len(stable_df)} "
191
                f"{self.run_type_1} hull entries."
192
            )
193
            if hull_entries_2 == len(stable_df):
1✔
194
                print(f"  {self.run_type_1} energies will be adjusted to the {self.run_type_2} scale")
1✔
195
            else:
196
                print(f"  {self.run_type_2} energies will be adjusted to the {self.run_type_1} scale")
1✔
197

198
            if hull_entries_2 > 0:
1✔
199
                print(
1✔
200
                    f"  The energy above hull for {self.run_type_2} materials at compositions with "
201
                    f"{self.run_type_2} hull entries will be preserved. For other compositions, "
202
                    f"Energies of {self.run_type_2} materials will be set equal to those of "
203
                    f"matching {self.run_type_1} materials"
204
                )
205

206
        # the code below is identical to code inside process_entries in the base
207
        # Compatibility class, except that an extra kwarg is passed to get_adjustments
208
        for entry in entries_type_1 + entries_type_2:
1✔
209
            ignore_entry = False
1✔
210
            # get the energy adjustments
211
            try:
1✔
212
                adjustments = self.get_adjustments(entry, mixing_state_data)
1✔
213
            except CompatibilityError as exc:
1✔
214
                if "WARNING!" in str(exc):
1✔
215
                    warnings.warn(str(exc))
1✔
216
                elif verbose:
1✔
217
                    print(f"  {exc}")
1✔
218
                ignore_entry = True
1✔
219
                continue
1✔
220

221
            for ea in adjustments:
1✔
222
                # Has this correction already been applied?
223
                if (ea.name, ea.cls, ea.value) in [(ea2.name, ea2.cls, ea2.value) for ea2 in entry.energy_adjustments]:
1✔
224
                    # we already applied this exact correction. Do nothing.
225
                    pass
×
226
                elif (ea.name, ea.cls) in [(ea2.name, ea2.cls) for ea2 in entry.energy_adjustments]:
1✔
227
                    # we already applied a correction with the same name
228
                    # but a different value. Something is wrong.
229
                    ignore_entry = True
×
230
                    warnings.warn(
×
231
                        f"Entry {entry.entry_id} already has an energy adjustment called {ea.name}, but its "
232
                        f"value differs from the value of {ea.value:.3f} calculated here. This "
233
                        "Entry will be discarded."
234
                    )
235
                else:
236
                    # Add the correction to the energy_adjustments list
237
                    entry.energy_adjustments.append(ea)
1✔
238

239
            if not ignore_entry:
1✔
240
                processed_entry_list.append(entry)
1✔
241

242
        if verbose:
1✔
243
            count_type_1 = len([e for e in processed_entry_list if e.parameters["run_type"] in self.valid_rtypes_1])
1✔
244
            count_type_2 = len([e for e in processed_entry_list if e.parameters["run_type"] in self.valid_rtypes_2])
1✔
245
            print(
1✔
246
                f"\nProcessing complete. Mixed entries contain {count_type_1} {self.run_type_1} and {count_type_2} "
247
                f"{self.run_type_2} entries.\n"
248
            )
249
            self.display_entries(processed_entry_list)
1✔
250

251
        return processed_entry_list
1✔
252

253
    def get_adjustments(self, entry, mixing_state_data: pd.DataFrame = None):
1✔
254
        """
255
        Returns the corrections applied to a particular entry. Note that get_adjustments is not
256
        intended to be called directly in the R2SCAN mixing scheme. Call process_entries instead,
257
        and it will pass the required arguments to get_adjustments.
258

259
        Args:
260
            entry: A ComputedEntry object. The entry must be a member of the list of entries
261
                used to create mixing_state_data.
262
            mixing_state_data: A DataFrame containing information about which Entries
263
                correspond to the same materials, which are stable on the phase diagrams of
264
                the respective run_types, etc. Can be generated from a list of entries using
265
                MaterialsProjectDFTMixingScheme.get_mixing_state_data. This argument is included to
266
                facilitate use of the mixing scheme in high-throughput databases where an alternative
267
                to get_mixing_state_data is desirable for performance reasons. In general, it should
268
                always be left at the default value (None) to avoid inconsistencies between the mixing
269
                state data and the properties of the ComputedStructureEntry.
270

271
        Returns:
272
            [EnergyAdjustment]: Energy adjustments to be applied to entry.
273

274
        Raises:
275
            CompatibilityError if the DFT mixing scheme cannot be applied to the entry.
276
        """
277
        adjustments: list[ConstantEnergyAdjustment] = []
1✔
278
        run_type = entry.parameters.get("run_type")
1✔
279

280
        if mixing_state_data is None:
1✔
281
            raise CompatibilityError(
1✔
282
                "WARNING! `mixing_state_data` DataFrame is None. No energy adjustments will be applied."
283
            )
284

285
        if not all(mixing_state_data["hull_energy_1"].notna()):
1✔
286
            if any(mixing_state_data["entry_id_1"].notna()):
1✔
287
                raise CompatibilityError(
1✔
288
                    f"WARNING! {self.run_type_1} entries do not form a complete PhaseDiagram."
289
                    " No energy adjustments will be applied."
290
                )
291

292
        if run_type not in self.valid_rtypes_1 + self.valid_rtypes_2:
1✔
293
            raise CompatibilityError(
1✔
294
                f"WARNING! Invalid run_type {run_type} for entry {entry.entry_id}. Must be one of "
295
                f"{self.valid_rtypes_1 + self.valid_rtypes_2}. This entry will be ignored."
296
            )
297

298
        # Verify that the entry is included in the mixing state data
299
        if (entry.entry_id not in mixing_state_data["entry_id_1"].values) and (
1✔
300
            entry.entry_id not in mixing_state_data["entry_id_2"].values
301
        ):
302
            raise CompatibilityError(
1✔
303
                f"WARNING! Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
304
                f"because it was not found in the mixing state data. This can occur when there are duplicate "
305
                "structures. In such cases, only the lowest energy entry with that structure appears in the "
306
                "mixing state data."
307
            )
308

309
        # Verify that the entry's energy has not been modified since mixing state data was generated
310
        if (entry.energy_per_atom not in mixing_state_data["energy_1"].values) and (
1✔
311
            entry.energy_per_atom not in mixing_state_data["energy_2"].values
312
        ):
313
            raise CompatibilityError(
1✔
314
                f"WARNING! Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
315
                "because it's energy has been modified since the mixing state data was generated."
316
            )
317

318
        # Compute the energy correction for mixing. The correction value depends on how many of the
319
        # run_type_1 stable entries are present as run_type_2 calculations
320

321
        # First case - ALL run_type_1 stable entries are present in run_type_2
322
        # In this scenario we construct the hull using run_type_2 energies. We discard any
323
        # run_type_1 entries that already exist in run_type_2 and correct other run_type_1
324
        # energies to have the same e_above_hull on the run_type_2 hull as they had on the run_type_1 hull
325
        if all(mixing_state_data[mixing_state_data["is_stable_1"]]["entry_id_2"].notna()):
1✔
326
            if run_type in self.valid_rtypes_2:  # pylint: disable=R1705
1✔
327
                # For run_type_2 entries, there is no correction
328
                return adjustments
1✔
329

330
            # Discard GGA ground states whose structures already exist in R2SCAN.
331
            else:
332
                df_slice = mixing_state_data[(mixing_state_data["entry_id_1"] == entry.entry_id)]
1✔
333

334
                if df_slice["entry_id_2"].notna().item():
1✔
335
                    # there is a matching run_type_2 entry, so we will discard this entry
336
                    if df_slice["is_stable_1"].item():
1✔
337
                        # this is a GGA ground state.
338
                        raise CompatibilityError(
1✔
339
                            f"Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
340
                            f"because it is a {self.run_type_1} ground state that matches a {self.run_type_2} "
341
                            "material."
342
                        )
343

344
                    raise CompatibilityError(
1✔
345
                        f"Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
346
                        f"because there is a matching {self.run_type_2} material."
347
                    )
348

349
                # If a GGA is not present in R2SCAN, correct its energy to give the same
350
                # e_above_hull on the R2SCAN hull that it would have on the GGA hull
351
                hull_energy_1 = df_slice["hull_energy_1"].iloc[0]
1✔
352
                hull_energy_2 = df_slice["hull_energy_2"].iloc[0]
1✔
353
                correction = (hull_energy_2 - hull_energy_1) * entry.composition.num_atoms
1✔
354

355
                adjustments.append(
1✔
356
                    ConstantEnergyAdjustment(
357
                        correction,
358
                        0.0,
359
                        name=f"MP {self.run_type_1}/{self.run_type_2} mixing adjustment",
360
                        cls=self.as_dict(),
361
                        description=f"Place {self.run_type_1} energy onto the {self.run_type_2} hull",
362
                    )
363
                )
364
                return adjustments
1✔
365

366
        # Second case - there are run_type_2 energies available for at least some run_type_1
367
        # stable entries. Here, we can correct run_type_2 energies at certain compositions
368
        # to preserve their e_above_hull on the run_type_1 hull
369
        elif any(mixing_state_data[mixing_state_data["is_stable_1"]]["entry_id_2"].notna()):
1✔
370
            if run_type in self.valid_rtypes_1:  # pylint: disable=R1705
1✔
371
                df_slice = mixing_state_data[mixing_state_data["entry_id_1"] == entry.entry_id]
1✔
372

373
                if df_slice["entry_id_2"].notna().item():
1✔
374
                    # there is a matching run_type_2 entry. We should discard this entry
375
                    if df_slice["is_stable_1"].item():
1✔
376
                        # this is a GGA ground state.
377
                        raise CompatibilityError(
1✔
378
                            f"Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
379
                            f"because it is a {self.run_type_1} ground state that matches a {self.run_type_2} "
380
                            "material."
381
                        )
382

383
                    raise CompatibilityError(
1✔
384
                        f"Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
385
                        f"because there is a matching {self.run_type_2} material"
386
                    )
387

388
                # For other run_type_1 entries, there is no correction
389
                return adjustments
1✔
390

391
            else:
392
                # for run_type_2, determine whether there is a run_type_2 ground state at this composition
393
                df_slice = mixing_state_data[mixing_state_data["formula"] == entry.composition.reduced_formula]
1✔
394

395
                if any(df_slice[df_slice["is_stable_1"]]["entry_id_2"].notna()):
1✔
396
                    # there is a run_type_2 entry corresponding to the run_type_1 ground state
397
                    # adjust the run_type_2 energy to preserve the e_above_hull
398
                    gs_energy_type_2 = df_slice[df_slice["is_stable_1"]]["energy_2"].item()
1✔
399
                    e_above_hull = entry.energy_per_atom - gs_energy_type_2
1✔
400
                    hull_energy_1 = df_slice["hull_energy_1"].iloc[0]
1✔
401
                    correction = (hull_energy_1 + e_above_hull - entry.energy_per_atom) * entry.composition.num_atoms
1✔
402
                    adjustments.append(
1✔
403
                        ConstantEnergyAdjustment(
404
                            correction,
405
                            0.0,
406
                            name=f"MP {self.run_type_1}/{self.run_type_2} mixing adjustment",
407
                            cls=self.as_dict(),
408
                            description=f"Place {self.run_type_2} energy onto the {self.run_type_1} hull",
409
                        )
410
                    )
411
                    return adjustments
1✔
412

413
                # this composition is not stable in run_type_1. If the run_type_2 entry matches a run_type_1
414
                # entry, we can adjust the run_type_2 energy to match the run_type_1 energy.
415
                if any(df_slice[df_slice["entry_id_2"] == entry.entry_id]["entry_id_1"].notna()):
1✔
416
                    # adjust the energy of the run_type_2 entry to match that of the run_type_1 entry
417
                    type_1_energy = df_slice[df_slice["entry_id_2"] == entry.entry_id]["energy_1"].iloc[0]
1✔
418
                    correction = (type_1_energy - entry.energy_per_atom) * entry.composition.num_atoms
1✔
419
                    adjustments.append(
1✔
420
                        ConstantEnergyAdjustment(
421
                            correction,
422
                            0.0,
423
                            name=f"MP {self.run_type_1}/{self.run_type_2} mixing adjustment",
424
                            cls=self.as_dict(),
425
                            description=f"Replace {self.run_type_2} energy with {self.run_type_1} energy",
426
                        )
427
                    )
428
                    return adjustments
1✔
429

430
                # there is no run_type_1 entry that matches this material, and no ground state. Discard.
431
                raise CompatibilityError(
1✔
432
                    f"Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
433
                    f"because there is no matching {self.run_type_1} entry and no {self.run_type_2} "
434
                    "ground state at this composition."
435
                )
436

437
        # Third case - there are no run_type_2 energies available for any run_type_1
438
        # ground states. There's no way to use the run_type_2 energies in this case.
439
        elif all(mixing_state_data[mixing_state_data["is_stable_1"]]["entry_id_2"].isna()):
1✔
440
            if run_type in self.valid_rtypes_1:
1✔
441
                # nothing to do for run_type_1, return as is
442
                return adjustments
1✔
443

444
            # for run_type_2, discard the entry
445
            raise CompatibilityError(
1✔
446
                f"Discarding {run_type} entry {entry.entry_id} for {entry.composition.formula} "
447
                f"because there are no {self.run_type_2} ground states at this composition."
448
            )
449

450
        # this statement is here to make pylint happy by guaranteeing a return or raise
451
        else:
452
            raise CompatibilityError(
×
453
                "WARNING! If you see this Exception it means you have encountered"
454
                f"an edge case in {type(self).__name__}. Inspect your input carefully and post a bug report."
455
            )
456

457
    def get_mixing_state_data(self, entries: list[ComputedStructureEntry], verbose: bool = False):
1✔
458
        """
459
        Generate internal state data to be passed to get_adjustments.
460

461
        Args:
462
            entries: The list of ComputedStructureEntry to process. It is assumed that the entries have
463
                already been filtered using _filter_and_sort_entries() to remove any irrelevant run types,
464
                apply compat_1 and compat_2, and confirm that all have unique entry_id.
465

466
        Returns:
467
            DataFrame: A pandas DataFrame that contains information associating structures from
468
                different functionals with specific materials and establishing how many run_type_1
469
                ground states have been computed with run_type_2. The DataFrame contains one row
470
                for each distinct material (Structure), with the following columns:
471
                    formula: str the reduced_formula
472
                    spacegroup: int the spacegroup
473
                    num_sites: int the number of sites in the Structure
474
                    entry_id_1: the entry_id of the run_type_1 entry
475
                    entry_id_2: the entry_id of the run_type_2 entry
476
                    run_type_1: Optional[str] the run_type_1 value
477
                    run_type_2: Optional[str] the run_type_2 value
478
                    energy_1: float or nan the ground state energy in run_type_1 in eV/atom
479
                    energy_2: float or nan the ground state energy in run_type_2 in eV/atom
480
                    is_stable_1: bool whether this material is stable on the run_type_1 PhaseDiagram
481
                    hull_energy_1: float or nan the energy of the run_type_1 hull at this composition in eV/atom
482
                    hull_energy_2: float or nan the energy of the run_type_1 hull at this composition in eV/atom
483
            None: Returns None if the supplied ComputedStructureEntry are insufficient for applying
484
                the mixing scheme.
485
        """
486
        filtered_entries = []
1✔
487

488
        for entry in entries:
1✔
489
            if not isinstance(entry, ComputedStructureEntry):
1✔
490
                warnings.warn(
1✔
491
                    f"Entry {entry.entry_id} is not a ComputedStructureEntry and will be"
492
                    "ignored. The DFT mixing scheme requires structures for"
493
                    " all entries"
494
                )
495
                continue
1✔
496

497
            filtered_entries.append(entry)
1✔
498

499
        # separate by run_type
500
        entries_type_1 = [e for e in filtered_entries if e.parameters["run_type"] in self.valid_rtypes_1]
1✔
501
        entries_type_2 = [e for e in filtered_entries if e.parameters["run_type"] in self.valid_rtypes_2]
1✔
502

503
        # construct PhaseDiagram for each run_type, if possible
504
        pd_type_1, pd_type_2 = None, None
1✔
505
        try:
1✔
506
            pd_type_1 = PhaseDiagram(entries_type_1)
1✔
507
        except ValueError:
1✔
508
            warnings.warn(f"{self.run_type_1} entries do not form a complete PhaseDiagram.")
1✔
509

510
        try:
1✔
511
            pd_type_2 = PhaseDiagram(entries_type_2)
1✔
512
        except ValueError:
1✔
513
            warnings.warn(f"{self.run_type_2} entries do not form a complete PhaseDiagram.")
1✔
514

515
        # Objective: loop through all the entries, group them by structure matching (or fuzzy structure matching
516
        # where relevant). For each group, put a row in a pandas DataFrame with the composition of the run_type_1 entry,
517
        # the run_type_2 entry, whether or not that entry is a ground state (not necessarily on the hull), its energy,
518
        # and the energy of the hull at that composition
519
        all_entries = list(entries_type_1) + list(entries_type_2)
1✔
520
        row_list = []
1✔
521
        columns = [
1✔
522
            "formula",
523
            "spacegroup",
524
            "num_sites",
525
            "is_stable_1",
526
            "entry_id_1",
527
            "entry_id_2",
528
            "run_type_1",
529
            "run_type_2",
530
            "energy_1",
531
            "energy_2",
532
            "hull_energy_1",
533
            "hull_energy_2",
534
        ]
535

536
        def _get_sg(struct) -> int:
1✔
537
            """helper function to get spacegroup with a loose tolerance"""
538
            try:
1✔
539
                return struct.get_space_group_info(symprec=0.1)[1]
1✔
540
            except Exception:
×
541
                return -1
×
542

543
        # loop through all structures
544
        # this logic follows emmet.builders.vasp.materials.MaterialsBuilder.filter_and_group_tasks
545
        structures = []
1✔
546
        for entry in all_entries:
1✔
547
            s = entry.structure
1✔
548
            s.entry_id = entry.entry_id
1✔
549
            structures.append(s)
1✔
550

551
        # First group by composition, then by spacegroup number, then by structure matching
552
        for comp, compgroup in groupby(sorted(structures, key=lambda s: s.composition), key=lambda s: s.composition):
1✔
553
            l_compgroup = list(compgroup)
1✔
554
            # group by spacegroup, then by number of sites (for diatmics) or by structure matching
555
            for sg, pregroup in groupby(sorted(l_compgroup, key=_get_sg), key=_get_sg):
1✔
556
                l_pregroup = list(pregroup)
1✔
557
                if comp.reduced_formula in ["O2", "H2", "Cl2", "F2", "N2", "I", "Br", "H2O"] and self.fuzzy_matching:
1✔
558
                    # group by number of sites
559
                    for n, sitegroup in groupby(
1✔
560
                        sorted(l_pregroup, key=lambda s: s.num_sites), key=lambda s: s.num_sites
561
                    ):
562
                        l_sitegroup = list(sitegroup)
1✔
563
                        row_list.append(
1✔
564
                            self._populate_df_row(l_sitegroup, comp, sg, n, pd_type_1, pd_type_2, all_entries)
565
                        )
566
                else:
567
                    for group in self.structure_matcher.group_structures(l_pregroup):
1✔
568
                        grp = list(group)
1✔
569
                        n = group[0].num_sites
1✔
570
                        # StructureMatcher.group_structures returns a list of lists,
571
                        # so each group should be a list containing matched structures
572
                        row_list.append(self._populate_df_row(grp, comp, sg, n, pd_type_1, pd_type_2, all_entries))
1✔
573

574
        mixing_state_data = pd.DataFrame(row_list, columns=columns)
1✔
575
        mixing_state_data.sort_values(
1✔
576
            ["formula", "energy_1", "spacegroup", "num_sites"], inplace=True, ignore_index=True
577
        )
578

579
        return mixing_state_data
1✔
580

581
    def _filter_and_sort_entries(self, entries, verbose=True):
1✔
582
        """
583
        Given a single list of entries, separate them by run_type and return two lists, one containin
584
        only entries of each run_type
585
        """
586
        filtered_entries = []
1✔
587

588
        for entry in entries:
1✔
589
            if not entry.parameters.get("run_type"):
1✔
590
                warnings.warn(
1✔
591
                    f"Entry {entry.entry_id} is missing parameters.run_type! This field"
592
                    "is required. This entry will be ignored."
593
                )
594
                continue
1✔
595

596
            if entry.parameters.get("run_type") not in self.valid_rtypes_1 + self.valid_rtypes_2:
1✔
597
                warnings.warn(
1✔
598
                    f"Invalid run_type {entry.parameters.get('run_type')} for entry {entry.entry_id}. Must be one of "
599
                    f"{self.valid_rtypes_1 + self.valid_rtypes_2}. This entry will be ignored."
600
                )
601
                continue
1✔
602

603
            if entry.entry_id is None:
1✔
604
                warnings.warn(
×
605
                    f"Entry_id for {entry.composition.reduced_formula} entry {entry.entry_id} is invalid. "
606
                    "Unique entry_ids are required for every ComputedStructureEntry. This entry will be ignored."
607
                )
608
                continue
×
609

610
            filtered_entries.append(entry)
1✔
611

612
        filtered_entry_ids = {e.entry_id for e in filtered_entries}
1✔
613
        if len(filtered_entry_ids) != len(filtered_entries):
1✔
614
            raise ValueError(
1✔
615
                "The provided ComputedStructureEntry do not all have unique entry_ids."
616
                " Unique entry_ids are required for every ComputedStructureEntry."
617
            )
618

619
        # separate by run_type
620
        entries_type_1 = [e for e in filtered_entries if e.parameters["run_type"] in self.valid_rtypes_1]
1✔
621
        entries_type_2 = [e for e in filtered_entries if e.parameters["run_type"] in self.valid_rtypes_2]
1✔
622

623
        if verbose:
1✔
624
            print(
1✔
625
                f"Processing {len(entries_type_1)} {self.run_type_1} and {len(entries_type_2)} "
626
                f"{self.run_type_2} entries..."
627
            )
628

629
        # preprocess entries with any corrections
630
        # make an EntrySet to enable some useful methods like .chemsys and .is_ground_state
631
        if self.compat_1:
1✔
632
            entries_type_1 = self.compat_1.process_entries(entries_type_1)
1✔
633
            if verbose:
1✔
634
                print(
1✔
635
                    f"  Processed {len(entries_type_1)} compatible {self.run_type_1} entries with "
636
                    f"{type(self.compat_1).__name__}"
637
                )
638
        entries_type_1 = EntrySet(entries_type_1)
1✔
639

640
        if self.compat_2:
1✔
641
            entries_type_2 = self.compat_2.process_entries(entries_type_2)
1✔
642
            if verbose:
1✔
643
                print(
1✔
644
                    f"  Processed {len(entries_type_2)} compatible {self.run_type_2} entries with "
645
                    f"{type(self.compat_2).__name__}"
646
                )
647
        entries_type_2 = EntrySet(entries_type_2)
1✔
648

649
        # make sure both sets of entries belong to the same chemical system
650
        # assuming there are any gga entries at all
651
        if len(entries_type_1.chemsys) > 0:
1✔
652
            chemsys = entries_type_1.chemsys
1✔
653
            if not entries_type_2.chemsys <= entries_type_1.chemsys:
1✔
654
                warnings.warn(
1✔
655
                    f"  {self.run_type_2} entries chemical system {entries_type_2.chemsys} is larger than "
656
                    f"{self.run_type_1} entries chemical system {entries_type_1.chemsys}. Entries outside the "
657
                    f"{self.run_type_1} chemical system will be discarded"
658
                )
659
                entries_type_2 = entries_type_2.get_subset_in_chemsys(chemsys)
1✔
660
        else:
661
            # if only run_type_2 entries are present, then they define the chemsys
662
            chemsys = entries_type_2.chemsys
1✔
663

664
        if verbose:
1✔
665
            print(f"  Entries belong to the {chemsys} chemical system")
1✔
666

667
        return list(entries_type_1), list(entries_type_2)
1✔
668

669
    def _populate_df_row(self, struct_group, comp, sg, n, pd_type_1, pd_type_2, all_entries):
1✔
670
        """
671
        helper function to populate a row of the mixing state DataFrame, given
672
        a list of matched structures
673
        """
674
        # within the group of matched structures, keep the lowest energy entry from
675
        # each run_type
676
        entries_type_1 = sorted(
1✔
677
            (
678
                e
679
                for e in all_entries
680
                if e.entry_id in [s.entry_id for s in struct_group] and e.parameters["run_type"] in self.valid_rtypes_1
681
            ),
682
            key=lambda x: x.energy_per_atom,
683
        )
684
        first_entry = entries_type_1[0] if len(entries_type_1) > 0 else None
1✔
685

686
        entries_type_2 = sorted(
1✔
687
            (
688
                e
689
                for e in all_entries
690
                if e.entry_id in [s.entry_id for s in struct_group] and e.parameters["run_type"] in self.valid_rtypes_2
691
            ),
692
            key=lambda x: x.energy_per_atom,
693
        )
694
        second_entry = entries_type_2[0] if len(entries_type_2) > 0 else None
1✔
695

696
        # generate info for the DataFrame
697
        stable_1 = False
1✔
698

699
        id1 = first_entry.entry_id if first_entry else None
1✔
700
        id2 = second_entry.entry_id if second_entry else None
1✔
701
        rt1 = first_entry.parameters["run_type"] if first_entry else None
1✔
702
        rt2 = second_entry.parameters["run_type"] if second_entry else None
1✔
703
        # are the entries the lowest energy at this composition?
704
        energy_1 = first_entry.energy_per_atom if first_entry else np.nan
1✔
705
        energy_2 = second_entry.energy_per_atom if second_entry else np.nan
1✔
706
        # are they stable?
707
        if pd_type_1:
1✔
708
            stable_1 = first_entry in pd_type_1.stable_entries
1✔
709

710
        # get the respective hull energies at this composition, if available
711
        hull_energy_1, hull_energy_2 = np.nan, np.nan
1✔
712
        if pd_type_1:
1✔
713
            hull_energy_1 = pd_type_1.get_hull_energy_per_atom(comp)
1✔
714
        if pd_type_2:
1✔
715
            hull_energy_2 = pd_type_2.get_hull_energy_per_atom(comp)
1✔
716

717
        return [
1✔
718
            comp.reduced_formula,
719
            sg,
720
            n,
721
            stable_1,
722
            id1,
723
            id2,
724
            rt1,
725
            rt2,
726
            energy_1,
727
            energy_2,
728
            hull_energy_1,
729
            hull_energy_2,
730
        ]
731

732
    @staticmethod
1✔
733
    def display_entries(entries):
1✔
734
        """
735
        Generate a pretty printout of key properties of a list of ComputedEntry
736
        """
737
        entries = sorted(entries, key=lambda e: (e.composition.reduced_formula, e.energy_per_atom))
1✔
738
        try:
1✔
739
            pd = PhaseDiagram(entries)
1✔
740
        except ValueError:
1✔
741
            return None
1✔
742

743
        print(
1✔
744
            f"{'entry_id':<12}{'formula':<12}{'spacegroup':<12}{'run_type':<10}{'eV/atom':<8}"
745
            f"{'corr/atom':<9} {'e_above_hull':<9}"
746
        )
747
        for e in entries:
1✔
748
            print(
1✔
749
                f"{e.entry_id:<12}{e.composition.reduced_formula:<12}{e.structure.get_space_group_info()[0]:<12}"
750
                f"{e.parameters['run_type']:<10}{e.energy_per_atom:<8.3f}"
751
                f"{e.correction / e.composition.num_atoms:<9.3f} {pd.get_e_above_hull(e):<9.3f}"
752
            )
753
        return None
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc