• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

materialsproject / pymatgen / 4075885785

pending completion
4075885785

push

github

Shyue Ping Ong
Merge branch 'master' of github.com:materialsproject/pymatgen

96 of 96 new or added lines in 27 files covered. (100.0%)

81013 of 102710 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

16.24
/pymatgen/command_line/enumlib_caller.py
1
# Copyright (c) Pymatgen Development Team.
2
# Distributed under the terms of the MIT License.
3

4
"""
1✔
5
This module implements an interface to enumlib, Gus Hart's excellent Fortran
6
code for enumerating derivative structures.
7

8
This module depends on a compiled enumlib with the executables enum.x and
9
makestr.x available in the path. Please download the library at
10
https://github.com/msg-byu/enumlib and follow the instructions in the README to
11
compile these two executables accordingly.
12

13
If you use this module, please cite the following:
14

15
Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative
16
structures," Phys. Rev. B 77 224115 (26 June 2008)
17

18
Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from
19
multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009)
20

21
Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating
22
derivative structures at a fixed concentration," Comp. Mat. Sci. 59
23
101-107 (March 2012)
24

25
Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative
26
superstructures for systems with high configurational freedom," Comp. Mat.
27
Sci. 136 144-149 (May 2017)
28
"""
29

30
from __future__ import annotations
1✔
31

32
import fractions
1✔
33
import glob
1✔
34
import itertools
1✔
35
import logging
1✔
36
import math
1✔
37
import re
1✔
38
import subprocess
1✔
39
from shutil import which
1✔
40
from threading import Timer
1✔
41

42
import numpy as np
1✔
43
from monty.dev import requires
1✔
44
from monty.fractions import lcm
1✔
45
from monty.tempfile import ScratchDir
1✔
46

47
from pymatgen.core.periodic_table import DummySpecies
1✔
48
from pymatgen.core.sites import PeriodicSite
1✔
49
from pymatgen.core.structure import Structure
1✔
50
from pymatgen.io.vasp.inputs import Poscar
1✔
51
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
1✔
52

53
logger = logging.getLogger(__name__)
1✔
54

55
# Favor the use of the newer "enum.x" by Gus Hart instead of the older
56
# "multienum.x"
57
enum_cmd = which("enum.x") or which("multienum.x")
1✔
58
# prefer makestr.x at present
59
makestr_cmd = which("makestr.x") or which("makeStr.x") or which("makeStr.py")
1✔
60

61

62
@requires(
1✔
63
    enum_cmd and makestr_cmd,
64
    "EnumlibAdaptor requires the executables 'enum.x' or 'multienum.x' "
65
    "and 'makestr.x' or 'makeStr.py' to be in the path. Please download the "
66
    "library at https://github.com/msg-byu/enumlib and follow the instructions "
67
    "in the README to compile these two executables accordingly.",
68
)
69
class EnumlibAdaptor:
1✔
70
    """
71
    An adaptor for enumlib.
72

73
    .. attribute:: structures
74

75
        List of all enumerated structures.
76
    """
77

78
    amount_tol = 1e-5
1✔
79

80
    def __init__(
1✔
81
        self,
82
        structure,
83
        min_cell_size=1,
84
        max_cell_size=1,
85
        symm_prec=0.1,
86
        enum_precision_parameter=0.001,
87
        refine_structure=False,
88
        check_ordered_symmetry=True,
89
        timeout=None,
90
    ):
91
        """
92
        Initializes the adapter with a structure and some parameters.
93

94
        Args:
95
            structure: An input structure.
96
            min_cell_size (int): The minimum cell size wanted. Defaults to 1.
97
            max_cell_size (int): The maximum cell size wanted. Defaults to 1.
98
            symm_prec (float): Symmetry precision. Defaults to 0.1.
99
            enum_precision_parameter (float): Finite precision parameter for
100
                enumlib. Default of 0.001 is usually ok, but you might need to
101
                tweak it for certain cells.
102
            refine_structure (bool): If you are starting from a structure that
103
                has been relaxed via some electronic structure code,
104
                it is usually much better to start with symmetry determination
105
                and then obtain a refined structure. The refined structure have
106
                cell parameters and atomic positions shifted to the expected
107
                symmetry positions, which makes it much less sensitive precision
108
                issues in enumlib. If you are already starting from an
109
                experimental cif, refinement should have already been done and
110
                it is not necessary. Defaults to False.
111
            check_ordered_symmetry (bool): Whether to check the symmetry of
112
                the ordered sites. If the symmetry of the ordered sites is
113
                lower, the lowest symmetry ordered sites is included in the
114
                enumeration. This is important if the ordered sites break
115
                symmetry in a way that is important getting possible
116
                structures. But sometimes including ordered sites
117
                slows down enumeration to the point that it cannot be
118
                completed. Switch to False in those cases. Defaults to True.
119
            timeout (float): If specified, will kill enumlib after specified
120
                time in minutes. This can be useful for gracefully handling
121
                enumerations in a high-throughput context, for some enumerations
122
                which will not terminate in a realistic length of time.
123
        """
124
        if refine_structure:
×
125
            finder = SpacegroupAnalyzer(structure, symm_prec)
×
126
            self.structure = finder.get_refined_structure()
×
127
        else:
128
            self.structure = structure
×
129
        self.min_cell_size = min_cell_size
×
130
        self.max_cell_size = max_cell_size
×
131
        self.symm_prec = symm_prec
×
132
        self.enum_precision_parameter = enum_precision_parameter
×
133
        self.check_ordered_symmetry = check_ordered_symmetry
×
134
        self.timeout = timeout
×
135

136
    def run(self):
1✔
137
        """
138
        Run the enumeration.
139
        """
140
        # Create a temporary directory for working.
141
        with ScratchDir(".") as d:
×
142
            logger.debug(f"Temp dir : {d}")
×
143
            # Generate input files
144
            self._gen_input_file()
×
145
            # Perform the actual enumeration
146
            num_structs = self._run_multienum()
×
147
            # Read in the enumeration output as structures.
148
            if num_structs > 0:
×
149
                self.structures = self._get_structures(num_structs)
×
150
            else:
151
                raise EnumError("Unable to enumerate structure.")
×
152

153
    def _gen_input_file(self):
1✔
154
        """
155
        Generate the necessary struct_enum.in file for enumlib. See enumlib
156
        documentation for details.
157
        """
158
        coord_format = "{:.6f} {:.6f} {:.6f}"
×
159
        # Using symmetry finder, get the symmetrically distinct sites.
160
        fitter = SpacegroupAnalyzer(self.structure, self.symm_prec)
×
161
        symmetrized_structure = fitter.get_symmetrized_structure()
×
162
        logger.debug(
×
163
            f"Spacegroup {fitter.get_space_group_symbol()} ({fitter.get_space_group_number()}) "
164
            f"with {len(symmetrized_structure.equivalent_sites)} distinct sites"
165
        )
166
        """
167
        Enumlib doesn"t work when the number of species get too large. To
168
        simplify matters, we generate the input file only with disordered sites
169
        and exclude the ordered sites from the enumeration. The fact that
170
        different disordered sites with the exact same species may belong to
171
        different equivalent sites is dealt with by having determined the
172
        spacegroup earlier and labelling the species differently.
173
        """
174
        # index_species and index_amounts store mappings between the indices
175
        # used in the enum input file, and the actual species and amounts.
176
        index_species = []
×
177
        index_amounts = []
×
178

179
        # Stores the ordered sites, which are not enumerated.
180
        ordered_sites = []
×
181
        disordered_sites = []
×
182
        coord_str = []
×
183
        for sites in symmetrized_structure.equivalent_sites:
×
184
            if sites[0].is_ordered:
×
185
                ordered_sites.append(sites)
×
186
            else:
187
                sp_label = []
×
188
                species = dict(sites[0].species.items())
×
189
                if sum(species.values()) < 1 - EnumlibAdaptor.amount_tol:
×
190
                    # Let us first make add a dummy element for every single
191
                    # site whose total occupancies don't sum to 1.
192
                    species[DummySpecies("X")] = 1 - sum(species.values())
×
193
                for sp, amt in species.items():
×
194
                    if sp not in index_species:
×
195
                        index_species.append(sp)
×
196
                        sp_label.append(len(index_species) - 1)
×
197
                        index_amounts.append(amt * len(sites))
×
198
                    else:
199
                        ind = index_species.index(sp)
×
200
                        sp_label.append(ind)
×
201
                        index_amounts[ind] += amt * len(sites)
×
202
                sp_label = "/".join(f"{i}" for i in sorted(sp_label))
×
203
                for site in sites:
×
204
                    coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")
×
205
                disordered_sites.append(sites)
×
206

207
        def get_sg_info(ss):
×
208
            finder = SpacegroupAnalyzer(Structure.from_sites(ss), self.symm_prec)
×
209
            return finder.get_space_group_number()
×
210

211
        target_sgnum = get_sg_info(symmetrized_structure.sites)
×
212
        curr_sites = list(itertools.chain.from_iterable(disordered_sites))
×
213
        sgnum = get_sg_info(curr_sites)
×
214
        ordered_sites = sorted(ordered_sites, key=lambda sites: len(sites))
×
215
        logger.debug(f"Disordered sites has sg # {sgnum}")
×
216
        self.ordered_sites = []
×
217

218
        # progressively add ordered sites to our disordered sites
219
        # until we match the symmetry of our input structure
220
        if self.check_ordered_symmetry:
×
221
            while sgnum != target_sgnum and len(ordered_sites) > 0:
×
222
                sites = ordered_sites.pop(0)
×
223
                temp_sites = list(curr_sites) + sites
×
224
                new_sgnum = get_sg_info(temp_sites)
×
225
                if sgnum != new_sgnum:
×
226
                    logger.debug(f"Adding {sites[0].specie} in enum. New sg # {new_sgnum}")
×
227
                    index_species.append(sites[0].specie)
×
228
                    index_amounts.append(len(sites))
×
229
                    sp_label = len(index_species) - 1
×
230
                    for site in sites:
×
231
                        coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")
×
232
                    disordered_sites.append(sites)
×
233
                    curr_sites = temp_sites
×
234
                    sgnum = new_sgnum
×
235
                else:
236
                    self.ordered_sites.extend(sites)
×
237

238
        for sites in ordered_sites:
×
239
            self.ordered_sites.extend(sites)
×
240

241
        self.index_species = index_species
×
242

243
        lattice = self.structure.lattice
×
244

245
        output = [self.structure.formula, "bulk"]
×
246
        for vec in lattice.matrix:
×
247
            output.append(coord_format.format(*vec))
×
248
        output.append(f"{len(index_species)}")
×
249
        output.append(f"{len(coord_str)}")
×
250
        output.extend(coord_str)
×
251

252
        output.append(f"{self.min_cell_size} {self.max_cell_size}")
×
253
        output.append(str(self.enum_precision_parameter))
×
254
        output.append("full")
×
255

256
        ndisordered = sum(len(s) for s in disordered_sites)
×
257
        base = int(
×
258
            ndisordered
259
            * lcm(
260
                *(
261
                    f.limit_denominator(ndisordered * self.max_cell_size).denominator
262
                    for f in map(fractions.Fraction, index_amounts)
263
                )
264
            )
265
        )
266

267
        # This multiplicative factor of 10 is to prevent having too small bases
268
        # which can lead to rounding issues in the next step.
269
        # An old bug was that a base was set to 8, with a conc of 0.4:0.6. That
270
        # resulted in a range that overlaps and a conc of 0.5 satisfying this
271
        # enumeration. See Cu7Te5.cif test file.
272
        base *= 10
×
273

274
        # base = ndisordered #10 ** int(math.ceil(math.log10(ndisordered)))
275
        # To get a reasonable number of structures, we fix concentrations to the
276
        # range expected in the original structure.
277
        total_amounts = sum(index_amounts)
×
278
        for amt in index_amounts:
×
279
            conc = amt / total_amounts
×
280

281
            if abs(conc * base - round(conc * base)) < 1e-5:
×
282
                output.append(f"{int(round(conc * base))} {int(round(conc * base))} {base}")
×
283
            else:
284
                min_conc = int(math.floor(conc * base))
×
285
                output.append(f"{min_conc - 1} {min_conc + 1} {base}")
×
286
        output.append("")
×
287
        logger.debug("Generated input file:\n{}".format("\n".join(output)))
×
288
        with open("struct_enum.in", "w") as f:
×
289
            f.write("\n".join(output))
×
290

291
    def _run_multienum(self):
1✔
292
        with subprocess.Popen([enum_cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as p:
×
293
            if self.timeout:
×
294
                timed_out = False
×
295
                timer = Timer(self.timeout * 60, lambda p: p.kill(), [p])
×
296

297
                try:
×
298
                    timer.start()
×
299
                    output = p.communicate()[0].decode("utf-8")
×
300
                finally:
301
                    if not timer.is_alive():
×
302
                        timed_out = True
×
303
                    timer.cancel()
×
304

305
                if timed_out:
×
306
                    raise TimeoutError("Enumeration took too long.")
×
307

308
            else:
309
                output = p.communicate()[0].decode("utf-8")
×
310

311
        count = 0
×
312
        start_count = False
×
313
        for line in output.strip().split("\n"):
×
314
            if line.strip().endswith("RunTot"):
×
315
                start_count = True
×
316
            elif start_count and re.match(r"\d+\s+.*", line.strip()):
×
317
                count = int(line.split()[-1])
×
318
        logger.debug(f"Enumeration resulted in {count} structures")
×
319
        return count
×
320

321
    def _get_structures(self, num_structs):
1✔
322
        structs = []
×
323

324
        if ".py" in makestr_cmd:
×
325
            options = ["-input", "struct_enum.out", str(1), str(num_structs)]
×
326
        else:
327
            options = ["struct_enum.out", str(0), str(num_structs - 1)]
×
328

329
        with subprocess.Popen(
×
330
            [makestr_cmd] + options,
331
            stdout=subprocess.PIPE,
332
            stdin=subprocess.PIPE,
333
            close_fds=True,
334
        ) as rs:
335
            stdout, stderr = rs.communicate()
×
336
        if stderr:
×
337
            logger.warning(stderr.decode())
×
338

339
        # sites retrieved from enumlib will lack site properties
340
        # to ensure consistency, we keep track of what site properties
341
        # are missing and set them to None
342
        # TODO: improve this by mapping ordered structure to original
343
        # disordered structure, and retrieving correct site properties
344
        disordered_site_properties = {}
×
345

346
        if len(self.ordered_sites) > 0:
×
347
            original_latt = self.ordered_sites[0].lattice
×
348
            # Need to strip sites of site_properties, which would otherwise
349
            # result in an index error. Hence Structure is reconstructed in
350
            # the next step.
351
            site_properties = {}
×
352
            for site in self.ordered_sites:
×
353
                for k, v in site.properties.items():
×
354
                    disordered_site_properties[k] = None
×
355
                    if k in site_properties:
×
356
                        site_properties[k].append(v)
×
357
                    else:
358
                        site_properties[k] = [v]
×
359
            ordered_structure = Structure(
×
360
                original_latt,
361
                [site.species for site in self.ordered_sites],
362
                [site.frac_coords for site in self.ordered_sites],
363
                site_properties=site_properties,
364
            )
365
            inv_org_latt = np.linalg.inv(original_latt.matrix)
×
366
        else:
367
            ordered_structure = None  # to fix pylint E0601
×
368
            inv_org_latt = None
×
369

370
        for file in glob.glob("vasp.*"):
×
371
            with open(file) as f:
×
372
                data = f.read()
×
373
                data = re.sub(r"scale factor", "1", data)
×
374
                data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data)
×
375
                poscar = Poscar.from_string(data, self.index_species)
×
376
                sub_structure = poscar.structure
×
377
                # Enumeration may have resulted in a super lattice. We need to
378
                # find the mapping from the new lattice to the old lattice, and
379
                # perform supercell construction if necessary.
380
                new_latt = sub_structure.lattice
×
381

382
                sites = []
×
383

384
                if len(self.ordered_sites) > 0:
×
385
                    transformation = np.dot(new_latt.matrix, inv_org_latt)
×
386
                    transformation = [[int(round(cell)) for cell in row] for row in transformation]
×
387
                    logger.debug(f"Supercell matrix: {transformation}")
×
388
                    s = ordered_structure * transformation
×
389
                    sites.extend([site.to_unit_cell() for site in s])
×
390
                    super_latt = sites[-1].lattice
×
391
                else:
392
                    super_latt = new_latt
×
393

394
                for site in sub_structure:
×
395
                    if site.specie.symbol != "X":  # We exclude vacancies.
×
396
                        sites.append(
×
397
                            PeriodicSite(
398
                                site.species,
399
                                site.frac_coords,
400
                                super_latt,
401
                                to_unit_cell=True,
402
                                properties=disordered_site_properties,
403
                            )
404
                        )
405
                    else:
406
                        logger.debug("Skipping sites that include species X.")
×
407
                structs.append(Structure.from_sites(sorted(sites)))
×
408

409
        logger.debug(f"Read in a total of {num_structs} structures.")
×
410
        return structs
×
411

412

413
class EnumError(BaseException):
1✔
414
    """
415
    Error subclass for enumeration errors.
416
    """
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc