4075885785

Build Type

push

github

Committed by Shyue Ping Ong

Commit Message

Merge branch 'master' of github.com:materialsproject/pymatgen

Run Details

96 of 96 new or added lines in 27 files covered. (100.0%)

81013 of 102710 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

16.24

/pymatgen/command_line/enumlib_caller.py

# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

"""
This module implements an interface to enumlib, Gus Hart's excellent Fortran
code for enumerating derivative structures.

This module depends on a compiled enumlib with the executables enum.x and
makestr.x available in the path. Please download the library at
https://github.com/msg-byu/enumlib and follow the instructions in the README to
compile these two executables accordingly.

If you use this module, please cite the following:

Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative
structures," Phys. Rev. B 77 224115 (26 June 2008)

Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from
multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009)

Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating
derivative structures at a fixed concentration," Comp. Mat. Sci. 59
101-107 (March 2012)

Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative
superstructures for systems with high configurational freedom," Comp. Mat.
Sci. 136 144-149 (May 2017)
"""

from __future__ import annotations

import fractions
import glob
import itertools
import logging
import math
import re
import subprocess
from shutil import which
from threading import Timer

import numpy as np
from monty.dev import requires
from monty.fractions import lcm
from monty.tempfile import ScratchDir

from pymatgen.core.periodic_table import DummySpecies
from pymatgen.core.sites import PeriodicSite
from pymatgen.core.structure import Structure
from pymatgen.io.vasp.inputs import Poscar
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

logger = logging.getLogger(__name__)

# Favor the use of the newer "enum.x" by Gus Hart instead of the older
# "multienum.x"
enum_cmd = which("enum.x") or which("multienum.x")
# prefer makestr.x at present
makestr_cmd = which("makestr.x") or which("makeStr.x") or which("makeStr.py")


@requires(
    enum_cmd and makestr_cmd,
    "EnumlibAdaptor requires the executables 'enum.x' or 'multienum.x' "
    "and 'makestr.x' or 'makeStr.py' to be in the path. Please download the "
    "library at https://github.com/msg-byu/enumlib and follow the instructions "
    "in the README to compile these two executables accordingly.",
)
class EnumlibAdaptor:
    """
    An adaptor for enumlib.

    .. attribute:: structures

        List of all enumerated structures.
    """

    amount_tol = 1e-5

    def __init__(
        self,
        structure,
        min_cell_size=1,
        max_cell_size=1,
        symm_prec=0.1,
        enum_precision_parameter=0.001,
        refine_structure=False,
        check_ordered_symmetry=True,
        timeout=None,
    ):
        """
        Initializes the adapter with a structure and some parameters.

        Args:
            structure: An input structure.
            min_cell_size (int): The minimum cell size wanted. Defaults to 1.
            max_cell_size (int): The maximum cell size wanted. Defaults to 1.
            symm_prec (float): Symmetry precision. Defaults to 0.1.
            enum_precision_parameter (float): Finite precision parameter for
                enumlib. Default of 0.001 is usually ok, but you might need to
                tweak it for certain cells.
            refine_structure (bool): If you are starting from a structure that
                has been relaxed via some electronic structure code,
                it is usually much better to start with symmetry determination
                and then obtain a refined structure. The refined structure have
                cell parameters and atomic positions shifted to the expected
                symmetry positions, which makes it much less sensitive precision
                issues in enumlib. If you are already starting from an
                experimental cif, refinement should have already been done and
                it is not necessary. Defaults to False.
            check_ordered_symmetry (bool): Whether to check the symmetry of
                the ordered sites. If the symmetry of the ordered sites is
                lower, the lowest symmetry ordered sites is included in the
                enumeration. This is important if the ordered sites break
                symmetry in a way that is important getting possible
                structures. But sometimes including ordered sites
                slows down enumeration to the point that it cannot be
                completed. Switch to False in those cases. Defaults to True.
            timeout (float): If specified, will kill enumlib after specified
                time in minutes. This can be useful for gracefully handling
                enumerations in a high-throughput context, for some enumerations
                which will not terminate in a realistic length of time.
        """
        if refine_structure:
            finder = SpacegroupAnalyzer(structure, symm_prec)
            self.structure = finder.get_refined_structure()
        else:
            self.structure = structure
        self.min_cell_size = min_cell_size
        self.max_cell_size = max_cell_size
        self.symm_prec = symm_prec
        self.enum_precision_parameter = enum_precision_parameter
        self.check_ordered_symmetry = check_ordered_symmetry
        self.timeout = timeout

    def run(self):
        """
        Run the enumeration.
        """
        # Create a temporary directory for working.
        with ScratchDir(".") as d:
            logger.debug(f"Temp dir : {d}")
            # Generate input files
            self._gen_input_file()
            # Perform the actual enumeration
            num_structs = self._run_multienum()
            # Read in the enumeration output as structures.
            if num_structs > 0:
                self.structures = self._get_structures(num_structs)
            else:
                raise EnumError("Unable to enumerate structure.")

    def _gen_input_file(self):
        """
        Generate the necessary struct_enum.in file for enumlib. See enumlib
        documentation for details.
        """
        coord_format = "{:.6f} {:.6f} {:.6f}"
        # Using symmetry finder, get the symmetrically distinct sites.
        fitter = SpacegroupAnalyzer(self.structure, self.symm_prec)
        symmetrized_structure = fitter.get_symmetrized_structure()
        logger.debug(
            f"Spacegroup {fitter.get_space_group_symbol()} ({fitter.get_space_group_number()}) "
            f"with {len(symmetrized_structure.equivalent_sites)} distinct sites"
        )
        """
        Enumlib doesn"t work when the number of species get too large. To
        simplify matters, we generate the input file only with disordered sites
        and exclude the ordered sites from the enumeration. The fact that
        different disordered sites with the exact same species may belong to
        different equivalent sites is dealt with by having determined the
        spacegroup earlier and labelling the species differently.
        """
        # index_species and index_amounts store mappings between the indices
        # used in the enum input file, and the actual species and amounts.
        index_species = []
        index_amounts = []

        # Stores the ordered sites, which are not enumerated.
        ordered_sites = []
        disordered_sites = []
        coord_str = []
        for sites in symmetrized_structure.equivalent_sites:
            if sites[0].is_ordered:
                ordered_sites.append(sites)
            else:
                sp_label = []
                species = dict(sites[0].species.items())
                if sum(species.values()) < 1 - EnumlibAdaptor.amount_tol:
                    # Let us first make add a dummy element for every single
                    # site whose total occupancies don't sum to 1.
                    species[DummySpecies("X")] = 1 - sum(species.values())
                for sp, amt in species.items():
                    if sp not in index_species:
                        index_species.append(sp)
                        sp_label.append(len(index_species) - 1)
                        index_amounts.append(amt * len(sites))
                    else:
                        ind = index_species.index(sp)
                        sp_label.append(ind)
                        index_amounts[ind] += amt * len(sites)
                sp_label = "/".join(f"{i}" for i in sorted(sp_label))
                for site in sites:
                    coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")
                disordered_sites.append(sites)

        def get_sg_info(ss):
            finder = SpacegroupAnalyzer(Structure.from_sites(ss), self.symm_prec)
            return finder.get_space_group_number()

        target_sgnum = get_sg_info(symmetrized_structure.sites)
        curr_sites = list(itertools.chain.from_iterable(disordered_sites))
        sgnum = get_sg_info(curr_sites)
        ordered_sites = sorted(ordered_sites, key=lambda sites: len(sites))
        logger.debug(f"Disordered sites has sg # {sgnum}")
        self.ordered_sites = []

        # progressively add ordered sites to our disordered sites
        # until we match the symmetry of our input structure
        if self.check_ordered_symmetry:
            while sgnum != target_sgnum and len(ordered_sites) > 0:
                sites = ordered_sites.pop(0)
                temp_sites = list(curr_sites) + sites
                new_sgnum = get_sg_info(temp_sites)
                if sgnum != new_sgnum:
                    logger.debug(f"Adding {sites[0].specie} in enum. New sg # {new_sgnum}")
                    index_species.append(sites[0].specie)
                    index_amounts.append(len(sites))
                    sp_label = len(index_species) - 1
                    for site in sites:
                        coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")
                    disordered_sites.append(sites)
                    curr_sites = temp_sites
                    sgnum = new_sgnum
                else:
                    self.ordered_sites.extend(sites)

        for sites in ordered_sites:
            self.ordered_sites.extend(sites)

        self.index_species = index_species

        lattice = self.structure.lattice

        output = [self.structure.formula, "bulk"]
        for vec in lattice.matrix:
            output.append(coord_format.format(*vec))
        output.append(f"{len(index_species)}")
        output.append(f"{len(coord_str)}")
        output.extend(coord_str)

        output.append(f"{self.min_cell_size} {self.max_cell_size}")
        output.append(str(self.enum_precision_parameter))
        output.append("full")

        ndisordered = sum(len(s) for s in disordered_sites)
        base = int(
            ndisordered
            * lcm(
                *(
                    f.limit_denominator(ndisordered * self.max_cell_size).denominator
                    for f in map(fractions.Fraction, index_amounts)
                )
            )
        )

        # This multiplicative factor of 10 is to prevent having too small bases
        # which can lead to rounding issues in the next step.
        # An old bug was that a base was set to 8, with a conc of 0.4:0.6. That
        # resulted in a range that overlaps and a conc of 0.5 satisfying this
        # enumeration. See Cu7Te5.cif test file.
        base *= 10

        # base = ndisordered #10 ** int(math.ceil(math.log10(ndisordered)))
        # To get a reasonable number of structures, we fix concentrations to the
        # range expected in the original structure.
        total_amounts = sum(index_amounts)
        for amt in index_amounts:
            conc = amt / total_amounts

            if abs(conc * base - round(conc * base)) < 1e-5:
                output.append(f"{int(round(conc * base))} {int(round(conc * base))} {base}")
            else:
                min_conc = int(math.floor(conc * base))
                output.append(f"{min_conc - 1} {min_conc + 1} {base}")
        output.append("")
        logger.debug("Generated input file:\n{}".format("\n".join(output)))
        with open("struct_enum.in", "w") as f:
            f.write("\n".join(output))

    def _run_multienum(self):
        with subprocess.Popen([enum_cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as p:
            if self.timeout:
                timed_out = False
                timer = Timer(self.timeout * 60, lambda p: p.kill(), [p])

                try:
                    timer.start()
                    output = p.communicate()[0].decode("utf-8")
                finally:
                    if not timer.is_alive():
                        timed_out = True
                    timer.cancel()

                if timed_out:
                    raise TimeoutError("Enumeration took too long.")

            else:
                output = p.communicate()[0].decode("utf-8")

        count = 0
        start_count = False
        for line in output.strip().split("\n"):
            if line.strip().endswith("RunTot"):
                start_count = True
            elif start_count and re.match(r"\d+\s+.*", line.strip()):
                count = int(line.split()[-1])
        logger.debug(f"Enumeration resulted in {count} structures")
        return count

    def _get_structures(self, num_structs):
        structs = []

        if ".py" in makestr_cmd:
            options = ["-input", "struct_enum.out", str(1), str(num_structs)]
        else:
            options = ["struct_enum.out", str(0), str(num_structs - 1)]

        with subprocess.Popen(
            [makestr_cmd] + options,
            stdout=subprocess.PIPE,
            stdin=subprocess.PIPE,
            close_fds=True,
        ) as rs:
            stdout, stderr = rs.communicate()
        if stderr:
            logger.warning(stderr.decode())

        # sites retrieved from enumlib will lack site properties
        # to ensure consistency, we keep track of what site properties
        # are missing and set them to None
        # TODO: improve this by mapping ordered structure to original
        # disordered structure, and retrieving correct site properties
        disordered_site_properties = {}

        if len(self.ordered_sites) > 0:
            original_latt = self.ordered_sites[0].lattice
            # Need to strip sites of site_properties, which would otherwise
            # result in an index error. Hence Structure is reconstructed in
            # the next step.
            site_properties = {}
            for site in self.ordered_sites:
                for k, v in site.properties.items():
                    disordered_site_properties[k] = None
                    if k in site_properties:
                        site_properties[k].append(v)
                    else:
                        site_properties[k] = [v]
            ordered_structure = Structure(
                original_latt,
                [site.species for site in self.ordered_sites],
                [site.frac_coords for site in self.ordered_sites],
                site_properties=site_properties,
            )
            inv_org_latt = np.linalg.inv(original_latt.matrix)
        else:
            ordered_structure = None  # to fix pylint E0601
            inv_org_latt = None

        for file in glob.glob("vasp.*"):
            with open(file) as f:
                data = f.read()
                data = re.sub(r"scale factor", "1", data)
                data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data)
                poscar = Poscar.from_string(data, self.index_species)
                sub_structure = poscar.structure
                # Enumeration may have resulted in a super lattice. We need to
                # find the mapping from the new lattice to the old lattice, and
                # perform supercell construction if necessary.
                new_latt = sub_structure.lattice

                sites = []

                if len(self.ordered_sites) > 0:
                    transformation = np.dot(new_latt.matrix, inv_org_latt)
                    transformation = [[int(round(cell)) for cell in row] for row in transformation]
                    logger.debug(f"Supercell matrix: {transformation}")
                    s = ordered_structure * transformation
                    sites.extend([site.to_unit_cell() for site in s])
                    super_latt = sites[-1].lattice
                else:
                    super_latt = new_latt

                for site in sub_structure:
                    if site.specie.symbol != "X":  # We exclude vacancies.
                        sites.append(
                            PeriodicSite(
                                site.species,
                                site.frac_coords,
                                super_latt,
                                to_unit_cell=True,
                                properties=disordered_site_properties,
                            )
                        )
                    else:
                        logger.debug("Skipping sites that include species X.")
                structs.append(Structure.from_sites(sorted(sites)))

        logger.debug(f"Read in a total of {num_structs} structures.")
        return structs


class EnumError(BaseException):
    """
    Error subclass for enumeration errors.
    """

1	# Copyright (c) Pymatgen Development Team.
2	# Distributed under the terms of the MIT License.
3
4	"""	1✔
5	This module implements an interface to enumlib, Gus Hart's excellent Fortran
6	code for enumerating derivative structures.
7
8	This module depends on a compiled enumlib with the executables enum.x and
9	makestr.x available in the path. Please download the library at
10	https://github.com/msg-byu/enumlib and follow the instructions in the README to
11	compile these two executables accordingly.
12
13	If you use this module, please cite the following:
14
15	Gus L. W. Hart and Rodney W. Forcade, "Algorithm for generating derivative
16	structures," Phys. Rev. B 77 224115 (26 June 2008)
17
18	Gus L. W. Hart and Rodney W. Forcade, "Generating derivative structures from
19	multilattices: Application to hcp alloys," Phys. Rev. B 80 014120 (July 2009)
20
21	Gus L. W. Hart, Lance J. Nelson, and Rodney W. Forcade, "Generating
22	derivative structures at a fixed concentration," Comp. Mat. Sci. 59
23	101-107 (March 2012)
24
25	Wiley S. Morgan, Gus L. W. Hart, Rodney W. Forcade, "Generating derivative
26	superstructures for systems with high configurational freedom," Comp. Mat.
27	Sci. 136 144-149 (May 2017)
28	"""
29
30	from __future__ import annotations	1✔
31
32	import fractions	1✔
33	import glob	1✔
34	import itertools	1✔
35	import logging	1✔
36	import math	1✔
37	import re	1✔
38	import subprocess	1✔
39	from shutil import which	1✔
40	from threading import Timer	1✔
41
42	import numpy as np	1✔
43	from monty.dev import requires	1✔
44	from monty.fractions import lcm	1✔
45	from monty.tempfile import ScratchDir	1✔
46
47	from pymatgen.core.periodic_table import DummySpecies	1✔
48	from pymatgen.core.sites import PeriodicSite	1✔
49	from pymatgen.core.structure import Structure	1✔
50	from pymatgen.io.vasp.inputs import Poscar	1✔
51	from pymatgen.symmetry.analyzer import SpacegroupAnalyzer	1✔
52
53	logger = logging.getLogger(__name__)	1✔
54
55	# Favor the use of the newer "enum.x" by Gus Hart instead of the older
56	# "multienum.x"
57	enum_cmd = which("enum.x") or which("multienum.x")	1✔
58	# prefer makestr.x at present
59	makestr_cmd = which("makestr.x") or which("makeStr.x") or which("makeStr.py")	1✔
60
61
62	@requires(	1✔
63	enum_cmd and makestr_cmd,
64	"EnumlibAdaptor requires the executables 'enum.x' or 'multienum.x' "
65	"and 'makestr.x' or 'makeStr.py' to be in the path. Please download the "
66	"library at https://github.com/msg-byu/enumlib and follow the instructions "
67	"in the README to compile these two executables accordingly.",
68	)
69	class EnumlibAdaptor:	1✔
70	"""
71	An adaptor for enumlib.
72
73	.. attribute:: structures
74
75	List of all enumerated structures.
76	"""
77
78	amount_tol = 1e-5	1✔
79
80	def __init__(	1✔
81	self,
82	structure,
83	min_cell_size=1,
84	max_cell_size=1,
85	symm_prec=0.1,
86	enum_precision_parameter=0.001,
87	refine_structure=False,
88	check_ordered_symmetry=True,
89	timeout=None,
90	):
91	"""
92	Initializes the adapter with a structure and some parameters.
93
94	Args:
95	structure: An input structure.
96	min_cell_size (int): The minimum cell size wanted. Defaults to 1.
97	max_cell_size (int): The maximum cell size wanted. Defaults to 1.
98	symm_prec (float): Symmetry precision. Defaults to 0.1.
99	enum_precision_parameter (float): Finite precision parameter for
100	enumlib. Default of 0.001 is usually ok, but you might need to
101	tweak it for certain cells.
102	refine_structure (bool): If you are starting from a structure that
103	has been relaxed via some electronic structure code,
104	it is usually much better to start with symmetry determination
105	and then obtain a refined structure. The refined structure have
106	cell parameters and atomic positions shifted to the expected
107	symmetry positions, which makes it much less sensitive precision
108	issues in enumlib. If you are already starting from an
109	experimental cif, refinement should have already been done and
110	it is not necessary. Defaults to False.
111	check_ordered_symmetry (bool): Whether to check the symmetry of
112	the ordered sites. If the symmetry of the ordered sites is
113	lower, the lowest symmetry ordered sites is included in the
114	enumeration. This is important if the ordered sites break
115	symmetry in a way that is important getting possible
116	structures. But sometimes including ordered sites
117	slows down enumeration to the point that it cannot be
118	completed. Switch to False in those cases. Defaults to True.
119	timeout (float): If specified, will kill enumlib after specified
120	time in minutes. This can be useful for gracefully handling
121	enumerations in a high-throughput context, for some enumerations
122	which will not terminate in a realistic length of time.
123	"""
124	if refine_structure:	×
125	finder = SpacegroupAnalyzer(structure, symm_prec)	×
126	self.structure = finder.get_refined_structure()	×
127	else:
128	self.structure = structure	×
129	self.min_cell_size = min_cell_size	×
130	self.max_cell_size = max_cell_size	×
131	self.symm_prec = symm_prec	×
132	self.enum_precision_parameter = enum_precision_parameter	×
133	self.check_ordered_symmetry = check_ordered_symmetry	×
134	self.timeout = timeout	×
135
136	def run(self):	1✔
137	"""
138	Run the enumeration.
139	"""
140	# Create a temporary directory for working.
141	with ScratchDir(".") as d:	×
142	logger.debug(f"Temp dir : {d}")	×
143	# Generate input files
144	self._gen_input_file()	×
145	# Perform the actual enumeration
146	num_structs = self._run_multienum()	×
147	# Read in the enumeration output as structures.
148	if num_structs > 0:	×
149	self.structures = self._get_structures(num_structs)	×
150	else:
151	raise EnumError("Unable to enumerate structure.")	×
152
153	def _gen_input_file(self):	1✔
154	"""
155	Generate the necessary struct_enum.in file for enumlib. See enumlib
156	documentation for details.
157	"""
158	coord_format = "{:.6f} {:.6f} {:.6f}"	×
159	# Using symmetry finder, get the symmetrically distinct sites.
160	fitter = SpacegroupAnalyzer(self.structure, self.symm_prec)	×
161	symmetrized_structure = fitter.get_symmetrized_structure()	×
162	logger.debug(	×
163	f"Spacegroup {fitter.get_space_group_symbol()} ({fitter.get_space_group_number()}) "
164	f"with {len(symmetrized_structure.equivalent_sites)} distinct sites"
165	)
166	"""
167	Enumlib doesn"t work when the number of species get too large. To
168	simplify matters, we generate the input file only with disordered sites
169	and exclude the ordered sites from the enumeration. The fact that
170	different disordered sites with the exact same species may belong to
171	different equivalent sites is dealt with by having determined the
172	spacegroup earlier and labelling the species differently.
173	"""
174	# index_species and index_amounts store mappings between the indices
175	# used in the enum input file, and the actual species and amounts.
176	index_species = []	×
177	index_amounts = []	×
178
179	# Stores the ordered sites, which are not enumerated.
180	ordered_sites = []	×
181	disordered_sites = []	×
182	coord_str = []	×
183	for sites in symmetrized_structure.equivalent_sites:	×
184	if sites[0].is_ordered:	×
185	ordered_sites.append(sites)	×
186	else:
187	sp_label = []	×
188	species = dict(sites[0].species.items())	×
189	if sum(species.values()) < 1 - EnumlibAdaptor.amount_tol:	×
190	# Let us first make add a dummy element for every single
191	# site whose total occupancies don't sum to 1.
192	species[DummySpecies("X")] = 1 - sum(species.values())	×
193	for sp, amt in species.items():	×
194	if sp not in index_species:	×
195	index_species.append(sp)	×
196	sp_label.append(len(index_species) - 1)	×
197	index_amounts.append(amt * len(sites))	×
198	else:
199	ind = index_species.index(sp)	×
200	sp_label.append(ind)	×
201	index_amounts[ind] += amt * len(sites)	×
202	sp_label = "/".join(f"{i}" for i in sorted(sp_label))	×
203	for site in sites:	×
204	coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")	×
205	disordered_sites.append(sites)	×
206
207	def get_sg_info(ss):	×
208	finder = SpacegroupAnalyzer(Structure.from_sites(ss), self.symm_prec)	×
209	return finder.get_space_group_number()	×
210
211	target_sgnum = get_sg_info(symmetrized_structure.sites)	×
212	curr_sites = list(itertools.chain.from_iterable(disordered_sites))	×
213	sgnum = get_sg_info(curr_sites)	×
214	ordered_sites = sorted(ordered_sites, key=lambda sites: len(sites))	×
215	logger.debug(f"Disordered sites has sg # {sgnum}")	×
216	self.ordered_sites = []	×
217
218	# progressively add ordered sites to our disordered sites
219	# until we match the symmetry of our input structure
220	if self.check_ordered_symmetry:	×
221	while sgnum != target_sgnum and len(ordered_sites) > 0:	×
222	sites = ordered_sites.pop(0)	×
223	temp_sites = list(curr_sites) + sites	×
224	new_sgnum = get_sg_info(temp_sites)	×
225	if sgnum != new_sgnum:	×
226	logger.debug(f"Adding {sites[0].specie} in enum. New sg # {new_sgnum}")	×
227	index_species.append(sites[0].specie)	×
228	index_amounts.append(len(sites))	×
229	sp_label = len(index_species) - 1	×
230	for site in sites:	×
231	coord_str.append(f"{coord_format.format(*site.coords)} {sp_label}")	×
232	disordered_sites.append(sites)	×
233	curr_sites = temp_sites	×
234	sgnum = new_sgnum	×
235	else:
236	self.ordered_sites.extend(sites)	×
237
238	for sites in ordered_sites:	×
239	self.ordered_sites.extend(sites)	×
240
241	self.index_species = index_species	×
242
243	lattice = self.structure.lattice	×
244
245	output = [self.structure.formula, "bulk"]	×
246	for vec in lattice.matrix:	×
247	output.append(coord_format.format(*vec))	×
248	output.append(f"{len(index_species)}")	×
249	output.append(f"{len(coord_str)}")	×
250	output.extend(coord_str)	×
251
252	output.append(f"{self.min_cell_size} {self.max_cell_size}")	×
253	output.append(str(self.enum_precision_parameter))	×
254	output.append("full")	×
255
256	ndisordered = sum(len(s) for s in disordered_sites)	×
257	base = int(	×
258	ndisordered
259	* lcm(
260	*(
261	f.limit_denominator(ndisordered * self.max_cell_size).denominator
262	for f in map(fractions.Fraction, index_amounts)
263	)
264	)
265	)
266
267	# This multiplicative factor of 10 is to prevent having too small bases
268	# which can lead to rounding issues in the next step.
269	# An old bug was that a base was set to 8, with a conc of 0.4:0.6. That
270	# resulted in a range that overlaps and a conc of 0.5 satisfying this
271	# enumeration. See Cu7Te5.cif test file.
272	base *= 10	×
273
274	# base = ndisordered #10 ** int(math.ceil(math.log10(ndisordered)))
275	# To get a reasonable number of structures, we fix concentrations to the
276	# range expected in the original structure.
277	total_amounts = sum(index_amounts)	×
278	for amt in index_amounts:	×
279	conc = amt / total_amounts	×
280
281	if abs(conc * base - round(conc * base)) < 1e-5:	×
282	output.append(f"{int(round(conc * base))} {int(round(conc * base))} {base}")	×
283	else:
284	min_conc = int(math.floor(conc * base))	×
285	output.append(f"{min_conc - 1} {min_conc + 1} {base}")	×
286	output.append("")	×
287	logger.debug("Generated input file:\n{}".format("\n".join(output)))	×
288	with open("struct_enum.in", "w") as f:	×
289	f.write("\n".join(output))	×
290
291	def _run_multienum(self):	1✔
292	with subprocess.Popen([enum_cmd], stdout=subprocess.PIPE, stdin=subprocess.PIPE, close_fds=True) as p:	×
293	if self.timeout:	×
294	timed_out = False	×
295	timer = Timer(self.timeout * 60, lambda p: p.kill(), [p])	×
296
297	try:	×
298	timer.start()	×
299	output = p.communicate()[0].decode("utf-8")	×
300	finally:
301	if not timer.is_alive():	×
302	timed_out = True	×
303	timer.cancel()	×
304
305	if timed_out:	×
306	raise TimeoutError("Enumeration took too long.")	×
307
308	else:
309	output = p.communicate()[0].decode("utf-8")	×
310
311	count = 0	×
312	start_count = False	×
313	for line in output.strip().split("\n"):	×
314	if line.strip().endswith("RunTot"):	×
315	start_count = True	×
316	elif start_count and re.match(r"\d+\s+.*", line.strip()):	×
317	count = int(line.split()[-1])	×
318	logger.debug(f"Enumeration resulted in {count} structures")	×
319	return count	×
320
321	def _get_structures(self, num_structs):	1✔
322	structs = []	×
323
324	if ".py" in makestr_cmd:	×
325	options = ["-input", "struct_enum.out", str(1), str(num_structs)]	×
326	else:
327	options = ["struct_enum.out", str(0), str(num_structs - 1)]	×
328
329	with subprocess.Popen(	×
330	[makestr_cmd] + options,
331	stdout=subprocess.PIPE,
332	stdin=subprocess.PIPE,
333	close_fds=True,
334	) as rs:
335	stdout, stderr = rs.communicate()	×
336	if stderr:	×
337	logger.warning(stderr.decode())	×
338
339	# sites retrieved from enumlib will lack site properties
340	# to ensure consistency, we keep track of what site properties
341	# are missing and set them to None
342	# TODO: improve this by mapping ordered structure to original
343	# disordered structure, and retrieving correct site properties
344	disordered_site_properties = {}	×
345
346	if len(self.ordered_sites) > 0:	×
347	original_latt = self.ordered_sites[0].lattice	×
348	# Need to strip sites of site_properties, which would otherwise
349	# result in an index error. Hence Structure is reconstructed in
350	# the next step.
351	site_properties = {}	×
352	for site in self.ordered_sites:	×
353	for k, v in site.properties.items():	×
354	disordered_site_properties[k] = None	×
355	if k in site_properties:	×
356	site_properties[k].append(v)	×
357	else:
358	site_properties[k] = [v]	×
359	ordered_structure = Structure(	×
360	original_latt,
361	[site.species for site in self.ordered_sites],
362	[site.frac_coords for site in self.ordered_sites],
363	site_properties=site_properties,
364	)
365	inv_org_latt = np.linalg.inv(original_latt.matrix)	×
366	else:
367	ordered_structure = None # to fix pylint E0601	×
368	inv_org_latt = None	×
369
370	for file in glob.glob("vasp.*"):	×
371	with open(file) as f:	×
372	data = f.read()	×
373	data = re.sub(r"scale factor", "1", data)	×
374	data = re.sub(r"(\d+)-(\d+)", r"\1 -\2", data)	×
375	poscar = Poscar.from_string(data, self.index_species)	×
376	sub_structure = poscar.structure	×
377	# Enumeration may have resulted in a super lattice. We need to
378	# find the mapping from the new lattice to the old lattice, and
379	# perform supercell construction if necessary.
380	new_latt = sub_structure.lattice	×
381
382	sites = []	×
383
384	if len(self.ordered_sites) > 0:	×
385	transformation = np.dot(new_latt.matrix, inv_org_latt)	×
386	transformation = [[int(round(cell)) for cell in row] for row in transformation]	×
387	logger.debug(f"Supercell matrix: {transformation}")	×
388	s = ordered_structure * transformation	×
389	sites.extend([site.to_unit_cell() for site in s])	×
390	super_latt = sites[-1].lattice	×
391	else:
392	super_latt = new_latt	×
393
394	for site in sub_structure:	×
395	if site.specie.symbol != "X": # We exclude vacancies.	×
396	sites.append(	×
397	PeriodicSite(
398	site.species,
399	site.frac_coords,
400	super_latt,
401	to_unit_cell=True,
402	properties=disordered_site_properties,
403	)
404	)
405	else:
406	logger.debug("Skipping sites that include species X.")	×
407	structs.append(Structure.from_sites(sorted(sites)))	×
408
409	logger.debug(f"Read in a total of {num_structs} structures.")	×
410	return structs	×
411
412
413	class EnumError(BaseException):	1✔
414	"""
415	Error subclass for enumeration errors.
416	"""

materialsproject / pymatgen / 4075885785

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous