5729289971

pending completion

Build # 5729289971

Build Type

Pull #47

github-actions

Committed by

web-flow

Commit Message

Merge 63e68d88b into de8f8fd69

Pull Request Pull Request #47: Update interstitial class

Run Details

51 of 51 new or added lines in 1 file covered. (100.0%)

2209 of 2543 relevant lines covered (86.87%)

0.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.24

/structuretoolkit/analyse/spatial.py

# coding: utf-8
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
# Distributed under the terms of "New BSD License", see the LICENSE file.

import numpy as np
from scipy.sparse import coo_matrix
from scipy.spatial import ConvexHull, Delaunay, Voronoi

from structuretoolkit.analyse.neighbors import get_neighborhood, get_neighbors
from structuretoolkit.common.helper import (
    get_average_of_unique_labels,
    get_extended_positions,
    get_vertical_length,
    get_wrapped_coordinates,
)
from sklearn.cluster import DBSCAN

__author__ = "Joerg Neugebauer, Sam Waseda"
__copyright__ = (
    "Copyright 2021, Max-Planck-Institut für Eisenforschung GmbH - "
    "Computational Materials Design (CM) Department"
)
__version__ = "1.0"
__maintainer__ = "Sam Waseda"
__email__ = "waseda@mpie.de"
__status__ = "production"
__date__ = "Sep 1, 2017"


def get_mean_positions(positions, cell, pbc, labels):
    """
    This function calculates the average position(-s) across periodic boundary conditions according
    to the labels

    Args:
        positions (numpy.ndarray (n, 3)): Coordinates to be averaged
        cell (numpy.ndarray (3, 3)): Cell dimensions
        pbc (numpy.ndarray (3,)): Periodic boundary conditions (in boolean)
        labels (numpy.ndarray (n,)): labels according to which the atoms are grouped

    Returns:
        (numpy.ndarray): mean positions
    """
    # Translate labels to integer enumeration (0, 1, 2, ... etc.) and get their counts
    _, labels, counts = np.unique(labels, return_inverse=True, return_counts=True)
    # Get reference point for each unique label
    mean_positions = positions[np.unique(labels, return_index=True)[1]]
    # Get displacement vectors from reference points to all other points for the same labels
    all_positions = positions - mean_positions[labels]
    # Account for pbc
    all_positions = np.einsum("ji,nj->ni", np.linalg.inv(cell), all_positions)
    all_positions[:, pbc] -= np.rint(all_positions)[:, pbc]
    all_positions = np.einsum("ji,nj->ni", cell, all_positions)
    # Add average displacement vector of each label to the reference point
    np.add.at(mean_positions, labels, (all_positions.T / counts[labels]).T)
    return mean_positions


def create_gridpoints(structure, n_gridpoints_per_angstrom=5):
    cell = get_vertical_length(structure=structure)
    n_points = (n_gridpoints_per_angstrom * cell).astype(int)
    positions = np.meshgrid(
        *[np.linspace(0, 1, n_points[i], endpoint=False) for i in range(3)]
    )
    positions = np.stack(positions, axis=-1).reshape(-1, 3)
    return np.einsum("ji,nj->ni", structure.cell, positions)


def remove_too_close(positions, structure, min_distance=1):
    neigh = get_neighborhood(structure=structure, positions=positions, num_neighbors=1)
    return positions[neigh.distances.flatten() > min_distance]


def set_to_high_symmetry_points(positions, structure, neigh, decimals=4):
    for _ in range(10):
        neigh = neigh.get_neighborhood(positions)
        dx = np.mean(neigh.vecs, axis=-2)
        if np.allclose(dx, 0):
            return positions
        positions += dx
        positions = get_wrapped_coordinates(structure=structure, positions=positions)
        unique_indices = np.unique(
            np.round(positions, decimals=decimals), axis=0, return_index=True
        )[1]
        positions = positions[unique_indices]
    raise ValueError("High symmetry points could not be detected")


def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samples):
    if min_samples is None:
        min_samples = min(len(neigh.distances), 5)
    neigh = neigh.get_neighborhood(positions)
    Q_values = np.array([neigh.get_steinhardt_parameter(ll) for ll in l_values])
    db = DBSCAN(q_eps, min_samples=min_samples)
    var = np.std(neigh.distances, axis=-1)
    descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0)
    labels = db.fit_predict(descriptors.T)
    var_mean = np.array(
        [np.mean(var[labels==ll]) for ll in np.unique(labels) if ll >= 0]
    )
    return positions[labels == np.argmin(var_mean)]


class Interstitials:
    """
    Class to search for interstitial sites

    This class internally does the following steps:

        0. Initialize grid points (or Voronoi vertices) which are considered as
            interstitial site candidates.
        1. Eliminate points within a distance from the nearest neighboring atoms as
            given by `min_distance`
        2. Shift interstitial candidates to the nearest symmetric points with respect to the
            neighboring atom sites/vertices.
        3. Cluster interstitial candidates to avoid point overlapping.
        4. Cluster interstitial candidates by their Steinhardt parameters (cf. `l_values` for
            the values of the spherical harmonics) and the variance of the distances and
            take the group with the smallest average distance variance

    The interstitial sites can be obtained via `positions`

    In complex structures (i.e. grain boundary, dislocation etc.), the default parameters
    should be chosen properly. In order to see other quantities, which potentially
    characterize interstitial sites, see the following class methods:

        - `get_variances()`
        - `get_distances()`
        - `get_steinhardt_parameters()`
        - `get_volumes()`
        - `get_areas()`

    Troubleshooting:

    Identifying interstitial sites is not a very easy task. The algorithm employed here will
    probably do a good job, but if it fails, it might be good to look at the following points

    - The intermediate results can be accessed via `run_workflow` by specifying the step number.
    - The most vulnerable point is the last step, clustering by Steinhardt parameters. Take a
        look at the step before and after. If the interstitial sites are present in the step
        before the clustering by Steinhardt parameters, you might want to change the values of
        `q_eps` and `var_ratio`. It might make a difference to use `l_values` as well.
    - It might fail to find sites if the box is very small. In that case it might make sense to
        set `min_samples` very low (you can take 1)
    """

    def __init__(
        self,
        structure,
        num_neighbors,
        n_gridpoints_per_angstrom=5,
        min_distance=1,
        use_voronoi=False,
        x_eps=0.1,
        l_values=np.arange(2, 13),
        q_eps=0.3,
        var_ratio=5,
        min_samples=None,
        neigh_args={},
        **args
    ):
        """

        Args:
            num_neighbors (int): Number of neighbors/vertices to consider for the interstitial
                sites. By definition, tetrahedral sites should have 4 vertices and octahedral
                sites 6.
            n_gridpoints_per_angstrom (int): Number of grid points per angstrom for the
                initialization of the interstitial candidates. The finer the mesh (i.e. the larger
                the value), the likelier it is to find the correct sites but then also it becomes
                computationally more expensive. Ignored if `use_voronoi` is set to `True`
            min_distance (float): Minimum distance from the nearest neighboring atoms to the
                positions for them to be considered as interstitial site candidates. Set
                `min_distance` to 0 if no point should be removed.
            use_voronoi (bool): Use Voronoi vertices for the initial interstitial candidate
                positions instead of grid points.
            x_eps (bool): eps value for the clustering of interstitial candidate positions
            l_values (list): list of values for the Steinhardt parameter values for the
                classification of the interstitial candidate points
            q_eps (float): eps value for the clustering of interstitial candidate points based
                on Steinhardt parameters and distance variances. This might play a crucial role
                in identifying the correct interstitial sites
            var_ratio (float): factor to be multiplied to the variance values in order to give
                a larger weight to the variances.
            min_samples (int/None): `min_sample` in the point clustering.
            neigh_args (dict): arguments to be added to `get_neighbors`
        """
        if use_voronoi:
            self.initial_positions = get_voronoi_vertices(structure)
        else:
            self.initial_positions = create_gridpoints(
                structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom
            )
        self._neigh = get_neighbors(
            structure=structure, num_neighbors=num_neighbors, **neigh_args
        )
        self.workflow = [
            {
                "f": remove_too_close,
                "args": {"structure": structure, "min_distance": min_distance},
            },
            {
                "f": set_to_high_symmetry_points,
                "args": {"structure": structure, "neigh": self.neigh},
            },
            {
                "f": lambda **args: get_cluster_positions(structure, **args),
                "args": {"eps": x_eps}
            },
            {
                "f": cluster_by_steinhardt,
                "args": {
                    "neigh": self.neigh,
                    "l_values": l_values,
                    "q_eps": q_eps,
                    "var_ratio": var_ratio,
                    "min_samples": min_samples,
                },
            },
        ]
        self._positions = None
        self.structure = structure

    def run_workflow(self, positions=None, steps=-1):
        if positions is None:
            positions = self.initial_positions.copy()
        for ii, ww in enumerate(self.workflow):
            positions = ww["f"](positions=positions, **ww["args"])
            if ii == steps:
                return positions
        return positions

    @property
    def neigh(self):
        """
        Neighborhood information of each interstitial candidate and their surrounding atoms. E.g.
        `class.neigh.distances[0][0]` gives the distance from the first interstitial candidate to
        its nearest neighboring atoms. The functionalities of `neigh` follow those of
        `structuretoolkit.analyse.neighbors`.
        """
        return self._neigh

    @property
    def positions(self):
        if self._positions is None:
            self._positions = self.run_workflow()
            self._neigh = self.neigh.get_neighborhood(self._positions)
        return self._positions

    @property
    def hull(self):
        """
        Convex hull of each atom. It is mainly used for the volume and area calculation of each
        interstitial candidate. For more info, see `get_volumes` and `get_areas`.
        """
        return [ConvexHull(v) for v in self.neigh.vecs]

    def get_variances(self):
        """
        Get variance of neighboring distances. Since interstitial sites are mostly in symmetric
        sites, the variance values tend to be small. In the case of fcc, both tetrahedral and
        octahedral sites as well as tetrahedral sites in bcc should have the value of 0.

        Returns:
            (numpy.array (n,)) Variance values
        """
        return np.std(self.neigh.distances, axis=-1)

    def get_distances(self, function_to_apply=np.min):
        """
        Get per-position return values of a given function for the neighbors.

        Args:
            function_to_apply (function): Function to apply to the distance array. Default is
                numpy.minimum

        Returns:
            (numpy.array (n,)) Function values on the distance array
        """
        return function_to_apply(self.neigh.distances, axis=-1)

    def get_steinhardt_parameters(self, l):
        """
        Args:
            l (int/numpy.array): Order of Steinhardt parameter

        Returns:
            (numpy.array (n,)) Steinhardt parameter values
        """
        return self.neigh.get_steinhardt_parameter(l=l)

    def get_volumes(self):
        """
        Returns:
            (numpy.array (n,)): Convex hull volume of each site.
        """
        return np.array([h.volume for h in self.hull])

    def get_areas(self):
        """
        Returns:
            (numpy.array (n,)): Convex hull area of each site.
        """
        return np.array([h.area for h in self.hull])


def get_interstitials(
    structure,
    num_neighbors,
    n_gridpoints_per_angstrom=5,
    min_distance=1,
    use_voronoi=False,
    x_eps=0.1,
    l_values=np.arange(2, 13),
    q_eps=0.3,
    var_ratio=5,
    min_samples=None,
    neigh_args={},
    **args
):
    return Interstitials(
        structure=structure,
        num_neighbors=num_neighbors,
        n_gridpoints_per_angstrom=n_gridpoints_per_angstrom,
        min_distance=min_distance,
        use_voronoi=use_voronoi,
        x_eps=x_eps,
        l_values=l_values,
        q_eps=q_eps,
        var_ratio=var_ratio,
        min_samples=min_samples,
        neigh_args=neigh_args,
        **args
    )


get_interstitials.__doc__ = (
    Interstitials.__doc__.replace("Class", "Function") + Interstitials.__init__.__doc__
)


def get_layers(
    structure,
    distance_threshold=0.01,
    id_list=None,
    wrap_atoms=True,
    planes=None,
    cluster_method=None,
):
    """
    Get an array of layer numbers.

    Args:
        distance_threshold (float): Distance below which two points are
            considered to belong to the same layer. For detailed
            description: sklearn.cluster.AgglomerativeClustering
        id_list (list/numpy.ndarray): List of atoms for which the layers
            should be considered.
        wrap_atoms (bool): Whether to consider periodic boundary conditions according to the box definition or not.
            If set to `False`, atoms lying on opposite box boundaries are considered to belong to different layers,
            regardless of whether the box itself has the periodic boundary condition in this direction or not.
            If `planes` is not `None` and `wrap_atoms` is `True`, this tag has the same effect as calling
            `get_layers()` after calling `center_coordinates_in_unit_cell()`
        planes (list/numpy.ndarray): Planes along which the layers are calculated. Planes are
            given in vectors, i.e. [1, 0, 0] gives the layers along the x-axis. Default planes
            are orthogonal unit vectors: [[1, 0, 0], [0, 1, 0], [0, 0, 1]]. If you have a
            tilted box and want to calculate the layers along the directions of the cell
            vectors, use `planes=np.linalg.inv(structure.cell).T`. Whatever values are
            inserted, they are internally normalized, so whether [1, 0, 0] is entered or
            [2, 0, 0], the results will be the same.
        cluster_method (scikit-learn cluster algorithm): if given overrides the clustering method used, must be an
            instance of a cluster algorithm from scikit-learn (or compatible interface)

    Returns: Array of layer numbers (same shape as structure.positions)

    Example I - how to get the number of layers in each direction:

    >>> structure = Project('.').create_structure('Fe', 'bcc', 2.83).repeat(5)
    >>> print('Numbers of layers:', np.max(structure.analyse.get_layers(), axis=0)+1)

    Example II - get layers of only one species:

    >>> print('Iron layers:', structure.analyse.get_layers(
    ...       id_list=structure.select_index('Fe')))

    The clustering algorithm can be changed with the cluster_method argument

    >>> from sklearn.cluster import DBSCAN
    >>> layers = structure.analyse.get_layers(cluster_method=DBSCAN())
    """
    if distance_threshold <= 0:
        raise ValueError("distance_threshold must be a positive float")
    if id_list is not None and len(id_list) == 0:
        raise ValueError("id_list must contain at least one id")
    if wrap_atoms and planes is None:
        positions, indices = get_extended_positions(
            structure=structure, width=distance_threshold, return_indices=True
        )
        if id_list is not None:
            id_list = np.arange(len(structure))[np.array(id_list)]
            id_list = np.any(id_list[:, np.newaxis] == indices[np.newaxis, :], axis=0)
            positions = positions[id_list]
            indices = indices[id_list]
    else:
        positions = structure.positions
        if id_list is not None:
            positions = positions[id_list]
        if wrap_atoms:
            positions = get_wrapped_coordinates(
                structure=structure, positions=positions
            )
    if planes is not None:
        mat = np.asarray(planes).reshape(-1, 3)
        positions = np.einsum(
            "ij,i,nj->ni", mat, 1 / np.linalg.norm(mat, axis=-1), positions
        )
    if cluster_method is None:
        from sklearn.cluster import AgglomerativeClustering

        cluster_method = AgglomerativeClustering(
            linkage="complete",
            n_clusters=None,
            distance_threshold=distance_threshold,
        )
    layers = []
    for ii, x in enumerate(positions.T):
        cluster = cluster_method.fit(x.reshape(-1, 1))
        first_occurrences = np.unique(cluster.labels_, return_index=True)[1]
        permutation = x[first_occurrences].argsort().argsort()
        labels = permutation[cluster.labels_]
        if wrap_atoms and planes is None and structure.pbc[ii]:
            mean_positions = get_average_of_unique_labels(labels, positions)
            scaled_positions = np.einsum(
                "ji,nj->ni", np.linalg.inv(structure.cell), mean_positions
            )
            unique_inside_box = np.all(
                np.absolute(scaled_positions - 0.5 + 1.0e-8) < 0.5, axis=-1
            )
            arr_inside_box = np.any(
                labels[:, None] == np.unique(labels)[unique_inside_box][None, :],
                axis=-1,
            )
            first_occurences = np.unique(indices[arr_inside_box], return_index=True)[1]
            labels = labels[arr_inside_box]
            labels -= np.min(labels)
            labels = labels[first_occurences]
        layers.append(labels)
    if planes is not None and len(np.asarray(planes).shape) == 1:
        return np.asarray(layers).flatten()
    return np.vstack(layers).T


def get_voronoi_vertices(
    structure, epsilon=2.5e-4, distance_threshold=0, width_buffer=10
):
    """
    Get voronoi vertices of the box.

    Args:
        epsilon (float): displacement to add to avoid wrapping of atoms at borders
        distance_threshold (float): distance below which two vertices are considered as one.
            Agglomerative clustering algorithm (sklearn) is employed. Final positions are given
            as the average positions of clusters.
        width_buffer (float): width of the layer to be added to account for pbc.

    Returns:
        numpy.ndarray: 3d-array of vertices

    This function detect octahedral and tetrahedral sites in fcc; in bcc it detects tetrahedral
    sites. In defects (e.g. vacancy, dislocation, grain boundary etc.), it gives a list of
    positions interstitial atoms might want to occupy. In order for this to be more successful,
    it might make sense to look at the distance between the voronoi vertices and their nearest
    neighboring atoms via:

    >>> voronoi_vertices = structure_of_your_choice.analyse.get_voronoi_vertices()
    >>> neigh = structure_of_your_choice.get_neighborhood(voronoi_vertices)
    >>> print(neigh.distances.min(axis=-1))

    """
    voro = Voronoi(
        get_extended_positions(structure=structure, width=width_buffer) + epsilon
    )
    xx = voro.vertices
    if distance_threshold > 0:
        from sklearn.cluster import AgglomerativeClustering

        cluster = AgglomerativeClustering(
            linkage="single", distance_threshold=distance_threshold, n_clusters=None
        )
        cluster.fit(xx)
        xx = get_average_of_unique_labels(cluster.labels_, xx)
    xx = xx[
        np.linalg.norm(
            xx - get_wrapped_coordinates(structure=structure, positions=xx, epsilon=0),
            axis=-1,
        )
        < epsilon
    ]
    return xx - epsilon


def _get_neighbors(
    structure,
    position_interpreter,
    data_field: str,
    width_buffer: float = 10,
) -> np.ndarray:
    positions, indices = get_extended_positions(
        structure=structure, width=width_buffer, return_indices=True
    )
    interpretation = position_interpreter(positions)
    data = getattr(interpretation, data_field)
    x = positions[data]
    return indices[
        data[
            np.isclose(get_wrapped_coordinates(structure=structure, positions=x), x)
            .all(axis=-1)
            .any(axis=-1)
        ]
    ]


def get_voronoi_neighbors(structure, width_buffer: float = 10) -> np.ndarray:
    """
    Get pairs of atom indices sharing the same Voronoi vertices/areas.

    Args:
        width_buffer (float): Width of the layer to be added to account for pbc.

    Returns:
        pairs (ndarray): Pair indices
    """
    return _get_neighbors(
        structure=structure,
        position_interpreter=Voronoi,
        data_field="ridge_points",
        width_buffer=width_buffer,
    )


def get_delaunay_neighbors(structure, width_buffer: float = 10.0) -> np.ndarray:
    """
    Get indices of atoms sharing the same Delaunay tetrahedrons (commonly known as Delaunay
    triangles), i.e. indices of neighboring atoms, which form a tetrahedron, in which no other
    atom exists.

    Args:
        width_buffer (float): Width of the layer to be added to account for pbc.

    Returns:
        pairs (ndarray): Delaunay neighbor indices
    """
    return _get_neighbors(
        structure=structure,
        position_interpreter=Delaunay,
        data_field="simplices",
        width_buffer=width_buffer,
    )


def get_cluster_positions(
    structure, positions=None, eps=1, buffer_width=None, return_labels=False
):
    """
    Cluster positions according to the distances. Clustering algorithm uses DBSCAN:

    https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html

    Example I:

    ```
    analyse = Analyze(some_ase_structure)
    positions = analyse.cluster_points(eps=2)
    ```

    This example should return the atom positions, if no two atoms lie within a distance of 2.
    If there are at least two atoms which lie within a distance of 2, their entries will be
    replaced by their mean position.

    Example II:

    ```
    analyse = Analyze(some_ase_structure)
    print(analyse.cluster_positions([3*[0.], 3*[1.]], eps=3))
    ```

    This returns `[0.5, 0.5, 0.5]` (if the cell is large enough)

    Args:
        positions (numpy.ndarray): Positions to consider. Default: atom positions
        eps (float): The maximum distance between two samples for one to be considered as in
            the neighborhood of the other.
        buffer_width (float): Buffer width to consider across the periodic boundary
            conditions. If too small, it is possible that atoms that are meant to belong
            together across PBC are missed. Default: Same as eps
        return_labels (bool): Whether to return the labels given according to the grouping
            together with the mean positions

    Returns:
        positions (numpy.ndarray): Mean positions
        label (numpy.ndarray): Labels of the positions (returned when `return_labels = True`)
    """

    positions = structure.positions if positions is None else np.array(positions)
    if buffer_width is None:
        buffer_width = eps
    extended_positions, indices = get_extended_positions(
        structure=structure,
        width=buffer_width,
        return_indices=True,
        positions=positions,
    )
    labels = DBSCAN(eps=eps, min_samples=1).fit_predict(extended_positions)
    coo = coo_matrix((labels, (np.arange(len(labels)), indices)))
    labels = coo.max(axis=0).toarray().flatten()
    # make labels look nicer
    labels = np.unique(labels, return_inverse=True)[1]
    mean_positions = get_mean_positions(
        positions, structure.cell, structure.pbc, labels
    )
    if return_labels:
        return mean_positions, labels
    return mean_positions

pyiron / structuretoolkit / 5729289971

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous