• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pyiron / structuretoolkit / 5796414896

pending completion
5796414896

Pull #47

github-actions

web-flow
Merge 55425c70c into f90d30dfc
Pull Request #47: Update interstitial class

59 of 59 new or added lines in 2 files covered. (100.0%)

2219 of 2553 relevant lines covered (86.92%)

0.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.83
/structuretoolkit/analyse/spatial.py
1
# coding: utf-8
2
# Copyright (c) Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
3
# Distributed under the terms of "New BSD License", see the LICENSE file.
4

5
import numpy as np
1✔
6
from scipy.sparse import coo_matrix
1✔
7
from scipy.spatial import ConvexHull, Delaunay, Voronoi
1✔
8

9
from structuretoolkit.analyse.neighbors import get_neighborhood, get_neighbors
1✔
10
from structuretoolkit.common.helper import (
1✔
11
    get_average_of_unique_labels,
12
    get_extended_positions,
13
    get_vertical_length,
14
    get_wrapped_coordinates,
15
)
16

17
__author__ = "Joerg Neugebauer, Sam Waseda"
1✔
18
__copyright__ = (
1✔
19
    "Copyright 2021, Max-Planck-Institut für Eisenforschung GmbH - "
20
    "Computational Materials Design (CM) Department"
21
)
22
__version__ = "1.0"
1✔
23
__maintainer__ = "Sam Waseda"
1✔
24
__email__ = "waseda@mpie.de"
1✔
25
__status__ = "production"
1✔
26
__date__ = "Sep 1, 2017"
1✔
27

28

29
def get_mean_positions(positions, cell, pbc, labels):
1✔
30
    """
31
    This function calculates the average position(-s) across periodic boundary conditions according
32
    to the labels
33

34
    Args:
35
        positions (numpy.ndarray (n, 3)): Coordinates to be averaged
36
        cell (numpy.ndarray (3, 3)): Cell dimensions
37
        pbc (numpy.ndarray (3,)): Periodic boundary conditions (in boolean)
38
        labels (numpy.ndarray (n,)): labels according to which the atoms are grouped
39

40
    Returns:
41
        (numpy.ndarray): mean positions
42
    """
43
    # Translate labels to integer enumeration (0, 1, 2, ... etc.) and get their counts
44
    _, labels, counts = np.unique(labels, return_inverse=True, return_counts=True)
1✔
45
    # Get reference point for each unique label
46
    mean_positions = positions[np.unique(labels, return_index=True)[1]]
1✔
47
    # Get displacement vectors from reference points to all other points for the same labels
48
    all_positions = positions - mean_positions[labels]
1✔
49
    # Account for pbc
50
    all_positions = np.einsum("ji,nj->ni", np.linalg.inv(cell), all_positions)
1✔
51
    all_positions[:, pbc] -= np.rint(all_positions)[:, pbc]
1✔
52
    all_positions = np.einsum("ji,nj->ni", cell, all_positions)
1✔
53
    # Add average displacement vector of each label to the reference point
54
    np.add.at(mean_positions, labels, (all_positions.T / counts[labels]).T)
1✔
55
    return mean_positions
1✔
56

57

58
def create_gridpoints(structure, n_gridpoints_per_angstrom=5):
1✔
59
    cell = get_vertical_length(structure=structure)
1✔
60
    n_points = (n_gridpoints_per_angstrom * cell).astype(int)
1✔
61
    positions = np.meshgrid(
1✔
62
        *[np.linspace(0, 1, n_points[i], endpoint=False) for i in range(3)]
63
    )
64
    positions = np.stack(positions, axis=-1).reshape(-1, 3)
1✔
65
    return np.einsum("ji,nj->ni", structure.cell, positions)
1✔
66

67

68
def remove_too_close(positions, structure, min_distance=1):
1✔
69
    neigh = get_neighborhood(structure=structure, positions=positions, num_neighbors=1)
1✔
70
    return positions[neigh.distances.flatten() > min_distance]
1✔
71

72

73
def set_to_high_symmetry_points(positions, structure, neigh, decimals=4):
1✔
74
    for _ in range(10):
1✔
75
        neigh = neigh.get_neighborhood(positions)
1✔
76
        dx = np.mean(neigh.vecs, axis=-2)
1✔
77
        if np.allclose(dx, 0):
1✔
78
            return positions
1✔
79
        positions += dx
1✔
80
        positions = get_wrapped_coordinates(structure=structure, positions=positions)
1✔
81
        unique_indices = np.unique(
1✔
82
            np.round(positions, decimals=decimals), axis=0, return_index=True
83
        )[1]
84
        positions = positions[unique_indices]
1✔
85
    raise ValueError("High symmetry points could not be detected")
×
86

87

88
def cluster_by_steinhardt(positions, neigh, l_values, q_eps, var_ratio, min_samples):
1✔
89
    """
90
    Clusters candidate positions via Steinhardt parameters and the variance in distances to host atoms.
91
    
92
    The cluster that has the lowest variance is returned, i.e. those positions that have the most "regular" coordination polyhedra.
93
    
94
    Args:
95
        positions (array): candidate positions
96
        neigh (Neighbor): neighborhood information of the candidate positions
97
        l_values (list of int): which steinhardt parameters to use for clustering
98
        q_eps (float): maximum intercluster distance in steinhardt parameters for DBSCAN clustering
99
        var_ratio (float): multiplier to make steinhardt's and distance variance numerically comparable
100
        min_samples (int): minimum size of clusters
101
    
102
    Returns:
103
         array:  Positions of the most likely interstitial sites
104
    """
105
    from sklearn.cluster import DBSCAN
1✔
106

107
    if min_samples is None:
1✔
108
        min_samples = min(len(neigh.distances), 5)
1✔
109
    neigh = neigh.get_neighborhood(positions)
1✔
110
    Q_values = np.array([neigh.get_steinhardt_parameter(ll) for ll in l_values])
1✔
111
    db = DBSCAN(q_eps, min_samples=min_samples)
1✔
112
    var = np.std(neigh.distances, axis=-1)
1✔
113
    descriptors = np.concatenate((Q_values, [var * var_ratio]), axis=0)
1✔
114
    labels = db.fit_predict(descriptors.T)
1✔
115
    var_mean = np.array(
1✔
116
        [np.mean(var[labels == ll]) for ll in np.unique(labels) if ll >= 0]
117
    )
118
    return positions[labels == np.argmin(var_mean)]
1✔
119

120

121
class Interstitials:
1✔
122
    """
123
    Class to search for interstitial sites
124

125
    This class internally does the following steps:
126

127
        0. Initialize grid points (or Voronoi vertices) which are considered as
128
            interstitial site candidates.
129
        1. Eliminate points within a distance from the nearest neighboring atoms as
130
            given by `min_distance`
131
        2. Shift interstitial candidates to the nearest symmetric points with respect to the
132
            neighboring atom sites/vertices.
133
        3. Cluster interstitial candidate positions to avoid point overlapping.
134
        4. Cluster interstitial candidates by their Steinhardt parameters (cf. `l_values` for
135
            the values of the spherical harmonics) and the variance of the distances and
136
            take the group with the smallest average distance variance
137

138
    The interstitial sites can be obtained via `positions`
139

140
    In complex structures (i.e. grain boundary, dislocation etc.), the default parameters
141
    should be chosen properly. In order to see other quantities, which potentially
142
    characterize interstitial sites, see the following class methods:
143

144
        - `get_variances()`
145
        - `get_distances()`
146
        - `get_steinhardt_parameters()`
147
        - `get_volumes()`
148
        - `get_areas()`
149

150
    Troubleshooting:
151

152
    Identifying interstitial sites is not a very easy task. The algorithm employed here will
153
    probably do a good job, but if it fails, it might be good to look at the following points
154

155
    - The intermediate results can be accessed via `run_workflow` by specifying the step number.
156
    - The most vulnerable point is the last step, clustering by Steinhardt parameters. Take a
157
        look at the step before and after. If the interstitial sites are present in the step
158
        before the clustering by Steinhardt parameters, you might want to change the values of
159
        `q_eps` and `var_ratio`. It might make a difference to use `l_values` as well.
160
    - It might fail to find sites if the box is very small. In that case it might make sense to
161
        set `min_samples` very low (you can take 1)
162
    """
163

164
    def __init__(
1✔
165
        self,
166
        structure,
167
        num_neighbors,
168
        n_gridpoints_per_angstrom=5,
169
        min_distance=1,
170
        use_voronoi=False,
171
        x_eps=0.1,
172
        l_values=np.arange(2, 13),
173
        q_eps=0.3,
174
        var_ratio=5,
175
        min_samples=None,
176
        neigh_args={},
177
        **args
178
    ):
179
        """
180

181
        Args:
182
            num_neighbors (int): Number of neighbors/vertices to consider for the interstitial
183
                sites. By definition, tetrahedral sites should have 4 vertices and octahedral
184
                sites 6.
185
            n_gridpoints_per_angstrom (int): Number of grid points per angstrom for the
186
                initialization of the interstitial candidates. The finer the mesh (i.e. the larger
187
                the value), the likelier it is to find the correct sites but then also it becomes
188
                computationally more expensive. Ignored if `use_voronoi` is set to `True`
189
            min_distance (float): Minimum distance from the nearest neighboring atoms to the
190
                positions for them to be considered as interstitial site candidates. Set
191
                `min_distance` to 0 if no point should be removed.
192
            use_voronoi (bool): Use Voronoi vertices for the initial interstitial candidate
193
                positions instead of grid points.
194
            x_eps (bool): eps value for the clustering of interstitial candidate positions
195
            l_values (list): list of values for the Steinhardt parameter values for the
196
                classification of the interstitial candidate points
197
            q_eps (float): eps value for the clustering of interstitial candidate points based
198
                on Steinhardt parameters and distance variances. This might play a crucial role
199
                in identifying the correct interstitial sites
200
            var_ratio (float): factor to be multiplied to the variance values in order to give
201
                a larger weight to the variances.
202
            min_samples (int/None): `min_sample` in the point clustering.
203
            neigh_args (dict): arguments to be added to `get_neighbors`
204
        """
205
        if use_voronoi:
1✔
206
            self.initial_positions = get_voronoi_vertices(structure)
×
207
        else:
208
            self.initial_positions = create_gridpoints(
1✔
209
                structure=structure, n_gridpoints_per_angstrom=n_gridpoints_per_angstrom
210
            )
211
        self._neigh = get_neighbors(
1✔
212
            structure=structure, num_neighbors=num_neighbors, **neigh_args
213
        )
214
        self.workflow = [
1✔
215
            {
216
                "f": remove_too_close,
217
                "args": {"structure": structure, "min_distance": min_distance},
218
            },
219
            {
220
                "f": set_to_high_symmetry_points,
221
                "args": {"structure": structure, "neigh": self.neigh},
222
            },
223
            {
224
                "f": lambda **args: get_cluster_positions(structure, **args),
225
                "args": {"eps": x_eps},
226
            },
227
            {
228
                "f": cluster_by_steinhardt,
229
                "args": {
230
                    "neigh": self.neigh,
231
                    "l_values": l_values,
232
                    "q_eps": q_eps,
233
                    "var_ratio": var_ratio,
234
                    "min_samples": min_samples,
235
                },
236
            },
237
        ]
238
        self._positions = None
1✔
239
        self.structure = structure
1✔
240

241
    def run_workflow(self, positions=None, steps=-1):
1✔
242
        if positions is None:
1✔
243
            positions = self.initial_positions.copy()
1✔
244
        for ii, ww in enumerate(self.workflow):
1✔
245
            positions = ww["f"](positions=positions, **ww["args"])
1✔
246
            if ii == steps:
1✔
247
                return positions
1✔
248
        return positions
1✔
249

250
    @property
1✔
251
    def neigh(self):
1✔
252
        """
253
        Neighborhood information of each interstitial candidate and their surrounding atoms. E.g.
254
        `class.neigh.distances[0][0]` gives the distance from the first interstitial candidate to
255
        its nearest neighboring atoms. The functionalities of `neigh` follow those of
256
        `structuretoolkit.analyse.neighbors`.
257
        """
258
        return self._neigh
1✔
259

260
    @property
1✔
261
    def positions(self):
1✔
262
        if self._positions is None:
1✔
263
            self._positions = self.run_workflow()
1✔
264
            self._neigh = self.neigh.get_neighborhood(self._positions)
1✔
265
        return self._positions
1✔
266

267
    @property
1✔
268
    def hull(self):
1✔
269
        """
270
        Convex hull of each atom. It is mainly used for the volume and area calculation of each
271
        interstitial candidate. For more info, see `get_volumes` and `get_areas`.
272
        """
273
        return [ConvexHull(v) for v in self.neigh.vecs]
1✔
274

275
    def get_variances(self):
1✔
276
        """
277
        Get variance of neighboring distances. Since interstitial sites are mostly in symmetric
278
        sites, the variance values tend to be small. In the case of fcc, both tetrahedral and
279
        octahedral sites as well as tetrahedral sites in bcc should have the value of 0.
280

281
        Returns:
282
            (numpy.array (n,)) Variance values
283
        """
284
        return np.std(self.neigh.distances, axis=-1)
1✔
285

286
    def get_distances(self, function_to_apply=np.min):
1✔
287
        """
288
        Get per-position return values of a given function for the neighbors.
289

290
        Args:
291
            function_to_apply (function): Function to apply to the distance array. Default is
292
                numpy.minimum
293

294
        Returns:
295
            (numpy.array (n,)) Function values on the distance array
296
        """
297
        return function_to_apply(self.neigh.distances, axis=-1)
1✔
298

299
    def get_steinhardt_parameters(self, l):
1✔
300
        """
301
        Args:
302
            l (int/numpy.array): Order of Steinhardt parameter
303

304
        Returns:
305
            (numpy.array (n,)) Steinhardt parameter values
306
        """
307
        return self.neigh.get_steinhardt_parameter(l=l)
1✔
308

309
    def get_volumes(self):
1✔
310
        """
311
        Returns:
312
            (numpy.array (n,)): Convex hull volume of each site.
313
        """
314
        return np.array([h.volume for h in self.hull])
1✔
315

316
    def get_areas(self):
1✔
317
        """
318
        Returns:
319
            (numpy.array (n,)): Convex hull area of each site.
320
        """
321
        return np.array([h.area for h in self.hull])
1✔
322

323

324
def get_interstitials(
1✔
325
    structure,
326
    num_neighbors,
327
    n_gridpoints_per_angstrom=5,
328
    min_distance=1,
329
    use_voronoi=False,
330
    x_eps=0.1,
331
    l_values=np.arange(2, 13),
332
    q_eps=0.3,
333
    var_ratio=5,
334
    min_samples=None,
335
    neigh_args={},
336
    **args
337
):
338
    return Interstitials(
1✔
339
        structure=structure,
340
        num_neighbors=num_neighbors,
341
        n_gridpoints_per_angstrom=n_gridpoints_per_angstrom,
342
        min_distance=min_distance,
343
        use_voronoi=use_voronoi,
344
        x_eps=x_eps,
345
        l_values=l_values,
346
        q_eps=q_eps,
347
        var_ratio=var_ratio,
348
        min_samples=min_samples,
349
        neigh_args=neigh_args,
350
        **args
351
    )
352

353

354
get_interstitials.__doc__ = (
1✔
355
    Interstitials.__doc__.replace("Class", "Function") + Interstitials.__init__.__doc__
356
)
357

358

359
def get_layers(
1✔
360
    structure,
361
    distance_threshold=0.01,
362
    id_list=None,
363
    wrap_atoms=True,
364
    planes=None,
365
    cluster_method=None,
366
):
367
    """
368
    Get an array of layer numbers.
369

370
    Args:
371
        distance_threshold (float): Distance below which two points are
372
            considered to belong to the same layer. For detailed
373
            description: sklearn.cluster.AgglomerativeClustering
374
        id_list (list/numpy.ndarray): List of atoms for which the layers
375
            should be considered.
376
        wrap_atoms (bool): Whether to consider periodic boundary conditions according to the box definition or not.
377
            If set to `False`, atoms lying on opposite box boundaries are considered to belong to different layers,
378
            regardless of whether the box itself has the periodic boundary condition in this direction or not.
379
            If `planes` is not `None` and `wrap_atoms` is `True`, this tag has the same effect as calling
380
            `get_layers()` after calling `center_coordinates_in_unit_cell()`
381
        planes (list/numpy.ndarray): Planes along which the layers are calculated. Planes are
382
            given in vectors, i.e. [1, 0, 0] gives the layers along the x-axis. Default planes
383
            are orthogonal unit vectors: [[1, 0, 0], [0, 1, 0], [0, 0, 1]]. If you have a
384
            tilted box and want to calculate the layers along the directions of the cell
385
            vectors, use `planes=np.linalg.inv(structure.cell).T`. Whatever values are
386
            inserted, they are internally normalized, so whether [1, 0, 0] is entered or
387
            [2, 0, 0], the results will be the same.
388
        cluster_method (scikit-learn cluster algorithm): if given overrides the clustering method used, must be an
389
            instance of a cluster algorithm from scikit-learn (or compatible interface)
390

391
    Returns: Array of layer numbers (same shape as structure.positions)
392

393
    Example I - how to get the number of layers in each direction:
394

395
    >>> structure = Project('.').create_structure('Fe', 'bcc', 2.83).repeat(5)
396
    >>> print('Numbers of layers:', np.max(structure.analyse.get_layers(), axis=0)+1)
397

398
    Example II - get layers of only one species:
399

400
    >>> print('Iron layers:', structure.analyse.get_layers(
401
    ...       id_list=structure.select_index('Fe')))
402

403
    The clustering algorithm can be changed with the cluster_method argument
404

405
    >>> from sklearn.cluster import DBSCAN
406
    >>> layers = structure.analyse.get_layers(cluster_method=DBSCAN())
407
    """
408
    if distance_threshold <= 0:
1✔
409
        raise ValueError("distance_threshold must be a positive float")
1✔
410
    if id_list is not None and len(id_list) == 0:
1✔
411
        raise ValueError("id_list must contain at least one id")
1✔
412
    if wrap_atoms and planes is None:
1✔
413
        positions, indices = get_extended_positions(
1✔
414
            structure=structure, width=distance_threshold, return_indices=True
415
        )
416
        if id_list is not None:
1✔
417
            id_list = np.arange(len(structure))[np.array(id_list)]
1✔
418
            id_list = np.any(id_list[:, np.newaxis] == indices[np.newaxis, :], axis=0)
1✔
419
            positions = positions[id_list]
1✔
420
            indices = indices[id_list]
1✔
421
    else:
422
        positions = structure.positions
1✔
423
        if id_list is not None:
1✔
424
            positions = positions[id_list]
1✔
425
        if wrap_atoms:
1✔
426
            positions = get_wrapped_coordinates(
1✔
427
                structure=structure, positions=positions
428
            )
429
    if planes is not None:
1✔
430
        mat = np.asarray(planes).reshape(-1, 3)
1✔
431
        positions = np.einsum(
1✔
432
            "ij,i,nj->ni", mat, 1 / np.linalg.norm(mat, axis=-1), positions
433
        )
434
    if cluster_method is None:
1✔
435
        from sklearn.cluster import AgglomerativeClustering
1✔
436

437
        cluster_method = AgglomerativeClustering(
1✔
438
            linkage="complete",
439
            n_clusters=None,
440
            distance_threshold=distance_threshold,
441
        )
442
    layers = []
1✔
443
    for ii, x in enumerate(positions.T):
1✔
444
        cluster = cluster_method.fit(x.reshape(-1, 1))
1✔
445
        first_occurrences = np.unique(cluster.labels_, return_index=True)[1]
1✔
446
        permutation = x[first_occurrences].argsort().argsort()
1✔
447
        labels = permutation[cluster.labels_]
1✔
448
        if wrap_atoms and planes is None and structure.pbc[ii]:
1✔
449
            mean_positions = get_average_of_unique_labels(labels, positions)
1✔
450
            scaled_positions = np.einsum(
1✔
451
                "ji,nj->ni", np.linalg.inv(structure.cell), mean_positions
452
            )
453
            unique_inside_box = np.all(
1✔
454
                np.absolute(scaled_positions - 0.5 + 1.0e-8) < 0.5, axis=-1
455
            )
456
            arr_inside_box = np.any(
1✔
457
                labels[:, None] == np.unique(labels)[unique_inside_box][None, :],
458
                axis=-1,
459
            )
460
            first_occurences = np.unique(indices[arr_inside_box], return_index=True)[1]
1✔
461
            labels = labels[arr_inside_box]
1✔
462
            labels -= np.min(labels)
1✔
463
            labels = labels[first_occurences]
1✔
464
        layers.append(labels)
1✔
465
    if planes is not None and len(np.asarray(planes).shape) == 1:
1✔
466
        return np.asarray(layers).flatten()
1✔
467
    return np.vstack(layers).T
1✔
468

469

470
def get_voronoi_vertices(
1✔
471
    structure, epsilon=2.5e-4, distance_threshold=0, width_buffer=10
472
):
473
    """
474
    Get voronoi vertices of the box.
475

476
    Args:
477
        epsilon (float): displacement to add to avoid wrapping of atoms at borders
478
        distance_threshold (float): distance below which two vertices are considered as one.
479
            Agglomerative clustering algorithm (sklearn) is employed. Final positions are given
480
            as the average positions of clusters.
481
        width_buffer (float): width of the layer to be added to account for pbc.
482

483
    Returns:
484
        numpy.ndarray: 3d-array of vertices
485

486
    This function detect octahedral and tetrahedral sites in fcc; in bcc it detects tetrahedral
487
    sites. In defects (e.g. vacancy, dislocation, grain boundary etc.), it gives a list of
488
    positions interstitial atoms might want to occupy. In order for this to be more successful,
489
    it might make sense to look at the distance between the voronoi vertices and their nearest
490
    neighboring atoms via:
491

492
    >>> voronoi_vertices = structure_of_your_choice.analyse.get_voronoi_vertices()
493
    >>> neigh = structure_of_your_choice.get_neighborhood(voronoi_vertices)
494
    >>> print(neigh.distances.min(axis=-1))
495

496
    """
497
    voro = Voronoi(
1✔
498
        get_extended_positions(structure=structure, width=width_buffer) + epsilon
499
    )
500
    xx = voro.vertices
1✔
501
    if distance_threshold > 0:
1✔
502
        from sklearn.cluster import AgglomerativeClustering
1✔
503

504
        cluster = AgglomerativeClustering(
1✔
505
            linkage="single", distance_threshold=distance_threshold, n_clusters=None
506
        )
507
        cluster.fit(xx)
1✔
508
        xx = get_average_of_unique_labels(cluster.labels_, xx)
1✔
509
    xx = xx[
1✔
510
        np.linalg.norm(
511
            xx - get_wrapped_coordinates(structure=structure, positions=xx, epsilon=0),
512
            axis=-1,
513
        )
514
        < epsilon
515
    ]
516
    return xx - epsilon
1✔
517

518

519
def _get_neighbors(
1✔
520
    structure,
521
    position_interpreter,
522
    data_field: str,
523
    width_buffer: float = 10,
524
) -> np.ndarray:
525
    positions, indices = get_extended_positions(
1✔
526
        structure=structure, width=width_buffer, return_indices=True
527
    )
528
    interpretation = position_interpreter(positions)
1✔
529
    data = getattr(interpretation, data_field)
1✔
530
    x = positions[data]
1✔
531
    return indices[
1✔
532
        data[
533
            np.isclose(get_wrapped_coordinates(structure=structure, positions=x), x)
534
            .all(axis=-1)
535
            .any(axis=-1)
536
        ]
537
    ]
538

539

540
def get_voronoi_neighbors(structure, width_buffer: float = 10) -> np.ndarray:
1✔
541
    """
542
    Get pairs of atom indices sharing the same Voronoi vertices/areas.
543

544
    Args:
545
        width_buffer (float): Width of the layer to be added to account for pbc.
546

547
    Returns:
548
        pairs (ndarray): Pair indices
549
    """
550
    return _get_neighbors(
1✔
551
        structure=structure,
552
        position_interpreter=Voronoi,
553
        data_field="ridge_points",
554
        width_buffer=width_buffer,
555
    )
556

557

558
def get_delaunay_neighbors(structure, width_buffer: float = 10.0) -> np.ndarray:
1✔
559
    """
560
    Get indices of atoms sharing the same Delaunay tetrahedrons (commonly known as Delaunay
561
    triangles), i.e. indices of neighboring atoms, which form a tetrahedron, in which no other
562
    atom exists.
563

564
    Args:
565
        width_buffer (float): Width of the layer to be added to account for pbc.
566

567
    Returns:
568
        pairs (ndarray): Delaunay neighbor indices
569
    """
570
    return _get_neighbors(
1✔
571
        structure=structure,
572
        position_interpreter=Delaunay,
573
        data_field="simplices",
574
        width_buffer=width_buffer,
575
    )
576

577

578
def get_cluster_positions(
1✔
579
    structure, positions=None, eps=1, buffer_width=None, return_labels=False
580
):
581
    """
582
    Cluster positions according to the distances. Clustering algorithm uses DBSCAN:
583

584
    https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html
585

586
    Example I:
587

588
    ```
589
    analyse = Analyze(some_ase_structure)
590
    positions = analyse.cluster_points(eps=2)
591
    ```
592

593
    This example should return the atom positions, if no two atoms lie within a distance of 2.
594
    If there are at least two atoms which lie within a distance of 2, their entries will be
595
    replaced by their mean position.
596

597
    Example II:
598

599
    ```
600
    analyse = Analyze(some_ase_structure)
601
    print(analyse.cluster_positions([3*[0.], 3*[1.]], eps=3))
602
    ```
603

604
    This returns `[0.5, 0.5, 0.5]` (if the cell is large enough)
605

606
    Args:
607
        positions (numpy.ndarray): Positions to consider. Default: atom positions
608
        eps (float): The maximum distance between two samples for one to be considered as in
609
            the neighborhood of the other.
610
        buffer_width (float): Buffer width to consider across the periodic boundary
611
            conditions. If too small, it is possible that atoms that are meant to belong
612
            together across PBC are missed. Default: Same as eps
613
        return_labels (bool): Whether to return the labels given according to the grouping
614
            together with the mean positions
615

616
    Returns:
617
        positions (numpy.ndarray): Mean positions
618
        label (numpy.ndarray): Labels of the positions (returned when `return_labels = True`)
619
    """
620
    from sklearn.cluster import DBSCAN
1✔
621

622
    positions = structure.positions if positions is None else np.array(positions)
1✔
623
    if buffer_width is None:
1✔
624
        buffer_width = eps
1✔
625
    extended_positions, indices = get_extended_positions(
1✔
626
        structure=structure,
627
        width=buffer_width,
628
        return_indices=True,
629
        positions=positions,
630
    )
631
    labels = DBSCAN(eps=eps, min_samples=1).fit_predict(extended_positions)
1✔
632
    coo = coo_matrix((labels, (np.arange(len(labels)), indices)))
1✔
633
    labels = coo.max(axis=0).toarray().flatten()
1✔
634
    # make labels look nicer
635
    labels = np.unique(labels, return_inverse=True)[1]
1✔
636
    mean_positions = get_mean_positions(
1✔
637
        positions, structure.cell, structure.pbc, labels
638
    )
639
    if return_labels:
1✔
640
        return mean_positions, labels
1✔
641
    return mean_positions
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc