• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

GMPavanLab / SOAPify / 4607244921

pending completion
4607244921

push

github

GitHub
Adding examples (#77)

11 of 91 new or added lines in 5 files covered. (12.09%)

1 existing line in 1 file now uncovered.

830 of 929 relevant lines covered (89.34%)

2.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.83
/src/SOAPify/utils.py
1
"""Utilities submodule, cointains unclassified support functions
3✔
2
Author: Daniele Rapetti"""
3
from itertools import combinations_with_replacement
3✔
4
import numpy
3✔
5
from ase.data import atomic_numbers
3✔
6
import h5py
3✔
7

8

9
def _SOAPpstr(l, Z, n, Zp, np) -> str:
3✔
10
    if atomic_numbers[Z] < atomic_numbers[Zp]:
3✔
11
        Z, Zp = Zp, Z
3✔
12
        n, np = np, n
3✔
13
    return f"{l}_{Z}{n}_{Zp}{np}"
3✔
14

15

16
def getdscribeSOAPMapping(
3✔
17
    lmax: int, nmax: int, species: "list[str]", crossover: bool = True
18
) -> numpy.ndarray:
19
    """returns how dscribe saves the SOAP results
20

21
       return a list of string with the identities of the data returned from
22
       dscribe, see the note in
23
       https://singroup.github.io/dscribe/1.2.x/tutorials/descriptors/soap.html
24

25
    Args:
26
        lmax (int): the lmax specified in the calculation.
27
        nmax (int): the nmax specified in the calculation.
28
        species (list[str]): the list of atomic species.
29
        crossover (bool):
30
            if True, the SOAP descriptors are generated for the mixed species.
31
            Defaults to True.
32

33
    Returns:
34
        numpy.ndarray:
35
        an array of strings with the mapping of the output of the analysis
36
    """
37
    species = orderByZ(species)
3✔
38
    pdscribe = []
3✔
39
    for Z in species:
3✔
40
        for Zp in species:
3✔
41
            if not crossover and Z != Zp:
3✔
42
                continue
3✔
43
            for l in range(lmax + 1):
3✔
44
                for n in range(nmax):
3✔
45
                    for np in range(nmax):
3✔
46
                        if (np, atomic_numbers[Zp]) >= (n, atomic_numbers[Z]):
3✔
47
                            pdscribe.append(_SOAPpstr(l, Z, n, Zp, np))
3✔
48
    return numpy.array(pdscribe)
3✔
49

50

51
def _getRSindex(nmax: int, species: "list[str]") -> numpy.ndarray:
3✔
52
    """Support function for quippy"""
53
    rsIndex = numpy.zeros((2, nmax * len(species)), dtype=numpy.int32)
3✔
54
    i = 0
3✔
55
    for iSpecies in range(len(species)):
3✔
56
        for na in range(nmax):
3✔
57
            rsIndex[:, i] = na, iSpecies
3✔
58
            i += 1
3✔
59
    return rsIndex
3✔
60

61

62
def getquippySOAPMapping(
3✔
63
    lmax: int, nmax: int, species: "list[str]", diagonalRadial: bool = False
64
) -> numpy.ndarray:
65
    """returns how quippi saves the SOAP results
66

67

68
        return a list of string with the identities of the data returned from quippy,
69
        see https://github.com/libAtoms/GAP/blob/main/descriptors.f95#L7588
70

71
    Args:
72
        lmax (int): the lmax specified in the calculation.
73
        nmax (int): the nmax specified in the calculation.
74
        species (list[str]): the list of atomic species.
75
        diagonalRadial (bool):
76
            if True, Only return the n1=n2 elements of the power spectrum.
77
            **NOT IMPLEMENTED**. Defaults to False.
78

79
    Returns:
80
        numpy.ndarray:
81
        an array of strings with the mapping of the output of the analysis
82
    """
83
    species = orderByZ(species)
3✔
84
    rsIndex = _getRSindex(nmax, species)
3✔
85
    pquippy = []
3✔
86
    for ia in range(len(species) * nmax):
3✔
87
        np = rsIndex[0, ia]
3✔
88
        Zp = species[rsIndex[1, ia]]
3✔
89
        for jb in range(ia + 1):  # ia is  in the range
3✔
90
            n = rsIndex[0, jb]
3✔
91
            Z = species[rsIndex[1, jb]]
3✔
92
            # if diagonalRadial and np != n:
93
            #    continue
94

95
            for l in range(lmax + 1):
3✔
96
                pquippy.append(_SOAPpstr(l, Z, n, Zp, np))
3✔
97
    return numpy.array(pquippy)
3✔
98

99

100
def orderByZ(species: "list[str]") -> "list[str]":
3✔
101
    """Orders the list of species by their atomic number
102

103
    Args:
104
        species (list[str]): the list of atomic species to be ordered
105

106
    Returns:
107
        list[str]: the ordered list of atomic species
108
    """
109
    return sorted(species, key=lambda x: atomic_numbers[x])
3✔
110

111

112
def getAddressesQuippyLikeDscribe(
3✔
113
    lmax: int, nmax: int, species: "list[str]"
114
) -> numpy.ndarray:
115
    """create a support bdarray to reorder the quippy output in a dscribe fashion
116

117
        Given the lmax and nmax of a SOAP calculation and the species of the atoms
118
        returns an array of idexes for reordering the quippy results as the dscribe results
119

120
    Args:
121
        lmax (int): the lmax specified in the calculation.
122
        nmax (int): the nmax specified in the calculation.
123
        species (list[str]): the list of atomic species.
124

125
        Returns:
126
            numpy.ndarray: an array of indexes
127
    """
128
    species = orderByZ(species)
3✔
129
    nsp = len(species)
3✔
130
    addresses = numpy.zeros(
3✔
131
        (lmax + 1) * ((nmax * nsp) * (nmax * nsp + 1)) // 2, dtype=int
132
    )
133
    quippyOrder = getquippySOAPMapping(lmax, nmax, species)
3✔
134
    dscribeOrder = getdscribeSOAPMapping(lmax, nmax, species)
3✔
135
    for i, _ in enumerate(addresses):
3✔
136
        addresses[i] = numpy.where(quippyOrder == dscribeOrder[i])[0][0]
3✔
137
    return addresses
3✔
138

139

140
def normalizeArray(x: numpy.ndarray) -> numpy.ndarray:
3✔
141
    """Normalizes the futher axis of the given array
142

143
    (eg. in an array of shape (100,50,3) normalizes all the  5000 3D vectors)
144

145
    Args:
146
        x (numpy.ndarray): the array to be normalized
147

148
    Returns:
149
        numpy.ndarray: the normalized array
150
    """
151
    norm = numpy.linalg.norm(x, axis=-1, keepdims=True)
3✔
152
    norm[norm == 0] = 1
3✔
153
    return x / norm
3✔
154

155

156
def getSlicesFromAttrs(attrs: dict) -> "tuple(list,dict)":
3✔
157
    """returns the positional slices for the calculated fingerprints
158

159
        Given the attributes of from a SOAP dataset returns the slices of the
160
        SOAP vector that contains the pair information
161

162
    Args:
163
        attrs (dict): the attributes of the SOAP dataset
164

165
    Returns:
166
        tuple(list,dict):
167
        the slices of the SOAP vector to be extracted and the atomic types
168
    """
169
    species = attrs["species"]
3✔
170
    slices = {}
3✔
171
    for symbol1 in species:
3✔
172
        for symbol2 in species:
3✔
173
            if f"species_location_{symbol1}-{symbol2}" in attrs:
3✔
174
                slices[symbol1 + symbol2] = slice(
3✔
175
                    attrs[f"species_location_{symbol1}-{symbol2}"][0],
176
                    attrs[f"species_location_{symbol1}-{symbol2}"][1],
177
                )
178

179
    return species, slices
3✔
180

181

182
def _getIndexesForFillSOAPVectorFromdscribeSameSpecies(
3✔
183
    lMax: int,
184
    nMax: int,
185
) -> numpy.ndarray:
186
    """returns the indexes of the SOAP vector to be reordered
187

188
    given lMax and nMax returns the indexes of the SOAP vector to be
189
    reordered to fill a  complete vector.
190

191
    useful to calculate the correct distances between the SOAP vectors
192

193
    Args:
194
        lMax (int): the lmax specified in the calculation.
195
        nMax (int): the nmax specified in the calculation.
196

197
    Returns:
198
        numpy.ndarray: the array of the indexes in the correct order
199
    """
200

201
    completeData = numpy.zeros(((lMax + 1), nMax, nMax), dtype=int)
3✔
202
    limitedID = 0
3✔
203
    for l in range(lMax + 1):
3✔
204
        for n in range(nMax):
3✔
205
            for nP in range(n, nMax):
3✔
206
                completeData[l, n, nP] = limitedID
3✔
207
                completeData[l, nP, n] = limitedID
3✔
208
                limitedID += 1
3✔
209
    return completeData.reshape(-1)
3✔
210

211

212
def _getIndexesForFillSOAPVectorFromdscribe(
3✔
213
    lMax: int,
214
    nMax: int,
215
    atomTypes: list = None,
216
    atomicSlices: dict = None,
217
) -> numpy.ndarray:
218
    """Given the data of a SOAP calculation from dscribe returns the SOAP power
219
        spectrum with also the symmetric part explicitly stored, see the note in
220
        https://singroup.github.io/dscribe/1.2.x/tutorials/descriptors/soap.html
221

222
        No controls are implemented on the shape of the soapFromdscribe vector.
223

224
    Args:
225
        soapFromdscribe (numpy.ndarray):
226
            the result of the SOAP calculation from the dscribe utility
227
        lmax (int):
228
            the l_max specified in the calculation. Defaults to 8.
229
        nMax (int):
230
            the n_max specified in the calculation. Defaults to 8.
231
        atomTypes (list[str]):
232
            the list of atomic species. Defaults to [None].
233
        atomicSlices (dict):
234
            the slices of the SOAP vector relative to che atomic species combinations.
235
            Defaults to None.
236

237
    Returns:
238
        numpy.ndarray:
239
            The full soap spectrum, with the symmetric part sored explicitly
240
    """
241
    if atomTypes is None:
3✔
242
        atomTypes = [None]
×
243
    if atomTypes == [None]:
3✔
244
        return _getIndexesForFillSOAPVectorFromdscribeSameSpecies(lMax, nMax)
3✔
245
    nOfFeatures = (lMax + 1) * nMax * nMax
3✔
246
    nofCombinations = len(list(combinations_with_replacement(atomTypes, 2)))
3✔
247
    completeData = numpy.zeros(nOfFeatures * nofCombinations, dtype=int)
3✔
248
    combinationID = 0
3✔
249
    for i, symbol1 in enumerate(atomTypes):
3✔
250
        for j in range(i, len(atomTypes)):
3✔
251
            symbol2 = atomTypes[j]
3✔
252
            completeID = combinationID * nOfFeatures
3✔
253
            completeSlice = slice(
3✔
254
                completeID,
255
                completeID + nOfFeatures,
256
            )
257
            if symbol1 == symbol2:
3✔
258
                completeData[completeSlice] = (
3✔
259
                    _getIndexesForFillSOAPVectorFromdscribeSameSpecies(lMax, nMax)
260
                    + atomicSlices[symbol1 + symbol2].start
261
                )
262
            else:
263
                completeData[completeSlice] = (
3✔
264
                    numpy.arange(nOfFeatures, dtype=int)
265
                    + atomicSlices[symbol1 + symbol2].start
266
                )
267
            combinationID += 1
3✔
268
    return completeData
3✔
269

270

271
def fillSOAPVectorFromdscribe(
3✔
272
    soapFromdscribe: numpy.ndarray,
273
    lMax: int,
274
    nMax: int,
275
    atomTypes: list = None,
276
    atomicSlices: dict = None,
277
) -> numpy.ndarray:
278
    """Given the result of a SOAP calculation from dscribe returns the SOAP power spectrum
279
        with also the symmetric part explicitly stored, see the note in
280
        https://singroup.github.io/dscribe/1.2.x/tutorials/descriptors/soap.html
281

282
        No controls are implemented on the shape of the soapFromdscribe vector.
283

284
    Args:
285
        soapFromdscribe (numpy.ndarray):
286
            the result of the SOAP calculation from the dscribe utility
287
        lMax (int):
288
            the l_max specified in the calculation.
289
        nMax (int):
290
            the n_max specified in the calculation.
291

292
    Returns:
293
        numpy.ndarray:
294
            The full soap spectrum, with the symmetric part sored explicitly
295
    """
296
    if atomTypes is None:
3✔
297
        atomTypes = [None]
3✔
298
    upperDiag = int((lMax + 1) * nMax * (nMax + 1) / 2)
3✔
299
    fullmat = nMax * nMax * (lMax + 1)
3✔
300
    limitedSOAPdim = upperDiag * len(atomTypes) + fullmat * int(
3✔
301
        (len(atomTypes) - 1) * len(atomTypes) / 2
302
    )
303
    # enforcing the order of the atomTypes
304
    if len(atomTypes) > 1:
3✔
305
        atomTypes = list(atomTypes)
3✔
306
        atomTypes.sort(key=lambda x: atomic_numbers[x])
3✔
307

308
    if soapFromdscribe.shape[-1] != limitedSOAPdim:
3✔
309
        raise ValueError(
3✔
310
            "fillSOAPVectorFromdscribe: the given soap vector do not have the expected dimensions"
311
        )
312
    indexes = _getIndexesForFillSOAPVectorFromdscribe(
3✔
313
        lMax, nMax, atomTypes, atomicSlices
314
    )
315
    if len(soapFromdscribe.shape) == 1:
3✔
316
        return soapFromdscribe[indexes]
3✔
317
    if len(soapFromdscribe.shape) == 2:
3✔
318
        return soapFromdscribe[:, indexes]
3✔
319
    if len(soapFromdscribe.shape) == 3:
3✔
320
        return soapFromdscribe[:, :, indexes]
3✔
321

322
    raise ValueError(
3✔
323
        "fillSOAPVectorFromdscribe: cannot convert array with len(shape) >=3"
324
    )
325

326

327
def getSOAPSettings(fitsetData: h5py.Dataset) -> dict:
3✔
328
    """Gets the settings of the SOAP calculation
329

330
        you can feed directly this output to :func:`fillSOAPVectorFromdscribe`
331

332
        #TODO: make tests for this
333
    Args:
334
        fitsetData (h5py.Dataset): A soap dataset with attributes
335

336
    Returns:
337
        dict: a dictionary with the following components:
338
            - **nMax**
339
            - **lMax**
340
            - **atomTypes**
341
            - **atomicSlices**
342

343
    """
NEW
344
    lmax = fitsetData.attrs["l_max"]
×
NEW
345
    nmax = fitsetData.attrs["n_max"]
×
NEW
346
    symbols, atomicSlices = getSlicesFromAttrs(fitsetData.attrs)
×
347

NEW
348
    return {
×
349
        "nMax": nmax,
350
        "lMax": lmax,
351
        "atomTypes": symbols,
352
        "atomicSlices": atomicSlices,
353
    }
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc