• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zincware / MDSuite / 3999396905

pending completion
3999396905

push

github-actions

GitHub
[merge before other PRs] ruff updates (#580)

960 of 1311 branches covered (73.23%)

Branch coverage included in aggregate %.

15 of 15 new or added lines in 11 files covered. (100.0%)

4034 of 4930 relevant lines covered (81.83%)

3.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.65
/mdsuite/calculators/radial_distribution_function.py
1
"""
2
MDSuite: A Zincwarecode package.
3

4
License
5
-------
6
This program and the accompanying materials are made available under the terms
7
of the Eclipse Public License v2.0 which accompanies this distribution, and is
8
available at https://www.eclipse.org/legal/epl-v20.html
9

10
SPDX-License-Identifier: EPL-2.0
11

12
Copyright Contributors to the Zincwarecode Project.
13

14
Contact Information
15
-------------------
16
email: zincwarecode@gmail.com
17
github: https://github.com/zincware
18
web: https://zincwarecode.com/
19

20
Citation
21
--------
22
If you use this module please cite us with:
23

24
Summary
25
-------
26
MDSuite module for the computation of the radial distribution function (RDF). An RDF
27
describes the probability of finding a particle of species b at a distance r of
28
species a.
29
"""
30
from __future__ import annotations
4✔
31

32
import itertools
4✔
33
import logging
4✔
34
from abc import ABC
4✔
35
from dataclasses import dataclass
4✔
36
from timeit import default_timer as timer
4✔
37
from typing import Union
4✔
38

39
import numpy as np
4✔
40
import tensorflow as tf
4✔
41

42
# Import user packages
43
from tqdm import tqdm
4✔
44

45
from mdsuite.calculators.calculator import call
4✔
46
from mdsuite.calculators.trajectory_calculator import TrajectoryCalculator
4✔
47
from mdsuite.database.mdsuite_properties import mdsuite_properties
4✔
48
from mdsuite.utils.linalg import (
4✔
49
    apply_minimum_image,
50
    apply_system_cutoff,
51
    get_partial_triu_indices,
52
)
53
from mdsuite.utils.meta_functions import join_path, split_array
4✔
54

55
log = logging.getLogger(__name__)
4✔
56

57

58
@dataclass
4✔
59
class Args:
3✔
60
    """Data class for the saved properties."""
61

62
    number_of_bins: int
4✔
63
    number_of_configurations: int
4✔
64
    correlation_time: int
4✔
65
    atom_selection: np.s_
4✔
66
    data_range: int
4✔
67
    cutoff: float
4✔
68
    start: int
4✔
69
    stop: int
4✔
70
    species: list
4✔
71
    molecules: bool
4✔
72

73

74
class RadialDistributionFunction(TrajectoryCalculator, ABC):
4✔
75
    """
76
    Class for the calculation of the radial distribution function.
77

78
    Attributes
79
    ----------
80
    experiment :  object
81
            Experiment class to call from
82
    data_range :
83
            Number of configurations to use in each ensemble
84
    x_label : str
85
            X label of the tensor_values when plotted
86
    y_label : str
87
            Y label of the tensor_values when plotted
88
    analysis_name : str
89
            Name of the analysis
90
    loaded_property : str
91
            Property loaded from the database_path for the analysis
92
    minibatch: int, default None
93
            Size of an individual minibatch, if set. By default mini-batching is not
94
            applied
95

96
    See Also
97
    --------
98
    mdsuite.calculators.calculator.Calculator class
99

100
    Examples
101
    --------
102
    .. code-block:: python
103

104
        project = mdsuite.Project()
105
        project.run.RadialDistributionFunction(number_of_configurations=500)
106

107
    """
108

109
    def __init__(self, **kwargs):
4✔
110
        """
111
        Constructor for the RDF calculator.
112

113
        Attributes
114
        ----------
115
        kwargs: see RunComputation class for all the passed arguments
116
        """
117
        super().__init__(**kwargs)
4✔
118

119
        self.scale_function = {
4✔
120
            "quadratic": {"outer_scale_factor": 10, "inner_scale_factor": 5}
121
        }
122
        self.loaded_property = mdsuite_properties.positions
4✔
123
        self.x_label = r"$$r / nm$$"
4✔
124
        self.y_label = r"$$g(r)$$"
4✔
125
        self.analysis_name = "Radial_Distribution_Function"
4✔
126
        self.result_series_keys = ["x", "y"]
4✔
127

128
        self._dtype = tf.float32
4✔
129

130
        self.rdf_minibatch = None
4✔
131
        self.use_tf_function = None
4✔
132
        self.override_n_batches = None
4✔
133
        self.index_list = None
4✔
134
        self.sample_configurations = None
4✔
135
        self.key_list = None
4✔
136
        self.rdf = None
4✔
137

138
    @call
4✔
139
    def __call__(
4✔
140
        self,
141
        plot: bool = True,
142
        number_of_bins: int = None,
143
        cutoff: float = None,
144
        save: bool = True,
145
        start: int = 0,
146
        stop: int = None,
147
        number_of_configurations: int = 500,
148
        atom_selection: Union[np.s_, dict] = np.s_[:],
149
        minibatch: int = -1,
150
        species: list = None,
151
        molecules: bool = False,
152
        **kwargs,
153
    ):
154
        """
155
        Compute the RDF with the given user parameters.
156

157
        Parameters
158
        ----------
159
        plot: bool
160
            Plot the RDF after the computation
161
        number_of_bins: int
162
            The number of bins for the RDF histogram
163
        species : list
164
            A list of species to study.
165
        cutoff: float
166
            The cutoff value for the RDF. Default is half the box size
167
        save: bool
168
            save the data
169
        start: int
170
            Starting position in the database. All values before start will be
171
            ignored.
172
        stop: int
173
            Stopping position in the database. All values after stop will be
174
            ignored.
175
        number_of_configurations: int
176
            The number of uniformly sampled configuration between start and
177
            stop to be used for the RDF.
178
        atom_selection : Union[np.s_, dict]
179
                Atoms to be used in the analysis.
180
        minibatch: int
181
            Size of a minibatch over atoms in the batch over configurations.
182
            Decrease this value if you run into memory
183
            issues. Increase this value for better performance.
184
        molecules: bool
185
            If true, the molecules will be analyzed rather than the atoms.
186
        kwargs:
187
            overide_n_batches: int
188
                    override the automatic batch size calculation
189
            use_tf_function : bool
190
                    If true, tf.function is used in the calculation.
191
        """
192
        # set args that will affect the computation result
193
        self.args = Args(
4✔
194
            number_of_bins=number_of_bins,
195
            cutoff=cutoff,
196
            start=start,
197
            stop=stop,
198
            atom_selection=atom_selection,
199
            data_range=1,
200
            correlation_time=1,
201
            molecules=molecules,
202
            species=species,
203
            number_of_configurations=number_of_configurations,
204
        )
205
        # args parsing that will not affect the computation result
206
        # usually performance or plotting
207
        self.rdf_minibatch = minibatch
4✔
208
        self.plot = plot
4✔
209

210
        # kwargs parsing
211
        self.use_tf_function = kwargs.pop("use_tf_function", False)
4✔
212
        self.override_n_batches = kwargs.get("batches")
4✔
213
        self.tqdm_limit = kwargs.pop("tqdm", 10)
4✔
214

215
    def check_input(self):
4✔
216
        """
217
        Check the input of the call method and store defaults if needed.
218

219
        Returns
220
        -------
221
        Updates class attributes if required.
222
        """
223
        if self.args.stop is None:
4!
224
            self.args.stop = self.experiment.number_of_configurations - 1
4✔
225

226
        if self.args.cutoff is None:
4✔
227
            self.args.cutoff = (
4✔
228
                self.experiment.box_array[0] / 2 - 0.1
229
            )  # set cutoff to half box size if none set
230

231
        if self.args.number_of_configurations == -1:
4!
232
            self.args.number_of_configurations = (
×
233
                self.experiment.number_of_configurations - 1
234
            )
235

236
        if self.rdf_minibatch == -1:
4!
237
            self.rdf_minibatch = self.args.number_of_configurations
4✔
238

239
        if self.args.number_of_bins is None:
4✔
240
            self.args.number_of_bins = int(
4✔
241
                self.args.cutoff / 0.01
242
            )  # default is 1/100th of an angstrom
243

244
        # Get the correct species out.
245
        if self.args.species is None:
4✔
246
            if self.args.molecules:
4✔
247
                self.args.species = list(self.experiment.molecules)
4✔
248
            else:
249
                self.args.species = list(self.experiment.species)
4✔
250
        self._initialize_rdf_parameters()
4✔
251

252
    def _initialize_rdf_parameters(self):
4✔
253
        """
254
        Initialize the RDF parameters.
255

256
        Returns
257
        -------
258
        Updates class attributes.
259
        """
260
        self.bin_range = [0, self.args.cutoff]
4✔
261
        self.index_list = list(range(len(self.args.species)))
4✔
262
        # Get the indices of the species
263

264
        self.sample_configurations = np.linspace(
4✔
265
            self.args.start,
266
            self.args.stop,
267
            self.args.number_of_configurations,
268
            dtype=int,
269
        )  # choose sampled configurations
270

271
        # Generate the tuples e.g ('Na', 'Cl'), ('Na', 'Na')
272
        self.key_list = [
4✔
273
            self._get_species_names(x)
274
            for x in list(itertools.combinations_with_replacement(self.index_list, r=2))
275
        ]
276

277
        self.rdf = {
4✔
278
            name: np.zeros(self.args.number_of_bins) for name in self.key_list
279
        }  # instantiate the rdf tuples
280

281
    def _get_species_names(self, species_tuple: tuple) -> str:
4✔
282
        """
283
        Get the correct names of the species being studied.
284

285
        Parameters
286
        ----------
287
        species_tuple : tuple
288
                The species tuple i.e (1, 2) corresponding to the rdf being calculated
289

290
        Returns
291
        -------
292
        names : str
293
                Prefix for the saved file
294
        """
295
        arg_1 = self.args.species[species_tuple[0]]
4✔
296
        arg_2 = self.args.species[species_tuple[1]]
4✔
297
        return f"{arg_1}_{arg_2}"
4✔
298

299
    def _calculate_prefactor(self, species: Union[str, tuple] = None):
4✔
300
        """
301
        Calculate the relevant prefactor for the analysis.
302

303
        Parameters
304
        ----------
305
        species : str
306
                The species tuple of the RDF being studied, e.g. Na_Na
307
        """
308
        species_scale_factor = 1
4✔
309
        species_split = species.split("_")
4✔
310
        if species_split[0] == species_split[1]:
4✔
311
            species_scale_factor = 2
4✔
312

313
        if self.args.molecules:
4✔
314
            # Density of all atoms / total volume
315
            rho = (
4✔
316
                self.experiment.molecules[species_split[1]].n_particles
317
                / self.experiment.volume
318
            )
319
            numerator = species_scale_factor
4✔
320
            denominator = (
4✔
321
                self.args.number_of_configurations
322
                * rho
323
                * self.ideal_correction
324
                * self.experiment.molecules[species_split[0]].n_particles
325
            )
326
        else:
327
            if isinstance(self.args.atom_selection, dict):
4!
328
                n_species_0 = len(self.args.atom_selection[species_split[0]])
×
329
                n_species_1 = len(self.args.atom_selection[species_split[1]])
×
330
            else:
331
                n_species_0 = self.experiment.species[species_split[0]].n_particles
4✔
332
                n_species_1 = self.experiment.species[species_split[1]].n_particles
4✔
333

334
            # Density of all atoms / total volume
335
            rho = n_species_1 / self.experiment.volume
4✔
336
            numerator = species_scale_factor
4✔
337
            denominator = (
4✔
338
                self.args.number_of_configurations
339
                * rho
340
                * self.ideal_correction
341
                * n_species_0
342
            )
343
        prefactor = numerator / denominator
4✔
344

345
        return prefactor
4✔
346

347
    def _calculate_radial_distribution_functions(self):
4✔
348
        """
349
        Take the calculated histograms and apply the correct pre-factor to them to get
350
        the correct RDF.
351

352
        Returns
353
        -------
354
        Updates the class state with the full RDF for each desired species pair.
355
        """
356
        # Compute the true RDF for each species combination.
357
        self.rdf.update(
4✔
358
            {key: np.array(val.numpy(), dtype=float) for key, val in self.rdf.items()}
359
        )
360

361
        for names in self.key_list:
4✔
362
            self.selected_species = names.split("_")
4✔
363
            # TODO use selected_species instead of names, it is more clear
364
            prefactor = self._calculate_prefactor(names)  # calculate the prefactor
4✔
365

366
            self.rdf.update(
4✔
367
                {names: self.rdf.get(names) * prefactor}
368
            )  # Apply the prefactor
369
            log.debug("Writing RDF to database!")
4✔
370

371
            x_data = self._ang_to_nm(
4✔
372
                np.linspace(0.0, self.args.cutoff, self.args.number_of_bins)
373
            )
374
            y_data = self.rdf.get(names)
4✔
375

376
            # self.data_range = self.number_of_configurations
377
            data = {
4✔
378
                self.result_series_keys[0]: x_data.tolist(),
379
                self.result_series_keys[1]: y_data.tolist(),
380
            }
381

382
            self.queue_data(data=data, subjects=self.selected_species)
4✔
383

384
    def _ang_to_nm(self, data_in: np.ndarray) -> np.ndarray:
4✔
385
        """
386
        Convert Angstroms to nm.
387

388
        Returns
389
        -------
390
        data_out : np.ndarray
391
                data_in converted to nm
392
        """
393
        return (self.experiment.units.length / 1e-9) * data_in
4✔
394

395
    def _correct_batch_properties(self):
4✔
396
        """
397
        We must fix the batch size parameters set by the parent class.
398

399
        Returns
400
        -------
401
        Updates the parent class.
402
        """
403
        if self.batch_size > self.args.number_of_configurations:
4!
404
            self.batch_size = self.args.number_of_configurations
×
405
            self.n_batches = 1
×
406
        else:
407
            self.n_batches = int(self.args.number_of_configurations / self.batch_size)
4✔
408

409
        if self.override_n_batches is not None:
4!
410
            self.n_batches = self.override_n_batches
×
411

412
        if self.minibatch:
4!
413
            self.batch_size = 1
×
414
            self.n_batches = self.args.number_of_configurations
×
415
            self.memory_manager.atom_batch_size = None
×
416
            self.memory_manager.n_atom_batches = None
×
417
            self.memory_manager.atom_remainder = None
×
418
            self.minibatch = False
×
419

420
        self.remainder = 0
4✔
421

422
    def run_minibatch_loop(self, atoms, stop, n_atoms, minibatch_start, positions_tensor):
4✔
423
        """
424
        Run a minibatch loop.
425

426
        Parameters
427
        ----------
428
        atoms : tf.Tensor
429
        stop : int
430
        n_atoms : int
431
        minibatch_start : int
432
        positions_tensor : tf.Tensor
433

434
        """
435
        # Compute the number of atoms and configurations in the batch.
436
        atoms_per_batch, batch_size, _ = tf.shape(atoms)
4✔
437

438
        # Compute the indices
439
        stop += atoms_per_batch
4✔
440
        start_time = timer()
4✔
441
        indices = get_partial_triu_indices(n_atoms, atoms_per_batch, minibatch_start)
4✔
442
        log.debug(f"Calculating indices took {timer() - start_time} s")
4✔
443

444
        # Compute the d_ij matrix.
445
        start_time = timer()
4✔
446
        d_ij = self.get_dij(
4✔
447
            indices,
448
            positions_tensor,
449
            atoms,
450
            tf.cast(self.experiment.box_array, dtype=self.dtype),
451
        )
452
        exec_time = timer() - start_time
4✔
453
        atom_pairs_per_second = (
4✔
454
            tf.cast(tf.shape(indices)[1], dtype=self.dtype) / exec_time / 10**6
455
        )
456
        atom_pairs_per_second *= tf.cast(batch_size, dtype=self.dtype)
4✔
457
        log.debug(
4✔
458
            f"Computing d_ij took {exec_time} s "
459
            f"({atom_pairs_per_second:.1f} million atom pairs / s)"
460
        )
461

462
        # Compute the rdf for the minibatch
463
        start_time = timer()
4✔
464
        minibatch_rdf = self.compute_species_values(indices, minibatch_start, d_ij)
4✔
465
        log.debug(f"Computing species values took {timer() - start_time} s")
4✔
466

467
        minibatch_start = stop
4✔
468
        return minibatch_rdf, minibatch_start, stop
4✔
469

470
    def compute_species_values(
4✔
471
        self, indices: tf.Tensor, start_batch, d_ij: tf.Tensor
472
    ) -> dict:
473
        """
474
        Compute species-wise histograms.
475

476
        Parameters
477
        ----------
478
        indices : tf.Tensor
479
                indices of the d_ij distances in the shape (x, 2)
480
        start_batch :
481
                starts from 0 and increments by atoms_per_batch every batch
482
        d_ij : tf.Tensor
483
                d_ij matrix in the shape (x, batches) where x comes from the triu
484
                computation
485
        start_batch : int
486
                Atom index within a single configuration from to start the computation
487
                based on the current minibatch that is being computed.
488

489
        Returns
490
        -------
491
        rdf : dict
492
                Dict of rdf values for each combination of species, e.g.:
493
                {'H-O': tf.Tensor(...), 'H-H': ..., 'O-O': ...}
494
        """
495
        rdf = {
4✔
496
            name: tf.zeros(self.args.number_of_bins, dtype=tf.int32)
497
            for name in self.key_list
498
        }
499
        indices = tf.transpose(indices)
4✔
500

501
        particles_list = self.particles_list
4✔
502
        for tuples in itertools.combinations_with_replacement(self.index_list, 2):
4✔
503
            names = self._get_species_names(tuples)
4✔
504
            start_ = tf.stack(
4✔
505
                [
506
                    sum(particles_list[: tuples[0]]) - start_batch,
507
                    sum(particles_list[: tuples[1]]),
508
                ],
509
                axis=0,
510
            )
511
            stop_ = start_ + tf.constant(
4✔
512
                [particles_list[tuples[0]], particles_list[tuples[1]]]
513
            )
514

515
            rdf[names] = self.bin_minibatch(
4✔
516
                start_,
517
                stop_,
518
                indices,
519
                d_ij,
520
                tf.cast(self.bin_range, dtype=self.dtype),
521
                tf.cast(self.args.number_of_bins, dtype=tf.int32),
522
                tf.cast(self.args.cutoff, dtype=self.dtype),
523
            )
524
        return rdf
4✔
525

526
    def plot_data(self, data):
4✔
527
        """Plot the RDF data."""
528
        for selected_species, val in data.items():
×
529
            self.run_visualization(
×
530
                x_data=np.array(val[self.result_series_keys[0]]),
531
                y_data=np.array(val[self.result_series_keys[1]]),
532
                title=selected_species,
533
            )
534

535
    def _format_data(self, batch: tf.Tensor, keys: list) -> tf.Tensor:
4✔
536
        """
537
        Format the loaded data for use in the rdf calculator.
538

539
        The RDF requires a reshaped dataset. The generator will load a default
540
        dict oriented type. This method restructures the data to be used in the
541
        calculator.
542

543
        Parameters
544
        ----------
545
        batch : tf.Tensor
546
                A batch of data to transform.
547
        keys : list
548
                Dict keys to extract from the data.
549

550
        Returns
551
        -------
552
        data : tf.Tensor
553
                data tensor of the shape (n_atoms * n_species, n_configurations, 3)
554

555
        """
556
        formatted_data = []
4✔
557
        for item in keys:
4✔
558
            formatted_data.append(batch[item])
4✔
559

560
        if len(self.args.species) == 1:
4✔
561
            return tf.cast(formatted_data[0], dtype=self.dtype)
4✔
562
        else:
563
            return tf.cast(tf.concat(formatted_data, axis=0), dtype=self.dtype)
4✔
564

565
    def prepare_computation(self):
4✔
566
        """
567
        Run the steps necessary to prepare for the RDF computation.
568

569
        Returns
570
        -------
571
        dict_keys : list
572
                dict keys to use when selecting data from the output.
573
        split_arr : np.ndarray
574
                Array of indices to load from the database split into sub-arrays which
575
                fulfill the necessary batch size.
576
        batch_tqdm : bool
577
                If true, the main tqdm loop over batches is disabled and only the
578
                mini-batch loop will be displayed.
579
        """
580
        path_list = [
4✔
581
            join_path(item, self.loaded_property.name) for item in self.args.species
582
        ]
583
        self._prepare_managers(path_list)
4✔
584

585
        # batch loop correction
586
        self._correct_batch_properties()
4✔
587

588
        # Get the correct dict keys.
589
        dict_keys = []
4✔
590
        for item in self.args.species:
4✔
591
            dict_keys.append(str.encode(join_path(item, self.loaded_property.name)))
4✔
592

593
        # Split the configurations into batches.
594
        split_arr = np.array_split(self.sample_configurations, self.n_batches)
4✔
595

596
        # Turn off the tqdm for certain scenarios.
597
        batch_tqdm = self.tqdm_limit > self.n_batches
4✔
598

599
        return dict_keys, split_arr, batch_tqdm
4✔
600

601
    @staticmethod
4✔
602
    def combine_dictionaries(dict_a: dict, dict_b: dict):
3✔
603
        """
604
        Combine two dictionaries in a tf.function call.
605

606
        Parameters
607
        ----------
608
        dict_a : dict
609
        dict_b : dict
610
        """
611
        out = {}
4✔
612
        for key in dict_a:
4✔
613
            out[key] = dict_a[key] + dict_b[key]
4✔
614
        return out
4✔
615

616
    @staticmethod
4✔
617
    @tf.function(experimental_relax_shapes=True)
4✔
618
    def bin_minibatch(
3✔
619
        start, stop, indices, d_ij, bin_range, number_of_bins, cutoff
620
    ) -> tf.Tensor:
621
        """
622
        Compute the minibatch histogram.
623

624
        Parameters
625
        ----------
626
        start : list
627
        stop : list
628
        indices : tf.Tensor
629
        d_ij : tf.Tensor
630
        bin_range
631
        number_of_bins : int
632
        cutoff : float
633
                Cutoff to enforce on the distance tensor.
634
        """
635
        # select the indices that are within the boundaries of the current species /
636
        # molecule
637
        mask_1 = (indices[:, 0] > start[0]) & (indices[:, 0] < stop[0])
×
638
        mask_2 = (indices[:, 1] > start[1]) & (indices[:, 1] < stop[1])
×
639

640
        values_species = tf.boolean_mask(d_ij, mask_1 & mask_2, axis=0)
×
641
        values = apply_system_cutoff(values_species, cutoff)
×
642
        bin_data = tf.histogram_fixed_width(
×
643
            values=values, value_range=bin_range, nbins=number_of_bins
644
        )
645
        return bin_data
×
646

647
    @staticmethod
4✔
648
    @tf.function(experimental_relax_shapes=True)
4✔
649
    def get_dij(indices, positions_tensor, atoms, box_array):
3✔
650
        """
651
        Compute the distance matrix for the minibatch.
652

653
        Parameters
654
        ----------
655
        indices : tf.Tensor
656
        positions_tensor : tf.Tensor
657
        atoms : tf.Tensor
658
        box_array : tf.Tensor
659

660
        """
661
        start_time = timer()
×
662
        log.debug(f"Calculating indices took {timer() - start_time} s")
×
663

664
        # apply the mask to this, to only get the triu values and don't compute
665
        # anything twice
666
        start_time = timer()
×
667
        _positions = tf.gather(positions_tensor, indices[1], axis=0)
×
668
        log.debug(f"Gathering positions_tensor took {timer() - start_time} s")
×
669

670
        # for atoms_per_batch > 1, flatten the array according to the positions
671
        start_time = timer()
×
672
        atoms_position = tf.gather(atoms, indices[0], axis=0)
×
673
        log.debug(f"Gathering atoms took {timer() - start_time} s")
×
674

675
        start_time = timer()
×
676
        r_ij = _positions - atoms_position
×
677
        log.debug(f"Computing r_ij took {timer() - start_time} s")
×
678

679
        # apply minimum image convention
680
        start_time = timer()
×
681
        if box_array is not None:
×
682
            r_ij = apply_minimum_image(r_ij, box_array)
×
683
        log.debug(f"Applying minimum image convention took {timer() - start_time} s")
×
684

685
        start_time = timer()
×
686
        d_ij = tf.linalg.norm(r_ij, axis=-1)
×
687
        log.debug(f"Computing d_ij took {timer() - start_time} s")
×
688

689
        return d_ij
×
690

691
    @property
4✔
692
    def particles_list(self):
3✔
693
        """
694
        List of number of atoms of each species being studied.
695

696
        Returns
697
        -------
698
        -------.
699

700
        """
701
        if self.args.molecules:
4✔
702
            particles_list = [
4✔
703
                self.experiment.molecules[item].n_particles
704
                for item in self.experiment.molecules
705
            ]
706
        else:
707
            if isinstance(self.args.atom_selection, dict):
4!
708
                particles_list = [
×
709
                    len(self.args.atom_selection[item]) for item in self.args.species
710
                ]
711
            else:
712
                particles_list = [
4✔
713
                    self.experiment.species[item].n_particles
714
                    for item in self.args.species
715
                ]
716

717
        return particles_list
4✔
718

719
    @property
4✔
720
    def ideal_correction(self) -> float:
3✔
721
        """
722
        Get the correct ideal gas term.
723

724
        In the case of a cutoff value greater than half of the box size, the ideal gas
725
        term of the experiment must be corrected due to the lack of spherical symmetry
726
        in the experiment.
727

728
        Returns
729
        -------
730
        correction : float
731
                Correct ideal gas term for the RDF prefactor
732
        """
733
        # TODO make it a property
734
        def _spherical_symmetry(data: np.array) -> np.array:
4✔
735
            """
736
            Operation to perform for full spherical symmetry.
737

738
            Parameters
739
            ----------
740
            data : np.array
741
                    tensor_values on which to operate
742
            Returns
743
            -------
744
            function_values : np.array
745
                    result of the operation
746
            """
747
            return 4 * np.pi * (data**2)
4✔
748

749
        def _correction_1(data: np.array) -> np.array:
4✔
750
            """
751
            First correction to ideal gas.
752

753
            tensor_values : np.array
754
                    tensor_values on which to operate
755
            Returns
756
            -------
757
            function_values : np.array
758
                    result of the operation
759

760
            """
761
            return 2 * np.pi * data * (3 - 4 * data)
×
762

763
        def _correction_2(data: np.array) -> np.array:
4✔
764
            """
765
            Second correction to ideal gas.
766

767
            tensor_values : np.array
768
                    tensor_values on which to operate
769
            Returns
770
            -------
771
            function_values : np.array
772
                    result of the operation
773

774
            """
775
            arctan_1 = np.arctan(np.sqrt(4 * (data**2) - 2))
×
776
            arctan_2 = (
×
777
                8
778
                * data
779
                * np.arctan(
780
                    (2 * data * (4 * (data**2) - 3))
781
                    / (np.sqrt(4 * (data**2) - 2) * (4 * (data**2) + 1))
782
                )
783
            )
784

785
            return 2 * data * (3 * np.pi - 12 * arctan_1 + arctan_2)
×
786

787
        def _piecewise(data: np.array) -> np.array:
4✔
788
            """
789
            Return a piecewise operation on a set of tensor_values
790
            Parameters
791
            ----------
792
            data : np.array
793
                    tensor_values on which to operate.
794

795
            Returns
796
            -------
797
            scaled_data : np.array
798
                    tensor_values that has been operated on.
799
            """
800
            # Boundaries on the ideal gsa correction. These go to 73% over half the box
801
            # size, the most for a cubic box.
802
            lower_bound = self.experiment.box_array[0] / 2
4✔
803
            middle_bound = np.sqrt(2) * self.experiment.box_array[0] / 2
4✔
804

805
            # split the tensor_values into parts
806
            split_1 = list(split_array(data, data <= lower_bound))
4✔
807
            if len(split_1) == 1:
4!
808
                return _spherical_symmetry(split_1[0])
4✔
809
            else:
810
                split_2 = list(split_array(split_1[1], split_1[1] < middle_bound))
×
811
                if len(split_2) == 1:
×
812
                    return np.concatenate(
×
813
                        (_spherical_symmetry(split_1[0]), _correction_1(split_2[0]))
814
                    )
815
                else:
816
                    return np.concatenate(
×
817
                        (
818
                            _spherical_symmetry(split_1[0]),
819
                            _correction_1(split_2[0]),
820
                            _correction_2(split_2[1]),
821
                        )
822
                    )
823

824
        bin_width = self.args.cutoff / self.args.number_of_bins
4✔
825
        bin_edges = np.linspace(0.0, self.args.cutoff, self.args.number_of_bins)
4✔
826

827
        return _piecewise(np.array(bin_edges)) * bin_width
4✔
828

829
    def run_calculator(self):
4✔
830
        """
831
        Run the analysis.
832

833
        Returns
834
        -------
835

836
        """
837
        self.check_input()
4✔
838

839
        dict_keys, split_arr, batch_tqm = self.prepare_computation()
4✔
840

841
        # Get the batch dataset
842
        batch_ds = self.get_batch_dataset(
4✔
843
            subject_list=self.args.species, loop_array=split_arr, correct=True
844
        )
845

846
        # Loop over the batches.
847
        for idx, batch in tqdm(
4✔
848
            enumerate(batch_ds), ncols=70, disable=batch_tqm, total=self.n_batches
849
        ):
850
            # Reformat the data.
851
            log.debug("Reformatting data.")
4✔
852
            positions_tensor = self._format_data(batch=batch, keys=dict_keys)
4✔
853

854
            # Create a new dataset to loop over.
855
            log.debug("Creating dataset.")
4✔
856
            per_atoms_ds = tf.data.Dataset.from_tensor_slices(positions_tensor)
4✔
857
            n_atoms = tf.shape(positions_tensor)[0]
4✔
858

859
            # Start the computation.
860
            log.debug("Beginning calculation.")
4✔
861
            minibatch_start = tf.constant(0)
4✔
862
            stop = tf.constant(0)
4✔
863
            rdf = {
4✔
864
                name: tf.zeros(self.args.number_of_bins, dtype=tf.int32)
865
                for name in self.key_list
866
            }
867

868
            for atoms in tqdm(
4✔
869
                per_atoms_ds.batch(self.rdf_minibatch).prefetch(tf.data.AUTOTUNE),
870
                ncols=70,
871
                disable=not batch_tqm,
872
                desc=f"Running mini batch loop {idx + 1} / {self.n_batches}",
873
            ):
874
                # Compute the minibatch update
875
                minibatch_rdf, minibatch_start, stop = self.run_minibatch_loop(
4✔
876
                    atoms, stop, n_atoms, minibatch_start, positions_tensor
877
                )
878

879
                # Update the rdf.
880
                start_time = timer()
4✔
881
                rdf = self.combine_dictionaries(rdf, minibatch_rdf)
4✔
882
                log.debug(f"Updating dictionaries took {timer() - start_time} s")
4✔
883

884
            # Update the class before the next batch.
885
            for key in self.rdf:
4✔
886
                self.rdf[key] += rdf[key]
4✔
887

888
        self._calculate_radial_distribution_functions()
4✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc