• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

materialsproject / pymatgen / 4075885785

pending completion
4075885785

push

github

Shyue Ping Ong
Merge branch 'master' of github.com:materialsproject/pymatgen

96 of 96 new or added lines in 27 files covered. (100.0%)

81013 of 102710 relevant lines covered (78.88%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.38
/pymatgen/apps/borg/queen.py
1
# Copyright (c) Pymatgen Development Team.
2
# Distributed under the terms of the MIT License.
3

4
"""
1✔
5
This module defines the BorgQueen class, which manages drones to assimilate
6
data using Python's multiprocessing.
7
"""
8

9
from __future__ import annotations
1✔
10

11
import json
1✔
12
import logging
1✔
13
import os
1✔
14
from multiprocessing import Manager, Pool
1✔
15

16
from monty.io import zopen
1✔
17
from monty.json import MontyDecoder, MontyEncoder
1✔
18

19
logger = logging.getLogger("BorgQueen")
1✔
20

21

22
class BorgQueen:
1✔
23
    """
24
    The Borg Queen controls the drones to assimilate data in an entire
25
    directory tree. Uses multiprocessing to speed up things considerably. It
26
    also contains convenience methods to save and load data between sessions.
27
    """
28

29
    def __init__(self, drone, rootpath=None, number_of_drones=1):
1✔
30
        """
31
        Args:
32
            drone (Drone): An implementation of
33
                :class:`pymatgen.apps.borg.hive.AbstractDrone` to use for
34
                assimilation.
35
            rootpath (str): The root directory to start assimilation. Leave it
36
                as None if you want to do assimilation later, or is using the
37
                BorgQueen to load previously assimilated data.
38
            ndrones (int): Number of drones to parallelize over.
39
                Typical machines today have up to four processors. Note that you
40
                won't see a 100% improvement with two drones over one, but you
41
                will definitely see a significant speedup of at least 50% or so.
42
                If you are running this over a server with far more processors,
43
                the speedup will be even greater.
44
        """
45
        self._drone = drone
1✔
46
        self._num_drones = number_of_drones
1✔
47
        self._data = []
1✔
48

49
        if rootpath:
1✔
50
            if number_of_drones > 1:
1✔
51
                self.parallel_assimilate(rootpath)
×
52
            else:
53
                self.serial_assimilate(rootpath)
1✔
54

55
    def parallel_assimilate(self, rootpath):
1✔
56
        """
57
        Assimilate the entire subdirectory structure in rootpath.
58
        """
59
        logger.info("Scanning for valid paths...")
×
60
        valid_paths = []
×
61
        for parent, subdirs, files in os.walk(rootpath):
×
62
            valid_paths.extend(self._drone.get_valid_paths((parent, subdirs, files)))
×
63
        manager = Manager()
×
64
        data = manager.list()
×
65
        status = manager.dict()
×
66
        status["count"] = 0
×
67
        status["total"] = len(valid_paths)
×
68
        logger.info(f"{len(valid_paths)} valid paths found.")
×
69
        with Pool(self._num_drones) as p:
×
70
            p.map(
×
71
                order_assimilation,
72
                ((path, self._drone, data, status) for path in valid_paths),
73
            )
74
            for d in data:
×
75
                self._data.append(json.loads(d, cls=MontyDecoder))
×
76

77
    def serial_assimilate(self, rootpath):
1✔
78
        """
79
        Assimilate the entire subdirectory structure in rootpath serially.
80
        """
81
        valid_paths = []
1✔
82
        for parent, subdirs, files in os.walk(rootpath):
1✔
83
            valid_paths.extend(self._drone.get_valid_paths((parent, subdirs, files)))
1✔
84
        data = []
1✔
85
        count = 0
1✔
86
        total = len(valid_paths)
1✔
87
        for path in valid_paths:
1✔
88
            newdata = self._drone.assimilate(path)
1✔
89
            self._data.append(newdata)
1✔
90
            count += 1
1✔
91
            logger.info(f"{count}/{total} ({count / total :.2%}) done")
1✔
92
        for d in data:
1✔
93
            self._data.append(json.loads(d, cls=MontyDecoder))
×
94

95
    def get_data(self):
1✔
96
        """
97
        Returns an list of assimilated objects
98
        """
99
        return self._data
1✔
100

101
    def save_data(self, filename):
1✔
102
        """
103
        Save the assimilated data to a file.
104

105
        Args:
106
            filename (str): filename to save the assimilated data to. Note
107
                that if the filename ends with gz or bz2, the relevant gzip
108
                or bz2 compression will be applied.
109
        """
110
        with zopen(filename, "wt") as f:
×
111
            json.dump(list(self._data), f, cls=MontyEncoder)
×
112

113
    def load_data(self, filename):
1✔
114
        """
115
        Load assimilated data from a file
116
        """
117
        with zopen(filename, "rt") as f:
1✔
118
            self._data = json.load(f, cls=MontyDecoder)
1✔
119

120

121
def order_assimilation(args):
1✔
122
    """
123
    Internal helper method for BorgQueen to process assimilation
124
    """
125
    (path, drone, data, status) = args
×
126
    newdata = drone.assimilate(path)
×
127
    if newdata:
×
128
        data.append(json.dumps(newdata, cls=MontyEncoder))
×
129
    status["count"] += 1
×
130
    count = status["count"]
×
131
    total = status["total"]
×
132
    logger.info(f"{count}/{total} ({count / total :.2%}) done")
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc