• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

DeepRank / deeprank-core / 4075652401

pending completion
4075652401

Pull #330

github

GitHub
Merge 45ea1393e into d73e8c34f
Pull Request #330: fix: data generation threading locked

1046 of 1331 branches covered (78.59%)

Branch coverage included in aggregate %.

36 of 36 new or added lines in 2 files covered. (100.0%)

2949 of 3482 relevant lines covered (84.69%)

0.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

79.21
/deeprankcore/utils/grid.py
1
"""
2
This module holds the classes that are used when working with a 3D grid.
3
"""
4

5

6
from enum import Enum
1✔
7
from typing import Dict, Union, List
1✔
8
import numpy as np
1✔
9
import h5py
1✔
10
import itertools
1✔
11
from scipy.signal import bspline
1✔
12

13
from deeprankcore.domain import gridstorage
1✔
14

15

16
class MapMethod(Enum):
1✔
17
    """This holds the value of either one of 4 grid mapping methods.
18
    A mapping method determines how feature point values are divided over the grid points.
19
    """
20

21
    GAUSSIAN = 1
1✔
22
    FAST_GAUSSIAN = 2
1✔
23
    BSP_LINE = 3
1✔
24
    NEAREST_NEIGHBOURS = 4
1✔
25

26

27
class GridSettings:
1✔
28
    """Objects of this class hold the settings to build a grid.
29
    The grid is basically a multi-divided 3D cube with
30
    the following properties:
31

32
     - sizes: x, y, z sizes of the box in Ã…
33
     - points_counts: the number of points on the x, y, z edges of the cube
34
     - resolutions: the size in Ã… of one x, y, z edge subdivision. Also the distance between two points on the edge.
35
    """
36

37
    def __init__(
1✔
38
        self,
39
        points_counts: List[int],
40
        sizes: List[float]
41
    ):
42
        assert len(points_counts) == 3
1✔
43
        assert len(sizes) == 3
1✔
44

45
        self._points_counts = points_counts
1✔
46
        self._sizes = sizes
1✔
47

48
    @property
1✔
49
    def resolutions(self) -> List[float]:
1✔
50
        return [self._sizes[i] / self._points_counts[i] for i in range(3)]
1✔
51

52
    @property
1✔
53
    def sizes(self) -> List[float]:
1✔
54
        return self._sizes
1✔
55

56
    @property
1✔
57
    def points_counts(self) -> List[int]:
1✔
58
        return self._points_counts
1✔
59

60

61
class Grid:
1✔
62
    """An instance of this class holds everything that the grid is made of:
63
    - coordinates of points
64
    - names of features
65
    - feature values on each point
66
    """
67

68
    def __init__(self, id_: str, center: List[float], settings: GridSettings):
1✔
69
        self.id = id_
1✔
70

71
        self._center = np.array(center)
1✔
72

73
        self._settings = settings
1✔
74

75
        self._set_mesh(self._center, settings)
1✔
76

77
        self._features = {}
1✔
78

79
    def _set_mesh(self, center: np.ndarray, settings: GridSettings):
1✔
80
        "builds the grid points"
81

82
        half_size_x = settings.sizes[0] / 2
1✔
83
        half_size_y = settings.sizes[1] / 2
1✔
84
        half_size_z = settings.sizes[2] / 2
1✔
85

86
        min_x = center[0] - half_size_x
1✔
87
        max_x = min_x + (settings.points_counts[0] - 1.0) * settings.resolutions[0]
1✔
88
        self._xs = np.linspace(min_x, max_x, num=settings.points_counts[0])
1✔
89

90
        min_y = center[1] - half_size_y
1✔
91
        max_y = min_y + (settings.points_counts[1] - 1.0) * settings.resolutions[1]
1✔
92
        self._ys = np.linspace(min_y, max_y, num=settings.points_counts[1])
1✔
93

94
        min_z = center[2] - half_size_z
1✔
95
        max_z = min_z + (settings.points_counts[2] - 1.0) * settings.resolutions[2]
1✔
96
        self._zs = np.linspace(min_z, max_z, num=settings.points_counts[2])
1✔
97

98
        self._ygrid, self._xgrid, self._zgrid = np.meshgrid(
1✔
99
            self._ys, self._xs, self._zs
100
        )
101

102
    @property
1✔
103
    def center(self) -> np.ndarray:
1✔
104
        return self._center
1✔
105

106
    @property
1✔
107
    def xs(self) -> np.array:
1✔
108
        return self._xs
1✔
109

110
    @property
1✔
111
    def xgrid(self) -> np.array:
1✔
112
        return self._xgrid
1✔
113

114
    @property
1✔
115
    def ys(self) -> np.array:
1✔
116
        return self._ys
1✔
117

118
    @property
1✔
119
    def ygrid(self) -> np.array:
1✔
120
        return self._ygrid
1✔
121

122
    @property
1✔
123
    def zs(self) -> np.array:
1✔
124
        return self._zs
1✔
125

126
    @property
1✔
127
    def zgrid(self) -> np.array:
1✔
128
        return self._zgrid
1✔
129

130
    @property
1✔
131
    def features(self) -> Dict[str, np.array]:
1✔
132
        return self._features
1✔
133

134
    def add_feature_values(self, feature_name: str, data: np.ndarray):
1✔
135
        """Makes sure feature values per grid point get stored.
136

137
        This method may be called repeatedly to add on to existing grid point values.
138
        """
139

140
        if feature_name not in self._features:
1✔
141
            self._features[feature_name] = data
1✔
142
        else:
143
            self._features[feature_name] += data
1✔
144

145
    def _get_mapped_feature_gaussian(
1✔
146
        self, position: np.ndarray, value: float
147
    ) -> np.ndarray:
148

149
        beta = 1.0
1✔
150

151
        fx, fy, fz = position
1✔
152
        distances = np.sqrt(
1✔
153
            (self.xgrid - fx) ** 2 + (self.ygrid - fy) ** 2 + (self.zgrid - fz) ** 2
154
        )
155

156
        return value * np.exp(-beta * distances)
1✔
157

158
    def _get_mapped_feature_fast_gaussian(
1✔
159
        self, position: np.ndarray, value: float
160
    ) -> np.ndarray:
161

162
        beta = 1.0
1✔
163
        cutoff = 5.0 * beta
1✔
164

165
        fx, fy, fz = position
1✔
166
        distances = np.sqrt(
1✔
167
            (self.xgrid - fx) ** 2 + (self.ygrid - fy) ** 2 + (self.zgrid - fz) ** 2
168
        )
169

170
        data = np.zeros(distances.shape)
1✔
171

172
        data[distances < cutoff] = value * np.exp(
1✔
173
            -beta * distances[distances < cutoff]
174
        )
175

176
        return data
1✔
177

178
    def _get_mapped_feature_bsp_line(
1✔
179
        self, position: np.ndarray, value: float
180
    ) -> np.ndarray:
181

182
        order = 4
×
183

184
        fx, fy, fz = position
×
185
        bsp_data = (
×
186
            bspline((self.xgrid - fx) / self.resolution, order)
187
            * bspline((self.ygrid - fy) / self.resolution, order)
188
            * bspline((self.zgrid - fz) / self.resolution, order)
189
        )
190

191
        return value * bsp_data
×
192

193
    def _get_mapped_feature_nearest_neighbour( # pylint: disable=too-many-locals
1✔
194
        self, position: np.ndarray, value: float
195
    ) -> np.ndarray:
196

197
        fx, _, _ = position
×
198
        distances_x = np.abs(self.xs - fx)
×
199
        distances_y = np.abs(self.ys - fx)
×
200
        distances_z = np.abs(self.zs - fx)
×
201

202
        indices_x = np.argsort(distances_x)[:2]
×
203
        indices_y = np.argsort(distances_y)[:2]
×
204
        indices_z = np.argsort(distances_z)[:2]
×
205

206
        sorted_x = distances_x[indices_x]
×
207
        weights_x = sorted_x / np.sum(sorted_x)
×
208

209
        sorted_y = distances_y[indices_y]
×
210
        weights_y = sorted_y / np.sum(sorted_y)
×
211

212
        sorted_z = distances_z[indices_z]
×
213
        weights_z = sorted_z / np.sum(sorted_z)
×
214

215
        indices = [indices_x, indices_y, indices_z]
×
216
        points = list(itertools.product(*indices))
×
217

218
        weight_products = list(itertools.product(weights_x, weights_y, weights_z))
×
219
        weights = [np.sum(p) for p in weight_products]
×
220

221
        neighbour_data = np.zeros(
×
222
            (self.xs.shape[0], self.ys.shape[0], self.zs.shape[0])
223
        )
224

225
        for point_index, point in enumerate(points):
×
226
            weight = weights[point_index]
×
227

228
            neighbour_data[point] = weight * value
×
229

230
        return neighbour_data
×
231

232
    def _get_atomic_density_koes(self, position: np.ndarray, vanderwaals_radius: float) -> np.ndarray:
1✔
233
        """
234
        Function to map individual atomic density on the grid.
235
        The formula is equation (1) of the Koes paper
236
        Protein-Ligand Scoring with Convolutional NN Arxiv:1612.02751v1
237

238
        Returns:
239
            the mapped density
240
        """
241

242
        distances = np.sqrt(np.square(self.xgrid - position[0]) +
1✔
243
                            np.square(self.ygrid - position[1]) +
244
                            np.square(self.zgrid - position[2]))
245

246
        density_data = np.zeros(distances.shape)
1✔
247

248
        indices_close = distances < vanderwaals_radius
1✔
249
        indices_far = (distances >= vanderwaals_radius) & (distances < 1.5 * vanderwaals_radius)
1✔
250

251
        density_data[indices_close] = np.exp(-2.0 * np.square(distances[indices_close]) /  np.square(vanderwaals_radius))
1✔
252
        density_data[indices_far] = 4.0 / np.square(np.e) / np.square(vanderwaals_radius) * np.square(distances[indices_far]) - \
1✔
253
                                    12.0 / np.square(np.e) / vanderwaals_radius * distances[indices_far] + \
254
                                    9.0 / np.square(np.e)
255

256
        return density_data
1✔
257

258
    def map_feature(
1✔
259
        self,
260
        position: np.ndarray,
261
        feature_name: str,
262
        feature_value: Union[np.ndarray, float],
263
        method: MapMethod,
264
    ):
265
        """
266
        Maps point feature data at a given position to the grid, using the given method.
267
        The feature_value should either be a single number or a one-dimensional array
268
        """
269

270
        # determine whether we're dealing with a single number of multiple numbers:
271
        index_names_values = []
1✔
272
        if isinstance(feature_value, float):
1✔
273
            index_names_values = [(feature_name, feature_value)]
1✔
274

275
        elif isinstance(feature_value, int):
1!
276
            index_names_values = [(feature_name, float(feature_value))]
×
277

278
        else:
279
            for index, value in enumerate(feature_value):
1✔
280
                index_name = f"{feature_name}_{index:03d}"
1✔
281
                index_names_values.append((index_name, value))
1✔
282

283
        # map the data to the grid
284
        for index_name, value in index_names_values:
1✔
285

286
            if method == MapMethod.GAUSSIAN:
1✔
287
                grid_data = self._get_mapped_feature_gaussian(position, value)
1✔
288

289
            elif method == MapMethod.FAST_GAUSSIAN:
1!
290
                grid_data = self._get_mapped_feature_fast_gaussian(position, value)
1✔
291

292
            # elif method == MapMethod.BSP_LINE:
293
            #     grid_data = self._get_mapped_feature_bsp_line(position, value)
294

295
            elif method == MapMethod.NEAREST_NEIGHBOUR:
×
296
                grid_data = self._get_mapped_feature_nearest_neighbour(position, value)
×
297

298
            # set to grid
299
            self.add_feature_values(index_name, grid_data)
1✔
300

301
    def to_hdf5(self, hdf5_path: str):
1✔
302
        "Write the grid data to hdf5, according to deeprank standards."
303

304
        with h5py.File(hdf5_path, "a") as hdf5_file:
1✔
305

306
            # create a group to hold everything
307
            grid_group = hdf5_file.require_group(self.id)
1✔
308

309
            # store grid points
310
            points_group = grid_group.require_group("grid_points")
1✔
311
            points_group.create_dataset("x", data=self.xs)
1✔
312
            points_group.create_dataset("y", data=self.ys)
1✔
313
            points_group.create_dataset("z", data=self.zs)
1✔
314
            points_group.create_dataset("center", data=self.center)
1✔
315

316
            # store grid features
317
            features_group = grid_group.require_group(gridstorage.MAPPED_FEATURES)
1✔
318
            for feature_name, feature_data in self.features.items():
1✔
319

320
                feature_group = features_group.require_group(feature_name)
1✔
321
                feature_group.create_dataset(
1✔
322
                    gridstorage.FEATURE_VALUE,
323
                    data=feature_data,
324
                    compression="lzf",
325
                    chunks=True,
326
                )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc