• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

BlueBrain / libsonata / 7006020532

27 Nov 2023 01:57PM UTC coverage: 93.861% (-0.1%) from 93.961%
7006020532

Pull #314

github

1uc
Refactor `edge_index::resolve`.

The point is to split reading of the dataset into a separate function, and then
make `resolve` safe for collective IO (assuming the newly introduced function
is).

The overload for reading a single `nodeID` is removed as it's unused now.
Pull Request #314: Refactor edge_index::resolve.

26 of 27 new or added lines in 2 files covered. (96.3%)

3 existing lines in 1 file now uncovered.

1850 of 1971 relevant lines covered (93.86%)

81.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.34
/src/edge_index.cpp
1
/*************************************************************************
2
 * Copyright (C) 2018-2020 Blue Brain Project
3
 *
4
 * This file is part of 'libsonata', distributed under the terms
5
 * of the GNU Lesser General Public License version 3.
6
 *
7
 * See top-level COPYING.LESSER and COPYING files for details.
8
 *************************************************************************/
9

10
#include "edge_index.h"
11

12
#include <bbp/sonata/common.h>
13

14
#include <array>
15
#include <cstdint>
16
#include <set>
17
#include <unordered_map>
18
#include <vector>
19

20
#include "read_bulk.hpp"
21
#include "read_canonical_selection.hpp"
22

23
namespace bbp {
24
namespace sonata {
25
namespace edge_index {
26

27
namespace {
28

29
using RawIndex = std::vector<std::array<uint64_t, 2>>;
30

31
const char* const SOURCE_NODE_ID_DSET = "source_node_id";
32
const char* const TARGET_NODE_ID_DSET = "target_node_id";
33

34
const char* const INDEX_GROUP = "indices";
35
const char* const SOURCE_INDEX_GROUP = "indices/source_to_target";
36
const char* const TARGET_INDEX_GROUP = "indices/target_to_source";
37
const char* const NODE_ID_TO_RANGES_DSET = "node_id_to_ranges";
38
const char* const RANGE_TO_EDGE_ID_DSET = "range_to_edge_id";
39

40
}  // unnamed namespace
41

42

43
const HighFive::Group sourceIndex(const HighFive::Group& h5Root) {
32✔
44
    if (!h5Root.exist(SOURCE_INDEX_GROUP)) {
32✔
45
        throw SonataError("No source index group found");
2✔
46
    }
47
    return h5Root.getGroup(SOURCE_INDEX_GROUP);
30✔
48
}
49

50

51
const HighFive::Group targetIndex(const HighFive::Group& h5Root) {
30✔
52
    if (!h5Root.exist(TARGET_INDEX_GROUP)) {
30✔
53
        throw SonataError("No target index group found");
2✔
54
    }
55
    return h5Root.getGroup(TARGET_INDEX_GROUP);
28✔
56
}
57

58
Selection resolve(const HighFive::Group& indexGroup, const std::vector<NodeID>& nodeIDs) {
58✔
59
    auto node2ranges_dset = indexGroup.getDataSet(NODE_ID_TO_RANGES_DSET);
174✔
60
    auto node_dim = node2ranges_dset.getSpace().getDimensions()[0];
58✔
61
    auto sortedNodeIds = nodeIDs;
116✔
62
    bulk_read::detail::erase_if(sortedNodeIds, [node_dim](auto id) {
58✔
63
        // Filter out `nodeIDs[i] >= dims`; because SYN2 used to return an
64
        // empty range for an out-of-range `nodeId`s.
65
        return id >= node_dim;
94✔
66
    });
67
    std::sort(sortedNodeIds.begin(), sortedNodeIds.end());
58✔
68
    sortedNodeIds.erase(std::unique(sortedNodeIds.begin(), sortedNodeIds.end()),
58✔
69
                        sortedNodeIds.end());
116✔
70

71
    auto nodeSelection = Selection::fromValues(sortedNodeIds);
116✔
72
    auto primaryRange = detail::readCanonicalSelection<std::array<uint64_t, 2>>(
73
        node2ranges_dset, nodeSelection.ranges(), RawIndex{{0, 2}});
174✔
74

75
    bulk_read::detail::erase_if(primaryRange, [](const auto& range) {
58✔
76
        // Filter out any invalid ranges `start >= end`.
77
        return range[0] >= range[1];
82✔
78
    });
79

80
    // TODO check that the spec allows us to optimize this.
81
    primaryRange = bulk_read::sortAndMerge(primaryRange);
58✔
82

83
    auto secondaryRange = detail::readCanonicalSelection<std::array<uint64_t, 2>>(
84
        indexGroup.getDataSet(RANGE_TO_EDGE_ID_DSET), primaryRange, RawIndex{{0, 2}});
174✔
85

86
    // Sort and eliminate empty ranges.
87
    secondaryRange = bulk_read::sortAndMerge(secondaryRange);
58✔
88

89
    // Copy `secondaryRange`, because the types don't match.
90
    Selection::Ranges edgeIds;
58✔
91
    edgeIds.reserve(secondaryRange.size());
58✔
92
    for (const auto& range : secondaryRange) {
110✔
93
        edgeIds.emplace_back(range[0], range[1]);
52✔
94
    }
95

96
    return Selection(std::move(edgeIds));
116✔
97
}
98

99

100
namespace {
101

102
std::unordered_map<NodeID, RawIndex> _groupNodeRanges(const std::vector<NodeID>& nodeIDs) {
4✔
103
    std::unordered_map<NodeID, RawIndex> result;
4✔
104

105
    if (nodeIDs.empty()) {
4✔
UNCOV
106
        return result;
×
107
    }
108

109
    uint64_t rangeStart = 0;
4✔
110
    NodeID lastNodeID = nodeIDs[rangeStart];
4✔
111
    for (uint64_t i = 1; i < nodeIDs.size(); ++i) {
24✔
112
        if (nodeIDs[i] != lastNodeID) {
20✔
113
            result[lastNodeID].push_back({rangeStart, i});
12✔
114
            rangeStart = i;
12✔
115
            lastNodeID = nodeIDs[rangeStart];
12✔
116
        }
117
    }
118

119
    result[lastNodeID].push_back({rangeStart, nodeIDs.size()});
4✔
120

121
    return result;
4✔
122
}
123

124

125
std::vector<NodeID> _readNodeIDs(const HighFive::Group& h5Root, const std::string& name) {
4✔
126
    std::vector<NodeID> result;
4✔
127
    h5Root.getDataSet(name).read(result);
4✔
128
    return result;
4✔
129
}
130

131

132
void _writeIndexDataset(const RawIndex& data, const std::string& name, HighFive::Group& h5Group) {
8✔
133
    auto dset = h5Group.createDataSet<uint64_t>(name, HighFive::DataSpace::From(data));
8✔
134
    dset.write(data);
8✔
135
}
8✔
136

137

138
void _writeIndexGroup(const std::vector<NodeID>& nodeIDs,
4✔
139
                      uint64_t nodeCount,
140
                      HighFive::Group& h5Root,
141
                      const std::string& name) {
142
    auto indexGroup = h5Root.createGroup(name);
8✔
143

144
    auto nodeToRanges = _groupNodeRanges(nodeIDs);
8✔
145
    const auto rangeCount =
146
        std::accumulate(nodeToRanges.begin(),
4✔
147
                        nodeToRanges.end(),
148
                        uint64_t{0},
149
                        [](uint64_t total, decltype(nodeToRanges)::const_reference item) {
12✔
150
                            return total + item.second.size();
12✔
151
                        });
152

153
    RawIndex primaryIndex;
8✔
154
    RawIndex secondaryIndex;
4✔
155

156
    primaryIndex.reserve(nodeCount);
4✔
157
    secondaryIndex.reserve(rangeCount);
4✔
158

159
    uint64_t offset = 0;
4✔
160
    for (NodeID nodeID = 0; nodeID < nodeCount; ++nodeID) {
20✔
161
        const auto it = nodeToRanges.find(nodeID);
16✔
162
        if (it == nodeToRanges.end()) {
16✔
163
            primaryIndex.push_back({offset, offset});
4✔
164
        } else {
165
            auto& ranges = it->second;
12✔
166
            primaryIndex.push_back({offset, offset + ranges.size()});
12✔
167
            offset += ranges.size();
12✔
168
            std::move(ranges.begin(), ranges.end(), std::back_inserter(secondaryIndex));
12✔
169
        }
170
    }
171

172
    _writeIndexDataset(primaryIndex, NODE_ID_TO_RANGES_DSET, indexGroup);
4✔
173
    _writeIndexDataset(secondaryIndex, RANGE_TO_EDGE_ID_DSET, indexGroup);
4✔
174
}
4✔
175

176
}  // unnamed namespace
177

178

179
void write(HighFive::Group& h5Root,
6✔
180
           uint64_t sourceNodeCount,
181
           uint64_t targetNodeCount,
182
           bool overwrite) {
183
    if (h5Root.exist(INDEX_GROUP)) {
6✔
184
        if (overwrite) {
4✔
185
            // TODO: remove INDEX_GROUP
186
            throw SonataError("Index overwrite not implemented yet");
2✔
187
        } else {
188
            throw SonataError("Index group already exists");
2✔
189
        }
190
    }
191

192
    try {
193
        _writeIndexGroup(_readNodeIDs(h5Root, SOURCE_NODE_ID_DSET),
2✔
194
                         sourceNodeCount,
195
                         h5Root,
196
                         SOURCE_INDEX_GROUP);
197
        _writeIndexGroup(_readNodeIDs(h5Root, TARGET_NODE_ID_DSET),
2✔
198
                         targetNodeCount,
199
                         h5Root,
200
                         TARGET_INDEX_GROUP);
UNCOV
201
    } catch (...) {
×
202
        try {
203
            // TODO: remove INDEX_GROUP
204
        } catch (...) {
205
        }
UNCOV
206
        throw;
×
207
    }
208
}
2✔
209

210
}  // namespace edge_index
211
}  // namespace sonata
212
}  // namespace bbp
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc