• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

BlueBrain / libsonata / 7029770837

29 Nov 2023 08:15AM UTC coverage: 93.327% (-0.6%) from 93.924%
7029770837

Pull #307

github

1uc
Implement `Hdf5Reader` API and default.

This commit introduces the API for an Hdf5Reader. This reader abstracts the
process of opening HDF5 files, and reading an `libsonata.Selection` from a
dataset.

The default reader calls the existing `_readSelection`.
Pull Request #307: Inject dataset reading via `Hdf5Reader`.

80 of 95 new or added lines in 10 files covered. (84.21%)

7 existing lines in 2 files now uncovered.

1888 of 2023 relevant lines covered (93.33%)

80.43 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.9
/src/edge_index.cpp
1
/*************************************************************************
2
 * Copyright (C) 2018-2020 Blue Brain Project
3
 *
4
 * This file is part of 'libsonata', distributed under the terms
5
 * of the GNU Lesser General Public License version 3.
6
 *
7
 * See top-level COPYING.LESSER and COPYING files for details.
8
 *************************************************************************/
9

10
#include "edge_index.h"
11

12
#include <bbp/sonata/common.h>
13

14
#include <array>
15
#include <cstdint>
16
#include <set>
17
#include <unordered_map>
18
#include <vector>
19

20
#include "read_bulk.hpp"
21

22
namespace bbp {
23
namespace sonata {
24
namespace edge_index {
25

26
namespace {
27

28
using RawIndex = std::vector<std::array<uint64_t, 2>>;
29

30
const char* const SOURCE_NODE_ID_DSET = "source_node_id";
31
const char* const TARGET_NODE_ID_DSET = "target_node_id";
32

33
const char* const INDEX_GROUP = "indices";
34
const char* const SOURCE_INDEX_GROUP = "indices/source_to_target";
35
const char* const TARGET_INDEX_GROUP = "indices/target_to_source";
36
const char* const NODE_ID_TO_RANGES_DSET = "node_id_to_ranges";
37
const char* const RANGE_TO_EDGE_ID_DSET = "range_to_edge_id";
38

39
}  // unnamed namespace
40

41

42
const HighFive::Group sourceIndex(const HighFive::Group& h5Root) {
32✔
43
    if (!h5Root.exist(SOURCE_INDEX_GROUP)) {
32✔
44
        throw SonataError("No source index group found");
2✔
45
    }
46
    return h5Root.getGroup(SOURCE_INDEX_GROUP);
30✔
47
}
48

49

50
const HighFive::Group targetIndex(const HighFive::Group& h5Root) {
30✔
51
    if (!h5Root.exist(TARGET_INDEX_GROUP)) {
30✔
52
        throw SonataError("No target index group found");
2✔
53
    }
54
    return h5Root.getGroup(TARGET_INDEX_GROUP);
28✔
55
}
56

57
Selection resolve(const HighFive::Group& indexGroup,
58✔
58
                  const std::vector<NodeID>& nodeIDs,
59
                  const Hdf5Reader& reader) {
60
    auto node2ranges_dset = indexGroup.getDataSet(NODE_ID_TO_RANGES_DSET);
174✔
61
    auto node_dim = node2ranges_dset.getSpace().getDimensions()[0];
58✔
62
    auto sortedNodeIds = nodeIDs;
116✔
63
    bulk_read::detail::erase_if(sortedNodeIds, [node_dim](auto id) {
58✔
64
        // Filter out `nodeIDs[i] >= dims`; because SYN2 used to return an
65
        // empty range for an out-of-range `nodeId`s.
66
        return id >= node_dim;
94✔
67
    });
68
    std::sort(sortedNodeIds.begin(), sortedNodeIds.end());
58✔
69

70
    auto nodeSelection = Selection::fromValues(sortedNodeIds);
116✔
71
    auto primaryRange = reader.readSelection<std::array<uint64_t, 2>>(node2ranges_dset,
72
                                                                      nodeSelection,
73
                                                                      RawIndex{{0, 2}});
174✔
74

75
    bulk_read::detail::erase_if(primaryRange, [](const auto& range) {
58✔
76
        // Filter out any invalid ranges `start >= end`.
77
        return range[0] >= range[1];
86✔
78
    });
79

80
    primaryRange = bulk_read::sortAndMerge(primaryRange);
58✔
81

82
    auto secondaryRange = reader.readSelection<std::array<uint64_t, 2>>(
83
        indexGroup.getDataSet(RANGE_TO_EDGE_ID_DSET), primaryRange, RawIndex{{0, 2}});
174✔
84

85
    // Sort and eliminate empty ranges.
86
    secondaryRange = bulk_read::sortAndMerge(secondaryRange);
58✔
87

88
    // Copy `secondaryRange`, because the types don't match.
89
    Selection::Ranges edgeIds;
58✔
90
    edgeIds.reserve(secondaryRange.size());
58✔
91
    for (const auto& range : secondaryRange) {
110✔
92
        edgeIds.emplace_back(range[0], range[1]);
52✔
93
    }
94

95
    return Selection(std::move(edgeIds));
116✔
96
}
97

NEW
UNCOV
98
Selection resolve(const HighFive::Group& indexGroup,
×
99
                  const NodeID nodeID,
100
                  const Hdf5Reader& reader) {
NEW
UNCOV
101
    return resolve(indexGroup, std::vector<NodeID>{nodeID}, reader);
×
102
}
103

104

105
namespace {
106

107
std::unordered_map<NodeID, RawIndex> _groupNodeRanges(const std::vector<NodeID>& nodeIDs) {
4✔
108
    std::unordered_map<NodeID, RawIndex> result;
4✔
109

110
    if (nodeIDs.empty()) {
4✔
UNCOV
111
        return result;
×
112
    }
113

114
    uint64_t rangeStart = 0;
4✔
115
    NodeID lastNodeID = nodeIDs[rangeStart];
4✔
116
    for (uint64_t i = 1; i < nodeIDs.size(); ++i) {
24✔
117
        if (nodeIDs[i] != lastNodeID) {
20✔
118
            result[lastNodeID].push_back({rangeStart, i});
12✔
119
            rangeStart = i;
12✔
120
            lastNodeID = nodeIDs[rangeStart];
12✔
121
        }
122
    }
123

124
    result[lastNodeID].push_back({rangeStart, nodeIDs.size()});
4✔
125

126
    return result;
4✔
127
}
128

129

130
// Use only in the writing code below. General purpose reading should use the
131
// Hdf5Reader interface.
132
std::vector<NodeID> _readNodeIDs(const HighFive::Group& h5Root, const std::string& name) {
4✔
133
    std::vector<NodeID> result;
4✔
134
    h5Root.getDataSet(name).read(result);
4✔
135
    return result;
4✔
136
}
137

138

139
void _writeIndexDataset(const RawIndex& data, const std::string& name, HighFive::Group& h5Group) {
8✔
140
    auto dset = h5Group.createDataSet<uint64_t>(name, HighFive::DataSpace::From(data));
8✔
141
    dset.write(data);
8✔
142
}
8✔
143

144

145
void _writeIndexGroup(const std::vector<NodeID>& nodeIDs,
4✔
146
                      uint64_t nodeCount,
147
                      HighFive::Group& h5Root,
148
                      const std::string& name) {
149
    auto indexGroup = h5Root.createGroup(name);
8✔
150

151
    auto nodeToRanges = _groupNodeRanges(nodeIDs);
8✔
152
    const auto rangeCount =
153
        std::accumulate(nodeToRanges.begin(),
4✔
154
                        nodeToRanges.end(),
155
                        uint64_t{0},
156
                        [](uint64_t total, decltype(nodeToRanges)::const_reference item) {
12✔
157
                            return total + item.second.size();
12✔
158
                        });
159

160
    RawIndex primaryIndex;
8✔
161
    RawIndex secondaryIndex;
4✔
162

163
    primaryIndex.reserve(nodeCount);
4✔
164
    secondaryIndex.reserve(rangeCount);
4✔
165

166
    uint64_t offset = 0;
4✔
167
    for (NodeID nodeID = 0; nodeID < nodeCount; ++nodeID) {
20✔
168
        const auto it = nodeToRanges.find(nodeID);
16✔
169
        if (it == nodeToRanges.end()) {
16✔
170
            primaryIndex.push_back({offset, offset});
4✔
171
        } else {
172
            auto& ranges = it->second;
12✔
173
            primaryIndex.push_back({offset, offset + ranges.size()});
12✔
174
            offset += ranges.size();
12✔
175
            std::move(ranges.begin(), ranges.end(), std::back_inserter(secondaryIndex));
12✔
176
        }
177
    }
178

179
    _writeIndexDataset(primaryIndex, NODE_ID_TO_RANGES_DSET, indexGroup);
4✔
180
    _writeIndexDataset(secondaryIndex, RANGE_TO_EDGE_ID_DSET, indexGroup);
4✔
181
}
4✔
182

183
}  // unnamed namespace
184

185

186
void write(HighFive::Group& h5Root,
6✔
187
           uint64_t sourceNodeCount,
188
           uint64_t targetNodeCount,
189
           bool overwrite) {
190
    if (h5Root.exist(INDEX_GROUP)) {
6✔
191
        if (overwrite) {
4✔
192
            // TODO: remove INDEX_GROUP
193
            throw SonataError("Index overwrite not implemented yet");
2✔
194
        } else {
195
            throw SonataError("Index group already exists");
2✔
196
        }
197
    }
198

199
    try {
200
        _writeIndexGroup(_readNodeIDs(h5Root, SOURCE_NODE_ID_DSET),
2✔
201
                         sourceNodeCount,
202
                         h5Root,
203
                         SOURCE_INDEX_GROUP);
204
        _writeIndexGroup(_readNodeIDs(h5Root, TARGET_NODE_ID_DSET),
2✔
205
                         targetNodeCount,
206
                         h5Root,
207
                         TARGET_INDEX_GROUP);
UNCOV
208
    } catch (...) {
×
209
        try {
210
            // TODO: remove INDEX_GROUP
211
        } catch (...) {
212
        }
UNCOV
213
        throw;
×
214
    }
215
}
2✔
216

217
}  // namespace edge_index
218
}  // namespace sonata
219
}  // namespace bbp
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc