• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

networkit / networkit / 22852203844

09 Mar 2026 11:53AM UTC coverage: 79.439% (-0.05%) from 79.493%
22852203844

push

github

web-flow
Merge pull request #1385 from Schwarf/fix/binary_reader_writer_deleted_nodes

Fix binary I/O for graphs with deleted nodes

44 of 86 new or added lines in 3 files covered. (51.16%)

1 existing line in 1 file now uncovered.

29575 of 37230 relevant lines covered (79.44%)

2273396.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.61
/networkit/cpp/io/NetworkitBinaryReader.cpp
1
/*
2
 * NetworkitBinaryReader.cpp
3
 *
4
 * Author: Charmaine Ndolo <charmaine.ndolo@b-tu.de>
5
 *
6
 */
7

8
#include <atomic>
9
#include <fstream>
10

11
#include <networkit/auxiliary/Log.hpp>
12
#include <networkit/io/MemoryMappedFile.hpp>
13
#include <networkit/io/NetworkitBinaryGraph.hpp>
14
#include <networkit/io/NetworkitBinaryReader.hpp>
15

16
const char *accessData(const NetworKit::MemoryMappedFile &source) {
19✔
17
    return source.cbegin();
19✔
18
}
19

20
const char *accessData(const std::vector<uint8_t> &source) {
2✔
21
    return (char *)source.data();
2✔
22
}
23

24
namespace NetworKit {
25

26
Graph NetworkitBinaryReader::read(std::string_view path) {
19✔
27
    MemoryMappedFile mmfile(path);
19✔
28
    Graph G = readData(mmfile);
19✔
29
    return G;
38✔
30
}
19✔
31

32
Graph NetworkitBinaryReader::readFromBuffer(const std::vector<uint8_t> &data) {
2✔
33
    return readData(data);
2✔
34
}
35

36
template <class T>
37
Graph NetworkitBinaryReader::readData(const T &source) {
42✔
38
    nkbg::Header header;
42✔
39
    nkbg::WeightFormat weightFormat;
40

41
    const char *startIt = accessData(source);
42✔
42
    const char *it = startIt;
42✔
43
    auto readHeader = [&]() {
63✔
44
        memcpy(&header.magic, it, sizeof(uint64_t));
21✔
45
        it += sizeof(uint64_t);
21✔
46
        memcpy(&header.checksum, it, sizeof(uint64_t));
21✔
47
        it += sizeof(uint64_t);
21✔
48
        memcpy(&header.features, it, sizeof(uint64_t));
21✔
49
        it += sizeof(uint64_t);
21✔
50
        if (!memcmp("nkbg002", header.magic, 8)) {
21✔
51
            version = 2;
×
52
        } else if (!memcmp("nkbg003", header.magic, 8)) {
21✔
UNCOV
53
            version = 3;
×
54
        } else if (!memcmp("nkbg004", header.magic, 8)) {
21✔
55
            version = 4;
21✔
56
        } else {
57
            throw std::runtime_error("Reader expected another magic value");
×
58
        }
59
        directed = (header.features & nkbg::DIR_MASK);
21✔
60
        weightFormat = static_cast<nkbg::WeightFormat>((header.features & nkbg::WGHT_MASK)
21✔
61
                                                       >> nkbg::WGHT_SHIFT);
21✔
62
        indexed = false;
21✔
63
        if (version >= 3) {
21✔
64
            indexed = (header.features & nkbg::INDEX_MASK) >> nkbg::INDEX_SHIFT;
21✔
65
        }
66
        memcpy(&header.nodes, it, sizeof(uint64_t));
21✔
67
        it += sizeof(uint64_t);
21✔
68
        memcpy(&header.chunks, it, sizeof(uint64_t));
21✔
69
        it += sizeof(uint64_t);
21✔
70
        memcpy(&header.offsetBaseData, it, sizeof(uint64_t));
21✔
71
        it += sizeof(uint64_t);
21✔
72
        memcpy(&header.offsetAdjLists, it, sizeof(uint64_t));
21✔
73
        it += sizeof(uint64_t);
21✔
74
        memcpy(&header.offsetAdjTranspose, it, sizeof(uint64_t));
21✔
75
        it += sizeof(uint64_t);
21✔
76
        memcpy(&header.offsetWeightLists, it, sizeof(uint64_t));
21✔
77
        it += sizeof(uint64_t);
21✔
78
        memcpy(&header.offsetWeightTranspose, it, sizeof(uint64_t));
21✔
79
        it += sizeof(uint64_t);
21✔
80
        if (version > 2) {
21✔
81
            memcpy(&header.offsetAdjIdLists, it, sizeof(uint64_t));
21✔
82
            it += sizeof(uint64_t);
21✔
83
            memcpy(&header.offsetAdjIdTranspose, it, sizeof(uint64_t));
21✔
84
            it += sizeof(uint64_t);
21✔
85
        }
86
    };
87

88
    readHeader();
42✔
89
    nodes = header.nodes;
42✔
90
    DEBUG("# nodes here = ", nodes);
42✔
91
    chunks = header.chunks;
42✔
92
    DEBUG("# chunks here = ", chunks);
42✔
93
    if (weightFormat == nkbg::WeightFormat::NONE) {
42✔
94
        weighted = false;
22✔
95
    } else {
96
        weighted = true;
20✔
97
    }
98
    Graph G(nodes, weighted, directed);
42✔
99

100
    if (indexed)
42✔
101
        G.indexEdges();
16✔
102
    if (nodes == 0) {
42✔
103
        return G;
4✔
104
    }
105
    // Read base data.
106
    std::vector<uint8_t> nodeFlags;
38✔
107
    const char *baseIt = startIt + header.offsetBaseData;
38✔
108
    for (uint64_t i = 0; i < nodes; i++) {
30,276✔
109
        uint8_t flag;
110
        memcpy(&flag, baseIt, sizeof(uint8_t));
30,238✔
111
        baseIt += sizeof(uint8_t);
30,238✔
112
        if (flag & nkbg::DELETED_BIT) {
30,238✔
113
            G.removeNode(i);
14✔
114
        }
115
    }
116

117
    std::vector<uint64_t> firstVert;
38✔
118
    firstVert.push_back(0);
38✔
119
    for (uint64_t ch = 1; ch < chunks; ch++) {
666✔
120
        uint64_t first;
121
        memcpy(&first, baseIt, sizeof(uint64_t));
628✔
122
        baseIt += sizeof(uint64_t);
628✔
123
        firstVert.push_back(first);
628✔
124
    }
125
    firstVert.push_back(nodes);
38✔
126
    // Read adjacency lists.
127
    const char *adjIt = startIt + header.offsetAdjLists;
38✔
128
    const char *transpIt = startIt + header.offsetAdjTranspose;
38✔
129
    const char *adjWghtIt = startIt + header.offsetWeightLists;
38✔
130
    const char *transpWghtIt = startIt + header.offsetWeightTranspose;
38✔
131
    const char *adjIdIt = (version > 2) ? startIt + header.offsetAdjIdLists : nullptr;
38✔
132
    const char *transpIdIt = (version > 2) ? startIt + header.offsetAdjIdTranspose : nullptr;
38✔
133
    uint64_t adjListSize;
134
    memcpy(&adjListSize, adjIt + (chunks - 1) * sizeof(uint64_t), sizeof(uint64_t));
38✔
135
    uint64_t transposeListSize;
136
    memcpy(&transposeListSize, transpIt + (chunks - 1) * sizeof(uint64_t), sizeof(uint64_t));
38✔
137

138
    if (!directed) {
38✔
139
        assert(adjListSize == transposeListSize);
24✔
140
    }
141
    G.setEdgeCount(unsafe, adjListSize);
38✔
142

143
    std::atomic<count> selfLoops{0};
38✔
144
    edgeid omega = 0;
38✔
145

146
    auto constructGraph = [&](uint64_t c) {
371✔
147
        node vertex = firstVert[c];
333✔
148
        uint64_t off = 0;
333✔
149
        uint64_t transpOff = 0;
333✔
150
        uint64_t wghtOff = 0;
333✔
151
        uint64_t transWghtOff = 0;
333✔
152
        uint64_t indexOff = 0;
333✔
153
        uint64_t transIndexOff = 0;
333✔
154
        if (vertex) {
333✔
155
            memcpy(&off, adjIt + (c - 1) * sizeof(uint64_t), sizeof(uint64_t));
314✔
156
            memcpy(&transpOff, transpIt + (c - 1) * sizeof(uint64_t), sizeof(uint64_t));
314✔
157
            memcpy(&wghtOff, adjWghtIt + (c - 1) * sizeof(uint64_t), sizeof(uint64_t));
314✔
158
            memcpy(&transWghtOff, transpWghtIt + (c - 1) * sizeof(uint64_t), sizeof(uint64_t));
314✔
159
            if (indexed) {
314✔
160
                memcpy(&indexOff, adjIdIt + (c - 1) * sizeof(uint64_t), sizeof(uint64_t));
126✔
161
                memcpy(&transIndexOff, transpIdIt + (c - 1) * sizeof(uint64_t), sizeof(uint64_t));
126✔
162
            }
163
        }
164

165
        off += (chunks - 1) * sizeof(uint64_t) + sizeof(uint64_t);
333✔
166
        transpOff += (chunks - 1) * sizeof(uint64_t) + sizeof(uint64_t);
333✔
167
        wghtOff += (chunks - 1) * sizeof(uint64_t);
333✔
168
        transWghtOff += (chunks - 1) * sizeof(uint64_t);
333✔
169
        indexOff += (chunks - 1) * sizeof(uint64_t);
333✔
170
        transIndexOff += (chunks - 1) * sizeof(uint64_t);
333✔
171
        uint64_t n = firstVert[c + 1] - firstVert[c];
333✔
172

173
        // Helpers to consume weights/ids even if we skip graph construction for a deleted node.
174
        auto consumeAdjWeight = [&]() {
333✔
NEW
175
            switch (weightFormat) {
×
NEW
176
            case nkbg::WeightFormat::VARINT:
×
177
            case nkbg::WeightFormat::SIGNED_VARINT: {
178
                uint64_t tmp;
NEW
179
                wghtOff +=
×
NEW
180
                    nkbg::varIntDecode(reinterpret_cast<const uint8_t *>(adjWghtIt + wghtOff), tmp);
×
NEW
181
            } break;
×
NEW
182
            case nkbg::WeightFormat::DOUBLE:
×
NEW
183
                wghtOff += sizeof(double);
×
NEW
184
                break;
×
NEW
185
            case nkbg::WeightFormat::FLOAT:
×
NEW
186
                wghtOff += sizeof(float);
×
NEW
187
                break;
×
NEW
188
            case nkbg::WeightFormat::NONE:
×
NEW
189
                break;
×
190
            }
191
        };
192

193
        auto consumeTranspWeight = [&]() {
333✔
NEW
194
            switch (weightFormat) {
×
NEW
195
            case nkbg::WeightFormat::VARINT:
×
196
            case nkbg::WeightFormat::SIGNED_VARINT: {
197
                uint64_t tmp;
NEW
198
                transWghtOff += nkbg::varIntDecode(
×
NEW
199
                    reinterpret_cast<const uint8_t *>(transpWghtIt + transWghtOff), tmp);
×
NEW
200
            } break;
×
NEW
201
            case nkbg::WeightFormat::DOUBLE:
×
NEW
202
                transWghtOff += sizeof(double);
×
NEW
203
                break;
×
NEW
204
            case nkbg::WeightFormat::FLOAT:
×
NEW
205
                transWghtOff += sizeof(float);
×
NEW
206
                break;
×
NEW
207
            case nkbg::WeightFormat::NONE:
×
NEW
208
                break;
×
209
            }
210
        };
211

212
        auto consumeAdjId = [&]() {
333✔
NEW
213
            if (!indexed)
×
NEW
214
                return;
×
215
            edgeid tmpId;
NEW
216
            indexOff +=
×
NEW
217
                nkbg::varIntDecode(reinterpret_cast<const uint8_t *>(adjIdIt + indexOff), tmpId);
×
218
        };
219

220
        auto consumeTranspId = [&]() {
333✔
NEW
221
            if (!indexed)
×
NEW
222
                return;
×
223
            edgeid tmpId;
NEW
224
            transIndexOff += nkbg::varIntDecode(
×
NEW
225
                reinterpret_cast<const uint8_t *>(transpIdIt + transIndexOff), tmpId);
×
226
        };
227
        uint64_t ignored = 0;
333✔
228
        for (uint64_t i = 0; i < n; i++) {
15,452✔
229
            uint64_t curr = vertex + i;
15,119✔
230
            uint64_t outNbrs;
231
            off += nkbg::varIntDecode(reinterpret_cast<const uint8_t *>(adjIt + off), outNbrs);
15,119✔
232
            uint64_t inNbrs;
233
            transpOff +=
15,119✔
234
                nkbg::varIntDecode(reinterpret_cast<const uint8_t *>(transpIt + transpOff), inNbrs);
15,119✔
235

236
            // If the node doesn't exist (hole), we must NOT preallocate/add edges,
237
            // but we MUST still consume the encoded neighbor/weight/id payload to keep offsets
238
            // aligned.
239
            if (!G.hasNode(curr)) {
15,119✔
240
                for (uint64_t j = 0; j < outNbrs; ++j) {
7✔
NEW
241
                    off +=
×
NEW
242
                        nkbg::varIntDecode(reinterpret_cast<const uint8_t *>(adjIt + off), ignored);
×
NEW
243
                    consumeAdjWeight();
×
NEW
244
                    consumeAdjId();
×
245
                }
246
                for (uint64_t j = 0; j < inNbrs; ++j) {
7✔
NEW
247
                    transpOff += nkbg::varIntDecode(
×
NEW
248
                        reinterpret_cast<const uint8_t *>(transpIt + transpOff), ignored);
×
NEW
249
                    consumeTranspWeight();
×
NEW
250
                    consumeTranspId();
×
251
                }
252
                continue;
7✔
253
            }
7✔
254

255
            if (!directed) {
15,112✔
256
                G.preallocateUndirected(curr, outNbrs + inNbrs);
486✔
257
            } else {
258
                G.preallocateDirected(curr, outNbrs, inNbrs);
14,626✔
259
            }
260
            // Read adjacency lists.
261
            for (uint64_t j = 0; j < outNbrs; j++) {
234,482✔
262
                edgeid id = 0;
219,370✔
263
                uint64_t add;
264
                double weight = defaultEdgeWeight;
219,370✔
265
                off += nkbg::varIntDecode(reinterpret_cast<const uint8_t *>(adjIt + off), add);
219,370✔
266
                switch (weightFormat) {
219,370✔
267
                case nkbg::WeightFormat::VARINT: {
×
268
                    uint64_t unsignedWeight;
269
                    wghtOff += nkbg::varIntDecode(
×
270
                        reinterpret_cast<const uint8_t *>(adjWghtIt + wghtOff), unsignedWeight);
×
271
                    weight = unsignedWeight;
×
272
                } break;
×
273
                case nkbg::WeightFormat::DOUBLE:
6,411✔
274
                    memcpy(&weight, adjWghtIt + wghtOff, sizeof(double));
6,411✔
275
                    wghtOff += sizeof(double);
6,411✔
276
                    break;
6,411✔
277
                case nkbg::WeightFormat::SIGNED_VARINT: {
18✔
278
                    uint64_t unsignedWeight;
279
                    wghtOff += nkbg::varIntDecode(
36✔
280
                        reinterpret_cast<const uint8_t *>(adjWghtIt + wghtOff), unsignedWeight);
18✔
281
                    weight = nkbg::zigzagDecode(unsignedWeight);
18✔
282
                } break;
18✔
283
                case nkbg::WeightFormat::FLOAT: {
25✔
284
                    float floatWeight;
285
                    memcpy(&floatWeight, adjWghtIt + wghtOff, sizeof(float));
25✔
286
                    wghtOff += sizeof(float);
25✔
287
                    weight = floatWeight;
25✔
288
                } break;
25✔
289
                case nkbg::WeightFormat::NONE:
212,916✔
290
                    break;
212,916✔
291
                }
292
                if (indexed) {
219,370✔
293
                    indexOff += nkbg::varIntDecode(
217,216✔
294
                        reinterpret_cast<const uint8_t *>(adjIdIt + indexOff), id);
108,608✔
295
                    if (id > omega)
108,608✔
296
                        omega = id;
383✔
297
                }
298
                if (!directed) {
219,370✔
299
                    if (!G.addPartialEdge(unsafe, curr, add, weight, id, true))
5,565✔
300
                        WARN("Not adding edge ", curr, "-", add, " since it is already present.");
×
301
                } else {
302
                    if (!G.addPartialOutEdge(unsafe, curr, add, weight, id, true))
213,805✔
303
                        WARN("Not adding edge ", curr, "-", add, " since it is already present.");
×
304
                }
305
                if (curr == add) {
219,370✔
306
                    selfLoops.fetch_add(1, std::memory_order_relaxed);
10✔
307
                }
308
            }
309
            // Read transpose lists.
310
            for (uint64_t j = 0; j < inNbrs; j++) {
234,482✔
311
                uint64_t add;
312
                edgeid id = 0;
219,370✔
313
                double weight = defaultEdgeWeight;
219,370✔
314
                transpOff += nkbg::varIntDecode(
438,740✔
315
                    reinterpret_cast<const uint8_t *>(transpIt + transpOff), add);
219,370✔
316
                switch (weightFormat) {
219,370✔
317
                case nkbg::WeightFormat::VARINT: {
×
318
                    uint64_t unsignedWeight;
319
                    transWghtOff += nkbg::varIntDecode(
×
320
                        reinterpret_cast<const uint8_t *>(transpWghtIt + transWghtOff),
×
321
                        unsignedWeight);
322
                    weight = unsignedWeight;
×
323
                } break;
×
324
                case nkbg::WeightFormat::DOUBLE:
6,411✔
325
                    memcpy(&weight, transpWghtIt + transWghtOff, sizeof(double));
6,411✔
326
                    transWghtOff += sizeof(double);
6,411✔
327
                    break;
6,411✔
328
                case nkbg::WeightFormat::SIGNED_VARINT: {
18✔
329
                    uint64_t unsignedWeight;
330
                    transWghtOff += nkbg::varIntDecode(
36✔
331
                        reinterpret_cast<const uint8_t *>(transpWghtIt + transWghtOff),
18✔
332
                        unsignedWeight);
333
                    weight = nkbg::zigzagDecode(unsignedWeight);
18✔
334
                } break;
18✔
335
                case nkbg::WeightFormat::FLOAT: {
25✔
336
                    float floatWeight;
337
                    memcpy(&floatWeight, transpWghtIt + transWghtOff, sizeof(float));
25✔
338
                    transWghtOff += sizeof(float);
25✔
339
                    weight = floatWeight;
25✔
340
                } break;
25✔
341
                case nkbg::WeightFormat::NONE:
212,916✔
342
                    break;
212,916✔
343
                }
344
                if (indexed) {
219,370✔
345
                    transIndexOff += nkbg::varIntDecode(
217,216✔
346
                        reinterpret_cast<const uint8_t *>(transpIdIt + transIndexOff), id);
108,608✔
347
                    if (id > omega)
108,608✔
348
                        omega = id;
85✔
349
                }
350
                if (!directed) {
219,370✔
351
                    if (curr != add) {
5,565✔
352
                        if (G.addPartialEdge(unsafe, curr, add, weight, id, true))
5,560✔
353
                            WARN("Not adding edge ", curr, "-", add,
5,560✔
354
                                 " since it is already present.");
355
                    }
356
                } else {
357
                    if (!G.addPartialInEdge(unsafe, curr, add, weight, id, true))
213,805✔
358
                        WARN("Not adding edge ", curr, "-", add, " since it is already present.");
×
359
                }
360
            }
361
        }
362
    };
363

364
// create graph
365
#pragma omp parallel for
38✔
366
    for (omp_index c = 0; c < static_cast<omp_index>(chunks); c++) {
367
        constructGraph(c);
368
    }
369
    G.setNumberOfSelfLoops(unsafe, selfLoops);
38✔
370
    if (indexed)
38✔
371
        G.setUpperEdgeIdBound(unsafe, omega);
14✔
372
    return G;
38✔
373
}
38✔
374
} // namespace NetworKit
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc