• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

llnl / dftracer-utils / 23529483807

25 Mar 2026 07:17AM UTC coverage: 48.515% (-1.6%) from 50.098%
23529483807

Pull #57

github

web-flow
Merge 5b1e117ad into 38f9f3616
Pull Request #57: feat(comparator): add pairwise traces comparator

18829 of 49412 branches covered (38.11%)

Branch coverage included in aggregate %.

1584 of 1933 new or added lines in 14 files covered. (81.95%)

3552 existing lines in 135 files now uncovered.

18474 of 27477 relevant lines covered (67.23%)

241072.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.33
/src/dftracer/utils/python/indexer.cpp
1
#include <dftracer/utils/core/runtime.h>
2
#include <dftracer/utils/python/indexer.h>
3
#include <dftracer/utils/python/indexer_checkpoint.h>
4
#include <dftracer/utils/python/runtime.h>
5
#include <dftracer/utils/utilities/indexer/index_builder_utility.h>
6
#include <dftracer/utils/utilities/indexer/index_database.h>
7
#include <dftracer/utils/utilities/indexer/internal/helpers.h>
8
#include <structmember.h>
9

10
#include <cstring>
11

12
static void Indexer_dealloc(IndexerObject *self) {
49✔
13
    if (self->handle) {
49✔
14
        dft_indexer_destroy(self->handle);
48✔
15
    }
48✔
16
    Py_XDECREF(self->gz_path);
49✔
17
    Py_XDECREF(self->idx_path);
49✔
18
    Py_XDECREF(self->runtime_obj);
49✔
19
    Py_TYPE(self)->tp_free((PyObject *)self);
49✔
20
}
49✔
21

22
static PyObject *Indexer_new(PyTypeObject *type, PyObject *args,
49✔
23
                             PyObject *kwds) {
24
    IndexerObject *self;
25
    self = (IndexerObject *)type->tp_alloc(type, 0);
49✔
26
    if (self != NULL) {
49!
27
        self->handle = NULL;
49✔
28
        self->gz_path = NULL;
49✔
29
        self->idx_path = NULL;
49✔
30
        self->checkpoint_size = 0;
49✔
31
        self->build_bloom = 0;
49✔
32
        self->build_manifest = 0;
49✔
33
        self->index_threshold = 8 * 1024 * 1024;
49✔
34
        self->runtime_obj = NULL;
49✔
35
    }
49✔
36
    return (PyObject *)self;
49✔
37
}
38

39
static int Indexer_init(IndexerObject *self, PyObject *args, PyObject *kwds) {
49✔
40
    static const char *kwlist[] = {
41
        "gz_path",         "idx_path",    "checkpoint_size",
42
        "force_rebuild",   "build_bloom", "build_manifest",
43
        "index_threshold", "runtime",     NULL};
44
    const char *gz_path;
45
    const char *idx_path = NULL;
49✔
46
    std::uint64_t checkpoint_size =
49✔
47
        dftracer::utils::constants::indexer::DEFAULT_CHECKPOINT_SIZE;
48
    int force_rebuild = 0;
49✔
49
    int build_bloom = 0;
49✔
50
    int build_manifest = 0;
49✔
51
    std::uint64_t index_threshold = 8 * 1024 * 1024;
49✔
52
    PyObject *runtime_arg = NULL;
49✔
53

54
    if (!PyArg_ParseTupleAndKeywords(
49!
55
            args, kwds, "s|snpppnO", (char **)kwlist, &gz_path, &idx_path,
49✔
56
            &checkpoint_size, &force_rebuild, &build_bloom, &build_manifest,
57
            &index_threshold, &runtime_arg)) {
58
        return -1;
×
59
    }
60

61
    if (runtime_arg && runtime_arg != Py_None) {
49!
62
        if (PyObject_TypeCheck(runtime_arg, &RuntimeType)) {
×
63
            Py_INCREF(runtime_arg);
×
64
            self->runtime_obj = runtime_arg;
×
UNCOV
65
        } else {
×
66
            PyObject *native = PyObject_GetAttrString(runtime_arg, "_native");
×
67
            if (native && PyObject_TypeCheck(native, &RuntimeType)) {
×
68
                self->runtime_obj = native;
×
UNCOV
69
            } else {
×
70
                Py_XDECREF(native);
×
71
                PyErr_SetString(PyExc_TypeError,
×
72
                                "runtime must be a Runtime instance or None");
73
                return -1;
×
74
            }
75
        }
UNCOV
76
    }
×
77

78
    self->gz_path = PyUnicode_FromString(gz_path);
49✔
79
    if (!self->gz_path) {
49!
80
        return -1;
×
81
    }
82

83
    if (idx_path) {
49✔
84
        self->idx_path = PyUnicode_FromString(idx_path);
40✔
85
    } else {
40✔
86
        PyObject *gz_path_obj = PyUnicode_FromString(gz_path);
9✔
87
        self->idx_path = PyUnicode_FromFormat("%U.idx", gz_path_obj);
9✔
88
        Py_DECREF(gz_path_obj);
9✔
89
    }
90

91
    if (!self->idx_path) {
49!
92
        Py_DECREF(self->gz_path);
×
93
        return -1;
×
94
    }
95

96
    self->checkpoint_size = checkpoint_size;
49✔
97
    self->build_bloom = build_bloom;
49✔
98
    self->build_manifest = build_manifest;
49✔
99
    self->index_threshold = index_threshold;
49✔
100

101
    const char *idx_path_str = PyUnicode_AsUTF8(self->idx_path);
49✔
102
    if (!idx_path_str) {
49!
103
        return -1;
×
104
    }
105

106
    self->handle = dft_indexer_create(gz_path, idx_path_str, checkpoint_size,
98✔
107
                                      force_rebuild);
49✔
108
    if (!self->handle) {
49✔
109
        PyErr_SetString(PyExc_RuntimeError, "Failed to create indexer");
1✔
110
        return -1;
1✔
111
    }
112

113
    return 0;
48✔
114
}
49✔
115

116
static dftracer::utils::Runtime *get_indexer_runtime(IndexerObject *self) {
43✔
117
    if (self->runtime_obj) {
43!
118
        return ((RuntimeObject *)self->runtime_obj)->runtime.get();
×
119
    }
120
    return get_default_runtime();
43✔
121
}
43✔
122

123
static PyObject *Indexer_build(IndexerObject *self,
43✔
124
                               PyObject *Py_UNUSED(ignored)) {
125
    if (!self->handle) {
43!
126
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
127
        return NULL;
×
128
    }
129

130
    using namespace dftracer::utils;
131
    using namespace dftracer::utils::utilities::indexer;
132

133
    const char *gz = PyUnicode_AsUTF8(self->gz_path);
43✔
134
    const char *idx = PyUnicode_AsUTF8(self->idx_path);
43✔
135
    if (!gz || !idx) {
43!
136
        return NULL;
×
137
    }
138

139
    auto config = IndexBuildConfig::for_file(gz)
86!
140
                      .with_checkpoint_size(
43!
141
                          static_cast<std::size_t>(self->checkpoint_size))
43✔
142
                      .with_bloom(self->build_bloom != 0)
43!
143
                      .with_manifest(self->build_manifest != 0)
43!
144
                      .with_index_threshold(0);
43!
145

146
    std::string idx_str(idx);
43!
147
    auto pos = idx_str.find_last_of('/');
43✔
148
    if (pos != std::string::npos) {
43!
149
        config.with_index_dir(idx_str.substr(0, pos));
43!
150
    }
43✔
151

152
    Runtime *rt = get_indexer_runtime(self);
43!
153
    IndexBuildResult build_result;
43✔
154

155
    try {
156
        auto build_coro =
43✔
157
            [](IndexBuildConfig cfg) -> coro::CoroTask<IndexBuildResult> {
301!
158
            IndexBuilderUtility builder;
129!
159
            co_return co_await builder.process(cfg);
215!
160
        };
129!
161

162
        Py_BEGIN_ALLOW_THREADS auto handle =
43!
163
            rt->submit(build_coro(config), "indexer-build");
43!
164
        build_result = handle.get();
43!
165
        Py_END_ALLOW_THREADS
43!
166
    } catch (const std::exception &e) {
43!
167
        PyErr_SetString(PyExc_RuntimeError, e.what());
×
168
        return NULL;
×
169
    }
×
170

171
    if (!build_result.success) {
43!
172
        PyErr_SetString(PyExc_RuntimeError, build_result.error_message.c_str());
×
173
        return NULL;
×
174
    }
175

176
    Py_RETURN_NONE;
43✔
177
}
43✔
178

179
static PyObject *Indexer_need_rebuild(IndexerObject *self,
18✔
180
                                      PyObject *Py_UNUSED(ignored)) {
181
    if (!self->handle) {
18!
182
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
183
        return NULL;
×
184
    }
185

186
    int result = dft_indexer_need_rebuild(self->handle);
18✔
187
    return PyBool_FromLong(result);
18✔
188
}
18✔
189

190
static PyObject *Indexer_exists(IndexerObject *self,
×
191
                                PyObject *Py_UNUSED(ignored)) {
192
    if (!self->handle) {
×
193
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
194
        return NULL;
×
195
    }
196

197
    int result = dft_indexer_exists(self->handle);
×
198
    return PyBool_FromLong(result);
×
UNCOV
199
}
×
200

201
static PyObject *Indexer_get_max_bytes(IndexerObject *self,
3✔
202
                                       PyObject *Py_UNUSED(ignored)) {
203
    if (!self->handle) {
3!
204
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
205
        return NULL;
×
206
    }
207

208
    uint64_t result = dft_indexer_get_max_bytes(self->handle);
3✔
209
    return PyLong_FromUnsignedLongLong(result);
3✔
210
}
3✔
211

212
static PyObject *Indexer_get_num_lines(IndexerObject *self,
2✔
213
                                       PyObject *Py_UNUSED(ignored)) {
214
    if (!self->handle) {
2!
215
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
216
        return NULL;
×
217
    }
218

219
    uint64_t result = dft_indexer_get_num_lines(self->handle);
2✔
220
    return PyLong_FromUnsignedLongLong(result);
2✔
221
}
2✔
222

223
static PyObject *Indexer_find_checkpoint(IndexerObject *self, PyObject *args) {
3✔
224
    if (!self->handle) {
3!
225
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
226
        return NULL;
×
227
    }
228

229
    std::size_t target_offset;
230
    if (!PyArg_ParseTuple(args, "n", &target_offset)) {
3!
231
        return NULL;
×
232
    }
233

234
    dft_indexer_checkpoint_t checkpoint;
235
    int found =
3✔
236
        dft_indexer_find_checkpoint(self->handle, target_offset, &checkpoint);
3✔
237

238
    if (!found) {
3✔
239
        Py_RETURN_NONE;
2✔
240
    }
241

242
    // Create IndexerCheckpoint object
243
    IndexerCheckpointObject *cp_obj =
1✔
244
        (IndexerCheckpointObject *)IndexerCheckpoint_new(&IndexerCheckpointType,
1✔
245
                                                         NULL, NULL);
246
    if (!cp_obj) {
1!
247
        return NULL;
×
248
    }
249

250
    cp_obj->checkpoint = checkpoint;
1✔
251
    return (PyObject *)cp_obj;
1✔
252
}
3✔
253

254
static PyObject *Indexer_get_checkpoints(IndexerObject *self,
2✔
255
                                         PyObject *Py_UNUSED(ignored)) {
256
    if (!self->handle) {
2!
257
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
258
        return NULL;
×
259
    }
260

261
    dft_indexer_checkpoint_t *checkpoints = NULL;
2✔
262
    std::size_t count = 0;
2✔
263

264
    int result =
2✔
265
        dft_indexer_get_checkpoints(self->handle, &checkpoints, &count);
2✔
266
    if (result != 0 || !checkpoints) {
2!
267
        dft_indexer_free_checkpoints(checkpoints, count);
×
268
        PyObject *list = PyList_New(0);
×
269
        return list;
×
270
    }
271

272
    PyObject *list = PyList_New(count);
2✔
273
    if (!list) {
2!
274
        dft_indexer_free_checkpoints(checkpoints, count);
×
275
        return NULL;
×
276
    }
277

278
    for (std::size_t i = 0; i < count; i++) {
64✔
279
        IndexerCheckpointObject *cp_obj =
62✔
280
            (IndexerCheckpointObject *)IndexerCheckpoint_new(
62✔
281
                &IndexerCheckpointType, NULL, NULL);
282
        if (!cp_obj) {
62!
UNCOV
283
            Py_DECREF(list);
×
284
            dft_indexer_free_checkpoints(checkpoints, count);
×
285
            return NULL;
×
286
        }
287
        cp_obj->checkpoint = checkpoints[i];
62✔
288
        PyList_SetItem(list, i, (PyObject *)cp_obj);
62✔
289
    }
62✔
290

291
    dft_indexer_free_checkpoints(checkpoints, count);
2✔
292
    return list;
2✔
293
}
2✔
294

295
static PyObject *Indexer_has_bloom(IndexerObject *self, void *closure) {
6✔
296
    const char *idx = PyUnicode_AsUTF8(self->idx_path);
6✔
297
    const char *gz = PyUnicode_AsUTF8(self->gz_path);
6✔
298
    if (!idx || !gz) {
6!
299
        Py_RETURN_FALSE;
×
300
    }
301
    try {
302
        using namespace dftracer::utils::utilities::indexer;
303
        using namespace dftracer::utils::utilities::indexer::internal;
304
        IndexDatabase db(idx);
6!
305
        std::string logical = get_logical_path(gz);
6!
306
        int fid = db.get_file_info_id(logical);
6!
307
        if (fid >= 0 && db.has_bloom_data(fid)) {
6!
308
            Py_RETURN_TRUE;
5✔
309
        }
310
    } catch (...) {
6!
311
    }
×
312
    Py_RETURN_FALSE;
1✔
313
}
6✔
314

315
static PyObject *Indexer_has_manifest(IndexerObject *self, void *closure) {
5✔
316
    const char *idx = PyUnicode_AsUTF8(self->idx_path);
5✔
317
    const char *gz = PyUnicode_AsUTF8(self->gz_path);
5✔
318
    if (!idx || !gz) {
5!
319
        Py_RETURN_FALSE;
×
320
    }
321
    try {
322
        using namespace dftracer::utils::utilities::indexer;
323
        using namespace dftracer::utils::utilities::indexer::internal;
324
        IndexDatabase db(idx);
5!
325
        std::string logical = get_logical_path(gz);
5!
326
        int fid = db.get_file_info_id(logical);
5!
327
        if (fid >= 0 && db.has_manifest_data(fid)) {
5!
328
            Py_RETURN_TRUE;
4✔
329
        }
330
    } catch (...) {
5!
331
    }
×
332
    Py_RETURN_FALSE;
1✔
333
}
5✔
334

335
static PyObject *Indexer_gz_path(IndexerObject *self, void *closure) {
2✔
336
    Py_INCREF(self->gz_path);
2✔
337
    return self->gz_path;
2✔
338
}
339

340
static PyObject *Indexer_idx_path(IndexerObject *self, void *closure) {
2✔
341
    Py_INCREF(self->idx_path);
2✔
342
    return self->idx_path;
2✔
343
}
344

345
static PyObject *Indexer_checkpoint_size(IndexerObject *self, void *closure) {
3✔
346
    return PyLong_FromUnsignedLongLong(self->checkpoint_size);
3✔
347
}
348

349
static PyObject *Indexer_enter(IndexerObject *self,
39✔
350
                               PyObject *Py_UNUSED(ignored)) {
351
    Py_INCREF(self);
39✔
352
    return (PyObject *)self;
39✔
353
}
354

355
static PyObject *Indexer_exit(IndexerObject *self, PyObject *args) {
39✔
356
    Py_RETURN_NONE;
39✔
357
}
358

359
static PyMethodDef Indexer_methods[] = {
360
    {"build", (PyCFunction)Indexer_build, METH_NOARGS,
361
     "build()\n"
362
     "--\n"
363
     "\n"
364
     "Build or rebuild the index.\n"},
365
    {"need_rebuild", (PyCFunction)Indexer_need_rebuild, METH_NOARGS,
366
     "Check if a rebuild is needed."},
367
    {"exists", (PyCFunction)Indexer_exists, METH_NOARGS,
368
     "Check if the index file exists."},
369
    {"get_max_bytes", (PyCFunction)Indexer_get_max_bytes, METH_NOARGS,
370
     "Get the maximum uncompressed bytes in the indexed file."},
371
    {"get_num_lines", (PyCFunction)Indexer_get_num_lines, METH_NOARGS,
372
     "Get the total number of lines in the indexed file."},
373
    {"find_checkpoint", (PyCFunction)Indexer_find_checkpoint, METH_VARARGS,
374
     "Find the best checkpoint for a given uncompressed offset.\n"
375
     "\n"
376
     "Args:\n"
377
     "    offset (int): Uncompressed byte offset.\n"},
378
    {"get_checkpoints", (PyCFunction)Indexer_get_checkpoints, METH_NOARGS,
379
     "Get all checkpoints for this file as a list."},
380
    {"__enter__", (PyCFunction)Indexer_enter, METH_NOARGS,
381
     "Enter the runtime context for the with statement."},
382
    {"__exit__", (PyCFunction)Indexer_exit, METH_VARARGS,
383
     "Exit the runtime context for the with statement."},
384
    {NULL} /* Sentinel */
385
};
386

387
static PyGetSetDef Indexer_getsetters[] = {
388
    {"gz_path", (getter)Indexer_gz_path, NULL, "Path to the gzip file", NULL},
389
    {"idx_path", (getter)Indexer_idx_path, NULL, "Path to the index file",
390
     NULL},
391
    {"checkpoint_size", (getter)Indexer_checkpoint_size, NULL,
392
     "Checkpoint size in bytes", NULL},
393
    {"has_bloom", (getter)Indexer_has_bloom, NULL,
394
     "Whether bloom data exists in index", NULL},
395
    {"has_manifest", (getter)Indexer_has_manifest, NULL,
396
     "Whether manifest data exists in index", NULL},
397
    {NULL} /* Sentinel */
398
};
399

400
PyTypeObject IndexerType = {
401
    PyVarObject_HEAD_INIT(NULL, 0) "indexer.Indexer", /* tp_name */
402
    sizeof(IndexerObject),                            /* tp_basicsize */
403
    0,                                                /* tp_itemsize */
404
    (destructor)Indexer_dealloc,                      /* tp_dealloc */
405
    0,                                                /* tp_vectorcall_offset */
406
    0,                                                /* tp_getattr */
407
    0,                                                /* tp_setattr */
408
    0,                                                /* tp_as_async */
409
    0,                                                /* tp_repr */
410
    0,                                                /* tp_as_number */
411
    0,                                                /* tp_as_sequence */
412
    0,                                                /* tp_as_mapping */
413
    0,                                                /* tp_hash */
414
    0,                                                /* tp_call */
415
    0,                                                /* tp_str */
416
    0,                                                /* tp_getattro */
417
    0,                                                /* tp_setattro */
418
    0,                                                /* tp_as_buffer */
419
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         /* tp_flags */
420
    "Indexer(gz_path: str, idx_path: str | None = None,\n"
421
    "       checkpoint_size: int = 1048576,\n"
422
    "       force_rebuild: bool = False, build_bloom: bool = False,\n"
423
    "       build_manifest: bool = False,\n"
424
    "       index_threshold: int = 8388608,\n"
425
    "       runtime: Runtime | None = None)\n"
426
    "--\n"
427
    "\n"
428
    "Indexer for creating and managing gzip file indices.\n"
429
    "\n"
430
    "Args:\n"
431
    "    gz_path (str): Path to the gzip trace file.\n"
432
    "    idx_path (str or None): Path to the index file. If None,\n"
433
    "        uses gz_path + \".idx\".\n"
434
    "    checkpoint_size (int): Checkpoint size in bytes for index\n"
435
    "        building (default 1 MB).\n"
436
    "    force_rebuild (bool): If True, rebuild the index even if it\n"
437
    "        exists.\n"
438
    "    build_bloom (bool): If True, build bloom filter data in the\n"
439
    "        index.\n"
440
    "    build_manifest (bool): If True, build manifest data in the\n"
441
    "        index.\n"
442
    "    index_threshold (int): Skip indexing for files smaller than\n"
443
    "        this (default 8 MB).\n"
444
    "    runtime (Runtime or None): Runtime instance for thread pool\n"
445
    "        control. If None, uses the default global Runtime.\n", /* tp_doc */
446
    0,                      /* tp_traverse */
447
    0,                      /* tp_clear */
448
    0,                      /* tp_richcompare */
449
    0,                      /* tp_weaklistoffset */
450
    0,                      /* tp_iter */
451
    0,                      /* tp_iternext */
452
    Indexer_methods,        /* tp_methods */
453
    0,                      /* tp_members */
454
    Indexer_getsetters,     /* tp_getset */
455
    0,                      /* tp_base */
456
    0,                      /* tp_dict */
457
    0,                      /* tp_descr_get */
458
    0,                      /* tp_descr_set */
459
    0,                      /* tp_dictoffset */
460
    (initproc)Indexer_init, /* tp_init */
461
    0,                      /* tp_alloc */
462
    Indexer_new,            /* tp_new */
463
};
464

465
int init_indexer(PyObject *m) {
1✔
466
    if (PyType_Ready(&IndexerType) < 0) return -1;
1!
467

468
    Py_INCREF(&IndexerType);
1✔
469
    if (PyModule_AddObject(m, "Indexer", (PyObject *)&IndexerType) < 0) {
1!
UNCOV
470
        Py_DECREF(&IndexerType);
×
UNCOV
471
        Py_DECREF(m);
×
472
        return -1;
×
473
    }
474

475
    return 0;
1✔
476
}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc