• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

llnl / dftracer-utils / 23531917822

25 Mar 2026 08:31AM UTC coverage: 50.205% (+0.1%) from 50.098%
23531917822

push

github

rayandrew
chore(docs): update utility behavior descriptions

19928 of 51702 branches covered (38.54%)

Branch coverage included in aggregate %.

17727 of 23300 relevant lines covered (76.08%)

474762.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

61.09
/src/dftracer/utils/python/indexer.cpp
1
#include <dftracer/utils/core/runtime.h>
2
#include <dftracer/utils/python/indexer.h>
3
#include <dftracer/utils/python/indexer_checkpoint.h>
4
#include <dftracer/utils/python/runtime.h>
5
#include <dftracer/utils/utilities/indexer/index_builder_utility.h>
6
#include <dftracer/utils/utilities/indexer/index_database.h>
7
#include <dftracer/utils/utilities/indexer/internal/helpers.h>
8
#include <structmember.h>
9

10
#include <cstring>
11

12
static void Indexer_dealloc(IndexerObject *self) {
124✔
13
    if (self->handle) {
124✔
14
        dft_indexer_destroy(self->handle);
122✔
15
    }
61✔
16
    Py_XDECREF(self->gz_path);
124✔
17
    Py_XDECREF(self->idx_path);
124✔
18
    Py_XDECREF(self->runtime_obj);
124✔
19
    Py_TYPE(self)->tp_free((PyObject *)self);
124✔
20
}
124✔
21

22
static PyObject *Indexer_new(PyTypeObject *type, PyObject *args,
124✔
23
                             PyObject *kwds) {
24
    IndexerObject *self;
25
    self = (IndexerObject *)type->tp_alloc(type, 0);
124✔
26
    if (self != NULL) {
124✔
27
        self->handle = NULL;
124✔
28
        self->gz_path = NULL;
124✔
29
        self->idx_path = NULL;
124✔
30
        self->checkpoint_size = 0;
124✔
31
        self->build_bloom = 0;
124✔
32
        self->build_manifest = 0;
124✔
33
        self->index_threshold =
124✔
34
            dftracer::utils::constants::indexer::DEFAULT_INDEX_SIZE_THRESHOLD;
35
        self->runtime_obj = NULL;
124✔
36
    }
62✔
37
    return (PyObject *)self;
124✔
38
}
39

40
static int Indexer_init(IndexerObject *self, PyObject *args, PyObject *kwds) {
124✔
41
    static const char *kwlist[] = {
42
        "gz_path",         "idx_path",    "checkpoint_size",
43
        "force_rebuild",   "build_bloom", "build_manifest",
44
        "index_threshold", "runtime",     NULL};
45
    const char *gz_path;
46
    const char *idx_path = NULL;
124✔
47
    std::uint64_t checkpoint_size =
124✔
48
        dftracer::utils::constants::indexer::DEFAULT_CHECKPOINT_SIZE;
49
    int force_rebuild = 0;
124✔
50
    int build_bloom = 0;
124✔
51
    int build_manifest = 0;
124✔
52
    std::uint64_t index_threshold =
124✔
53
        dftracer::utils::constants::indexer::DEFAULT_INDEX_SIZE_THRESHOLD;
54
    PyObject *runtime_arg = NULL;
124✔
55

56
    if (!PyArg_ParseTupleAndKeywords(
124!
57
            args, kwds, "s|snpppnO", (char **)kwlist, &gz_path, &idx_path,
62✔
58
            &checkpoint_size, &force_rebuild, &build_bloom, &build_manifest,
59
            &index_threshold, &runtime_arg)) {
60
        return -1;
×
61
    }
62

63
    if (runtime_arg && runtime_arg != Py_None) {
124!
64
        if (PyObject_TypeCheck(runtime_arg, &RuntimeType)) {
×
65
            Py_INCREF(runtime_arg);
×
66
            self->runtime_obj = runtime_arg;
×
67
        } else {
68
            PyObject *native = PyObject_GetAttrString(runtime_arg, "_native");
×
69
            if (native && PyObject_TypeCheck(native, &RuntimeType)) {
×
70
                self->runtime_obj = native;
×
71
            } else {
72
                Py_XDECREF(native);
×
73
                PyErr_SetString(PyExc_TypeError,
×
74
                                "runtime must be a Runtime instance or None");
75
                return -1;
×
76
            }
77
        }
78
    }
79

80
    self->gz_path = PyUnicode_FromString(gz_path);
124!
81
    if (!self->gz_path) {
124✔
82
        return -1;
×
83
    }
84

85
    if (idx_path) {
124✔
86
        self->idx_path = PyUnicode_FromString(idx_path);
106!
87
    } else {
53✔
88
        PyObject *gz_path_obj = PyUnicode_FromString(gz_path);
18!
89
        self->idx_path = PyUnicode_FromFormat("%U.idx", gz_path_obj);
18!
90
        Py_DECREF(gz_path_obj);
9✔
91
    }
92

93
    if (!self->idx_path) {
124✔
94
        Py_DECREF(self->gz_path);
×
95
        return -1;
×
96
    }
97

98
    self->checkpoint_size = checkpoint_size;
124✔
99
    self->build_bloom = build_bloom;
124✔
100
    self->build_manifest = build_manifest;
124✔
101
    self->index_threshold = index_threshold;
124✔
102

103
    const char *idx_path_str = PyUnicode_AsUTF8(self->idx_path);
124!
104
    if (!idx_path_str) {
124✔
105
        return -1;
×
106
    }
107

108
    self->handle = dft_indexer_create(gz_path, idx_path_str, checkpoint_size,
186!
109
                                      force_rebuild);
62✔
110
    if (!self->handle) {
124✔
111
        PyErr_SetString(PyExc_RuntimeError, "Failed to create indexer");
2!
112
        return -1;
2✔
113
    }
114

115
    return 0;
122✔
116
}
62✔
117

118
static dftracer::utils::Runtime *get_indexer_runtime(IndexerObject *self) {
112✔
119
    if (self->runtime_obj) {
112!
120
        return ((RuntimeObject *)self->runtime_obj)->runtime.get();
×
121
    }
122
    return get_default_runtime();
112✔
123
}
56✔
124

125
static PyObject *Indexer_build(IndexerObject *self,
112✔
126
                               PyObject *Py_UNUSED(ignored)) {
127
    if (!self->handle) {
112✔
128
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
129
        return NULL;
×
130
    }
131

132
    using namespace dftracer::utils;
133
    using namespace dftracer::utils::utilities::indexer;
134

135
    const char *gz = PyUnicode_AsUTF8(self->gz_path);
112!
136
    const char *idx = PyUnicode_AsUTF8(self->idx_path);
112!
137
    if (!gz || !idx) {
112!
138
        return NULL;
×
139
    }
140

141
    auto config = IndexBuildConfig::for_file(gz)
224!
142
                      .with_checkpoint_size(
168!
143
                          static_cast<std::size_t>(self->checkpoint_size))
112!
144
                      .with_bloom(self->build_bloom != 0)
112!
145
                      .with_manifest(self->build_manifest != 0)
112!
146
                      .with_index_threshold(
168!
147
                          static_cast<std::size_t>(self->index_threshold));
112!
148

149
    std::string idx_str(idx);
112!
150
    auto pos = idx_str.find_last_of('/');
112✔
151
    if (pos != std::string::npos) {
112!
152
        config.with_index_dir(idx_str.substr(0, pos));
112!
153
    }
56✔
154

155
    Runtime *rt = get_indexer_runtime(self);
112!
156
    IndexBuildResult build_result;
112✔
157

158
    try {
159
        auto build_coro =
56✔
160
            [](IndexBuildConfig cfg) -> coro::CoroTask<IndexBuildResult> {
448!
161
            IndexBuilderUtility builder;
168!
162
            co_return co_await builder.process(cfg);
280!
163
        };
280!
164

165
        Py_BEGIN_ALLOW_THREADS auto handle =
112!
166
            rt->submit(build_coro(config), "indexer-build");
168!
167
        build_result = handle.get();
112!
168
        Py_END_ALLOW_THREADS
112!
169
    } catch (const std::exception &e) {
56!
170
        PyErr_SetString(PyExc_RuntimeError, e.what());
×
171
        return NULL;
×
172
    }
×
173

174
    if (!build_result.success) {
112✔
175
        PyErr_SetString(PyExc_RuntimeError, build_result.error_message.c_str());
×
176
        return NULL;
×
177
    }
178

179
    Py_RETURN_NONE;
112✔
180
}
112✔
181

182
static PyObject *Indexer_need_rebuild(IndexerObject *self,
36✔
183
                                      PyObject *Py_UNUSED(ignored)) {
184
    if (!self->handle) {
36✔
185
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
186
        return NULL;
×
187
    }
188

189
    int result = dft_indexer_need_rebuild(self->handle);
36✔
190
    return PyBool_FromLong(result);
36✔
191
}
18✔
192

193
static PyObject *Indexer_exists(IndexerObject *self,
×
194
                                PyObject *Py_UNUSED(ignored)) {
195
    if (!self->handle) {
×
196
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
197
        return NULL;
×
198
    }
199

200
    int result = dft_indexer_exists(self->handle);
×
201
    return PyBool_FromLong(result);
×
202
}
203

204
static PyObject *Indexer_get_max_bytes(IndexerObject *self,
6✔
205
                                       PyObject *Py_UNUSED(ignored)) {
206
    if (!self->handle) {
6✔
207
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
208
        return NULL;
×
209
    }
210

211
    uint64_t result = dft_indexer_get_max_bytes(self->handle);
6✔
212
    return PyLong_FromUnsignedLongLong(result);
6✔
213
}
3✔
214

215
static PyObject *Indexer_get_num_lines(IndexerObject *self,
4✔
216
                                       PyObject *Py_UNUSED(ignored)) {
217
    if (!self->handle) {
4✔
218
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
219
        return NULL;
×
220
    }
221

222
    uint64_t result = dft_indexer_get_num_lines(self->handle);
4✔
223
    return PyLong_FromUnsignedLongLong(result);
4✔
224
}
2✔
225

226
static PyObject *Indexer_find_checkpoint(IndexerObject *self, PyObject *args) {
6✔
227
    if (!self->handle) {
6✔
228
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
229
        return NULL;
×
230
    }
231

232
    std::size_t target_offset;
233
    if (!PyArg_ParseTuple(args, "n", &target_offset)) {
6!
234
        return NULL;
×
235
    }
236

237
    dft_indexer_checkpoint_t checkpoint;
238
    int found =
3✔
239
        dft_indexer_find_checkpoint(self->handle, target_offset, &checkpoint);
6!
240

241
    if (!found) {
6✔
242
        Py_RETURN_NONE;
4✔
243
    }
244

245
    // Create IndexerCheckpoint object
246
    IndexerCheckpointObject *cp_obj =
1✔
247
        (IndexerCheckpointObject *)IndexerCheckpoint_new(&IndexerCheckpointType,
2!
248
                                                         NULL, NULL);
249
    if (!cp_obj) {
2✔
250
        return NULL;
×
251
    }
252

253
    cp_obj->checkpoint = checkpoint;
2✔
254
    return (PyObject *)cp_obj;
2✔
255
}
3✔
256

257
static PyObject *Indexer_get_checkpoints(IndexerObject *self,
4✔
258
                                         PyObject *Py_UNUSED(ignored)) {
259
    if (!self->handle) {
4✔
260
        PyErr_SetString(PyExc_RuntimeError, "Indexer not initialized");
×
261
        return NULL;
×
262
    }
263

264
    dft_indexer_checkpoint_t *checkpoints = NULL;
4✔
265
    std::size_t count = 0;
4✔
266

267
    int result =
2✔
268
        dft_indexer_get_checkpoints(self->handle, &checkpoints, &count);
4!
269
    if (result != 0 || !checkpoints) {
4!
270
        dft_indexer_free_checkpoints(checkpoints, count);
×
271
        PyObject *list = PyList_New(0);
×
272
        return list;
×
273
    }
274

275
    PyObject *list = PyList_New(count);
4!
276
    if (!list) {
4✔
277
        dft_indexer_free_checkpoints(checkpoints, count);
×
278
        return NULL;
×
279
    }
280

281
    for (std::size_t i = 0; i < count; i++) {
128✔
282
        IndexerCheckpointObject *cp_obj =
62✔
283
            (IndexerCheckpointObject *)IndexerCheckpoint_new(
124!
284
                &IndexerCheckpointType, NULL, NULL);
285
        if (!cp_obj) {
124!
286
            Py_DECREF(list);
287
            dft_indexer_free_checkpoints(checkpoints, count);
×
288
            return NULL;
×
289
        }
290
        cp_obj->checkpoint = checkpoints[i];
124✔
291
        PyList_SetItem(list, i, (PyObject *)cp_obj);
124!
292
    }
62✔
293

294
    dft_indexer_free_checkpoints(checkpoints, count);
4!
295
    return list;
4✔
296
}
2✔
297

298
static PyObject *Indexer_has_bloom(IndexerObject *self, void *closure) {
32✔
299
    const char *idx = PyUnicode_AsUTF8(self->idx_path);
32✔
300
    const char *gz = PyUnicode_AsUTF8(self->gz_path);
32✔
301
    if (!idx || !gz) {
32!
302
        Py_RETURN_FALSE;
×
303
    }
304
    try {
305
        using namespace dftracer::utils::utilities::indexer;
306
        using namespace dftracer::utils::utilities::indexer::internal;
307
        IndexDatabase db(idx);
48!
308
        std::string logical = get_logical_path(gz);
48!
309
        int fid = db.get_file_info_id(logical);
32!
310
        if (fid >= 0 && db.has_bloom_data(fid)) {
32!
311
            Py_RETURN_TRUE;
10✔
312
        }
313
    } catch (...) {
37✔
314
    }
×
315
    Py_RETURN_FALSE;
22✔
316
}
16✔
317

318
static PyObject *Indexer_has_manifest(IndexerObject *self, void *closure) {
20✔
319
    const char *idx = PyUnicode_AsUTF8(self->idx_path);
20✔
320
    const char *gz = PyUnicode_AsUTF8(self->gz_path);
20✔
321
    if (!idx || !gz) {
20!
322
        Py_RETURN_FALSE;
×
323
    }
324
    try {
325
        using namespace dftracer::utils::utilities::indexer;
326
        using namespace dftracer::utils::utilities::indexer::internal;
327
        IndexDatabase db(idx);
30!
328
        std::string logical = get_logical_path(gz);
30!
329
        int fid = db.get_file_info_id(logical);
20!
330
        if (fid >= 0 && db.has_manifest_data(fid)) {
20!
331
            Py_RETURN_TRUE;
10✔
332
        }
333
    } catch (...) {
25✔
334
    }
×
335
    Py_RETURN_FALSE;
10✔
336
}
10✔
337

338
static PyObject *Indexer_gz_path(IndexerObject *self, void *closure) {
4✔
339
    Py_INCREF(self->gz_path);
4!
340
    return self->gz_path;
4✔
341
}
342

343
static PyObject *Indexer_idx_path(IndexerObject *self, void *closure) {
4✔
344
    Py_INCREF(self->idx_path);
4!
345
    return self->idx_path;
4✔
346
}
347

348
static PyObject *Indexer_checkpoint_size(IndexerObject *self, void *closure) {
6✔
349
    return PyLong_FromUnsignedLongLong(self->checkpoint_size);
6✔
350
}
351

352
static PyObject *Indexer_enter(IndexerObject *self,
104!
353
                               PyObject *Py_UNUSED(ignored)) {
354
    Py_INCREF(self);
52✔
355
    return (PyObject *)self;
104✔
356
}
357

358
static PyObject *Indexer_exit(IndexerObject *self, PyObject *args) {
104✔
359
    Py_RETURN_NONE;
104✔
360
}
361

362
static PyMethodDef Indexer_methods[] = {
363
    {"build", (PyCFunction)Indexer_build, METH_NOARGS,
364
     "build()\n"
365
     "--\n"
366
     "\n"
367
     "Build or rebuild the index.\n"},
368
    {"need_rebuild", (PyCFunction)Indexer_need_rebuild, METH_NOARGS,
369
     "Check if a rebuild is needed."},
370
    {"exists", (PyCFunction)Indexer_exists, METH_NOARGS,
371
     "Check if the index file exists."},
372
    {"get_max_bytes", (PyCFunction)Indexer_get_max_bytes, METH_NOARGS,
373
     "Get the maximum uncompressed bytes in the indexed file."},
374
    {"get_num_lines", (PyCFunction)Indexer_get_num_lines, METH_NOARGS,
375
     "Get the total number of lines in the indexed file."},
376
    {"find_checkpoint", (PyCFunction)Indexer_find_checkpoint, METH_VARARGS,
377
     "Find the best checkpoint for a given uncompressed offset.\n"
378
     "\n"
379
     "Args:\n"
380
     "    offset (int): Uncompressed byte offset.\n"},
381
    {"get_checkpoints", (PyCFunction)Indexer_get_checkpoints, METH_NOARGS,
382
     "Get all checkpoints for this file as a list."},
383
    {"__enter__", (PyCFunction)Indexer_enter, METH_NOARGS,
384
     "Enter the runtime context for the with statement."},
385
    {"__exit__", (PyCFunction)Indexer_exit, METH_VARARGS,
386
     "Exit the runtime context for the with statement."},
387
    {NULL} /* Sentinel */
388
};
389

390
static PyGetSetDef Indexer_getsetters[] = {
391
    {"gz_path", (getter)Indexer_gz_path, NULL, "Path to the gzip file", NULL},
392
    {"idx_path", (getter)Indexer_idx_path, NULL, "Path to the index file",
393
     NULL},
394
    {"checkpoint_size", (getter)Indexer_checkpoint_size, NULL,
395
     "Checkpoint size in bytes", NULL},
396
    {"has_bloom", (getter)Indexer_has_bloom, NULL,
397
     "Whether bloom data exists in index", NULL},
398
    {"has_manifest", (getter)Indexer_has_manifest, NULL,
399
     "Whether manifest data exists in index", NULL},
400
    {NULL} /* Sentinel */
401
};
402

403
PyTypeObject IndexerType = {
404
    PyVarObject_HEAD_INIT(NULL, 0) "indexer.Indexer", /* tp_name */
405
    sizeof(IndexerObject),                            /* tp_basicsize */
406
    0,                                                /* tp_itemsize */
407
    (destructor)Indexer_dealloc,                      /* tp_dealloc */
408
    0,                                                /* tp_vectorcall_offset */
409
    0,                                                /* tp_getattr */
410
    0,                                                /* tp_setattr */
411
    0,                                                /* tp_as_async */
412
    0,                                                /* tp_repr */
413
    0,                                                /* tp_as_number */
414
    0,                                                /* tp_as_sequence */
415
    0,                                                /* tp_as_mapping */
416
    0,                                                /* tp_hash */
417
    0,                                                /* tp_call */
418
    0,                                                /* tp_str */
419
    0,                                                /* tp_getattro */
420
    0,                                                /* tp_setattro */
421
    0,                                                /* tp_as_buffer */
422
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,         /* tp_flags */
423
    "Indexer(gz_path: str, idx_path: str | None = None,\n"
424
    "       checkpoint_size: int = 1048576,\n"
425
    "       force_rebuild: bool = False, build_bloom: bool = False,\n"
426
    "       build_manifest: bool = False,\n"
427
    "       index_threshold: int = 8388608,\n"
428
    "       runtime: Runtime | None = None)\n"
429
    "--\n"
430
    "\n"
431
    "Indexer for creating and managing gzip file indices.\n"
432
    "\n"
433
    "Args:\n"
434
    "    gz_path (str): Path to the gzip trace file.\n"
435
    "    idx_path (str or None): Path to the index file. If None,\n"
436
    "        uses gz_path + \".idx\".\n"
437
    "    checkpoint_size (int): Checkpoint size in bytes for index\n"
438
    "        building (default 1 MB).\n"
439
    "    force_rebuild (bool): If True, rebuild the index even if it\n"
440
    "        exists.\n"
441
    "    build_bloom (bool): If True, build bloom filter data in the\n"
442
    "        index.\n"
443
    "    build_manifest (bool): If True, build manifest data in the\n"
444
    "        index.\n"
445
    "    index_threshold (int): Skip indexing for files smaller than\n"
446
    "        this (default 8 MB).\n"
447
    "    runtime (Runtime or None): Runtime instance for thread pool\n"
448
    "        control. If None, uses the default global Runtime.\n", /* tp_doc */
449
    0,                      /* tp_traverse */
450
    0,                      /* tp_clear */
451
    0,                      /* tp_richcompare */
452
    0,                      /* tp_weaklistoffset */
453
    0,                      /* tp_iter */
454
    0,                      /* tp_iternext */
455
    Indexer_methods,        /* tp_methods */
456
    0,                      /* tp_members */
457
    Indexer_getsetters,     /* tp_getset */
458
    0,                      /* tp_base */
459
    0,                      /* tp_dict */
460
    0,                      /* tp_descr_get */
461
    0,                      /* tp_descr_set */
462
    0,                      /* tp_dictoffset */
463
    (initproc)Indexer_init, /* tp_init */
464
    0,                      /* tp_alloc */
465
    Indexer_new,            /* tp_new */
466
};
467

468
int init_indexer(PyObject *m) {
2✔
469
    if (PyType_Ready(&IndexerType) < 0) return -1;
2✔
470

471
    Py_INCREF(&IndexerType);
1✔
472
    if (PyModule_AddObject(m, "Indexer", (PyObject *)&IndexerType) < 0) {
2✔
473
        Py_DECREF(&IndexerType);
474
        Py_DECREF(m);
475
        return -1;
×
476
    }
477

478
    return 0;
2✔
479
}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc