• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

llnl / dftracer-utils / 26043728131

18 May 2026 03:37PM UTC coverage: 51.706% (-0.4%) from 52.076%
26043728131

push

github

hariharan-devarajan
feat(perf): performance improvements for parallel reading, indexing, and aggregation

Indexer
- Streaming parse-and-emit worker pipeline with bounded memory usage
- Concurrent SST artifact ingestion with staging support
- Gzip member slicing for parallel indexing
- Lazy decoding for compressed value counts
- Bypass DOM wrapper for indexer hot path (simdjson on_demand)
- Decoupled write workers from parse workers
- --rebuild-summaries flag and optimized root summary rebuild

Aggregator / MPI
- Task-based DAG execution for aggregator pipeline
- Shared staging for multi-node artifact relocation
- Per-node thread scaling to avoid oversubscription
- Unified distributed aggregation tracking, removed manifest consolidation
- Deterministic aggregation and intra-file parallelism

Trace reader / query
- Compiled predicate evaluation for AND-of-EQ queries
- Uniform-match shortcut for AND-of-EQ queries
- Line-range support for work items and checkpoint processing
- Optimized chunk pruning and checkpoint handling

Replay
- Pipelined replay with coroutines and channels
- JsonParser-based trace processing
- Optimized string handling and i/o buffering

Organize / writer / dft
- Parallel slice creation and merging in organize visitor
- Inline indexer in organize
- Gzip member tracking in writer
- Coroutine-based event dispatcher with extracted parse logic
- Batch flushing in organize visitor

Arrow / call_tree
- Optimized arrow conversion
- Arrow IPC support and improved save/load in call_tree

Build / infrastructure
- zlib-ng option, system simdjson fallback
- cgroup v1/v2 memory limit detection
- Auto-computed per-file memory estimates and batch sizes
- CI: perf branch trigger, formatting

Docs
- Rewritten indexer and trace reader API references

35907 of 90345 branches covered (39.74%)

Branch coverage included in aggregate %.

16869 of 21880 new or added lines in 137 files covered. (77.1%)

273 existing lines in 39 files now uncovered.

32021 of 41028 relevant lines covered (78.05%)

13164.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.08
/src/dftracer/utils/python/arrow_helpers.cpp
1
#include <dftracer/utils/core/common/config.h>
2
#ifdef DFTRACER_UTILS_ENABLE_ARROW
3

4
#define PY_SSIZE_T_CLEAN
5
#include <Python.h>
6
#include <dftracer/utils/python/arrow_helpers.h>
7
#include <dftracer/utils/python/trace_reader_iterator.h>
8

9
namespace dftracer::utils::python {
10

11
PyObject *wrap_arrow_result(ArrowExportResult result) {
44✔
12
    if (!result.valid()) {
44!
13
        PyErr_SetString(PyExc_RuntimeError,
×
14
                        "Cannot wrap invalid ArrowExportResult");
15
        return NULL;
×
16
    }
17

18
    auto *cap = (ArrowBatchCapsuleObject *)ArrowBatchCapsuleType.tp_alloc(
44✔
19
        &ArrowBatchCapsuleType, 0);
20
    if (!cap) return NULL;
44✔
21

22
    cap->result = new ArrowExportResult(std::move(result));
44!
23
    return (PyObject *)cap;
44✔
24
}
22✔
25

26
PyObject *wrap_arrow_table(PyObject *batch_list) {
42✔
27
    if (!batch_list) {
42✔
28
        PyErr_SetString(PyExc_RuntimeError, "batch_list is NULL");
×
29
        return NULL;
×
30
    }
31

32
    PyObject *mod = PyImport_ImportModule("dftracer.utils.arrow");
42✔
33
    if (!mod) {
42✔
34
        Py_DECREF(batch_list);
35
        return NULL;
×
36
    }
37

38
    PyObject *cls = PyObject_GetAttrString(mod, "ArrowTable");
42✔
39
    Py_DECREF(mod);
21✔
40
    if (!cls) {
42✔
41
        Py_DECREF(batch_list);
42
        return NULL;
×
43
    }
44

45
    PyObject *table = PyObject_CallFunctionObjArgs(cls, batch_list, NULL);
42✔
46
    Py_DECREF(cls);
21✔
47
    Py_DECREF(batch_list);
21✔
48
    return table;
42✔
49
}
21✔
50

51
PyObject *wrap_arrow_stream_table(PyObject *stream_obj) {
26✔
52
    if (!stream_obj) {
26✔
NEW
53
        PyErr_SetString(PyExc_RuntimeError, "stream_obj is NULL");
×
NEW
54
        return NULL;
×
55
    }
56

57
    PyObject *mod = PyImport_ImportModule("dftracer.utils.arrow");
26✔
58
    if (!mod) {
26✔
59
        Py_DECREF(stream_obj);
NEW
60
        return NULL;
×
61
    }
62

63
    PyObject *cls = PyObject_GetAttrString(mod, "ArrowTable");
26✔
64
    Py_DECREF(mod);
13✔
65
    if (!cls) {
26✔
66
        Py_DECREF(stream_obj);
NEW
67
        return NULL;
×
68
    }
69

70
    PyObject *table = PyObject_CallFunctionObjArgs(cls, stream_obj, NULL);
26✔
71
    Py_DECREF(cls);
13✔
72
    Py_DECREF(stream_obj);
13✔
73
    return table;
26✔
74
}
13✔
75

76
PyObject *arrow_result_to_table(ArrowExportResult result) {
12✔
77
    PyObject *capsule = wrap_arrow_result(std::move(result));
12!
78
    if (!capsule) return NULL;
12✔
79

80
    PyObject *list = PyList_New(1);
12✔
81
    if (!list) {
12✔
82
        Py_DECREF(capsule);
83
        return NULL;
×
84
    }
85
    PyList_SET_ITEM(list, 0, capsule);  // steals ref
12✔
86

87
    return wrap_arrow_table(list);
12✔
88
}
6✔
89

90
}  // namespace dftracer::utils::python
91

92
#endif  // DFTRACER_UTILS_ENABLE_ARROW
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc