• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 13706216795

06 Mar 2025 07:04PM UTC coverage: 83.55% (+1.3%) from 82.289%
13706216795

push

github

web-flow
Merge LLVM 19 support (#23)

* Initial LLVM 19 port

* Pass all supported tests for LLVM 19

* Custom CI workflow for LLVM 19

* Add LLVM snapshot GPG key

* Reformat

* DIRootType endless recursion fix (#18)

* Parse static members (#19)

* Negative range counts for arrays set to 0 (#20)

* Adjust branch names again

---------

Co-authored-by: ahueck <hueck.alexander@gmail.com>

957 of 1374 branches covered (69.65%)

Branch coverage included in aggregate %.

51 of 51 new or added lines in 4 files covered. (100.0%)

1 existing line in 1 file now uncovered.

1877 of 2018 relevant lines covered (93.01%)

3537.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.0
/lib/type/GEP.cpp
1
//  llvm-dimeta library
2
//  Copyright (c) 2022-2025 llvm-dimeta authors
3
//  Distributed under the BSD 3-Clause license.
4
//  (See accompanying file LICENSE)
5
//  SPDX-License-Identifier: BSD-3-Clause
6
//
7

8
#include "GEP.h"
9

10
#include "DIVisitorUtil.h"
11
#include "support/Logger.h"
12

13
#include "llvm/ADT/APInt.h"
14
#include "llvm/ADT/DenseMapInfo.h"
15
#include "llvm/ADT/STLExtras.h"
16
#include "llvm/ADT/SmallVector.h"
17
#include "llvm/ADT/iterator.h"
18
#include "llvm/ADT/iterator_range.h"
19
#include "llvm/BinaryFormat/Dwarf.h"
20
#include "llvm/IR/Constants.h"
21
#include "llvm/IR/DebugInfoMetadata.h"
22
#include "llvm/IR/Metadata.h"
23
#include "llvm/IR/Operator.h"
24
#include "llvm/IR/Type.h"
25
#include "llvm/IR/Use.h"
200✔
26
#include "llvm/Support/Casting.h"
27
#include "llvm/Support/Debug.h"
28
#include "llvm/Support/raw_ostream.h"
29

30
#include <DIVisitor.h>
31
#include <algorithm>
32
#include <cassert>
33
#include <cstddef>
34
#include <cstdint>
35
#include <iterator>
36
#include <optional>
37

38
namespace dimeta::gep {
39

40
namespace util {
41
inline bool is_byte_indexing(const llvm::GEPOperator* gep) {
1,638✔
42
  return gep->getSourceElementType()->isIntegerTy(8);
1,638✔
43
}
44

45
inline bool is_first_non_zero_indexing(const llvm::GEPOperator* gep) {
544✔
46
  if (auto const_idx = llvm::dyn_cast<llvm::ConstantInt>((*gep->idx_begin()).get())) {
926!
47
    const int64_t index = const_idx->getValue().getSExtValue();
474✔
48
    return index > 0;
474✔
49
  }
382✔
50
  return false;
70✔
51
}
544✔
52
}  // namespace util
53

54
struct GepIndices {
544✔
55
  const llvm::GEPOperator* gep;
56
  llvm::SmallVector<uint64_t, 4> indices_;
57
  bool skipped{false};
544✔
58
  bool is_byte_access{false};
544✔
59
  using Iter = llvm::SmallVector<uint64_t, 4>::const_iterator;
60

61
  inline llvm::iterator_range<Iter> indices() const {
456✔
62
    return llvm::iterator_range<Iter>(indices_);
456✔
63
  }
64

65
  inline size_t size() const {
998✔
66
    return indices_.size();
998✔
67
  }
68

69
  inline bool empty() const {
544✔
70
    return size() == 0;
544✔
71
  }
72

73
  inline bool skipped_first() const {
474✔
74
    return skipped;
474✔
75
  }
76

77
  inline bool byte_access() const {
966✔
78
    return is_byte_access;
966✔
79
  }
80

81
  static GepIndices create(const llvm::GEPOperator* inst, bool skip_first = true);
82
};
83

84
GepIndices GepIndices::create(const llvm::GEPOperator* inst, bool skip_first) {
544✔
85
  GepIndices gep_ind;
544✔
86
  gep_ind.gep            = inst;
544✔
87
  gep_ind.skipped        = skip_first;
544✔
88
  gep_ind.is_byte_access = util::is_byte_indexing(gep_ind.gep);
544✔
89

90
#if LLVM_VERSION_MAJOR > 12
91
  for (const auto& index : inst->indices()) {
1,406✔
92
#else
93
  for (const auto& index : llvm::make_range(inst->idx_begin(), inst->idx_end())) {
376✔
94
#endif
95
    LOG_DEBUG("Iter " << skip_first << " with " << *index.get())
96
    if (skip_first) {
1,238✔
97
      skip_first = false;
404✔
98
      continue;
508✔
99
    }
100
    if (auto const_idx = llvm::dyn_cast<llvm::ConstantInt>(index.get())) {
1,294!
101
      const int64_t index = const_idx->getValue().getSExtValue();
730✔
102
      gep_ind.indices_.emplace_back(index);
730✔
103
    }
730✔
104
  }
1,134!
105
  return gep_ind;
438✔
106
}
544!
107

108
inline llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const GepIndices& indices) {
109
  const auto& vec = indices.indices_;
110
  if (vec.empty()) {
111
    os << "[]";
112
    return os;
113
  }
114
  const auto* begin = std::begin(vec);
115
  os << "[" << *begin;
116
  std::for_each(std::next(begin), std::end(vec), [&](const auto value) {
117
    os << ", ";
118
    os << value;
119
  });
120
  os << "]";
121
  return os;
122
}
123

124
llvm::DINode* select_non_zero_element(llvm::DINode* element, llvm::DINode* next_element) {
312✔
125
  // used to detect the empty base class optimization
126
  auto derived_type_member      = llvm::dyn_cast<llvm::DIDerivedType>(element);
312✔
127
  auto next_derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(next_element);
312✔
128
  if (derived_type_member != nullptr && next_derived_type_member != nullptr) {
312!
129
    LOG_DEBUG("Non-null elements")
130
    if (derived_type_member->getOffsetInBits() == next_derived_type_member->getOffsetInBits()) {
308!
131
      LOG_DEBUG("Same offset detected: " << log::ditype_str(derived_type_member) << " and "
132
                                         << log::ditype_str(next_derived_type_member))
133
      return next_element;
2✔
134
    }
135
  }
246✔
136
  return element;
310✔
137
}
312✔
138

139
namespace detail {
140

141
template <typename UnlessFn>
142
auto find_non_derived_type_unless(llvm::DIType* root, UnlessFn&& unless) {
632✔
143
  llvm::DIType* type = root;
632✔
144
  while (type && llvm::isa<llvm::DIDerivedType>(type)) {
656!
145
    if (unless(type)) {
220!
146
      break;
156✔
147
    }
148
    auto ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
24✔
149
    type        = ditype->getBaseType();
24✔
150
  }
24✔
151
  return type;
1,264✔
152
}
632✔
153

154
inline bool is_pointer_like(const llvm::DIType& di_type) {
220✔
155
  if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(&di_type)) {
400!
156
    return type->getTag() == llvm::dwarf::DW_TAG_array_type || type->getTag() == llvm::dwarf::DW_TAG_reference_type ||
440!
157
           type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
220✔
158
           type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
24✔
159
  }
160
  return false;
×
161
}
220✔
162

163
}  // namespace detail
164

165
auto find_non_derived_type_unless_ptr(llvm::DIType* root) {
632✔
166
  return detail::find_non_derived_type_unless(root, [](auto* val) { return detail::is_pointer_like(*val); });
852✔
167
}
168

169
auto find_non_derived_type(llvm::DIType* root) {
×
170
  return detail::find_non_derived_type_unless(root, [](auto* val) { return false; });
×
171
}
172

173
llvm::DICompositeType* skip_first_gep_access(llvm::DICompositeType* composite_type) {
256✔
174
  const auto select_next_member = [&](llvm::DICompositeType* base) -> std::optional<llvm::DIType*> {
274✔
175
    auto elems = base->getElements();
18✔
176
    if (elems.empty()) {
18!
177
      return {};
×
178
    }
179
    auto element = *base->getElements().begin();
18✔
180
    if (elems.size() > 1) {
18!
181
      auto next_element = *(std::next(base->getElements().begin()));
×
182
      element           = select_non_zero_element(element, next_element);
×
183
    }
×
184

185
    return find_non_derived_type_unless_ptr(llvm::dyn_cast<llvm::DIType>(element));
18✔
186
  };
18✔
187

188
  const auto should_iterate_next_member = [](auto* composite_type) {
530✔
189
    auto elem_count = llvm::count_if(composite_type->getElements(),
274✔
190
                                     [](const auto& elem) { return !llvm::isa<llvm::DISubprogram>(elem); });
988✔
191
    if (elem_count > 1) {
274✔
192
      auto element      = *composite_type->getElements().begin();
256✔
193
      auto next_element = *(std::next(composite_type->getElements().begin()));
256✔
194
      element           = select_non_zero_element(element, next_element);
256✔
195
      return element == next_element;
256✔
196
    }
208✔
197

198
    return elem_count == 1;
18✔
199
  };
274✔
200

201
  while (should_iterate_next_member(composite_type)) {
274✔
202
    auto next_di = select_next_member(composite_type);
18✔
203
    if (!next_di) {
18!
204
      break;
×
205
    }
206
    if (!llvm::isa<llvm::DICompositeType>(next_di.value())) {
18!
207
      break;
×
208
    }
209
    composite_type = llvm::dyn_cast<llvm::DICompositeType>(next_di.value());
18✔
210
  }
18!
211

212
  return composite_type;
464✔
213
}
256✔
214

215
GepIndexToType iterate_gep_index(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
456✔
216
  const auto has_next_idx = [&gep_indices](size_t pos) { return pos + 1 < gep_indices.size(); };
874✔
217

218
  LOG_DEBUG("Iterate over gep: " << gep_indices);
219

220
  for (const auto& enum_index : llvm::enumerate(gep_indices.indices())) {
1,070!
221
    const auto index  = enum_index.value();
614✔
222
    const auto& elems = composite_type->getElements();
614✔
223
    assert(elems.size() > index);
614!
224

225
    auto element = elems[index];
614✔
226
    if (!llvm::isa<llvm::DIDerivedType>(element)) {
614!
227
      LOG_DEBUG("Index shows to non-derived type: " << log::ditype_str(element))
228
      // TODO, if only one index, and this triggers, go first element all the way down?
229
      // maybe also check for class type (not structs etc.)
230
    }
231

232
    if (index == 0 && elems.size() > 1) {
614✔
233
      LOG_DEBUG("Check zero-size pattern")
234
      auto next_element = elems[index + 1];
56✔
235
      element           = select_non_zero_element(element, next_element);
56✔
236
    }
56✔
237

238
    LOG_DEBUG(" element: " << log::ditype_str(element))
239

240
    if (auto derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
1,228!
241
      auto member_type = find_non_derived_type_unless_ptr(derived_type_member->getBaseType());
614✔
242

243
      LOG_DEBUG("Looking at " << log::ditype_str(member_type))
244

245
      if (auto composite_member_type = llvm::dyn_cast<llvm::DICompositeType>(member_type)) {
964✔
246
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_class_type ||
418!
247
            composite_member_type->getTag() == llvm::dwarf::DW_TAG_structure_type) {
418✔
248
          // maybe need to recurse into!
249
          if (has_next_idx(enum_index.index())) {
274✔
250
            composite_type = composite_member_type;
116✔
251
            continue;
158✔
252
          }
253
        }
60✔
254
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_array_type) {
260✔
255
          if (has_next_idx(enum_index.index())) {
186✔
256
            // At end of gep instruction, return basetype:
257
            return GepIndexToType{composite_member_type->getBaseType(), derived_type_member};
80✔
258
          }
259
          // maybe need to recurse into tag_array_type (of non-basic type...)
260
        }
90✔
261
      }
150✔
262

263
      return GepIndexToType{member_type, derived_type_member};
376✔
264
    }
614✔
265
  }
614!
266
  return {};
×
267
}
294✔
268

269
struct DestructureGepIndex : visitor::DINodeVisitor<DestructureGepIndex> {
270
  explicit DestructureGepIndex(const size_t index) : index{index} {
18✔
271
  }
18✔
272

273
  [[nodiscard]] std::optional<GepIndexToType> result() const {
18✔
274
    return this->outermost_candidate;
18✔
275
  }
276

277
  bool visitCompositeType(const llvm::DICompositeType* ty) {
78✔
278
    LOG_DEBUG("visitCompositeType: " << ty->getName() << " index: " << index << " offset base: " << this->offset_base);
279

280
    for (auto* element : ty->getElements()) {
250✔
281
      if (auto* derived_ty = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
326✔
282
        assert(derived_ty->getTag() == llvm::dwarf::DW_TAG_member && "Expected member element in composite ty");
308!
283
        LOG_DEBUG("looking @ member: " << derived_ty->getName() << " offset: " << derived_ty->getOffsetInBits() / 8
284
                                       << " size: " << derived_ty->getSizeInBits() / 8);
285

286
        if (const auto offset = this->offset_base + (derived_ty->getOffsetInBits() / 8);
154✔
287
            index >= offset && index < offset + derived_ty->getSizeInBits() / 8) {
208✔
288
          LOG_DEBUG("saving candidate member");
289
          if (const auto* base_ty = derived_ty->getBaseType(); base_ty)
108!
290
            LOG_DEBUG(" base ty: " << base_ty->getName());
54✔
291

292
          this->outermost_candidate.emplace(GepIndexToType{derived_ty->getBaseType(), derived_ty});
54✔
293

294
          // We should only ever be able to recurse into one composite type where the offset condition holds, so
295
          // save the offset base for that member.
296
          if (const auto* comp_ty = llvm::dyn_cast<llvm::DICompositeType>(derived_ty->getBaseType()); comp_ty) {
108!
297
            LOG_DEBUG("setting offset base to: " << offset);
298
            this->offset_base = offset;
54✔
299
          }
54✔
300
        }
54✔
301
      }
154✔
302
    }
172✔
303

304
    return true;
78✔
305
  }
306

307
 private:
308
  size_t index;
309
  size_t offset_base{};
18✔
310
  std::optional<GepIndexToType> outermost_candidate{};
18✔
311
};
312

313
GepIndexToType resolve_gep_index_to_type(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
544✔
314
  if (gep_indices.empty()) {
544✔
315
    // this triggers for composite (-array) access without constant index, see "heap_milc_struct_mock.c":
316
    LOG_DEBUG("Gep indices empty")
317
    return GepIndexToType{composite_type};
104✔
318
  }
319

320
  if (gep_indices.byte_access()) {
492✔
321
    // Test heap_tachyon_mock_image.c for llvm 12:
322
    // This mostly applies to llvm <= 12?
323
    LOG_DEBUG("Gep indices are byte access: " << gep_indices)
324
    const auto& elems = composite_type->getElements();
36✔
325
    assert(elems.size() > 0 && "Need at least one member for gep byte-based access");
70!
326
    assert(gep_indices.size() == 1 && "Byte access is only supported for one byte index value");
70!
327
    for (auto element : elems) {
122✔
328
      if (auto ditype = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
172!
329
        assert(ditype->getTag() == llvm::dwarf::DW_TAG_member && "A member is expected here");
168!
330
        const auto offset_bytes = ditype->getOffsetInBits() / 8;
86✔
331
        if (offset_bytes == gep_indices.indices_[0]) {
86✔
332
          LOG_DEBUG("got match for: " << ditype->getName());
333
          return GepIndexToType{ditype->getBaseType(), ditype};
18✔
334
        }
335
      }
84✔
336
    }
84✔
337
  }
36!
338

339
  if (gep_indices.skipped_first() && gep_indices.indices_[0] != 0) {
474✔
340
    // This assumes that a single (and only single) first 0 skips through to the first element with more than one
341
    // member: struct A { struct B { struct C { int, int } } } -> would skip to "struct C" for gep [0 1]
342
    LOG_DEBUG("Skip single member nested of: " << log::ditype_str(composite_type))
343
    composite_type = skip_first_gep_access(composite_type);
256✔
344
    LOG_DEBUG("Result of skip: " << log::ditype_str(composite_type))
345
  }
256✔
346

347
  if (gep_indices.byte_access()) {
474✔
348
    DestructureGepIndex visitor{gep_indices.indices_[0]};
18✔
349
    visitor.traverseCompositeType(composite_type);
18✔
350

351
    // TODO: Should this function really return `GepIndexToType` instead of an optional?
352
    return visitor.result().value();
18✔
353
  } else {
18✔
354
    return iterate_gep_index(composite_type, gep_indices);
456✔
355
  }
356
}
544✔
357

358
GepIndexToType extract_gep_dereferenced_type(llvm::DIType* root, const llvm::GEPOperator& inst) {
834✔
359
  using namespace llvm;
360

361
  const auto gep_src = inst.getSourceElementType();
834✔
362

363
  if (gep_src->isPointerTy()) {
834✔
364
    LOG_DEBUG("Gep to ptr " << log::ditype_str(root));
365
    // The commented code is used in conjunction with load/store reset (non-basic!, e.g., reset_load_related):
366
    //    if (auto* type_behind_ptr = llvm::dyn_cast<llvm::DIDerivedType>(root)) {
367
    //      assert((type_behind_ptr->getTag() == llvm::dwarf::DW_TAG_pointer_type) && "Expected a DI pointer type.");
368
    //      return type_behind_ptr->getBaseType();
369
    //    }
370
    // if (!llvm::isa<llvm::DICompositeType>(root)) {
371
    return GepIndexToType{root};
404✔
372
    // }
373
  }
374

375
  if (gep_src->isArrayTy()) {
632✔
376
    if (auto composite_type = llvm::dyn_cast<llvm::DICompositeType>(root)) {
148!
377
      auto base_type = composite_type->getBaseType();
82✔
378
      LOG_DEBUG("Gep to array of DI composite, with base type " << log::ditype_str(base_type));
379
      return GepIndexToType{base_type};
82✔
380
    }
82✔
381
    LOG_DEBUG("Gep to array " << log::ditype_str(root));
382
    return GepIndexToType{root};
×
383
  }
384

385
  const auto find_composite = [](llvm::DIType* root) {
1,100✔
386
    assert(root != nullptr && "Root type should be non-null");
994!
387
    llvm::DIType* type = root;
550✔
388
    while (llvm::isa<llvm::DIDerivedType>(type)) {
1,062✔
389
      const auto ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
512✔
390
      auto next_type    = ditype->getBaseType();
512✔
391
      if (next_type != nullptr) {
512!
392
        type = next_type;
512✔
393
      } else {
414✔
394
        break;
×
395
      }
396
    }
512!
397
    return type;
1,100✔
398
  };
550✔
399

400
  auto* const base_ty = find_composite(root);
550✔
401

402
  auto* const ty             = dyn_cast<DIDerivedType>(root);
550✔
403
  auto* const composite_type = llvm::dyn_cast<DICompositeType>(base_ty);
550✔
404
  // TODO: This check seems like a bad idea but I'm not really sure how to do it properly, I reckon we need *some*
405
  //       heuristic to detect "fake-array" types though (e.g. gep/array_composite_s.c)
406
  if (util::is_byte_indexing(&inst) &&
582✔
407
      (!composite_type || (ty && ty->getBaseType()->getTag() == dwarf::DW_TAG_pointer_type))) {
42✔
408
    return GepIndexToType{root};
12✔
409
  }
410

411
  assert(composite_type != nullptr && "Root should be a struct-like type. A");
982!
412

413
  if (composite_type->isForwardDecl()) {
544!
414
    LOG_DEBUG("Forward declared composite type cannot be resolved " << log::ditype_str(composite_type))
415
    // TODO: Make some error code for such a case, this is hit by `cpp/heap_vector_operator_opt.cpp` and looking at
416
    //       the DI type, it doesn't seem to have a way to get the underlying type so...
UNCOV
417
    return GepIndexToType{};
×
418
  }
419

420
  LOG_DEBUG("Gep to DI composite: " << log::ditype_str(composite_type))
421
  bool skip_first{!util::is_first_non_zero_indexing(&inst)};
544✔
422
  if (util::is_byte_indexing(&inst)) {
544✔
423
    LOG_DEBUG("Access based on i8 ptr, assuming byte offsetting into composite member")
424
    skip_first = false;  // We do not skip over byte index values (likely != 0)
34✔
425
  }
36✔
426

427
  auto accessed_ditype = resolve_gep_index_to_type(composite_type, GepIndices::create(&inst, skip_first));
544✔
428

429
  return accessed_ditype;
216✔
430
}
834✔
431

432
}  // namespace dimeta::gep
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc