• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 13884111619

16 Mar 2025 01:59PM UTC coverage: 83.576%. First build
13884111619

push

github

web-flow
GEP refactor for EBO (#33)

1040 of 1499 branches covered (69.38%)

Branch coverage included in aggregate %.

46 of 47 new or added lines in 2 files covered. (97.87%)

1942 of 2069 relevant lines covered (93.86%)

3892.67 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.38
/lib/type/GEP.cpp
1
//  llvm-dimeta library
2
//  Copyright (c) 2022-2025 llvm-dimeta authors
3
//  Distributed under the BSD 3-Clause license.
4
//  (See accompanying file LICENSE)
5
//  SPDX-License-Identifier: BSD-3-Clause
6
//
7

8
#include "GEP.h"
9

10
#include "DIVisitor.h"
11
#include "DIVisitorUtil.h"
12
#include "support/Logger.h"
13

14
#include "llvm/ADT/APInt.h"
15
#include "llvm/ADT/DenseMapInfo.h"
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/iterator.h"
19
#include "llvm/ADT/iterator_range.h"
20
#include "llvm/BinaryFormat/Dwarf.h"
21
#include "llvm/IR/Constants.h"
22
#include "llvm/IR/DebugInfoMetadata.h"
23
#include "llvm/IR/Metadata.h"
24
#include "llvm/IR/Operator.h"
25
#include "llvm/IR/Type.h"
200✔
26
#include "llvm/IR/Use.h"
27
#include "llvm/Support/Casting.h"
28
#include "llvm/Support/Debug.h"
29
#include "llvm/Support/raw_ostream.h"
30

31
#include <algorithm>
32
#include <cassert>
33
#include <cstddef>
34
#include <cstdint>
35
#include <iterator>
36
#include <optional>
37

38
namespace dimeta::gep {
39

40
namespace util {
41
inline bool is_byte_indexing(const llvm::GEPOperator* gep) {
2,146✔
42
  return gep->getSourceElementType()->isIntegerTy(8);
2,146✔
43
}
44

45
inline bool is_first_non_zero_indexing(const llvm::GEPOperator* gep) {
712✔
46
  if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>((*gep->idx_begin()).get())) {
1,230!
47
    const int64_t index = const_idx->getValue().getSExtValue();
642✔
48
    return index > 0;
642✔
49
  }
518✔
50
  return false;
70✔
51
}
712✔
52
}  // namespace util
53

54
struct GepIndices {
712✔
55
  const llvm::GEPOperator* gep;
56
  llvm::SmallVector<uint64_t, 4> indices_;
57
  bool skipped{false};
712✔
58
  bool is_byte_access{false};
712✔
59
  using Iter = llvm::SmallVector<uint64_t, 4>::const_iterator;
60

61
  inline llvm::iterator_range<Iter> indices() const {
610✔
62
    return llvm::iterator_range<Iter>(indices_);
610✔
63
  }
64

65
  inline size_t size() const {
1,176✔
66
    return indices_.size();
1,176✔
67
  }
68

69
  inline bool empty() const {
712✔
70
    return size() == 0;
712✔
71
  }
72

73
  inline bool skipped_first() const {
610✔
74
    return skipped;
610✔
75
  }
76

77
  inline bool byte_access() const {
660✔
78
    return is_byte_access;
660✔
79
  }
80

81
  static GepIndices create(const llvm::GEPOperator* inst, bool skip_first = true);
82
};
83

84
GepIndices GepIndices::create(const llvm::GEPOperator* inst, bool skip_first) {
712✔
85
  GepIndices gep_ind;
712✔
86
  gep_ind.gep            = inst;
712✔
87
  gep_ind.skipped        = skip_first;
712✔
88
  gep_ind.is_byte_access = util::is_byte_indexing(gep_ind.gep);
712✔
89

90
#if LLVM_VERSION_MAJOR > 12
91
  for (const auto& index : inst->indices()) {
1,806✔
92
#else
93
  for (const auto& index : llvm::make_range(inst->idx_begin(), inst->idx_end())) {
478✔
94
#endif
95
    LOG_DEBUG("Iter " << skip_first << " with " << *index.get())
96
    if (skip_first) {
1,572✔
97
      skip_first = false;
526✔
98
      continue;
662✔
99
    }
100
    if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>(index.get())) {
1,616!
101
      const int64_t index = const_idx->getValue().getSExtValue();
910✔
102
      gep_ind.indices_.emplace_back(index);
910✔
103
    }
910✔
104
  }
1,436!
105
  return gep_ind;
574✔
106
}
712!
107

108
inline llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const GepIndices& indices) {
109
  const auto& vec = indices.indices_;
110
  if (vec.empty()) {
111
    os << "[]";
112
    return os;
113
  }
114
  const auto* begin = std::begin(vec);
115
  os << "[" << *begin;
116
  std::for_each(std::next(begin), std::end(vec), [&](const auto value) {
117
    os << ", ";
118
    os << value;
119
  });
120
  os << "]";
121
  return os;
122
}
123

124
namespace detail {
125

126
llvm::DINode* select_non_zero_element(llvm::DINode* element, llvm::DINode* next_element) {
100✔
127
  // used to detect the empty base class optimization
128
  auto* derived_type_member      = llvm::dyn_cast<llvm::DIDerivedType>(element);
100✔
129
  auto* next_derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(next_element);
100✔
130
  if (derived_type_member != nullptr && next_derived_type_member != nullptr) {
100!
131
    LOG_DEBUG("Non-null elements")
132
    if (derived_type_member->getOffsetInBits() == next_derived_type_member->getOffsetInBits()) {
96✔
133
      LOG_DEBUG("Same offset detected: " << log::ditype_str(derived_type_member) << " and "
134
                                         << log::ditype_str(next_derived_type_member))
135
      return next_element;
32✔
136
    }
137
  }
48✔
138
  return element;
68✔
139
}
100✔
140

141
template <typename UnlessFn>
142
auto find_non_derived_type_unless(llvm::DIType* root, UnlessFn&& unless) {
1,818✔
143
  llvm::DIType* type = root;
1,818✔
144
  while (type && llvm::isa<llvm::DIDerivedType>(type)) {
3,376!
145
    if (unless(type)) {
1,878!
146
      break;
254✔
147
    }
148
    auto* ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
1,558✔
149
    type         = ditype->getBaseType();
1,558✔
150
  }
1,270✔
151
  return type;
3,636✔
152
}
1,818✔
153

154
inline bool is_pointer_like(const llvm::DIType& di_type) {
348✔
155
  if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(&di_type)) {
630!
156
    return type->getTag() == llvm::dwarf::DW_TAG_array_type || type->getTag() == llvm::dwarf::DW_TAG_reference_type ||
696!
157
           type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
348✔
158
           type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
32✔
159
  }
160
  return false;
×
161
}
348✔
162

163
inline bool is_non_static_member(llvm::DINode* elem) {
1,628✔
164
  return elem->getTag() == llvm::dwarf::DW_TAG_member &&
3,016✔
165
         llvm::cast<llvm::DIType>(elem)->getFlags() != llvm::DINode::DIFlags::FlagStaticMember;
1,388✔
166
}
167

168
inline bool is_ebo_inherited_composite(llvm::DINode* dinode) {
2,592✔
169
  if (auto* derived = llvm::dyn_cast<llvm::DIDerivedType>(dinode)) {
4,412!
170
    if (derived->getTag() != llvm::dwarf::DW_TAG_inheritance) {
2,344✔
171
      return false;
2,178✔
172
    }
173

174
    LOG_DEBUG(log::ditype_str(derived))
175
    auto* base = llvm::dyn_cast<llvm::DICompositeType>(derived->getBaseType());
166✔
176
    if (!base) {
166!
177
      LOG_DEBUG("Is not a composite inheritance " << log::ditype_str(derived))
178
      return false;
2✔
179
    }
180
    const bool has_sized_member =
294✔
181
        llvm::any_of(base->getElements(), [](llvm::DINode* elem) { return detail::is_non_static_member(elem); });
478✔
182
    LOG_DEBUG("Has sized mem " << has_sized_member)
183
    return !has_sized_member;
164✔
184
  }
132✔
185

186
  return false;
248✔
187
}
2,592✔
188

189
}  // namespace detail
190

191
auto find_non_derived_type_unless_ptr(llvm::DIType* root) {
806✔
192
  return detail::find_non_derived_type_unless(root, [](auto* val) { return detail::is_pointer_like(*val); });
1,154✔
193
}
194

195
auto find_non_derived_type(llvm::DIType* root) {
1,008✔
196
  return detail::find_non_derived_type_unless(root, [](auto* val) { return false; });
2,514✔
197
}
198

199
llvm::DICompositeType* skip_first_gep_access(llvm::DICompositeType* composite_type) {
346✔
200
  using namespace detail;
201
  const auto select_next_member = [&](llvm::DICompositeType* base) -> std::optional<llvm::DIType*> {
372✔
202
    auto elems = base->getElements();
26✔
203
    if (elems.empty()) {
26!
204
      return {};
×
205
    }
206
    auto* element = *base->getElements().begin();
26✔
207
    if (elems.size() > 1) {
26!
208
      auto* next_element = *(std::next(base->getElements().begin()));
×
NEW
209
      element            = detail::select_non_zero_element(element, next_element);
×
210
    }
×
211

212
    return find_non_derived_type_unless_ptr(llvm::dyn_cast<llvm::DIType>(element));
26✔
213
  };
26✔
214

215
  const auto should_iterate_next_member = [&](auto* composite_type) {
718✔
216
    const auto has_members =
678✔
217
        llvm::count_if(composite_type->getElements(), [](const auto& elem) { return is_non_static_member(elem); }) > 1;
1,686✔
218
    return !static_cast<bool>(has_members);
678✔
219
  };
306✔
220

221
  while (should_iterate_next_member(composite_type)) {
372✔
222
    auto next_di = select_next_member(composite_type);
26✔
223
    if (!next_di || !llvm::isa<llvm::DICompositeType>(next_di.value())) {
26!
224
      LOG_DEBUG("Did not find next member")
225
      break;
×
226
    }
227
    composite_type = llvm::dyn_cast<llvm::DICompositeType>(next_di.value());
26✔
228
    LOG_DEBUG("Found next " << log::ditype_str(composite_type))
229
  }
26!
230

231
  return composite_type;
626✔
232
}
346✔
233

234
GepIndexToType iterate_gep_index(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
610✔
235
  const auto has_next_gep_idx = [&gep_indices](size_t pos) { return pos + 1 < gep_indices.size(); };
1,074✔
236

237
  LOG_DEBUG("Iterate over gep: " << gep_indices);
238

239
  const auto is_static_member = [](const llvm::DINode* node) {
1,450✔
240
    if (const auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
1,492!
241
      return derived_type_member->isStaticMember();
840✔
242
    }
243
    return false;
×
244
  };
840✔
245

246
  for (const auto& enum_index : llvm::enumerate(gep_indices.indices())) {
1,390!
247
    auto gep_index       = enum_index.value();
780✔
248
    const auto& elements = composite_type->getElements();
780✔
249
    assert(elements.size() > gep_index);
780!
250

251
    auto* element = elements[gep_index];
780✔
252

253
    if (gep_index == 0 && elements.size() > 1) {
780✔
254
      // e.g., LLVM-14: cpp/heap_lhs_function_opt.cpp: vector gep is [0 0 0 0 ...] -> never recurse into EBO
255
      LOG_DEBUG("Check zero-size pattern for " << log::ditype_str(composite_type))
256
      auto* next_element = elements[1];
100✔
257
      element            = detail::select_non_zero_element(element, next_element);
100✔
258
    }
100✔
259

260
    const auto ebo_inheritance_offset = llvm::count_if(
1,384✔
261
        composite_type->getElements(), [&](auto* dinode) { return detail::is_ebo_inherited_composite(dinode); });
3,372✔
262

263
    if (gep_index > 0 && ebo_inheritance_offset > 0 && (gep_index) < elements.size()) {
780!
264
      LOG_DEBUG("EBO offset needed " << ebo_inheritance_offset)
265
      gep_index += ebo_inheritance_offset;
40✔
266
      element = elements[gep_index];
40✔
267
    }
40✔
268

269
    if (!llvm::isa<llvm::DIDerivedType>(element)) {
780!
270
      LOG_DEBUG("Index shows to non-derived type: " << log::ditype_str(element))
271
      // TODO, if only one index, and this triggers, go first element all the way down?
272
      // maybe also check for class type (not structs etc.)
273
    }
274

275
    while (gep_index < elements.size() && is_static_member(element)) {
840!
276
      LOG_DEBUG("Skipping static member of composite " << log::ditype_str(element))
277
      element = elements[++gep_index];
60✔
278
    }
279

280
    LOG_DEBUG(" element: " << log::ditype_str(element))
281

282
    if (auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
1,560!
283
      auto* member_type = find_non_derived_type_unless_ptr(derived_type_member->getBaseType());
780✔
284

285
      LOG_DEBUG("Looking at " << log::ditype_str(member_type))
286

287
      if (auto* composite_member_type = llvm::dyn_cast<llvm::DICompositeType>(member_type)) {
1,170✔
288
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_class_type ||
464!
289
            composite_member_type->getTag() == llvm::dwarf::DW_TAG_structure_type) {
464✔
290
          // maybe need to recurse into!
291
          if (has_next_gep_idx(enum_index.index())) {
320✔
292
            composite_type = composite_member_type;
122✔
293
            continue;
170✔
294
          }
295
        }
88✔
296
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_array_type) {
294✔
297
          if (has_next_gep_idx(enum_index.index())) {
186✔
298
            // At end of gep instruction, return basetype:
299
            return GepIndexToType{composite_member_type->getBaseType(), derived_type_member};
80✔
300
          }
301
          // maybe need to recurse into tag_array_type (of non-basic type...)
302
        }
90✔
303
      }
178✔
304

305
      return GepIndexToType{member_type, derived_type_member};
530✔
306
    }
780✔
307
  }
780!
308
  return {};
×
309
}
390✔
310

311
GepIndexToType resolve_gep_index_to_type(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
712✔
312
  if (gep_indices.empty()) {
712✔
313
    // this triggers for composite (-array) access without constant index, see "heap_milc_struct_mock.c":
314
    LOG_DEBUG("Gep indices empty")
315
    return GepIndexToType{composite_type};
104✔
316
  }
317

318
  if (gep_indices.byte_access()) {
660✔
319
    LOG_DEBUG("Trying to resolve byte access based on offset " << gep_indices.indices_[0])
320
    auto result = visitor::util::resolve_byte_offset_to_member_of(composite_type, gep_indices.indices_[0]);
50✔
321
    if (result) {
50✔
322
      return GepIndexToType{result->type_of_member, result->member};
46✔
323
    }
324
    return GepIndexToType{composite_type};
8✔
325
  }
50✔
326

327
  if (gep_indices.skipped_first() && gep_indices.indices_[0] != 0) {
610!
328
    // This assumes that a single (and only single) first 0 skips through to the first element with more than one
329
    // member: struct A { struct B { struct C { int, int } } } -> would skip to "struct C" for gep [0 1]
330
    // see test gep/global_nested.c
331
    LOG_DEBUG("Skip single member nested of: " << log::ditype_str(composite_type))
332
    composite_type = skip_first_gep_access(composite_type);
346✔
333
    LOG_DEBUG("Result of skip: " << log::ditype_str(composite_type))
334
  }
346✔
335

336
  return iterate_gep_index(composite_type, gep_indices);
610✔
337
}
712✔
338

339
std::optional<GepIndexToType> try_resolve_inlined_operator(const llvm::GEPOperator* gep) {
4✔
340
  const auto* const load = llvm::dyn_cast<llvm::Instruction>(gep->getPointerOperand());
4✔
341
  if (!load) {
4!
342
    return {};
×
343
  }
344

345
  const auto* const sub_prog = llvm::dyn_cast<llvm::DISubprogram>(load->getDebugLoc().getScope());
4✔
346
  assert(sub_prog && "Scope does not represent a subprogram");
8!
347

348
  // see cpp/heap_vector_operator.cpp: vector::operator[] returns a reference, that we skip here:
349
  const auto remove_ref = [&](auto* di_type) {
8✔
350
    auto node = detail::find_non_derived_type_unless(di_type, [](auto* node) {
28✔
351
      if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
48!
352
        return type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
24✔
353
               type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
20✔
354
      }
355
      return false;
×
356
    });
24✔
357
    return node;
8✔
358
  };
4✔
359

360
  if (auto* sub_program_type = sub_prog->getType()) {
8!
361
    if (sub_program_type->getTypeArray().size() > 0) {
4!
362
      return {GepIndexToType{remove_ref(*sub_program_type->getTypeArray().begin())}};
8✔
363
    }
364
  }
365
  LOG_DEBUG("Could not detect inlined operator")
366
  return {};
×
367
}
4✔
368

369
GepIndexToType extract_gep_dereferenced_type(llvm::DIType* root, const llvm::GEPOperator& inst) {
1,008✔
370
  using namespace llvm;
371

372
  auto* const gep_src = inst.getSourceElementType();
1,008✔
373

374
  auto* const base_ty        = find_non_derived_type(root);
1,008✔
375
  auto* const composite_type = llvm::dyn_cast_or_null<DICompositeType>(base_ty);
1,008✔
376
  // see test cpp/heap_vector_opt.cpp: GEP on pointer (of inlined operator[])
377
  const bool may_be_inlined_operator = (composite_type != nullptr) && composite_type->isForwardDecl();
1,008✔
378

379
  if (gep_src->isPointerTy() && !may_be_inlined_operator) {
1,008✔
380
    LOG_DEBUG("Gep to ptr " << log::ditype_str(root));
381
    return GepIndexToType{root};
408✔
382
  }
383

384
  if (gep_src->isArrayTy()) {
804✔
385
    if (composite_type != nullptr) {
82!
386
      auto* base_type = composite_type->getBaseType();
82✔
387
      LOG_DEBUG("Gep to array of DI composite, with base type " << log::ditype_str(base_type));
388
      return GepIndexToType{base_type};
82✔
389
    }
82✔
390
    LOG_DEBUG("Gep to array " << log::ditype_str(root));
391
    return GepIndexToType{root};
×
392
  }
393

394
  auto* const derived_root = llvm::dyn_cast<DIDerivedType>(root);
722✔
395
  const bool is_pointer_target =
1,202✔
396
      (derived_root != nullptr) && derived_root->getBaseType()->getTag() == dwarf::DW_TAG_pointer_type;
722✔
397
  // TODO: This check seems like a bad idea but I'm not really sure how to do it properly, I reckon we need *some*
398
  //       heuristic to detect "fake-array" types though (e.g. gep/array_composite_s.c)
399
  if (util::is_byte_indexing(&inst) && (!composite_type || is_pointer_target)) {
722✔
400
    LOG_DEBUG("Gep with byte offset to pointer-like : " << log::ditype_str(root))
401
    return GepIndexToType{root};
12✔
402
  }
403

404
  assert(composite_type != nullptr && "Root should be a struct-like type.");
1,294!
405

406
  if (composite_type->isForwardDecl()) {
716✔
407
    LOG_DEBUG("Trying to resolve forward-declared composite type " << log::ditype_str(composite_type))
408
    return try_resolve_inlined_operator(&inst).value_or(GepIndexToType{root});
8✔
409
  }
410

411
  LOG_DEBUG("Gep to DI composite: " << log::ditype_str(composite_type))
412
  bool skip_first{!util::is_first_non_zero_indexing(&inst)};
712✔
413
  if (util::is_byte_indexing(&inst)) {
712✔
414
    LOG_DEBUG("Access based on i8 ptr, assuming byte offsetting into composite member")
415
    skip_first = false;  // We do not skip over byte index values (likely != 0)
48✔
416
  }
50✔
417

418
  auto accessed_ditype = resolve_gep_index_to_type(composite_type, GepIndices::create(&inst, skip_first));
712✔
419

420
  return accessed_ditype;
280✔
421
}
1,146✔
422

423
}  // namespace dimeta::gep
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc