• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 13811884945

12 Mar 2025 12:55PM UTC coverage: 83.51% (-0.1%) from 83.612%
13811884945

push

github

web-flow
Refactor IR type extraction (#28)

1007 of 1455 branches covered (69.21%)

Branch coverage included in aggregate %.

250 of 255 new or added lines in 6 files covered. (98.04%)

8 existing lines in 4 files now uncovered.

1905 of 2032 relevant lines covered (93.75%)

3689.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.38
/lib/type/GEP.cpp
1
//  llvm-dimeta library
2
//  Copyright (c) 2022-2025 llvm-dimeta authors
3
//  Distributed under the BSD 3-Clause license.
4
//  (See accompanying file LICENSE)
5
//  SPDX-License-Identifier: BSD-3-Clause
6
//
7

8
#include "GEP.h"
9

10
#include "DIVisitor.h"
11
#include "DIVisitorUtil.h"
12
#include "support/Logger.h"
13

14
#include "llvm/ADT/APInt.h"
15
#include "llvm/ADT/DenseMapInfo.h"
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/iterator.h"
19
#include "llvm/ADT/iterator_range.h"
20
#include "llvm/BinaryFormat/Dwarf.h"
21
#include "llvm/IR/Constants.h"
22
#include "llvm/IR/DebugInfoMetadata.h"
23
#include "llvm/IR/Metadata.h"
24
#include "llvm/IR/Operator.h"
25
#include "llvm/IR/Type.h"
200✔
26
#include "llvm/IR/Use.h"
27
#include "llvm/Support/Casting.h"
28
#include "llvm/Support/Debug.h"
29
#include "llvm/Support/raw_ostream.h"
30

31
#include <algorithm>
32
#include <cassert>
33
#include <cstddef>
34
#include <cstdint>
35
#include <iterator>
36
#include <optional>
37

38
namespace dimeta::gep {
39

40
namespace util {
41
inline bool is_byte_indexing(const llvm::GEPOperator* gep) {
1,804✔
42
  return gep->getSourceElementType()->isIntegerTy(8);
1,804✔
43
}
44

45
inline bool is_first_non_zero_indexing(const llvm::GEPOperator* gep) {
598✔
46
  if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>((*gep->idx_begin()).get())) {
1,024!
47
    const int64_t index = const_idx->getValue().getSExtValue();
528✔
48
    return index > 0;
528✔
49
  }
426✔
50
  return false;
70✔
51
}
598✔
52
}  // namespace util
53

54
struct GepIndices {
598✔
55
  const llvm::GEPOperator* gep;
56
  llvm::SmallVector<uint64_t, 4> indices_;
57
  bool skipped{false};
598✔
58
  bool is_byte_access{false};
598✔
59
  using Iter = llvm::SmallVector<uint64_t, 4>::const_iterator;
60

61
  inline llvm::iterator_range<Iter> indices() const {
504✔
62
    return llvm::iterator_range<Iter>(indices_);
504✔
63
  }
64

65
  inline size_t size() const {
1,062✔
66
    return indices_.size();
1,062✔
67
  }
68

69
  inline bool empty() const {
598✔
70
    return size() == 0;
598✔
71
  }
72

73
  inline bool skipped_first() const {
546✔
74
    return skipped;
546✔
75
  }
76

77
  inline bool byte_access() const {
546✔
78
    return is_byte_access;
546✔
79
  }
80

81
  static GepIndices create(const llvm::GEPOperator* inst, bool skip_first = true);
82
};
83

84
GepIndices GepIndices::create(const llvm::GEPOperator* inst, bool skip_first) {
598✔
85
  GepIndices gep_ind;
598✔
86
  gep_ind.gep            = inst;
598✔
87
  gep_ind.skipped        = skip_first;
598✔
88
  gep_ind.is_byte_access = util::is_byte_indexing(gep_ind.gep);
598✔
89

90
#if LLVM_VERSION_MAJOR > 12
91
  for (const auto& index : inst->indices()) {
1,538✔
92
#else
93
  for (const auto& index : llvm::make_range(inst->idx_begin(), inst->idx_end())) {
412✔
94
#endif
95
    LOG_DEBUG("Iter " << skip_first << " with " << *index.get())
96
    if (skip_first) {
1,352✔
97
      skip_first = false;
442✔
98
      continue;
556✔
99
    }
100
    if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>(index.get())) {
1,410!
101
      const int64_t index = const_idx->getValue().getSExtValue();
796✔
102
      gep_ind.indices_.emplace_back(index);
796✔
103
    }
796✔
104
  }
1,238!
105
  return gep_ind;
482✔
106
}
598!
107

108
inline llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const GepIndices& indices) {
109
  const auto& vec = indices.indices_;
110
  if (vec.empty()) {
111
    os << "[]";
112
    return os;
113
  }
114
  const auto* begin = std::begin(vec);
115
  os << "[" << *begin;
116
  std::for_each(std::next(begin), std::end(vec), [&](const auto value) {
117
    os << ", ";
118
    os << value;
119
  });
120
  os << "]";
121
  return os;
122
}
123

124
llvm::DINode* select_non_zero_element(llvm::DINode* element, llvm::DINode* next_element) {
340✔
125
  // used to detect the empty base class optimization
126
  auto* derived_type_member      = llvm::dyn_cast<llvm::DIDerivedType>(element);
340✔
127
  auto* next_derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(next_element);
340✔
128
  if (derived_type_member != nullptr && next_derived_type_member != nullptr) {
340!
129
    LOG_DEBUG("Non-null elements")
130
    if (derived_type_member->getOffsetInBits() == next_derived_type_member->getOffsetInBits()) {
336!
131
      LOG_DEBUG("Same offset detected: " << log::ditype_str(derived_type_member) << " and "
132
                                         << log::ditype_str(next_derived_type_member))
133
      return next_element;
2✔
134
    }
135
  }
268✔
136
  return element;
338✔
137
}
340✔
138

139
namespace detail {
140

141
template <typename UnlessFn>
142
auto find_non_derived_type_unless(llvm::DIType* root, UnlessFn&& unless) {
1,598✔
143
  llvm::DIType* type = root;
1,598✔
144
  while (type && llvm::isa<llvm::DIDerivedType>(type)) {
3,042!
145
    if (unless(type)) {
1,658!
146
      break;
170✔
147
    }
148
    auto* ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
1,444✔
149
    type         = ditype->getBaseType();
1,444✔
150
  }
1,178✔
151
  return type;
3,196✔
152
}
1,598✔
153

154
inline bool is_pointer_like(const llvm::DIType& di_type) {
242✔
155
  if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(&di_type)) {
440!
156
    return type->getTag() == llvm::dwarf::DW_TAG_array_type || type->getTag() == llvm::dwarf::DW_TAG_reference_type ||
484!
157
           type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
242✔
158
           type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
32✔
159
  }
160
  return false;
×
161
}
242✔
162

163
}  // namespace detail
164

165
auto find_non_derived_type_unless_ptr(llvm::DIType* root) {
700✔
166
  return detail::find_non_derived_type_unless(root, [](auto* val) { return detail::is_pointer_like(*val); });
942✔
167
}
168

169
auto find_non_derived_type(llvm::DIType* root) {
894✔
170
  return detail::find_non_derived_type_unless(root, [](auto* val) { return false; });
2,286✔
171
}
172

173
llvm::DICompositeType* skip_first_gep_access(llvm::DICompositeType* composite_type) {
270✔
174
  const auto select_next_member = [&](llvm::DICompositeType* base) -> std::optional<llvm::DIType*> {
296✔
175
    auto elems = base->getElements();
26✔
176
    if (elems.empty()) {
26!
177
      return {};
×
178
    }
179
    auto* element = *base->getElements().begin();
26✔
180
    if (elems.size() > 1) {
26!
NEW
181
      auto* next_element = *(std::next(base->getElements().begin()));
×
NEW
182
      element            = select_non_zero_element(element, next_element);
×
UNCOV
183
    }
×
184

185
    return find_non_derived_type_unless_ptr(llvm::dyn_cast<llvm::DIType>(element));
26✔
186
  };
26✔
187

188
  const auto should_iterate_next_member = [](auto* composite_type) {
566✔
189
    auto elem_count = llvm::count_if(composite_type->getElements(),
296✔
190
                                     [](const auto& elem) { return !llvm::isa<llvm::DISubprogram>(elem); });
1,024✔
191
    if (elem_count > 1) {
296✔
192
      auto element      = *composite_type->getElements().begin();
270✔
193
      auto next_element = *(std::next(composite_type->getElements().begin()));
270✔
194
      element           = select_non_zero_element(element, next_element);
270✔
195
      return element == next_element;
270✔
196
    }
220✔
197

198
    return elem_count == 1;
26✔
199
  };
296✔
200

201
  while (should_iterate_next_member(composite_type)) {
296✔
202
    auto next_di = select_next_member(composite_type);
26✔
203
    if (!next_di) {
26!
204
      break;
×
205
    }
206
    if (!llvm::isa<llvm::DICompositeType>(next_di.value())) {
26!
207
      break;
×
208
    }
209
    composite_type = llvm::dyn_cast<llvm::DICompositeType>(next_di.value());
26✔
210
  }
26!
211

212
  return composite_type;
490✔
213
}
270✔
214

215
GepIndexToType iterate_gep_index(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
504✔
216
  const auto has_next_idx = [&gep_indices](size_t pos) { return pos + 1 < gep_indices.size(); };
968✔
217

218
  LOG_DEBUG("Iterate over gep: " << gep_indices);
219

220
  for (const auto& enum_index : llvm::enumerate(gep_indices.indices())) {
1,178!
221
    const auto index  = enum_index.value();
674✔
222
    const auto& elems = composite_type->getElements();
674✔
223
    assert(elems.size() > index);
674!
224

225
    auto* element = elems[index];
674✔
226
    if (!llvm::isa<llvm::DIDerivedType>(element)) {
674!
227
      LOG_DEBUG("Index shows to non-derived type: " << log::ditype_str(element))
228
      // TODO, if only one index, and this triggers, go first element all the way down?
229
      // maybe also check for class type (not structs etc.)
230
    }
231

232
    if (index == 0 && elems.size() > 1) {
674✔
233
      LOG_DEBUG("Check zero-size pattern")
234
      auto* next_element = elems[index + 1];
70✔
235
      element            = select_non_zero_element(element, next_element);
70✔
236
    }
70✔
237

238
    LOG_DEBUG(" element: " << log::ditype_str(element))
239

240
    if (auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
1,348!
241
      auto* member_type = find_non_derived_type_unless_ptr(derived_type_member->getBaseType());
674✔
242

243
      LOG_DEBUG("Looking at " << log::ditype_str(member_type))
244

245
      if (auto* composite_member_type = llvm::dyn_cast<llvm::DICompositeType>(member_type)) {
1,064✔
246
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_class_type ||
464!
247
            composite_member_type->getTag() == llvm::dwarf::DW_TAG_structure_type) {
464✔
248
          // maybe need to recurse into!
249
          if (has_next_idx(enum_index.index())) {
320✔
250
            composite_type = composite_member_type;
122✔
251
            continue;
170✔
252
          }
253
        }
88✔
254
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_array_type) {
294✔
255
          if (has_next_idx(enum_index.index())) {
186✔
256
            // At end of gep instruction, return basetype:
257
            return GepIndexToType{composite_member_type->getBaseType(), derived_type_member};
80✔
258
          }
259
          // maybe need to recurse into tag_array_type (of non-basic type...)
260
        }
90✔
261
      }
178✔
262

263
      return GepIndexToType{member_type, derived_type_member};
424✔
264
    }
674✔
265
  }
674!
266
  return {};
×
267
}
324✔
268

269
GepIndexToType resolve_gep_index_to_type(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
598✔
270
  if (gep_indices.empty()) {
598✔
271
    // this triggers for composite (-array) access without constant index, see "heap_milc_struct_mock.c":
272
    LOG_DEBUG("Gep indices empty")
273
    return GepIndexToType{composite_type};
104✔
274
  }
275

276
  if (gep_indices.skipped_first() && gep_indices.indices_[0] != 0) {
546✔
277
    // This assumes that a single (and only single) first 0 skips through to the first element with more than one
278
    // member: struct A { struct B { struct C { int, int } } } -> would skip to "struct C" for gep [0 1]
279
    LOG_DEBUG("Skip single member nested of: " << log::ditype_str(composite_type))
280
    composite_type = skip_first_gep_access(composite_type);
270✔
281
    LOG_DEBUG("Result of skip: " << log::ditype_str(composite_type))
282
  }
270✔
283

284
  if (gep_indices.byte_access()) {
546✔
285
    LOG_DEBUG("Trying to resolve byte access based on offset " << gep_indices.indices_[0])
286
    auto result = visitor::util::resolve_byte_offset_to_member_of(composite_type, gep_indices.indices_[0]);
42✔
287
    if (result) {
42✔
288
      return GepIndexToType{result->type_of_member, result->member};
38✔
289
    }
290
    // TODO: Should this function really return `GepIndexToType` instead of an optional?
291
    return GepIndexToType{composite_type};  // visitor.result().value_or(GepIndexToType{composite_type});
8✔
292
  }
42✔
293

294
  return iterate_gep_index(composite_type, gep_indices);
504✔
295
}
598✔
296

297
std::optional<GepIndexToType> try_resolve_inlined_operator(const llvm::GEPOperator* gep) {
4✔
298
  const auto* const load = llvm::dyn_cast<llvm::Instruction>(gep->getPointerOperand());
4✔
299
  if (!load) {
4!
UNCOV
300
    return {};
×
301
  }
302

303
  const auto* const sub_prog = llvm::dyn_cast<llvm::DISubprogram>(load->getDebugLoc().getScope());
4✔
304
  assert(sub_prog && "Scope does not represent a subprogram");
8!
305

306
  // see cpp/heap_vector_operator.cpp: vector::operator[] returns a reference, that we skip here:
307
  const auto remove_ref = [&](auto* di_type) {
8✔
308
    auto node = detail::find_non_derived_type_unless(di_type, [](auto* node) {
28✔
309
      if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
48!
310
        return type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
24✔
311
               type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
20✔
312
      }
NEW
313
      return false;
×
314
    });
24✔
315
    return node;
8✔
316
  };
4✔
317

318
  if (auto* sub_program_type = sub_prog->getType()) {
8!
319
    if (sub_program_type->getTypeArray().size() > 0) {
4!
320
      return {GepIndexToType{remove_ref(*sub_program_type->getTypeArray().begin())}};
8✔
321
    }
322
  }
323
  LOG_DEBUG("Could not detect inlined operator")
NEW
324
  return {};
×
325
}
4✔
326

327
GepIndexToType extract_gep_dereferenced_type(llvm::DIType* root, const llvm::GEPOperator& inst) {
894✔
328
  using namespace llvm;
329

330
  auto* const gep_src = inst.getSourceElementType();
894✔
331

332
  auto* const base_ty        = find_non_derived_type(root);
894✔
333
  auto* const composite_type = llvm::dyn_cast_or_null<DICompositeType>(base_ty);
894✔
334
  // see test cpp/heap_vector_opt.cpp: GEP on pointer (of inlined operator[])
335
  const bool may_be_inlined_operator = (composite_type != nullptr) && composite_type->isForwardDecl();
894✔
336

337
  if (gep_src->isPointerTy() && !may_be_inlined_operator) {
894✔
338
    LOG_DEBUG("Gep to ptr " << log::ditype_str(root));
339
    return GepIndexToType{root};
408✔
340
  }
341

342
  if (gep_src->isArrayTy()) {
690✔
343
    if (composite_type != nullptr) {
82!
344
      auto* base_type = composite_type->getBaseType();
82✔
345
      LOG_DEBUG("Gep to array of DI composite, with base type " << log::ditype_str(base_type));
346
      return GepIndexToType{base_type};
82✔
347
    }
82✔
348
    LOG_DEBUG("Gep to array " << log::ditype_str(root));
349
    return GepIndexToType{root};
×
350
  }
351

352
  auto* const derived_root = llvm::dyn_cast<DIDerivedType>(root);
608✔
353
  const bool is_pointer_target =
996✔
354
      (derived_root != nullptr) && derived_root->getBaseType()->getTag() == dwarf::DW_TAG_pointer_type;
608✔
355
  // TODO: This check seems like a bad idea but I'm not really sure how to do it properly, I reckon we need *some*
356
  //       heuristic to detect "fake-array" types though (e.g. gep/array_composite_s.c)
357
  if (util::is_byte_indexing(&inst) && (!composite_type || is_pointer_target)) {
608✔
358
    LOG_DEBUG("Gep with byte offset to pointer-like : " << log::ditype_str(root))
359
    return GepIndexToType{root};
12✔
360
  }
361

362
  assert(composite_type != nullptr && "Root should be a struct-like type.");
1,088!
363

364
  if (composite_type->isForwardDecl()) {
602✔
365
    LOG_DEBUG("Trying to resolve forward-declared composite type " << log::ditype_str(composite_type))
366
    return try_resolve_inlined_operator(&inst).value_or(GepIndexToType{root});
8✔
367
  }
368

369
  LOG_DEBUG("Gep to DI composite: " << log::ditype_str(composite_type))
370
  bool skip_first{!util::is_first_non_zero_indexing(&inst)};
598✔
371
  if (util::is_byte_indexing(&inst)) {
598✔
372
    LOG_DEBUG("Access based on i8 ptr, assuming byte offsetting into composite member")
373
    skip_first = false;  // We do not skip over byte index values (likely != 0)
40✔
374
  }
42✔
375

376
  auto accessed_ditype = resolve_gep_index_to_type(composite_type, GepIndices::create(&inst, skip_first));
598✔
377

378
  return accessed_ditype;
236✔
379
}
1,010✔
380

381
}  // namespace dimeta::gep
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc