• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 13933486559

18 Mar 2025 09:06PM UTC coverage: 83.197% (-0.4%) from 83.637%
13933486559

push

github

web-flow
Refactoring (#35)

1027 of 1482 branches covered (69.3%)

Branch coverage included in aggregate %.

86 of 87 new or added lines in 10 files covered. (98.85%)

17 existing lines in 7 files now uncovered.

1924 of 2065 relevant lines covered (93.17%)

3950.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.37
/lib/type/GEP.cpp
1
//  llvm-dimeta library
2
//  Copyright (c) 2022-2025 llvm-dimeta authors
3
//  Distributed under the BSD 3-Clause license.
4
//  (See accompanying file LICENSE)
5
//  SPDX-License-Identifier: BSD-3-Clause
6
//
7

8
#include "GEP.h"
9

10
#include "DIUtil.h"
11
#include "support/Logger.h"
12

13
#include "llvm/ADT/APInt.h"
14
#include "llvm/ADT/STLExtras.h"
15
#include "llvm/ADT/SmallVector.h"
16
#include "llvm/ADT/iterator_range.h"
17
#include "llvm/BinaryFormat/Dwarf.h"
18
#include "llvm/IR/Constants.h"
19
#include "llvm/IR/DebugInfoMetadata.h"
20
#include "llvm/IR/Metadata.h"
21
#include "llvm/IR/Operator.h"
22
#include "llvm/IR/Type.h"
23
#include "llvm/IR/Use.h"
208✔
24
#include "llvm/Support/Casting.h"
25
#include "llvm/Support/Debug.h"
26
#include "llvm/Support/raw_ostream.h"
27

28
#include <algorithm>
29
#include <cassert>
30
#include <cstddef>
31
#include <cstdint>
32
#include <iterator>
33
#include <optional>
34

35
namespace dimeta::gep {
36

37
namespace util {
38
inline bool is_byte_indexing(const llvm::GEPOperator* gep) {
2,162✔
39
  return gep->getSourceElementType()->isIntegerTy(8);
2,162✔
40
}
41

42
inline bool is_first_non_zero_indexing(const llvm::GEPOperator* gep) {
716✔
43
  if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>((*gep->idx_begin()).get())) {
1,362!
44
    const int64_t index = const_idx->getValue().getSExtValue();
646✔
45
    return index > 0;
646✔
46
  }
646✔
47
  return false;
70✔
48
}
716✔
49
}  // namespace util
50

51
struct GepIndices {
716✔
52
  const llvm::GEPOperator* gep;
53
  llvm::SmallVector<uint64_t, 4> indices_;
54
  bool skipped{false};
716✔
55
  bool is_byte_access{false};
716✔
56
  using Iter = llvm::SmallVector<uint64_t, 4>::const_iterator;
57

58
  llvm::iterator_range<Iter> indices() const {
616✔
59
    return llvm::iterator_range<Iter>(indices_);
616✔
60
  }
61

62
  size_t size() const {
1,186✔
63
    return indices_.size();
1,186✔
64
  }
65

66
  bool empty() const {
716✔
67
    return size() == 0;
716✔
68
  }
69

70
  bool skipped_first() const {
616✔
71
    return skipped;
616✔
72
  }
73

74
  bool byte_access() const {
664✔
75
    return is_byte_access;
664✔
76
  }
77

78
  static GepIndices create(const llvm::GEPOperator* inst, bool skip_first = true);
79
};
80

81
GepIndices GepIndices::create(const llvm::GEPOperator* inst, bool skip_first) {
716✔
82
  GepIndices gep_ind;
716✔
83
  gep_ind.gep            = inst;
716✔
84
  gep_ind.skipped        = skip_first;
716✔
85
  gep_ind.is_byte_access = util::is_byte_indexing(gep_ind.gep);
716✔
86

87
#if LLVM_VERSION_MAJOR > 12
88
  for (const auto& index : inst->indices()) {
2,304✔
89
#else
90
  for (const auto& index : llvm::make_range(inst->idx_begin(), inst->idx_end())) {
91
#endif
92
    LOG_DEBUG("Iter " << skip_first << " with " << *index.get())
93
    if (skip_first) {
1,588✔
94
      skip_first = false;
668✔
95
      continue;
668✔
96
    }
97
    if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>(index.get())) {
1,840!
98
      const int64_t index_ = const_idx->getValue().getSExtValue();
920✔
99
      gep_ind.indices_.emplace_back(index_);
920✔
100
    }
920✔
101
  }
1,588!
102
  return gep_ind;
716✔
103
}
716!
104

105
inline llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const GepIndices& indices) {
106
  const auto& vec = indices.indices_;
107
  if (vec.empty()) {
108
    os << "[]";
109
    return os;
110
  }
111
  const auto* begin = std::begin(vec);
112
  os << "[" << *begin;
113
  std::for_each(std::next(begin), std::end(vec), [&](const auto value) {
114
    os << ", ";
115
    os << value;
116
  });
117
  os << "]";
118
  return os;
119
}
120

121
namespace detail {
122

123
llvm::DINode* select_non_zero_element(llvm::DINode* element, llvm::DINode* next_element) {
108✔
124
  // used to detect the empty base class optimization
125
  auto* derived_type_member      = llvm::dyn_cast<llvm::DIDerivedType>(element);
108✔
126
  auto* next_derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(next_element);
108✔
127
  if (derived_type_member != nullptr && next_derived_type_member != nullptr) {
108!
128
    LOG_DEBUG("Non-null elements")
129
    if (derived_type_member->getOffsetInBits() == next_derived_type_member->getOffsetInBits()) {
100✔
130
      LOG_DEBUG("Same offset detected: " << log::ditype_str(derived_type_member) << " and "
131
                                         << log::ditype_str(next_derived_type_member))
132
      return next_element;
34✔
133
    }
134
  }
66✔
135
  return element;
74✔
136
}
108✔
137

138
template <typename UnlessFn>
139
auto find_non_derived_type_unless(llvm::DIType* root, UnlessFn&& unless) {
1,842✔
140
  llvm::DIType* type = root;
1,842✔
141
  while (type && llvm::isa<llvm::DIDerivedType>(type)) {
3,432!
142
    if (unless(type)) {
1,918!
143
      break;
328✔
144
    }
145
    auto* ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
1,590✔
146
    type         = ditype->getBaseType();
1,590✔
147
  }
1,590✔
148
  return type;
3,684✔
149
}
1,842✔
150

151
inline bool is_ebo_inherited_composite(llvm::DINode* dinode) {
2,824✔
152
  if (auto* derived = llvm::dyn_cast<llvm::DIDerivedType>(dinode)) {
5,190!
153
    if (derived->getTag() != llvm::dwarf::DW_TAG_inheritance) {
2,366✔
154
      return false;
2,194✔
155
    }
156

157
    LOG_DEBUG(log::ditype_str(derived))
158
    auto* base = llvm::dyn_cast<llvm::DICompositeType>(derived->getBaseType());
172✔
159
    if (!base) {
172!
160
      LOG_DEBUG("Is not a composite inheritance " << log::ditype_str(derived))
161
      return false;
4✔
162
    }
163
    const bool has_sized_member =
336✔
164
        llvm::any_of(base->getElements(), [](llvm::DINode* elem) { return di::util::is_non_static_member(*elem); });
486✔
165
    LOG_DEBUG("Has sized mem " << has_sized_member)
166
    return !has_sized_member;
168✔
167
  }
172✔
168

169
  return false;
458✔
170
}
2,824✔
171

172
}  // namespace detail
173

174
auto find_non_derived_type_unless_ptr(llvm::DIType* root) {
818✔
175
  return detail::find_non_derived_type_unless(root, [](auto* val) { return di::util::is_pointer_like(*val); });
1,178✔
176
}
177

178
auto find_non_derived_type(llvm::DIType* root) {
1,018✔
179
  return detail::find_non_derived_type_unless(root, [](auto*) { return false; });
2,540✔
180
}
181

182
llvm::DICompositeType* skip_first_gep_access(llvm::DICompositeType* composite_type) {
350✔
183
  using namespace detail;
184
  const auto select_next_member = [&](llvm::DICompositeType* base) -> std::optional<llvm::DIType*> {
376✔
185
    auto composite_elements = base->getElements();
26✔
186
    auto* element           = composite_elements[0];
26✔
187
    if (composite_elements.size() > 1) {
26!
188
      auto* next_element = composite_elements[1];
×
189
      element            = detail::select_non_zero_element(element, next_element);
×
190
    }
×
191

192
    return find_non_derived_type_unless_ptr(llvm::dyn_cast<llvm::DIType>(element));
26✔
193
  };
26✔
194

195
  const auto should_iterate_next_member = [&](auto* composite_type) {
726✔
196
    const auto count_members = llvm::count_if(composite_type->getElements(),
376✔
197
                                              [](const auto* elem) { return di::util::is_non_static_member(*elem); });
1,322✔
198
    return count_members == 1;
752✔
199
  };
376✔
200

201
  while (should_iterate_next_member(composite_type)) {
376✔
202
    auto next_di = select_next_member(composite_type);
26✔
203
    if (!next_di || !llvm::isa<llvm::DICompositeType>(next_di.value())) {
26!
204
      LOG_DEBUG("Did not find next member")
205
      break;
×
206
    }
207
    composite_type = llvm::dyn_cast<llvm::DICompositeType>(next_di.value());
26✔
208
    LOG_DEBUG("Found next " << log::ditype_str(composite_type))
209
  }
26!
210

211
  return composite_type;
700✔
212
}
350✔
213

214
GepIndexToType iterate_gep_index(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
616✔
215
  const auto has_next_gep_idx = [&gep_indices](size_t pos) { return pos + 1 < gep_indices.size(); };
1,086✔
216

217
  LOG_DEBUG("Iterate over gep: " << gep_indices);
218

219
  const auto is_static_member = [](const llvm::DINode* node) {
1,468✔
220
    if (const auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
1,704!
221
      return derived_type_member->isStaticMember();
852✔
222
    }
223
    return false;
×
224
  };
852✔
225

226
  for (const auto& enum_index : llvm::enumerate(gep_indices.indices())) {
1,408!
227
    auto gep_index       = enum_index.value();
792✔
228
    const auto& elements = composite_type->getElements();
792✔
229
    assert(elements.size() > gep_index);
792!
230

231
    auto* element = elements[gep_index];
792✔
232

233
    if (gep_index == 0 && elements.size() > 1) {
792✔
234
      // e.g., LLVM-14: cpp/heap_lhs_function_opt.cpp: vector gep is [0 0 0 0 ...] -> never recurse into EBO
235
      LOG_DEBUG("Check zero-size pattern for " << log::ditype_str(composite_type))
236
      auto* next_element = elements[1];
108✔
237
      element            = detail::select_non_zero_element(element, next_element);
108✔
238
    }
108✔
239

240
    const auto ebo_inheritance_offset = llvm::count_if(
1,584✔
241
        composite_type->getElements(), [&](auto* dinode) { return detail::is_ebo_inherited_composite(dinode); });
3,616✔
242

243
    if (gep_index > 0 && ebo_inheritance_offset > 0 && (gep_index) < elements.size()) {
792!
244
      LOG_DEBUG("EBO offset needed " << ebo_inheritance_offset)
245
      gep_index += ebo_inheritance_offset;
40✔
246
      element = elements[gep_index];
40✔
247
    }
40✔
248

249
    if (!llvm::isa<llvm::DIDerivedType>(element)) {
792!
250
      LOG_DEBUG("Index shows to non-derived type: " << log::ditype_str(element))
251
      // TODO, if only one index, and this triggers, go first element all the way down?
252
      // maybe also check for class type (not structs etc.)
UNCOV
253
    }
×
254

255
    while (gep_index < elements.size() && is_static_member(element)) {
852!
256
      LOG_DEBUG("Skipping static member of composite " << log::ditype_str(element))
257
      element = elements[++gep_index];
60✔
258
    }
259

260
    LOG_DEBUG(" element: " << log::ditype_str(element))
261

262
    if (auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
1,584!
263
      auto* member_type = find_non_derived_type_unless_ptr(derived_type_member->getBaseType());
792✔
264

265
      LOG_DEBUG("Looking at " << log::ditype_str(member_type))
266

267
      if (auto* composite_member_type = llvm::dyn_cast<llvm::DICompositeType>(member_type)) {
1,262✔
268
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_class_type ||
470!
269
            composite_member_type->getTag() == llvm::dwarf::DW_TAG_structure_type) {
470✔
270
          // maybe need to recurse into!
271
          if (has_next_gep_idx(enum_index.index())) {
284✔
272
            composite_type = composite_member_type;
176✔
273
            continue;
176✔
274
          }
275
        }
108✔
276
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_array_type) {
294✔
277
          if (has_next_gep_idx(enum_index.index())) {
186✔
278
            // At end of gep instruction, return basetype:
279
            return GepIndexToType{composite_member_type->getBaseType(), derived_type_member};
80✔
280
          }
281
          // maybe need to recurse into tag_array_type (of non-basic type...)
282
        }
106✔
283
      }
214✔
284

285
      return GepIndexToType{member_type, derived_type_member};
536✔
286
    }
792✔
287
  }
792!
288
  return {};
×
289
}
396✔
290

291
GepIndexToType resolve_gep_index_to_type(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
716✔
292
  if (gep_indices.empty()) {
716✔
293
    // this triggers for composite (-array) access without constant index, see "heap_milc_struct_mock.c":
294
    LOG_DEBUG("Gep indices empty")
295
    return GepIndexToType{composite_type};
104✔
296
  }
297

298
  if (gep_indices.byte_access()) {
664✔
299
    LOG_DEBUG("Trying to resolve byte access based on offset " << gep_indices.indices_[0])
300
    auto result = di::util::resolve_byte_offset_to_member_of(composite_type, gep_indices.indices_[0]);
48✔
301
    if (result) {
48✔
302
      return GepIndexToType{result->type_of_member, result->member};
44✔
303
    }
304
    return GepIndexToType{composite_type};
8✔
305
  }
48✔
306

307
  if (gep_indices.skipped_first() && gep_indices.indices_[0] != 0) {
616!
308
    // This assumes that a single (and only single) first 0 skips through to the first element with more than one
309
    // member: struct A { struct B { struct C { int, int } } } -> would skip to "struct C" for gep [0 1]
310
    // see test gep/global_nested.c
311
    LOG_DEBUG("Skip single member nested of: " << log::ditype_str(composite_type))
312
    auto* new_composite_type = skip_first_gep_access(composite_type);
350✔
313
    if (new_composite_type != composite_type) {
350✔
314
      // required for
315
      // - LLVM-18: gep/global_nested & gep/param_first_nested_padding.cpp
316
      // - LLVM-19: gep/global_nested.c
317
      composite_type = new_composite_type;
22✔
318
      LOG_DEBUG("Result of skip: " << log::ditype_str(composite_type))
319
    }
22✔
320
  }
350✔
321

322
  return iterate_gep_index(composite_type, gep_indices);
616✔
323
}
716✔
324

325
std::optional<GepIndexToType> try_resolve_inlined_operator(const llvm::GEPOperator* gep) {
8✔
326
  const auto* const load = llvm::dyn_cast<llvm::Instruction>(gep->getPointerOperand());
8✔
327
  if (!load) {
8!
328
    return {};
2✔
329
  }
330

331
  const auto* const sub_prog = llvm::dyn_cast<llvm::DISubprogram>(load->getDebugLoc().getScope());
6✔
332
  assert(sub_prog && "Scope does not represent a subprogram");
12!
333

334
  // see cpp/heap_vector_operator.cpp: vector::operator[] returns a reference, that we skip here:
335
  const auto remove_ref = [&](auto* di_type) {
12✔
336
    auto node = detail::find_non_derived_type_unless(di_type, [](auto* node) {
42✔
337
      if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
72!
338
        return type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
36✔
339
               type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
30✔
340
      }
341
      return false;
×
342
    });
36✔
343
    return node;
12✔
344
  };
6✔
345

346
  if (auto* sub_program_type = sub_prog->getType()) {
12!
347
    if (sub_program_type->getTypeArray().size() > 0) {
6!
348
      return {GepIndexToType{remove_ref(*sub_program_type->getTypeArray().begin())}};
12✔
349
    }
UNCOV
350
  }
×
351
  LOG_DEBUG("Could not detect inlined operator")
352
  return {};
×
353
}
8✔
354

355
GepIndexToType extract_gep_dereferenced_type(llvm::DIType* root, const llvm::GEPOperator& inst) {
1,018✔
356
  using namespace llvm;
357

358
  auto* const gep_src = inst.getSourceElementType();
1,018✔
359

360
  auto* const base_ty        = find_non_derived_type(root);
1,018✔
361
  auto* const composite_type = llvm::dyn_cast_or_null<DICompositeType>(base_ty);
1,018✔
362
  // see test cpp/heap_vector_opt.cpp: GEP on pointer (of inlined operator[])
363
  const bool may_be_inlined_operator = (composite_type != nullptr) && composite_type->isForwardDecl();
1,018✔
364

365
  if (gep_src->isPointerTy() && !may_be_inlined_operator) {
1,018✔
366
    LOG_DEBUG("Gep to ptr " << log::ditype_str(root));
367
    return GepIndexToType{root};
412✔
368
  }
369

370
  if (gep_src->isArrayTy()) {
812✔
371
    if (composite_type != nullptr) {
82!
372
      auto* base_type = composite_type->getBaseType();
82✔
373
      LOG_DEBUG("Gep to array of DI composite, with base type " << log::ditype_str(base_type));
374
      return GepIndexToType{base_type};
82✔
375
    }
82✔
376
    LOG_DEBUG("Gep to array " << log::ditype_str(root));
377
    return GepIndexToType{root};
×
378
  }
379

380
  auto* const derived_root = llvm::dyn_cast<DIDerivedType>(root);
730✔
381
  const bool is_pointer_target =
1,330✔
382
      (derived_root != nullptr) && derived_root->getBaseType()->getTag() == dwarf::DW_TAG_pointer_type;
730✔
383
  // TODO: This check seems like a bad idea but I'm not really sure how to do it properly, I reckon we need *some*
384
  //       heuristic to detect "fake-array" types though (e.g. gep/array_composite_s.c)
385
  if (util::is_byte_indexing(&inst) && (!composite_type || is_pointer_target)) {
730✔
386
    LOG_DEBUG("Gep with byte offset to pointer-like : " << log::ditype_str(root))
387
    return GepIndexToType{root};
12✔
388
  }
389

390
  assert(composite_type != nullptr && "Root should be a struct-like type.");
1,448!
391

392
  if (composite_type->isForwardDecl()) {
724✔
393
    LOG_DEBUG("Trying to resolve forward-declared composite type " << log::ditype_str(composite_type))
394
    return try_resolve_inlined_operator(&inst).value_or(GepIndexToType{root});
16✔
395
  }
396

397
  LOG_DEBUG("Gep to DI composite: " << log::ditype_str(composite_type))
398
  bool skip_first{!util::is_first_non_zero_indexing(&inst)};
716✔
399
  if (util::is_byte_indexing(&inst)) {
716✔
400
    LOG_DEBUG("Access based on i8 ptr, assuming byte offsetting into composite member")
401
    skip_first = false;  // We do not skip over byte index values (likely != 0)
48✔
402
  }
48✔
403

404
  auto accessed_ditype = resolve_gep_index_to_type(composite_type, GepIndices::create(&inst, skip_first));
716✔
405

406
  return accessed_ditype;
284✔
407
}
1,018✔
408

409
}  // namespace dimeta::gep
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc