• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 17204706958

25 Aug 2025 09:20AM UTC coverage: 72.695% (-10.7%) from 83.355%
17204706958

Pull #45

github

web-flow
Merge 047c32aa1 into c60847ea6
Pull Request #45: Disable TBAA for all cases

1700 of 2971 branches covered (57.22%)

Branch coverage included in aggregate %.

205 of 211 new or added lines in 7 files covered. (97.16%)

57 existing lines in 7 files now uncovered.

2219 of 2420 relevant lines covered (91.69%)

7453.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.09
/lib/type/GEP.cpp
1
//  llvm-dimeta library
2
//  Copyright (c) 2022-2025 llvm-dimeta authors
3
//  Distributed under the BSD 3-Clause license.
4
//  (See accompanying file LICENSE)
5
//  SPDX-License-Identifier: BSD-3-Clause
6
//
7

8
#include "GEP.h"
9

10
#include "DIUtil.h"
11
#include "support/Logger.h"
12

13
#include "llvm/ADT/APInt.h"
14
#include "llvm/ADT/STLExtras.h"
15
#include "llvm/ADT/SmallVector.h"
16
#include "llvm/ADT/iterator_range.h"
17
#include "llvm/BinaryFormat/Dwarf.h"
18
#include "llvm/IR/Constants.h"
19
#include "llvm/IR/DebugInfoMetadata.h"
20
#include "llvm/IR/Metadata.h"
21
#include "llvm/IR/Operator.h"
22
#include "llvm/IR/Type.h"
23
#include "llvm/IR/Use.h"
286✔
24
#include "llvm/Support/Casting.h"
25
#include "llvm/Support/Debug.h"
26
#include "llvm/Support/raw_ostream.h"
27

28
#include <algorithm>
29
#include <cassert>
30
#include <cstddef>
31
#include <cstdint>
32
#include <iterator>
33
#include <llvm/IR/Instruction.h>
34
#include <llvm/IR/Instructions.h>
35
#include <optional>
36

37
namespace dimeta::gep {
38

39
namespace util {
40
inline bool is_byte_indexing(const llvm::GEPOperator* gep) {
4,522✔
41
  return gep->getSourceElementType()->isIntegerTy(8);
4,522✔
42
}
43

44
inline bool is_first_non_zero_indexing(const llvm::GEPOperator* gep) {
1,490✔
45
  if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>((*gep->idx_begin()).get())) {
2,854!
46
    const int64_t index = const_idx->getValue().getSExtValue();
1,364✔
47
    return index > 0;
1,364✔
48
  }
1,364✔
49
  return false;
126✔
50
}
1,490✔
51
}  // namespace util
52

53
struct GepIndices {
1,490✔
54
  const llvm::GEPOperator* gep;
55
  llvm::SmallVector<uint64_t, 4> indices_;
56
  bool skipped{false};
1,490✔
57
  bool is_byte_access{false};
1,490✔
58
  using Iter = llvm::SmallVector<uint64_t, 4>::const_iterator;
59

60
  llvm::iterator_range<Iter> indices() const {
1,170✔
61
    return llvm::iterator_range<Iter>(indices_);
1,170✔
62
  }
63

64
  size_t size() const {
3,672✔
65
    return indices_.size();
3,672✔
66
  }
67

68
  bool empty() const {
2,980✔
69
    return size() == 0;
2,980✔
70
  }
71

72
  bool skipped_first() const {
1,170✔
73
    return skipped;
1,170✔
74
  }
75

76
  bool byte_access() const {
1,390✔
77
    return is_byte_access;
1,390✔
78
  }
79

80
  static GepIndices create(const llvm::GEPOperator* inst, bool skip_first = true);
81
};
82

83
GepIndices GepIndices::create(const llvm::GEPOperator* inst, bool skip_first) {
1,490✔
84
  GepIndices gep_ind;
1,490✔
85
  gep_ind.gep            = inst;
1,490✔
86
  gep_ind.skipped        = skip_first;
1,490✔
87
  gep_ind.is_byte_access = util::is_byte_indexing(gep_ind.gep);
1,490!
88

89
  bool use_zero{true};
1,490✔
90

91
#if LLVM_VERSION_MAJOR > 12
92
  for (const auto& index : inst->indices()) {
4,452!
93
#else
94
  for (const auto& index : llvm::make_range(inst->idx_begin(), inst->idx_end())) {
95
#endif
96
    LOG_DEBUG("Iter " << skip_first << " with " << *index.get())
97
    if (skip_first) {
2,962✔
98
      skip_first = false;
1,252✔
99
      continue;
1,252✔
100
    }
101
    if (auto* const_idx = llvm::dyn_cast<llvm::ConstantInt>(index.get())) {
3,402!
102
      const int64_t index_ = const_idx->getValue().getSExtValue();
1,692!
103
      gep_ind.indices_.emplace_back(index_);
1,692!
104
    }
1,692✔
105
  }
2,962!
106

107
  if (!inst->indices().empty() && gep_ind.empty()) {
2,980!
108
    // based on heap_milc_mrecv_mock.c with optim:
109
    // gep_ind.indices_.push_back(0);
110
  }
100✔
111

112
  return gep_ind;
1,490✔
113
}
1,490!
114

115
inline llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const GepIndices& indices) {
116
  const auto& vec = indices.indices_;
117
  if (vec.empty()) {
118
    os << "[]";
119
    return os;
120
  }
121
  const auto* begin = std::begin(vec);
122
  os << "[" << *begin;
123
  std::for_each(std::next(begin), std::end(vec), [&](const auto value) {
124
    os << ", ";
125
    os << value;
126
  });
127
  os << "]";
128
  return os;
129
}
130

131
namespace detail {
132

133
llvm::DINode* select_non_zero_element(llvm::DINode* element, llvm::DINode* next_element) {
174✔
134
  // used to detect the empty base class optimization
135
  auto* derived_type_member      = llvm::dyn_cast<llvm::DIDerivedType>(element);
174✔
136
  auto* next_derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(next_element);
174✔
137
  if (derived_type_member != nullptr && next_derived_type_member != nullptr) {
174!
138
    LOG_DEBUG("Non-null elements")
139
    if (derived_type_member->getOffsetInBits() == next_derived_type_member->getOffsetInBits()) {
174✔
140
      LOG_DEBUG("Same offset detected: " << log::ditype_str(derived_type_member) << " and "
141
                                         << log::ditype_str(next_derived_type_member))
142
      return next_element;
54✔
143
    }
144
  }
120✔
145
  return element;
120✔
146
}
174✔
147

148
template <typename UnlessFn>
149
auto find_non_derived_type_unless(llvm::DIType* root, UnlessFn&& unless) {
3,752✔
150
  llvm::DIType* type = root;
3,752✔
151
  while (type && llvm::isa<llvm::DIDerivedType>(type)) {
7,113!
152
    if (unless(type)) {
4,061!
153
      break;
700✔
154
    }
155
    auto* ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
3,361✔
156
    type         = ditype->getBaseType();
3,361✔
157
  }
3,361✔
158
  return type;
7,504✔
159
}
3,752✔
160

161
inline bool is_ebo_inherited_composite(llvm::DINode* dinode) {
4,102✔
162
  if (auto* derived = llvm::dyn_cast<llvm::DIDerivedType>(dinode)) {
8,142!
163
    if (derived->getTag() != llvm::dwarf::DW_TAG_inheritance) {
4,040✔
164
      return false;
3,778✔
165
    }
166

167
    LOG_DEBUG(log::ditype_str(derived))
168
    auto* base = llvm::dyn_cast<llvm::DICompositeType>(derived->getBaseType());
262✔
169
    if (!base) {
262!
170
      LOG_DEBUG("Is not a composite inheritance " << log::ditype_str(derived))
UNCOV
171
      return false;
×
172
    }
173
    const bool has_sized_member =
524✔
174
        llvm::any_of(base->getElements(), [](llvm::DINode* elem) { return di::util::is_non_static_member(*elem); });
750✔
175
    LOG_DEBUG("Has sized mem " << has_sized_member)
176
    return !has_sized_member;
262✔
177
  }
262✔
178

179
  return false;
62✔
180
}
4,102✔
181

182
}  // namespace detail
183

184
auto find_non_derived_type_unless_ptr(llvm::DIType* root) {
1,412✔
185
  return detail::find_non_derived_type_unless(root, [](auto* val) { return di::util::is_pointer_like(*val); });
2,060✔
186
}
187

188
auto find_non_derived_type(llvm::DIType* root) {
2,244✔
189
  return detail::find_non_derived_type_unless(root, [](auto*) { return false; });
5,371✔
190
}
191

192
llvm::DICompositeType* skip_first_gep_access(llvm::DICompositeType* composite_type) {
680✔
193
  using namespace detail;
194
  const auto select_next_member = [&](llvm::DICompositeType* base) -> std::optional<llvm::DIType*> {
724✔
195
    auto composite_elements = base->getElements();
44✔
196
    auto* element           = composite_elements[0];
44✔
197
    if (composite_elements.size() > 1) {
44!
198
      auto* next_element = composite_elements[1];
×
199
      element            = detail::select_non_zero_element(element, next_element);
×
200
    }
×
201

202
    return find_non_derived_type_unless_ptr(llvm::dyn_cast<llvm::DIType>(element));
44✔
203
  };
44✔
204

205
  const auto should_iterate_next_member = [&](auto* composite_type) {
1,404✔
206
    const auto count_members = llvm::count_if(composite_type->getElements(),
724✔
207
                                              [](const auto* elem) { return di::util::is_non_static_member(*elem); });
2,486✔
208
    return count_members == 1;
1,448✔
209
  };
724✔
210

211
  while (should_iterate_next_member(composite_type)) {
724✔
212
    auto next_di = select_next_member(composite_type);
44✔
213
    if (!next_di || !llvm::isa<llvm::DICompositeType>(next_di.value())) {
44!
214
      LOG_DEBUG("Did not find next member")
215
      break;
×
216
    }
217
    composite_type = llvm::dyn_cast<llvm::DICompositeType>(next_di.value());
44✔
218
    LOG_DEBUG("Found next " << log::ditype_str(composite_type))
219
  }
44!
220

221
  return composite_type;
1,360✔
222
}
680✔
223

224
GepIndexToType iterate_gep_index(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
1,170✔
225
  const auto has_next_gep_idx = [&gep_indices](size_t pos) { return pos + 1 < gep_indices.size(); };
1,862✔
226

227
  LOG_DEBUG("Iterate over gep: " << gep_indices);
228

229
  const auto is_static_member = [](const llvm::DINode* node) {
2,646✔
230
    if (const auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
2,952!
231
      return derived_type_member->isStaticMember();
1,476✔
232
    }
233
    return false;
×
234
  };
1,476✔
235

236
  for (const auto& enum_index : llvm::enumerate(gep_indices.indices())) {
2,538!
237
    auto gep_index       = enum_index.value();
1,368✔
238
    const auto& elements = composite_type->getElements();
1,368✔
239
    assert(elements.size() > gep_index);
1,368!
240

241
    auto* element = elements[gep_index];
1,368✔
242

243
    if (gep_index == 0 && elements.size() > 1) {
1,368✔
244
      // e.g., LLVM-14: cpp/heap_lhs_function_opt.cpp: vector gep is [0 0 0 0 ...] -> never recurse into EBO
245
      LOG_DEBUG("Check zero-size pattern for " << log::ditype_str(composite_type))
246
      auto* next_element = elements[1];
174✔
247
      element            = detail::select_non_zero_element(element, next_element);
174✔
248
    }
174✔
249

250
    const auto ebo_inheritance_offset = llvm::count_if(
2,736✔
251
        composite_type->getElements(), [&](auto* dinode) { return detail::is_ebo_inherited_composite(dinode); });
5,470✔
252

253
    if (gep_index > 0 && ebo_inheritance_offset > 0 && (gep_index) < elements.size()) {
1,368!
254
      LOG_DEBUG("EBO offset needed " << ebo_inheritance_offset)
255
      gep_index += ebo_inheritance_offset;
68✔
256
      element = elements[gep_index];
68✔
257
    }
68✔
258

259
    if (!llvm::isa<llvm::DIDerivedType>(element)) {
1,368!
260
      LOG_DEBUG("Index shows to non-derived type: " << log::ditype_str(element))
261
      // TODO, if only one index, and this triggers, go first element all the way down?
262
      // maybe also check for class type (not structs etc.)
263
    }
×
264

265
    while (gep_index < elements.size() && is_static_member(element)) {
1,476!
266
      LOG_DEBUG("Skipping static member of composite " << log::ditype_str(element))
267
      element = elements[++gep_index];
108✔
268
    }
269

270
    LOG_DEBUG(" element[" << gep_index << "]: " << log::ditype_str(element))
271

272
    if (auto* derived_type_member = llvm::dyn_cast<llvm::DIDerivedType>(element)) {
2,736!
273
      auto* member_type = find_non_derived_type_unless_ptr(derived_type_member->getBaseType());
1,368✔
274

275
      LOG_DEBUG("Looking at " << log::ditype_str(member_type))
276

277
      if (auto* composite_member_type = llvm::dyn_cast<llvm::DICompositeType>(member_type)) {
2,060✔
278
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_class_type ||
692!
279
            composite_member_type->getTag() == llvm::dwarf::DW_TAG_structure_type) {
692✔
280
          // maybe need to recurse into!
281
          if (has_next_gep_idx(enum_index.index())) {
400✔
282
            composite_type = composite_member_type;
198✔
283
            continue;
198✔
284
          }
285
        }
202✔
286
        if (composite_member_type->getTag() == llvm::dwarf::DW_TAG_array_type) {
494✔
287
          if (has_next_gep_idx(enum_index.index())) {
292✔
288
            LOG_DEBUG("Found array that is indexed with next index")
289
            // At end of gep instruction, return basetype:
290
            return GepIndexToType{
312✔
291
                composite_member_type->getBaseType(),
104✔
292
                derived_type_member,
104✔
293
            };
294
          }
295
          // maybe need to recurse into tag_array_type (of non-basic type...)
296
        }
188✔
297
      }
390✔
298

299
      return GepIndexToType{member_type, derived_type_member};
1,066✔
300
    }
1,368✔
301
  }
1,368!
302
  return {};
×
303
}
450✔
304

305
GepIndexToType resolve_gep_index_to_type(llvm::DICompositeType* composite_type, const GepIndices& gep_indices) {
1,490✔
306
  if (gep_indices.empty()) {
1,490✔
307
    // this triggers for composite (-array) access without constant index, see "heap_milc_struct_mock.c":
308
    LOG_DEBUG("Gep indices empty")
309
    return GepIndexToType{composite_type};
200✔
310
  }
311

312
  if (gep_indices.byte_access()) {
1,390✔
313
    LOG_DEBUG("Trying to resolve byte access based on offset " << gep_indices.indices_[0])
314
    auto result = di::util::resolve_byte_offset_to_member_of(composite_type, gep_indices.indices_[0]);
220✔
315
    if (result) {
220✔
316
      return GepIndexToType{result->type_of_member, result->member};
184✔
317
    }
318
    return GepIndexToType{composite_type};
72✔
319
  }
220✔
320

321
  if (gep_indices.skipped_first() && gep_indices.indices_[0] != 0) {
1,170!
322
    // This assumes that a single (and only single) first 0 skips through to the first element with more than one
323
    // member: struct A { struct B { struct C { int, int } } } -> would skip to "struct C" for gep [0 1]
324
    // see test gep/global_nested.c
325
    LOG_DEBUG("Skip single member nested of: " << log::ditype_str(composite_type))
326
    auto* new_composite_type = skip_first_gep_access(composite_type);
680✔
327
    if (new_composite_type != composite_type) {
680✔
328
      // required for
329
      // - LLVM-18: gep/global_nested & gep/param_first_nested_padding.cpp
330
      // - LLVM-19: gep/global_nested.c
331
      composite_type = new_composite_type;
38✔
332
      LOG_DEBUG("Result of skip: " << log::ditype_str(composite_type))
333
    }
38✔
334
  }
680✔
335

336
  return iterate_gep_index(composite_type, gep_indices);
1,170✔
337
}
1,490✔
338

339
std::optional<llvm::DebugLoc> try_resolve_inlined_debug_loc(const llvm::GEPOperator* gep) {
2,280✔
340
  auto gep_ptr = llvm::dyn_cast<llvm::Instruction>(gep);
2,280✔
341

342
  if (!gep_ptr) {
2,280✔
343
    gep_ptr = llvm::dyn_cast<llvm::Instruction>(gep->getPointerOperand());
96✔
344
    if (!gep_ptr) {
96!
345
      LOG_DEBUG("No load for GEP found")
346
      return {};
96✔
347
    }
NEW
348
  }
×
349

350
  if (!gep_ptr->getDebugLoc()) {
2,184✔
351
    return {};
20✔
352
  }
353
  const bool is_inlined = gep_ptr->getDebugLoc()->getInlinedAt() != nullptr;
2,164✔
354
  if (!is_inlined) {
2,164✔
355
    LOG_DEBUG("GEP not inlined")
356
    return {};
2,068✔
357
  }
358
  return gep_ptr->getDebugLoc();
96✔
359
}
2,280✔
360

361
std::optional<GepIndexToType> try_resolve_inlined_operator(const llvm::GEPOperator* gep) {
2,280✔
362
  auto debug_loc = try_resolve_inlined_debug_loc(gep);
2,280✔
363

364
  if (!debug_loc) {
2,280✔
365
    return {};
2,184✔
366
  }
367

368
  const auto* const sub_prog = llvm::dyn_cast<llvm::DISubprogram>(debug_loc->getScope());
96!
369
  assert(sub_prog && "Scope does not represent a subprogram");
192!
370

371
  LOG_DEBUG("Looking at " << log::ditype_str(sub_prog))
372

373
  // see cpp/heap_vector_operator.cpp: vector::operator[] returns a reference, that we skip here:
374
  const auto remove_ref = [&](auto* di_type) {
192✔
375
    auto node = detail::find_non_derived_type_unless(di_type, [](auto* node) {
382✔
376
      if (const auto* type = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
572!
377
        return type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
286✔
378
               type->getTag() == llvm::dwarf::DW_TAG_ptr_to_member_type;
190✔
379
      }
380
      return false;
×
381
    });
286✔
382
    return node;
192✔
383
  };
96✔
384

385
  if (auto* sub_program_type = sub_prog->getType()) {
192!
386
    // Has return type (not void)?
387
    if (sub_program_type->getTypeArray().size() > 0 && (*sub_program_type->getTypeArray().begin() != nullptr)) {
96!
388
      auto* result = remove_ref(*sub_program_type->getTypeArray().begin());
96!
389
      LOG_DEBUG("Found candidate " << log::ditype_str(*sub_program_type->getTypeArray().begin()) << " with final type "
390
                                   << log::ditype_str(result))
391
      return {GepIndexToType{result}};
192✔
392
    }
96✔
393
  }
×
394

395
  LOG_DEBUG("Could not detect inlined operator")
396
  return {};
×
397
}
2,280✔
398

399
GepIndexToType extract_gep_dereferenced_type(llvm::DIType* root, const llvm::GEPOperator& inst) {
2,244✔
400
  using namespace llvm;
401

402
  auto* const gep_src = inst.getSourceElementType();
2,244✔
403

404
  auto* const base_ty        = find_non_derived_type(root);
2,244✔
405
  auto* const composite_type = llvm::dyn_cast_or_null<DICompositeType>(base_ty);
2,244✔
406
  // see test cpp/heap_vector_opt.cpp: GEP on pointer (of inlined operator[])
407
  const bool may_be_inlined_operator = (composite_type != nullptr) && composite_type->isForwardDecl();
2,244✔
408

409
  auto debug_loc = try_resolve_inlined_operator(&inst);
2,244✔
410
  if (debug_loc) {
2,244✔
411
    return debug_loc.value();
96✔
412
  }
413

414
  if (gep_src->isPointerTy() && !may_be_inlined_operator) {
2,148!
415
    LOG_DEBUG("Gep to ptr " << log::ditype_str(root));
416
    return GepIndexToType{root};
776✔
417
  }
418

419
  if (gep_src->isArrayTy()) {
1,760✔
420
    if (composite_type != nullptr) {
218!
421
      auto* base_type = composite_type->getBaseType();
218✔
422
      LOG_DEBUG("Gep to array of DI composite, with base type " << log::ditype_str(base_type));
423
      if (composite_type->getTag() == llvm::dwarf::DW_TAG_array_type) {
218!
424
        // return GepIndexToType{base_type};
425
      }
218✔
426
    }
218✔
427
    LOG_DEBUG("Gep to array " << log::ditype_str(root));
428
    return GepIndexToType{root};
436✔
429
  }
430

431
  auto* const derived_root = llvm::dyn_cast<DIDerivedType>(root);
1,542✔
432
  const bool is_pointer_target =
2,868✔
433
      (derived_root != nullptr) && derived_root->getBaseType()->getTag() == dwarf::DW_TAG_pointer_type;
1,542✔
434
  // TODO: This check seems like a bad idea but I'm not really sure how to do it properly, I reckon we need *some*
435
  //       heuristic to detect "fake-array" types though (e.g. gep/array_composite_s.c)
436
  if (util::is_byte_indexing(&inst) && (!composite_type || is_pointer_target)) {
1,542✔
437
    LOG_DEBUG("Gep with byte offset to pointer-like : " << log::ditype_str(root))
438
    return GepIndexToType{root};
32✔
439
  }
440

441
  assert(composite_type != nullptr && "Root should be a struct-like type.");
3,052!
442

443
  if (composite_type->isForwardDecl()) {
1,526✔
444
    LOG_DEBUG("Trying to resolve forward-declared composite type " << log::ditype_str(composite_type))
445
    return try_resolve_inlined_operator(&inst).value_or(GepIndexToType{root});
72✔
446
  }
447

448
  LOG_DEBUG("Gep to DI composite: " << log::ditype_str(composite_type))
449
  bool skip_first{!util::is_first_non_zero_indexing(&inst)};
1,490✔
450
  if (util::is_byte_indexing(&inst)) {
1,490✔
451
    LOG_DEBUG("Access based on i8 ptr, assuming byte offsetting into composite member")
452
    skip_first = false;  // We do not skip over byte index values (likely != 0)
238✔
453
  }
238✔
454

455
  auto accessed_ditype = resolve_gep_index_to_type(composite_type, GepIndices::create(&inst, skip_first));
1,490!
456

457
  return accessed_ditype;
320✔
458
}
2,244✔
459

460
}  // namespace dimeta::gep
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc