• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 19337095106

13 Nov 2025 03:41PM UTC coverage: 72.7% (-10.7%) from 83.355%
19337095106

push

github

web-flow
Make TBAA optional (#45)

- No default TBAA usage
- LLVM 20 support

1701 of 2973 branches covered (57.21%)

Branch coverage included in aggregate %.

207 of 213 new or added lines in 8 files covered. (97.18%)

56 existing lines in 6 files now uncovered.

2219 of 2419 relevant lines covered (91.73%)

7456.57 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.51
/lib/type/DITypeExtractor.cpp
1

2
//  llvm-dimeta library
3
//  Copyright (c) 2022-2025 llvm-dimeta authors
4
//  Distributed under the BSD 3-Clause license.
5
//  (See accompanying file LICENSE)
6
//  SPDX-License-Identifier: BSD-3-Clause
7
//
8

9
#include "DIFinder.h"
10
#include "DIRootType.h"
11
#include "DIUtil.h"
12
#include "DataflowAnalysis.h"
13
#include "DefUseAnalysis.h"
14
#include "GEP.h"
15
#include "TBAA.h"
16
#include "Util.h"
17
#include "ValuePath.h"
18
#include "support/Logger.h"
19

20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/STLExtras.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/ADT/ilist_iterator.h"
24
#include "llvm/BinaryFormat/Dwarf.h"
25
#include "llvm/Config/llvm-config.h"
26
#include "llvm/IR/Argument.h"
27
#include "llvm/IR/Constants.h"
28
#include "llvm/IR/DebugInfoMetadata.h"
29
#include "llvm/IR/Function.h"
30
#include "llvm/IR/GlobalVariable.h"
31
#include "llvm/IR/InstIterator.h"
32
#include "llvm/IR/InstrTypes.h"
33
#include "llvm/IR/Instructions.h"
34
#include "llvm/IR/IntrinsicInst.h"
35
#include "llvm/IR/Metadata.h"
36
#include "llvm/IR/Operator.h"
37
#include "llvm/IR/Value.h"
38
#include "llvm/Support/Casting.h"
39
#include "llvm/Support/Debug.h"
40
#include "llvm/Support/ErrorHandling.h"
41
#include "llvm/Support/raw_ostream.h"
42

43
#include <cassert>
44
#include <iterator>
45
#include <optional>
46
#include <type_traits>
47
#include <unordered_map>
48
#include <utility>
49

50
namespace dimeta::type {
51

52
namespace reset {
53

54
using GepToDIMemberMap = std::unordered_map<const llvm::GEPOperator*, llvm::DIDerivedType*>;
55

56
namespace detail {
57

58
template <typename T, typename InstTy>
59
std::optional<const T*> get_operand_to(const InstTy* memory_instruction) {
11,832✔
60
  static_assert(std::is_same_v<InstTy, llvm::StoreInst> || std::is_same_v<InstTy, llvm::LoadInst>,
61
                "Expected load or store instruction");
62
  const auto* memory_target = memory_instruction->getPointerOperand();
11,832✔
63
  if (llvm::isa<T>(memory_target)) {
11,832✔
64
    return llvm::dyn_cast<T>(memory_target);
2,655✔
65
  }
66

67
  if (auto bcast = llvm::dyn_cast<llvm::BitCastInst>(memory_target)) {
9,823!
68
    if (llvm::isa<T>(bcast->getOperand(0))) {
646!
69
      return llvm::dyn_cast<T>(bcast->getOperand(0));
106✔
70
    }
71
  }
540✔
72

73
  return {};
9,071✔
74
}
11,832✔
75

76
bool is_array_gep_with_non_const_indices(const llvm::GetElementPtrInst* gep) {
1,246✔
77
#if LLVM_VERSION_MAJOR > 12
78
  auto indices = gep->indices();
1,246✔
79
#else
80
  auto indices = llvm::make_range(gep->idx_begin(), gep->idx_end());
81
#endif
82
  for (const auto& index : indices) {
3,112✔
83
    if (!llvm::isa<llvm::ConstantInt>(index.get())) {
1,866✔
84
      return true;
366✔
85
    }
86
  }
1,866✔
87
  return false;
880✔
88
}
1,246✔
89

90
bool is_array_gep(const llvm::GetElementPtrInst* gep) {
474✔
91
  if (!gep) {
474✔
92
    return false;
106✔
93
  }
94
  return detail::is_array_gep_with_non_const_indices(gep) || gep->getSourceElementType()->isArrayTy();
368✔
95
}
474✔
96

97
}  // namespace detail
98

99
template <typename T>
100
bool store_to(const llvm::StoreInst* store) {
6,984✔
101
  return detail::get_operand_to<T>(store).has_value();
6,984✔
102
}
103

104
template <typename T>
105
bool load_to(const llvm::LoadInst* load) {
3,420✔
106
  return detail::get_operand_to<T>(load).has_value();
3,420✔
107
}
108

109
bool load_for_array_gep(const llvm::LoadInst* load) {
184✔
110
  for (const auto* user : load->users()) {
368!
111
    if (const auto* gep = llvm::dyn_cast<llvm::GetElementPtrInst>(user)) {
266✔
112
      return detail::is_array_gep(gep);
82✔
113
    }
114
  }
184✔
115
  return false;
102✔
116
}
184✔
117

118
bool load_of_array_gep(const llvm::LoadInst* load) {
392✔
119
  return detail::is_array_gep(llvm::dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand()));
392✔
120
}
121

122
bool store_to_array_gep(const llvm::StoreInst* store) {
1,428✔
123
  auto gep = detail::get_operand_to<llvm::GetElementPtrInst>(store);
1,428✔
124
  if (!gep) {
1,428✔
125
    return false;
550✔
126
  }
127
  return detail::is_array_gep_with_non_const_indices(gep.value());
878✔
128
}
1,428✔
129

130
namespace dipath {
131

132
struct IRMapping {
133
  const llvm::Value* value{nullptr};
134
  llvm::DIType* mapped{nullptr};
135
  std::string reason;
136
};
137

138
struct ValueToDiPath {
139
  llvm::SmallVector<IRMapping, 8> path_to_ditype;
140

141
  void emplace_back(const llvm::Value* val, llvm::DIType* mapped_di_type, const std::string reason = "") {
9,711✔
142
    path_to_ditype.emplace_back(IRMapping{val, mapped_di_type, std::move(reason)});
9,711!
143
  }
9,711✔
144

145
  std::optional<llvm::DIType*> final_type() const {
2,576✔
146
    if (path_to_ditype.empty()) {
2,576!
NEW
147
      return {};
×
148
    }
149
    const auto& ditype = path_to_ditype.back();
2,576✔
150
    return ditype.mapped != nullptr ? std::optional{ditype.mapped} : std::nullopt;
2,576!
151
  }
2,576✔
152
};
153

NEW
154
llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const ValueToDiPath& vdp) {
×
155
#if DIMETA_LOG_LEVEL > 2  // FIXME: For coverage
156
  const auto& mappings = vdp.path_to_ditype;
157
  // os << "ValueToDiPath: ";  // Prefix to identify the type being printed
158
  if (mappings.empty()) {
159
    os << "[]";
160
    return os;
161
  }
162
  const auto mapping_to_string = [](const IRMapping& mapping) -> std::string {
163
    std::string str_buffer;
164
    llvm::raw_string_ostream stream(str_buffer);
165

166
    stream << "{IR: ";
167
    if (mapping.value) {
168
      // mapping.value->printAsOperand(stream, true);
169
      mapping.value->print(stream, true);
170
    } else {
171
      stream << "null";
172
    }
173

174
    stream << "; DI: ";
175
    if (mapping.mapped) {
176
      stream << log::ditype_str(mapping.mapped);
177
    } else {
178
      stream << "null";
179
    }
180

181
    if (!mapping.reason.empty()) {
182
      stream << ", Reason: \"" << mapping.reason << "\"}";
183
    }
184
    return stream.str();
185
  };
186

187
  os << "[" << mapping_to_string(mappings.front());
188
  std::for_each(std::next(mappings.begin()), mappings.end(), [&](const IRMapping& mapping_item) {
189
    os << " --> ";
190
    os << mapping_to_string(mapping_item);
191
  });
192

193
  os << "]";
194
#endif
NEW
195
  return os;
×
196
}
197

198
}  // namespace dipath
199

200
std::optional<llvm::DIType*> reset_load_related_basic(const dataflow::ValuePath& path, llvm::DIType* type_to_reset,
1,595✔
201
                                                      const llvm::LoadInst* load) {
202
  auto* type = type_to_reset;
1,595✔
203

204
  if (load_to<llvm::GlobalVariable>(load) || load_to<llvm::AllocaInst>(load)) {
1,595✔
205
    LOG_DEBUG("Do not reset DIType based on load to global,alloca")
206
    return type;
1,041✔
207
  }
208

209
  if (di::util::is_array_member(*type)) {
554✔
210
    auto* base_type = llvm::dyn_cast<llvm::DIDerivedType>(type)->getBaseType();
108✔
211
    LOG_DEBUG("Load of array-like " << log::ditype_str(base_type))
212
    // auto type_de = di::util::desugar(*base_type, 1);
213
    if (auto underlying_type = llvm::dyn_cast<llvm::DICompositeType>(base_type)->getBaseType()) {
216!
214
      return underlying_type;
108✔
215
    }
216
  }
108!
217

218
  const auto try_resolve_to_first_member = [&](llvm::DIType* candidate_type) -> std::optional<llvm::DIType*> {
540✔
219
    auto comp = di::util::desugar(*candidate_type);
94✔
220
    LOG_DEBUG("Desugared load to " << log::ditype_str(comp.value_or(nullptr)));
221
    if (!comp) {
94✔
222
      return {};
76✔
223
    }
224
    LOG_DEBUG("Loading first pointer member?");
225
    auto result = di::util::resolve_byte_offset_to_member_of(comp.value(), 0);
18✔
226
    if (result) {
18!
227
      LOG_DEBUG("Return type of load " << log::ditype_str(result->type_of_member.value_or(nullptr)));
228
      return result->type_of_member;
18✔
229
    }
NEW
230
    return {};
×
231
  };
94✔
232

233
  // a (last?) load to a GEP of a composite likely loads the first member in an optimized context:
234
  const bool last_load     = path.start_value().value_or(nullptr) == load;
446✔
235
  const bool is_not_member = !di::util::is_member(*type);
446✔
236
  if (is_not_member || last_load) {
446✔
237
    const bool is_not_arg_load  = !load_to<llvm::Argument>(load);
392✔
238
    const bool is_not_array_gep = !load_of_array_gep(load) && !load_for_array_gep(load);
392✔
239
    if (is_not_array_gep && (is_not_arg_load || last_load)) {
392!
240
      if (auto resolved = try_resolve_to_first_member(type)) {
112✔
241
        return resolved.value();
18✔
242
      }
243
    }
76✔
244
  }
392✔
245

246
  if (auto* maybe_ptr_to_type = llvm::dyn_cast<llvm::DIDerivedType>(type)) {
856!
247
    if (di::util::is_pointer(*maybe_ptr_to_type)) {
428✔
248
      LOG_DEBUG("Load of pointer-like " << log::ditype_str(maybe_ptr_to_type))
249
    }
320✔
250
    auto* base_type = maybe_ptr_to_type->getBaseType();
428✔
251

252
#if DIMETA_USE_TBAA == 1
253
    if (auto* composite = llvm::dyn_cast<llvm::DICompositeType>(base_type)) {
162✔
254
      LOG_DEBUG("Have ptr to composite " << log::ditype_str(composite))
255
      auto type_tbaa = tbaa::resolve_tbaa(base_type, *load);
18✔
256
      if (type_tbaa) {
18✔
257
        return type_tbaa;
12✔
258
      }
259
    }
6✔
260
#endif
261
    return base_type;
416✔
262
  }
428✔
263

264
  return type;
×
265
}
1,595✔
266

267
std::optional<llvm::DIType*> reset_store_related_basic(const dataflow::ValuePath&, llvm::DIType* type_to_reset,
2,080✔
268
                                                       const llvm::StoreInst* store_inst) {
269
  auto* type = type_to_reset;
2,080✔
270

271
  if (store_to<llvm::GlobalVariable>(store_inst) || store_to<llvm::AllocaInst>(store_inst)) {
2,080✔
272
    // Relevant in "heap_lulesh_mock_char.cpp"
273
    LOG_DEBUG("Store to alloca/global, return " << log::ditype_str(type))
274
    return type;
274✔
275
  }
276

277
  if (di::util::is_array(*type)) {
1,806✔
278
    return llvm::cast<llvm::DICompositeType>(type)->getBaseType();
90✔
279
  }
280

281
  if (!di::util::is_array_member(*type)) {
1,716✔
282
    // !di::util::is_array(*type) &&
283
    const auto is_non_pointer_member = [&](auto& type_) {
2,856✔
284
      return di::util::is_non_static_member(type_) &&
1,992✔
285
             !di::util::is_pointer_like(*llvm::cast<llvm::DIDerivedType>(&type_)->getBaseType());
564✔
286
    };
287

288
    auto desugared_composite = di::util::desugar(*type);
1,428✔
289
    LOG_DEBUG("Desugared " << log::ditype_str(desugared_composite.value_or(nullptr)))
290

291
    const bool is_load_target     = store_to<llvm::LoadInst>(store_inst);
1,428✔
292
    const bool is_arg_target      = store_to<llvm::Argument>(store_inst);
1,428✔
293
    const bool is_array_gep_store = store_to_array_gep(store_inst);
1,428✔
294
    const bool is_non_ptr_member  = is_non_pointer_member(*type);
1,428✔
295

296
    if (desugared_composite && (is_non_ptr_member || is_array_gep_store || is_arg_target || is_load_target)) {
1,428✔
297
      LOG_DEBUG("Storing to first pointer member?")
298
      auto result = di::util::resolve_byte_offset_to_member_of(desugared_composite.value(), 0);
220✔
299
      if (result) {
220✔
300
#if DIMETA_USE_TBAA == 1
301
        if (result->member) {
74!
302
          const auto member_name = result->member.value()->getName();
74✔
303
          const bool is_vptr     = dimeta::util::starts_with_any_of(member_name, "_vptr");
74✔
304
          // Let this be handled by TBAA is available, see test 10_lulesh_ad_tbaa_static_member.ll
305
          if (is_vptr) {
74✔
306
            return type;
6✔
307
          }
308
        }
74✔
309
#endif
310
        LOG_DEBUG("Return type of store " << log::ditype_str(result->type_of_member.value_or(nullptr)))
311
        return result->type_of_member;
196✔
312
      }
313
    }
220✔
314
  }
1,428✔
315

316
  if (!llvm::isa<llvm::DIDerivedType>(type)) {
1,514✔
317
    LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
318
    return type;
12✔
319
  }
320

321
  auto* derived_type = llvm::cast<llvm::DIDerivedType>(type);
1,502✔
322

323
  if (di::util::is_array_member(*type)) {
1,502✔
324
    auto* member_base               = derived_type->getBaseType();
288✔
325
    const bool is_array_type_member = member_base->getTag() == llvm::dwarf::DW_TAG_array_type;
288✔
326
    if (is_array_type_member) {
288!
327
      return llvm::cast<llvm::DICompositeType>(member_base)->getBaseType();
288✔
328
    }
329
  }
288!
330

331
  if (di::util::is_non_static_member(*derived_type)) {
1,214✔
332
    auto* member_base = derived_type->getBaseType();
558✔
333
    return member_base;
558✔
334
  }
558✔
335

336
  if (di::util::is_pointer(*derived_type)) {
656✔
337
    if (auto* may_be_ptr_to_ptr = llvm::dyn_cast<llvm::DIDerivedType>(derived_type->getBaseType())) {
1,222✔
338
      // Pointer to pointer by default remove one level for RHS assignment type w.r.t. store:
339
      const auto is_ptr_to_ptr = di::util::is_pointer(*may_be_ptr_to_ptr);
584✔
340
      if (is_ptr_to_ptr) {
584!
341
        LOG_DEBUG("Store to ptr-ptr, return " << log::ditype_str(may_be_ptr_to_ptr))
342
        return may_be_ptr_to_ptr;
584✔
343
      }
344
    }
584!
345
  }
54✔
346

347
  LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
348
  return type;
72✔
349
}
2,080✔
350

351
template <typename Iter>
352
std::optional<llvm::DIType*> reset_ditype(llvm::DIType* type_to_reset, const dataflow::ValuePath& path,
9,015✔
353
                                          const Iter& path_iter, dipath::ValueToDiPath& logged_dipath) {
354
  std::optional<llvm::DIType*> type = type_to_reset;
9,015✔
355

356
  const auto& current_value = path_iter;
9,015✔
357
  LOG_DEBUG("Type to reset: " << log::ditype_str(*type));
358
  LOG_DEBUG(">> based on IR: " << **current_value);
359

360
  if (llvm::isa<llvm::GEPOperator>(*current_value)) {
9,015✔
361
    LOG_DEBUG("Reset based on GEP")
362
    auto* gep             = llvm::cast<llvm::GEPOperator>(*current_value);
2,244✔
363
    const auto gep_result = gep::extract_gep_dereferenced_type(type.value(), *gep);
2,244✔
364
    if (gep_result.member && !gep_result.use_type) {
2,244!
365
      LOG_DEBUG("Using gep member type result")
366
      type = gep_result.member;
1,354✔
367
    } else {
1,354✔
368
      type = gep_result.type;
890✔
369
    }
370
  } else if (const auto* load = llvm::dyn_cast<llvm::LoadInst>(*current_value)) {
15,786✔
371
    LOG_DEBUG("Reset based on load")
372
    type = reset::reset_load_related_basic(path, type.value(), load);
1,595✔
373
  } else if (const auto* store_inst = llvm::dyn_cast<llvm::StoreInst>(*current_value)) {
11,947✔
374
    LOG_DEBUG("Reset based on store")
375
    type = reset::reset_store_related_basic(path, type.value(), store_inst);
2,080✔
376
  } else {
2,080✔
377
    LOG_DEBUG(">> skipping");
378
    logged_dipath.emplace_back(*current_value, type.value_or(nullptr));
3,096!
379
    return type;
3,096✔
380
  }
381

382
  logged_dipath.emplace_back(*current_value, type.value_or(nullptr));
5,919!
383

384
  return type;
5,919✔
385
}
9,015✔
386

387
}  // namespace reset
388

389
std::optional<llvm::DIType*> find_type(const dataflow::CallValuePath& call_path) {
2,602✔
390
  auto type = root::find_type_root(call_path);
2,602✔
391

392
  if (!type) {
2,602✔
393
    LOG_DEBUG("find_type_root failed to find a type for path " << call_path.path)
394
    return {};
26✔
395
  }
396

397
  reset::dipath::ValueToDiPath dipath;
2,576✔
398

399
  const auto path_end = call_path.path.path_to_value.rend();
2,576!
400
  for (auto path_iter = call_path.path.path_to_value.rbegin(); path_iter != path_end; ++path_iter) {
11,591!
401
    LOG_DEBUG("Extracted type: " << log::ditype_str(*type));
402
    type = reset::reset_ditype(type.value(), call_path.path, path_iter, dipath).value_or(type.value());
9,015!
403
    LOG_DEBUG("reset_ditype result " << log::ditype_str(type.value_or(nullptr)) << "\n")
404
    if (!type) {
9,015!
405
      break;
×
406
    }
407
  }
9,015✔
408

409
#if DIMETA_USE_TBAA == 1
410
  if (type) {
872!
411
    // If last node is a store inst, try to extract type via TBAA
412
    const auto* const start_node = llvm::dyn_cast_or_null<llvm::StoreInst>(*call_path.path.start_value());
872!
413
    if (start_node) {
872✔
414
      auto type_tbaa = tbaa::resolve_tbaa(type.value(), *llvm::dyn_cast<llvm::Instruction>(start_node));
708!
415
      if (type_tbaa) {
708✔
416
        dipath.emplace_back(start_node, type_tbaa.value(), "TBAA");
696!
417
      }
696✔
418
    }
708✔
419
  }
872✔
420
#endif
421

422
  LOG_DEBUG("Final mapping\n" << dipath)
423

424
  return dipath.final_type();
2,576!
425
}
2,602✔
426
}  // namespace dimeta::type
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc