• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 19598487515

22 Nov 2025 04:54PM UTC coverage: 73.626% (-9.7%) from 83.355%
19598487515

Pull #48

github

web-flow
Merge cefb3414f into e3aac00b8
Pull Request #48: Release 0.4

1746 of 2973 branches covered (58.73%)

Branch coverage included in aggregate %.

207 of 213 new or added lines in 8 files covered. (97.18%)

54 existing lines in 5 files now uncovered.

2232 of 2430 relevant lines covered (91.85%)

8946.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

82.74
/lib/type/DITypeExtractor.cpp
1

2
//  llvm-dimeta library
3
//  Copyright (c) 2022-2025 llvm-dimeta authors
4
//  Distributed under the BSD 3-Clause license.
5
//  (See accompanying file LICENSE)
6
//  SPDX-License-Identifier: BSD-3-Clause
7
//
8

9
#include "DIFinder.h"
10
#include "DIRootType.h"
11
#include "DIUtil.h"
12
#include "DataflowAnalysis.h"
13
#include "DefUseAnalysis.h"
14
#include "GEP.h"
15
#include "TBAA.h"
16
#include "Util.h"
17
#include "ValuePath.h"
18
#include "support/Logger.h"
19

20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/STLExtras.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/ADT/ilist_iterator.h"
24
#include "llvm/BinaryFormat/Dwarf.h"
25
#include "llvm/Config/llvm-config.h"
26
#include "llvm/IR/Argument.h"
27
#include "llvm/IR/Constants.h"
28
#include "llvm/IR/DebugInfoMetadata.h"
29
#include "llvm/IR/Function.h"
30
#include "llvm/IR/GlobalVariable.h"
31
#include "llvm/IR/InstIterator.h"
32
#include "llvm/IR/InstrTypes.h"
33
#include "llvm/IR/Instructions.h"
34
#include "llvm/IR/IntrinsicInst.h"
35
#include "llvm/IR/Metadata.h"
36
#include "llvm/IR/Operator.h"
37
#include "llvm/IR/Value.h"
38
#include "llvm/Support/Casting.h"
39
#include "llvm/Support/Debug.h"
40
#include "llvm/Support/ErrorHandling.h"
41
#include "llvm/Support/raw_ostream.h"
42

43
#include <cassert>
44
#include <iterator>
45
#include <optional>
46
#include <type_traits>
47
#include <unordered_map>
48
#include <utility>
49

50
namespace dimeta::type {
51

52
namespace reset {
53

54
using GepToDIMemberMap = std::unordered_map<const llvm::GEPOperator*, llvm::DIDerivedType*>;
55

56
namespace detail {
57

58
template <typename T, typename InstTy>
59
std::optional<const T*> get_operand_to(const InstTy* memory_instruction) {
14,396✔
60
  static_assert(std::is_same_v<InstTy, llvm::StoreInst> || std::is_same_v<InstTy, llvm::LoadInst>,
61
                "Expected load or store instruction");
62
  const auto* memory_target = memory_instruction->getPointerOperand();
14,396✔
63
  if (llvm::isa<T>(memory_target)) {
14,396✔
64
    return llvm::dyn_cast<T>(memory_target);
3,253✔
65
  }
66

67
  if (auto bcast = llvm::dyn_cast<llvm::BitCastInst>(memory_target)) {
11,789!
68
    if (llvm::isa<T>(bcast->getOperand(0))) {
646!
69
      return llvm::dyn_cast<T>(bcast->getOperand(0));
106✔
70
    }
71
  }
540✔
72

73
  return {};
11,037✔
74
}
14,396✔
75

76
bool is_array_gep_with_non_const_indices(const llvm::GetElementPtrInst* gep) {
1,518✔
77
#if LLVM_VERSION_MAJOR > 12
78
  auto indices = gep->indices();
1,518✔
79
#else
80
  auto indices = llvm::make_range(gep->idx_begin(), gep->idx_end());
81
#endif
82
  for (const auto& index : indices) {
3,760✔
83
    if (!llvm::isa<llvm::ConstantInt>(index.get())) {
2,242✔
84
      return true;
450✔
85
    }
86
  }
2,242✔
87
  return false;
1,068✔
88
}
1,518✔
89

90
bool is_array_gep(const llvm::GetElementPtrInst* gep) {
578✔
91
  if (!gep) {
578✔
92
    return false;
126✔
93
  }
94
  return detail::is_array_gep_with_non_const_indices(gep) || gep->getSourceElementType()->isArrayTy();
452✔
95
}
578✔
96

97
}  // namespace detail
98

99
template <typename T>
100
bool store_to(const llvm::StoreInst* store) {
8,496✔
101
  return detail::get_operand_to<T>(store).has_value();
8,496✔
102
}
103

104
template <typename T>
105
bool load_to(const llvm::LoadInst* load) {
4,164✔
106
  return detail::get_operand_to<T>(load).has_value();
4,164✔
107
}
108

109
bool load_for_array_gep(const llvm::LoadInst* load) {
220✔
110
  for (const auto* user : load->users()) {
440✔
111
    if (const auto* gep = llvm::dyn_cast<llvm::GetElementPtrInst>(user)) {
318✔
112
      return detail::is_array_gep(gep);
98✔
113
    }
114
  }
220✔
115
  return false;
122✔
116
}
220✔
117

118
bool load_of_array_gep(const llvm::LoadInst* load) {
480✔
119
  return detail::is_array_gep(llvm::dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand()));
480✔
120
}
121

122
bool store_to_array_gep(const llvm::StoreInst* store) {
1,736✔
123
  auto gep = detail::get_operand_to<llvm::GetElementPtrInst>(store);
1,736✔
124
  if (!gep) {
1,736✔
125
    return false;
670✔
126
  }
127
  return detail::is_array_gep_with_non_const_indices(gep.value());
1,066✔
128
}
1,736✔
129

130
namespace dipath {
131

132
struct IRMapping {
133
  const llvm::Value* value{nullptr};
134
  llvm::DIType* mapped{nullptr};
135
  std::string reason;
136
};
137

138
struct ValueToDiPath {
139
  llvm::SmallVector<IRMapping, 8> path_to_ditype;
140

141
  void emplace_back(const llvm::Value* val, llvm::DIType* mapped_di_type, const std::string reason = "") {
11,835✔
142
    path_to_ditype.emplace_back(IRMapping{val, mapped_di_type, std::move(reason)});
11,835!
143
  }
11,835✔
144

145
  std::optional<llvm::DIType*> final_type() const {
3,132✔
146
    if (path_to_ditype.empty()) {
3,132!
NEW
147
      return {};
×
148
    }
149
    const auto& ditype = path_to_ditype.back();
3,132✔
150
    return ditype.mapped != nullptr ? std::optional{ditype.mapped} : std::nullopt;
3,132!
151
  }
3,132✔
152
};
153

NEW
154
llvm::raw_ostream& operator<<(llvm::raw_ostream& os, const ValueToDiPath& vdp) {
×
155
#if DIMETA_LOG_LEVEL > 2  // FIXME: For coverage
156
  const auto& mappings = vdp.path_to_ditype;
157
  // os << "ValueToDiPath: ";  // Prefix to identify the type being printed
158
  if (mappings.empty()) {
159
    os << "[]";
160
    return os;
161
  }
162
  const auto mapping_to_string = [](const IRMapping& mapping) -> std::string {
163
    std::string str_buffer;
164
    llvm::raw_string_ostream stream(str_buffer);
165

166
    stream << "{IR: ";
167
    if (mapping.value) {
168
      // mapping.value->printAsOperand(stream, true);
169
      mapping.value->print(stream, true);
170
    } else {
171
      stream << "null";
172
    }
173

174
    stream << "; DI: ";
175
    if (mapping.mapped) {
176
      stream << log::ditype_str(mapping.mapped);
177
    } else {
178
      stream << "null";
179
    }
180

181
    if (!mapping.reason.empty()) {
182
      stream << ", Reason: \"" << mapping.reason << "\"}";
183
    }
184
    return stream.str();
185
  };
186

187
  os << "[" << mapping_to_string(mappings.front());
188
  std::for_each(std::next(mappings.begin()), mappings.end(), [&](const IRMapping& mapping_item) {
189
    os << " --> ";
190
    os << mapping_to_string(mapping_item);
191
  });
192

193
  os << "]";
194
#endif
NEW
195
  return os;
×
196
}
197

198
}  // namespace dipath
199

200
std::optional<llvm::DIType*> reset_load_related_basic(const dataflow::ValuePath& path, llvm::DIType* type_to_reset,
1,941✔
201
                                                      const llvm::LoadInst* load) {
202
  auto* type = type_to_reset;
1,941✔
203

204
  if (load_to<llvm::GlobalVariable>(load) || load_to<llvm::AllocaInst>(load)) {
1,941✔
205
    LOG_DEBUG("Do not reset DIType based on load to global,alloca")
206
    return type;
1,263✔
207
  }
208

209
  if (di::util::is_array_member(*type)) {
678✔
210
    auto* base_type = llvm::dyn_cast<llvm::DIDerivedType>(type)->getBaseType();
132✔
211
    LOG_DEBUG("Load of array-like " << log::ditype_str(base_type))
212
    // auto type_de = di::util::desugar(*base_type, 1);
213
    if (auto underlying_type = llvm::dyn_cast<llvm::DICompositeType>(base_type)->getBaseType()) {
264!
214
      return underlying_type;
132✔
215
    }
216
  }
132!
217

218
  const auto try_resolve_to_first_member = [&](llvm::DIType* candidate_type) -> std::optional<llvm::DIType*> {
656✔
219
    auto comp = di::util::desugar(*candidate_type);
110✔
220
    LOG_DEBUG("Desugared load to " << log::ditype_str(comp.value_or(nullptr)));
221
    if (!comp) {
110✔
222
      return {};
88✔
223
    }
224
    LOG_DEBUG("Loading first pointer member?");
225
    auto result = di::util::resolve_byte_offset_to_member_of(comp.value(), 0);
22✔
226
    if (result) {
22!
227
      LOG_DEBUG("Return type of load " << log::ditype_str(result->type_of_member.value_or(nullptr)));
228
      return result->type_of_member;
22✔
229
    }
NEW
230
    return {};
×
231
  };
110✔
232

233
  // a (last?) load to a GEP of a composite likely loads the first member in an optimized context:
234
  const bool last_load     = path.start_value().value_or(nullptr) == load;
546✔
235
  const bool is_not_member = !di::util::is_member(*type);
546✔
236
  if (is_not_member || last_load) {
546✔
237
    const bool is_not_arg_load  = !load_to<llvm::Argument>(load);
480✔
238
    const bool is_not_array_gep = !load_of_array_gep(load) && !load_for_array_gep(load);
480✔
239
    if (is_not_array_gep && (is_not_arg_load || last_load)) {
480!
240
      if (auto resolved = try_resolve_to_first_member(type)) {
132✔
241
        return resolved.value();
22✔
242
      }
243
    }
88✔
244
  }
480✔
245

246
  if (auto* maybe_ptr_to_type = llvm::dyn_cast<llvm::DIDerivedType>(type)) {
1,048!
247
    if (di::util::is_pointer(*maybe_ptr_to_type)) {
524✔
248
      LOG_DEBUG("Load of pointer-like " << log::ditype_str(maybe_ptr_to_type))
249
    }
392✔
250
    auto* base_type = maybe_ptr_to_type->getBaseType();
524✔
251

252
#if DIMETA_USE_TBAA == 1
253
    if (auto* composite = llvm::dyn_cast<llvm::DICompositeType>(base_type)) {
216✔
254
      LOG_DEBUG("Have ptr to composite " << log::ditype_str(composite))
255
      auto type_tbaa = tbaa::resolve_tbaa(base_type, *load);
24✔
256
      if (type_tbaa) {
24✔
257
        return type_tbaa;
16✔
258
      }
259
    }
10✔
260
#endif
261
    return base_type;
508✔
262
  }
524✔
263

264
  return type;
×
265
}
1,941✔
266

267
std::optional<llvm::DIType*> reset_store_related_basic(const dataflow::ValuePath&, llvm::DIType* type_to_reset,
2,532✔
268
                                                       const llvm::StoreInst* store_inst) {
269
  auto* type = type_to_reset;
2,532✔
270

271
  if (store_to<llvm::GlobalVariable>(store_inst) || store_to<llvm::AllocaInst>(store_inst)) {
2,532✔
272
    // Relevant in "heap_lulesh_mock_char.cpp"
273
    LOG_DEBUG("Store to alloca/global, return " << log::ditype_str(type))
274
    return type;
334✔
275
  }
276

277
  if (di::util::is_array(*type)) {
2,198✔
278
    return llvm::cast<llvm::DICompositeType>(type)->getBaseType();
110✔
279
  }
280

281
  if (!di::util::is_array_member(*type)) {
2,088✔
282
    // !di::util::is_array(*type) &&
283
    const auto is_non_pointer_member = [&](auto& type_) {
3,472✔
284
      return di::util::is_non_static_member(type_) &&
2,420✔
285
             !di::util::is_pointer_like(*llvm::cast<llvm::DIDerivedType>(&type_)->getBaseType());
684✔
286
    };
287

288
    auto desugared_composite = di::util::desugar(*type);
1,736✔
289
    LOG_DEBUG("Desugared " << log::ditype_str(desugared_composite.value_or(nullptr)))
290

291
    const bool is_load_target     = store_to<llvm::LoadInst>(store_inst);
1,736✔
292
    const bool is_arg_target      = store_to<llvm::Argument>(store_inst);
1,736✔
293
    const bool is_array_gep_store = store_to_array_gep(store_inst);
1,736✔
294
    const bool is_non_ptr_member  = is_non_pointer_member(*type);
1,736✔
295

296
    if (desugared_composite && (is_non_ptr_member || is_array_gep_store || is_arg_target || is_load_target)) {
1,736✔
297
      LOG_DEBUG("Storing to first pointer member?")
298
      auto result = di::util::resolve_byte_offset_to_member_of(desugared_composite.value(), 0);
268✔
299
      if (result) {
268✔
300
#if DIMETA_USE_TBAA == 1
301
        if (result->member) {
96!
302
          const auto member_name = result->member.value()->getName();
96✔
303
          const bool is_vptr     = dimeta::util::starts_with_any_of(member_name, "_vptr");
96✔
304
          // Let this be handled by TBAA is available, see test 10_lulesh_ad_tbaa_static_member.ll
305
          if (is_vptr) {
96✔
306
            return type;
6✔
307
          }
308
        }
96✔
309
#endif
310
        LOG_DEBUG("Return type of store " << log::ditype_str(result->type_of_member.value_or(nullptr)))
311
        return result->type_of_member;
240✔
312
      }
313
    }
268✔
314
  }
1,736✔
315

316
  if (!llvm::isa<llvm::DIDerivedType>(type)) {
1,842✔
317
    LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
318
    return type;
12✔
319
  }
320

321
  auto* derived_type = llvm::cast<llvm::DIDerivedType>(type);
1,830✔
322

323
  if (di::util::is_array_member(*type)) {
1,830✔
324
    auto* member_base               = derived_type->getBaseType();
352✔
325
    const bool is_array_type_member = member_base->getTag() == llvm::dwarf::DW_TAG_array_type;
352✔
326
    if (is_array_type_member) {
352!
327
      return llvm::cast<llvm::DICompositeType>(member_base)->getBaseType();
352✔
328
    }
329
  }
352!
330

331
  if (di::util::is_non_static_member(*derived_type)) {
1,478✔
332
    auto* member_base = derived_type->getBaseType();
678✔
333
    return member_base;
678✔
334
  }
678✔
335

336
  if (di::util::is_pointer(*derived_type)) {
800✔
337
    if (auto* may_be_ptr_to_ptr = llvm::dyn_cast<llvm::DIDerivedType>(derived_type->getBaseType())) {
1,490✔
338
      // Pointer to pointer by default remove one level for RHS assignment type w.r.t. store:
339
      const auto is_ptr_to_ptr = di::util::is_pointer(*may_be_ptr_to_ptr);
712✔
340
      if (is_ptr_to_ptr) {
712!
341
        LOG_DEBUG("Store to ptr-ptr, return " << log::ditype_str(may_be_ptr_to_ptr))
342
        return may_be_ptr_to_ptr;
712✔
343
      }
344
    }
712!
345
  }
66✔
346

347
  LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
348
  return type;
88✔
349
}
2,532✔
350

351
template <typename Iter>
352
std::optional<llvm::DIType*> reset_ditype(llvm::DIType* type_to_reset, const dataflow::ValuePath& path,
10,917✔
353
                                          const Iter& path_iter, dipath::ValueToDiPath& logged_dipath) {
354
  std::optional<llvm::DIType*> type = type_to_reset;
10,917✔
355

356
  const auto& current_value = path_iter;
10,917✔
357
  LOG_DEBUG("Type to reset: " << log::ditype_str(*type));
358
  LOG_DEBUG(">> based on IR: " << **current_value);
359

360
  if (llvm::isa<llvm::GEPOperator>(*current_value)) {
10,917✔
361
    LOG_DEBUG("Reset based on GEP")
362
    auto* gep             = llvm::cast<llvm::GEPOperator>(*current_value);
2,740✔
363
    const auto gep_result = gep::extract_gep_dereferenced_type(type.value(), *gep);
2,740✔
364
    if (gep_result.member && !gep_result.use_type) {
2,740!
365
      LOG_DEBUG("Using gep member type result")
366
      type = gep_result.member;
1,654✔
367
    } else {
1,654✔
368
      type = gep_result.type;
1,086✔
369
    }
370
  } else if (const auto* load = llvm::dyn_cast<llvm::LoadInst>(*current_value)) {
19,094✔
371
    LOG_DEBUG("Reset based on load")
372
    type = reset::reset_load_related_basic(path, type.value(), load);
1,941✔
373
  } else if (const auto* store_inst = llvm::dyn_cast<llvm::StoreInst>(*current_value)) {
14,413✔
374
    LOG_DEBUG("Reset based on store")
375
    type = reset::reset_store_related_basic(path, type.value(), store_inst);
2,532✔
376
  } else {
2,532✔
377
    LOG_DEBUG(">> skipping");
378
    logged_dipath.emplace_back(*current_value, type.value_or(nullptr));
3,704!
379
    return type;
3,704✔
380
  }
381

382
  logged_dipath.emplace_back(*current_value, type.value_or(nullptr));
7,213!
383

384
  return type;
7,213✔
385
}
10,917✔
386

387
}  // namespace reset
388

389
std::optional<llvm::DIType*> find_type(const dataflow::CallValuePath& call_path) {
3,162✔
390
  auto type = root::find_type_root(call_path);
3,162✔
391

392
  if (!type) {
3,162✔
393
    LOG_DEBUG("find_type_root failed to find a type for path " << call_path.path)
394
    return {};
30✔
395
  }
396

397
  reset::dipath::ValueToDiPath dipath;
3,132✔
398

399
  const auto path_end = call_path.path.path_to_value.rend();
3,132!
400
  for (auto path_iter = call_path.path.path_to_value.rbegin(); path_iter != path_end; ++path_iter) {
14,049!
401
    LOG_DEBUG("Extracted type: " << log::ditype_str(*type));
402
    type = reset::reset_ditype(type.value(), call_path.path, path_iter, dipath).value_or(type.value());
10,917!
403
    LOG_DEBUG("reset_ditype result " << log::ditype_str(type.value_or(nullptr)) << "\n")
404
    if (!type) {
10,917!
405
      break;
×
406
    }
407
  }
10,917✔
408

409
#if DIMETA_USE_TBAA == 1
410
  if (type) {
1,150!
411
    // If last node is a store inst, try to extract type via TBAA
412
    const auto* const start_node = llvm::dyn_cast_or_null<llvm::StoreInst>(*call_path.path.start_value());
1,150!
413
    if (start_node) {
1,150✔
414
      auto type_tbaa = tbaa::resolve_tbaa(type.value(), *llvm::dyn_cast<llvm::Instruction>(start_node));
934!
415
      if (type_tbaa) {
934✔
416
        dipath.emplace_back(start_node, type_tbaa.value(), "TBAA");
918!
417
      }
918✔
418
    }
934✔
419
  }
1,150✔
420
#endif
421

422
  LOG_DEBUG("Final mapping\n" << dipath)
423

424
  return dipath.final_type();
3,132!
425
}
3,162✔
426
}  // namespace dimeta::type
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc