• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 13811884945

12 Mar 2025 12:55PM UTC coverage: 83.51% (-0.1%) from 83.612%
13811884945

push

github

web-flow
Refactor IR type extraction (#28)

1007 of 1455 branches covered (69.21%)

Branch coverage included in aggregate %.

250 of 255 new or added lines in 6 files covered. (98.04%)

8 existing lines in 4 files now uncovered.

1905 of 2032 relevant lines covered (93.75%)

3689.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.75
/lib/type/DITypeExtractor.cpp
1

2
//  llvm-dimeta library
3
//  Copyright (c) 2022-2025 llvm-dimeta authors
4
//  Distributed under the BSD 3-Clause license.
5
//  (See accompanying file LICENSE)
6
//  SPDX-License-Identifier: BSD-3-Clause
7
//
8

9
#include "DIFinder.h"
10
#include "DIRootType.h"
11
#include "DataflowAnalysis.h"
12
#include "DefUseAnalysis.h"
13
#include "GEP.h"
14
#include "TBAA.h"
15
#include "Util.h"
16
#include "ValuePath.h"
17
#include "support/Logger.h"
18

19
#include "llvm/ADT/ArrayRef.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/SmallVector.h"
22
#include "llvm/ADT/ilist_iterator.h"
23
#include "llvm/BinaryFormat/Dwarf.h"
24
#include "llvm/Config/llvm-config.h"
25
#include "llvm/IR/Argument.h"
26
#include "llvm/IR/Constants.h"
27
#include "llvm/IR/DebugInfoMetadata.h"
28
#include "llvm/IR/Function.h"
29
#include "llvm/IR/GlobalVariable.h"
30
#include "llvm/IR/InstIterator.h"
31
#include "llvm/IR/InstrTypes.h"
32
#include "llvm/IR/Instructions.h"
33
#include "llvm/IR/IntrinsicInst.h"
34
#include "llvm/IR/Metadata.h"
35
#include "llvm/IR/Operator.h"
36
#include "llvm/IR/Value.h"
37
#include "llvm/Support/Casting.h"
38
#include "llvm/Support/Debug.h"
39
#include "llvm/Support/ErrorHandling.h"
40
#include "llvm/Support/raw_ostream.h"
41

42
#include <cassert>
43
#include <iterator>
44
#include <type_traits>
45
#include <unordered_map>
46

47
namespace dimeta::type {
48

49
namespace reset {
50

51
using GepToDIMemberMap = std::unordered_map<const llvm::GEPOperator*, llvm::DIDerivedType*>;
52

53
namespace detail {
54

55
template <typename T, typename InstTy>
56
std::optional<const T*> get_operand_to(const InstTy* memory_instruction) {
2,900✔
57
  static_assert(std::is_same_v<InstTy, llvm::StoreInst> || std::is_same_v<InstTy, llvm::LoadInst>,
58
                "Expected load or store instruction");
59
  const auto* memory_target = memory_instruction->getPointerOperand();
2,900✔
60
  if (llvm::isa<T>(memory_target)) {
2,900!
61
    return llvm::dyn_cast<T>(memory_target);
494✔
62
  }
63

64
  if (auto bcast = llvm::dyn_cast<llvm::BitCastInst>(memory_target)) {
2,618!
65
    if (llvm::isa<T>(bcast->getOperand(0))) {
212!
UNCOV
66
      return llvm::dyn_cast<T>(bcast->getOperand(0));
×
67
    }
68
  }
104✔
69

70
  return {};
2,406✔
71
}
2,900✔
72

73
}  // namespace detail
74

75
template <typename T>
76
bool store_to(const llvm::StoreInst* store) {
1,790✔
77
  return detail::get_operand_to<T>(store).has_value();
1,790✔
78
}
79

80
template <typename T>
81
bool load_to(const llvm::LoadInst* store) {
1,110✔
82
  return detail::get_operand_to<T>(store).has_value();
1,110✔
83
}
84

85
std::optional<llvm::DIType*> reset_load_related_basic(const dataflow::ValuePath& path, llvm::DIType* type_to_reset,
600✔
86
                                                      const llvm::LoadInst* load) {
87
  auto type = type_to_reset;
600✔
88

89
  if (load_to<llvm::GlobalVariable>(load) || load_to<llvm::AllocaInst>(load)) {
600✔
90
    LOG_DEBUG("Do not reset DIType based on load to global,alloca")
91
    return type;
362✔
92
  }
93

94
  if (auto* maybe_ptr_to_type = llvm::dyn_cast<llvm::DIDerivedType>(type)) {
434!
95
    if ((maybe_ptr_to_type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
238✔
96
         maybe_ptr_to_type->getTag() == llvm::dwarf::DW_TAG_reference_type)) {
62✔
97
      LOG_DEBUG("Load of pointer-like " << log::ditype_str(maybe_ptr_to_type))
98
    }
154✔
99

100
    auto base_type = maybe_ptr_to_type->getBaseType();
238✔
101

102
    if (auto* composite = llvm::dyn_cast<llvm::DICompositeType>(base_type)) {
296✔
103
      LOG_DEBUG("Have ptr to composite " << log::ditype_str(composite))
104
      auto type_tbaa = tbaa::resolve_tbaa(base_type, *load);
70✔
105
      if (type_tbaa) {
70!
106
        return type_tbaa;
70✔
107
      }
108
    }
109
    return base_type;
168✔
110
  }
238✔
111

UNCOV
112
  return type;
×
113
}
600✔
114

115
std::optional<llvm::DIType*> reset_store_related_basic(const dataflow::ValuePath&, llvm::DIType* type_to_reset,
876✔
116
                                                       const llvm::StoreInst* store_inst) {
117
  auto type = type_to_reset;
876✔
118

119
  if (store_to<llvm::GlobalVariable>(store_inst) || store_to<llvm::AllocaInst>(store_inst)) {
876!
120
    // Relevant in "heap_lulesh_mock_char.cpp"
121
    LOG_DEBUG("Store to alloca/global, return " << log::ditype_str(type))
122
    return type;
124✔
123
  }
124

125
  if (!llvm::isa<llvm::DIDerivedType>(type)) {
752✔
126
    LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
127
    return type;
56✔
128
  }
129

130
  auto* derived_type = llvm::cast<llvm::DIDerivedType>(type);
696✔
131

132
  if (derived_type->getTag() == llvm::dwarf::DW_TAG_member) {
696✔
133
    auto* member_base               = derived_type->getBaseType();
304✔
134
    const bool is_array_type_member = member_base->getTag() == llvm::dwarf::DW_TAG_array_type;
304✔
135
    // Need to look at base type for array-type member of struct. Tests w.r.t. gep:
136
    // 1. array_composite.c
137
    // 2. array_composite_offset_zero.c
138
    // 3. array_composite_sub_offset_zero.c
139
    // 4. global_nested.c
140
    if (is_array_type_member) {
304✔
141
      return llvm::cast<llvm::DICompositeType>(member_base)->getBaseType();
98✔
142
    }
143
    return member_base;
206✔
144
  }
304✔
145

146
  const bool is_pointer = derived_type->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
392✔
147
                          derived_type->getTag() == llvm::dwarf::DW_TAG_reference_type;
32✔
148
  if (is_pointer) {
392✔
149
    if (auto* may_be_ptr_to_ptr = llvm::dyn_cast<llvm::DIDerivedType>(derived_type->getBaseType())) {
680✔
150
      // Pointer to pointer by default remove one level for RHS assignment type w.r.t. store:
151
      const auto is_ptr_to_ptr = may_be_ptr_to_ptr->getTag() == llvm::dwarf::DW_TAG_pointer_type ||
308✔
152
                                 may_be_ptr_to_ptr->getTag() == llvm::dwarf::DW_TAG_reference_type;
20✔
153
      if (is_ptr_to_ptr) {
308✔
154
        LOG_DEBUG("Store to ptr-ptr, return " << log::ditype_str(may_be_ptr_to_ptr))
155
        return may_be_ptr_to_ptr;
288✔
156
      }
157
    }
246✔
158

159
    if (auto* ptr_to_composite = llvm::dyn_cast<llvm::DICompositeType>(derived_type->getBaseType())) {
122✔
160
      if (store_to<llvm::LoadInst>(store_inst)) {
38✔
161
        // Triggers for "heap_lhs_obj_opt.c" (llvm 14/15)
162
        auto composite_members = ptr_to_composite->getElements();
8✔
163
        assert(!composite_members.empty() && "Store to composite assumed to be store to first member!");
16!
164
        auto store_di_target = llvm::dyn_cast<llvm::DIDerivedType>(composite_members[0])->getBaseType();
8✔
165
        LOG_DEBUG("Store to a 'load of a composite type', assume first member as target "
166
                  << log::ditype_str(store_di_target))
167
        return store_di_target;
8✔
168
      }
8✔
169
    }
24✔
170
  }
62✔
171

172
  LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
173
  return type;
96✔
174
}
876✔
175

176
template <typename Iter, typename Iter2>
177
std::optional<llvm::DIType*> reset_ditype(llvm::DIType* type_to_reset, const dataflow::ValuePath& path,
2,830✔
178
                                          const Iter& path_iter, const Iter2&) {
179
  std::optional<llvm::DIType*> type = type_to_reset;
2,830✔
180
  if (!type) {
2,830!
181
    LOG_DEBUG("No type to reset!")
182
    return {};
×
183
  }
184

185
  auto next_value = path_iter;
2,830✔
186
  LOG_DEBUG("Type to reset: " << log::ditype_str(*type));
187
  LOG_DEBUG(">> based on IR: " << **next_value);
188

189
  if (const auto* load = llvm::dyn_cast<llvm::LoadInst>(*next_value)) {
3,316✔
190
    // Re-set the DIType from the gep, if presence of:
191
    // - a load after a gep is likely the first element of the composite type
192
    // - a load also resolves to the basetype w.r.t. an array composite
193
    LOG_DEBUG("Reset based on load")
194
    return reset::reset_load_related_basic(path, type.value(), load);
600✔
195
  }
196

197
  if (const auto* store_inst = llvm::dyn_cast<llvm::StoreInst>(*next_value)) {
2,932✔
198
    // - a store with a ditype(array) is likely the first element of the array
199
    LOG_DEBUG("Reset based on store")
200
    return reset::reset_store_related_basic(path, type.value(), store_inst);
876✔
201
  }
202

203
  LOG_DEBUG(">> skipping");
204

205
  return type;
1,354✔
206
}
2,830✔
207

208
}  // namespace reset
209

210
std::optional<llvm::DIType*> find_type(const dataflow::CallValuePath& call_path) {
1,036✔
211
  auto type = root::find_type_root(call_path);
1,036✔
212

213
  if (!type) {
1,036✔
214
    LOG_DEBUG("find_type_root failed to find a type for path " << call_path.path)
215
    return {};
10✔
216
  }
217

218
  const auto path_end = call_path.path.path_to_value.rend();
1,026✔
219
  for (auto path_iter = call_path.path.path_to_value.rbegin(); path_iter != path_end; ++path_iter) {
4,750✔
220
    if (!type) {
3,724!
221
      break;
×
222
    }
223
    if (llvm::isa<llvm::GEPOperator>(*path_iter)) {
3,724✔
224
      auto* gep = llvm::cast<llvm::GEPOperator>(*path_iter);
894✔
225
      LOG_DEBUG("Path iter gep for extraction is currently " << *gep);
226
      // TODO: Maybe we could somehow get more info on the underlying type from the dataflow path
227
      //       if this returns an empty result due to forward decls?
228
      const auto gep_result = gep::extract_gep_dereferenced_type(type.value(), *gep);
894✔
229
      type                  = gep_result.type;
894✔
230
      if (gep_result.member) {
894✔
231
        LOG_DEBUG("Using gep member type result")
232
        type = gep_result.member;
542✔
233
      }
542✔
234
      LOG_DEBUG("Gep reset type is " << log::ditype_str(type.value_or(nullptr)) << "\n")
235
      continue;
236
    }
894✔
237
    LOG_DEBUG("Extracted type w.r.t. gep: " << log::ditype_str(*type));
238
    type = reset::reset_ditype(type.value(), call_path.path, path_iter, path_end).value_or(type.value());
2,830✔
239
    LOG_DEBUG("reset_ditype result " << log::ditype_str(type.value_or(nullptr)) << "\n")
240
  }
2,830✔
241

242
  if (type) {
1,026!
243
    // If last node is a store inst, try to extract type via TBAA
244
    const auto* const start_node = llvm::dyn_cast_or_null<llvm::StoreInst>(*call_path.path.start_value());
1,026✔
245
    if (start_node) {
1,026✔
246
      auto type_tbaa = tbaa::resolve_tbaa(type.value(), *llvm::dyn_cast<llvm::Instruction>(start_node));
876✔
247
      if (type_tbaa) {
876✔
248
        type = type_tbaa.value();
856✔
249
      }
856✔
250
    }
876✔
251
  }
1,026✔
252

253
  return type;
1,026✔
254
}
1,036✔
255
}  // namespace dimeta::type
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc