• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 13807411164

12 Mar 2025 08:58AM UTC coverage: 83.51%. First build
13807411164

Pull #27

github

web-flow
Merge 47999c9ef into f9fbafbd4
Pull Request #27: TBAA rework

1007 of 1455 branches covered (69.21%)

Branch coverage included in aggregate %.

204 of 205 new or added lines in 4 files covered. (99.51%)

1905 of 2032 relevant lines covered (93.75%)

3689.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.63
/lib/type/TBAA.cpp
1
//  llvm-dimeta library
2
//  Copyright (c) 2022-2025 llvm-dimeta authors
3
//  Distributed under the BSD 3-Clause license.
4
//  (See accompanying file LICENSE)
5
//  SPDX-License-Identifier: BSD-3-Clause
6
//
7

8
#include "TBAA.h"
9

10
#include "DIVisitor.h"
11
#include "DIVisitorUtil.h"
12
#include "DefUseAnalysis.h"
13
#include "support/Logger.h"
14

15
#include "llvm/ADT/StringRef.h"
16
#include "llvm/BinaryFormat/Dwarf.h"
17
#include "llvm/IR/Constants.h"
18
#include "llvm/IR/DebugInfoMetadata.h"
19
#include "llvm/IR/Instruction.h"
20
#include "llvm/IR/Instructions.h"
21
#include "llvm/IR/Metadata.h"
22
#include "llvm/Support/Casting.h"
23
#include "llvm/Support/raw_ostream.h"
24

25
#include <assert.h>
26
#include <cstddef>
27
#include <iterator>
28
#include <llvm/ADT/STLExtras.h>
29
#include <llvm/ADT/SmallVector.h>
30
#include <optional>
31
#include <string>
32
#include <string_view>
33

34
namespace dimeta::tbaa {
35

36
struct TBAAHandle {
37
  llvm::MDNode* base_ty;
38
  llvm::MDNode* access_ty;
39
  llvm::ConstantInt* offset;
40
  size_t offset_value{};
940✔
41

42
  static std::optional<TBAAHandle> create(const llvm::Instruction& store) {
946✔
43
    auto* access = store.getMetadata(llvm::StringRef{"tbaa"});
946✔
44
    if (!access) {
946✔
45
      return {};
6✔
46
    }
47

48
    return TBAAHandle(*access);
940✔
49
  }
946✔
50

51
  llvm::StringRef base_name() const {
238✔
52
    return llvm::dyn_cast<llvm::MDString>(base_ty->getOperand(0))->getString();
238✔
53
  }
54

55
  llvm::StringRef access_name() const {
762✔
56
    return llvm::dyn_cast<llvm::MDString>(access_ty->getOperand(0))->getString();
762✔
57
  }
58

59
  bool access_is_ptr() const {
762✔
60
    return access_name() == "any pointer";
762✔
61
  }
62

63
  bool base_is_ptr() const {
64
    return base_name() == "any pointer";
65
  }
66

67
  void reset_base(llvm::MDNode* new_base_ty) {
4✔
68
    LOG_DEBUG("Reset base_type " << log::ditype_str(base_ty) << " to " << log::ditype_str(new_base_ty))
69
    base_ty = new_base_ty;
4✔
70
  }
4✔
71

72
  void subtract_offset(size_t offset_) {
4✔
73
    LOG_DEBUG("Reset offset value " << offset_value << " with subtraction of " << offset_)
74
    assert(offset_ <= offset_value && "New offset should not be greater than current one");
8!
75
    offset_value -= offset_;
4✔
76
  }
4✔
77

78
 private:
79
  explicit TBAAHandle(llvm::MDNode& tbaa_node) {
940✔
80
    base_ty       = llvm::dyn_cast<llvm::MDNode>(tbaa_node.getOperand(0));
940✔
81
    access_ty     = llvm::dyn_cast<llvm::MDNode>(tbaa_node.getOperand(1));
940✔
82
    auto value_md = llvm::dyn_cast<llvm::ValueAsMetadata>(tbaa_node.getOperand(2))->getValue();
940✔
83
    assert(value_md && "Offset value may not be null.");
1,694!
84
    offset       = llvm::dyn_cast<llvm::ConstantInt>(value_md);
940✔
85
    offset_value = offset->getLimitedValue();
940✔
86
  }
940✔
87
};
88

89
namespace helper {
90

91
inline size_t num_composite_members(const llvm::DICompositeType* composite) {
92✔
92
  const auto num_members = llvm::count_if(composite->getElements(), [](const auto* node) {
326✔
93
    if (auto derived = llvm::dyn_cast<llvm::DIDerivedType>(node)) {
408!
94
      return derived->getTag() == llvm::dwarf::DW_TAG_member;
210✔
95
    }
96
    return false;
24✔
97
  });
234✔
98
  return num_members;
168✔
99
}
76✔
100

101
inline llvm::SmallVector<llvm::DIDerivedType*, 4> composite_members(const llvm::DICompositeType* composite) {
36✔
102
  llvm::SmallVector<llvm::DIDerivedType*, 4> members;
36✔
103
  for (auto* member : composite->getElements()) {
78✔
104
    if (auto* derived = llvm::dyn_cast<llvm::DIDerivedType>(member)) {
78!
105
      if (derived->getTag() == llvm::dwarf::DW_TAG_member) {
42!
106
        members.push_back(derived);
42✔
107
      }
42✔
108
    }
36✔
109
  }
36✔
110

111
  return members;
30✔
112
}
36!
113

114
class FindMatchingMember : public visitor::DINodeVisitor<FindMatchingMember> {
115
  llvm::StringRef composite_name_;
116

117
 public:
118
  std::optional<llvm::DICompositeType*> result{};
44✔
119
  explicit FindMatchingMember(llvm::StringRef composite_name) : composite_name_(composite_name) {
44✔
120
  }
44✔
121

122
  bool visitCompositeType(const llvm::DICompositeType* type) {
104✔
123
    const auto id = [](const auto* type) -> llvm::StringRef {
300✔
124
      if (type->getIdentifier().empty()) {
104✔
125
        return type->getName();
60✔
126
      }
127
      return type->getIdentifier();
44✔
128
    }(type);
196✔
129

130
    if (id == composite_name_) {
104✔
131
      result = const_cast<llvm::DICompositeType*>(type);
36✔
132
      return false;
36✔
133
    }
134

135
    return true;
68✔
136
  }
104✔
137
};
138

139
std::optional<llvm::DICompositeType*> find_composite_with_name(llvm::StringRef composite_name,
44✔
140
                                                               const llvm::DICompositeType* type) {
141
  FindMatchingMember finder{composite_name};
44✔
142
  finder.traverseCompositeType(type);
44✔
143
  return finder.result;
44✔
144
}
44✔
145

146
inline size_t tbaa_operand_count(llvm::MDNode* type_node) {
92✔
147
  // Given, e.g., {!"_ZTSNSt12_Vector_baseIPiSaIS0_EE17_Vector_impl_dataE", !749, i64 0, !749, i64 8, !749, i64 16}
148
  // returns number of [type, offsets] pairs: 3
149
  const auto num = type_node->getNumOperands();
92✔
150
  assert(num > 0 && "Operand count must be > 0");
168!
151
  return (num - 1) / 2;
168✔
152
}
76✔
153

154
inline std::optional<llvm::StringRef> tbaa_operand_name(llvm::MDNode* type_node) {
68✔
155
  if (type_node->getNumOperands() == 0) {
68!
156
    return {};
×
157
  }
158
  return llvm::dyn_cast<llvm::MDString>(type_node->getOperand(0))->getString();
68✔
159
}
68✔
160

161
struct TBAADestructure {
162
  llvm::StringRef name;
163
  size_t offset_base_{};
164
  std::optional<llvm::MDNode*> outermost_candidate_{};
8✔
165

166
  bool traverse_tbaa_nodes(llvm::MDNode* new_base_ty) {
68✔
167
    const auto tbaa_name = tbaa_operand_name(new_base_ty).value_or("");
68✔
168
    if (!tbaa_name.empty() && name == tbaa_name) {
68!
169
      LOG_DEBUG("Match found " << log::ditype_str(new_base_ty))
170
      outermost_candidate_.emplace(new_base_ty);
4✔
171
      return true;
4✔
172
    }
173

174
    const auto* end_node = std::prev(new_base_ty->op_end());
64✔
175
    for (const auto* iter = new_base_ty->op_begin(); iter != end_node; ++iter) {
184✔
176
      const auto& operand = *iter;
120✔
177

178
      if (auto* mdnode = llvm::dyn_cast<llvm::MDNode>(operand)) {
180✔
179
        auto result = traverse_tbaa_nodes(mdnode);
60✔
180

181
        if (result) {
60✔
182
          // we found a matching node, we need to set the offset now to re-calculate later
183
          const auto* offset = std::next(iter);
4✔
184
          if (auto* value_md = llvm::dyn_cast<llvm::ValueAsMetadata>(*offset)) {
8!
185
            if (auto* current_offset = llvm::dyn_cast<llvm::ConstantInt>(value_md->getValue())) {
8!
186
              this->offset_base_ += current_offset->getLimitedValue();
4✔
187
            }
4✔
188
          }
4✔
189
          break;
190
        }
4✔
191
      }
60✔
192
    }
120✔
193

194
    return false;
64✔
195
  }
68✔
196
};
197

198
inline std::pair<std::optional<llvm::MDNode*>, size_t> tbaa_sub_node_matches_name(llvm::StringRef name,
8✔
199
                                                                                  llvm::MDNode* new_base_ty) {
200
  TBAADestructure tbaa{name};
8✔
201
  const auto found = tbaa.traverse_tbaa_nodes(new_base_ty);
8✔
202
  LOG_DEBUG("Found " << log::ditype_str(tbaa.outermost_candidate_.value_or(nullptr)) << " at offset "
203
                     << tbaa.offset_base_)
204
  return {tbaa.outermost_candidate_, tbaa.offset_base_};
8✔
205
}
8✔
206

207
}  // namespace helper
208

209
bool composite_fits_tbaa(const llvm::DICompositeType* composite, const TBAAHandle& tbaa) {
56✔
210
  const auto num_members = helper::num_composite_members(composite);
56✔
211
  const auto tbaa_count  = helper::tbaa_operand_count(tbaa.base_ty);
56✔
212
  if (num_members != tbaa_count) {
56✔
213
    return false;
20✔
214
  }
215

216
  LOG_DEBUG("Type element size " << num_members << " vs. TBAA " << tbaa_count)
217

218
  const auto elements  = helper::composite_members(composite);
36✔
219
  int element_position = 0;  // Incremented for every TBAA constant int entry
30✔
220
  // Loop simply checks if the byte offsets are the same (TODO also compare types!)
221
  for (const auto& tbaa_operand : tbaa.base_ty->operands()) {
156!
222
    if (auto* value_md = llvm::dyn_cast<llvm::ValueAsMetadata>(tbaa_operand)) {
162!
223
      if (auto* current_offset = llvm::dyn_cast<llvm::ConstantInt>(value_md->getValue())) {
84!
224
        auto* current_member = elements[element_position];
42✔
225
        if (!current_offset->equalsInt(current_member->getOffsetInBits() / 8)) {
42!
NEW
226
          return false;
×
227
        }
228
        ++element_position;
66✔
229
      }
42!
230
    }
36✔
231
  }
120!
232

233
  return true;
36✔
234
}
56✔
235

236
std::optional<llvm::DIType*> tbaa_resolver(llvm::DIType* root, TBAAHandle& tbaa) {
284✔
237
  const auto find_composite = [](llvm::DIType* root) {
568✔
238
    llvm::DIType* type = root;
284✔
239
    while (type && llvm::isa<llvm::DIDerivedType>(type)) {
602!
240
      auto ditype = llvm::dyn_cast<llvm::DIDerivedType>(type);
318✔
241
      type        = ditype->getBaseType();
318✔
242
    }
252✔
243
    return type;
568✔
244
  };
284✔
245

246
  auto* maybe_composite = find_composite(root);
284✔
247
  if (!maybe_composite || !llvm::isa<llvm::DICompositeType>(maybe_composite)) {
284!
248
    LOG_DEBUG("Returning: DIType is not composite required for TBAA descend.")
249
    return root;
154✔
250
  }
251

252
  auto* composite = llvm::dyn_cast<llvm::DICompositeType>(maybe_composite);
130✔
253

254
  LOG_DEBUG("Found maybe legible composite node: " << log::ditype_str(composite))
255

256
  // Cpp TBAA uses identifier, in C we use the name:
257
  const auto struct_name = [](const auto* composite) {
370✔
258
    auto name = std::string{composite->getIdentifier()};
130✔
259
    if (name.empty()) {
130✔
260
      name = std::string{composite->getName()};
112✔
261
    }
112✔
262
    return name;
130✔
263
  }(composite);
240✔
264

265
  if (struct_name != tbaa.base_name() && !tbaa.base_name().empty()) {
130✔
266
    LOG_DEBUG("Names differ. Name of struct: \"" << struct_name << "\" vs name of TBAA \"" << tbaa.base_name() << "\".")
267
    auto result_composite = helper::find_composite_with_name(tbaa.base_name(), composite);
44✔
268

269
    if (result_composite) {
44✔
270
      LOG_DEBUG("Found matching sub member " << log::ditype_str(result_composite.value()))
271
      composite = result_composite.value();  // const_cast<llvm::DICompositeType*>(result_composite.value());
36✔
272
      const auto num_members = helper::num_composite_members(composite);
36✔
273
      const auto num_tbaa    = helper::tbaa_operand_count(tbaa.base_ty);
36✔
274
      if (num_members != num_tbaa) {
36!
275
        LOG_DEBUG("Mismatch between sub member element count and TBAA base type count "
276
                  << log::ditype_str(tbaa.base_ty))
277
        return root;
×
278
      }
279
    } else {
36!
280
      LOG_DEBUG("Did not find matching sub member: " << struct_name)
281
      const auto [new_base, calculated_offset] = helper::tbaa_sub_node_matches_name(struct_name, tbaa.base_ty);
8✔
282
      if (!new_base) {
8✔
283
        return root;
4✔
284
      }
285
      LOG_DEBUG("New TBAA base " << log::ditype_str(new_base.value()) << " with negative offset " << calculated_offset)
286
      tbaa.reset_base(new_base.value());
8✔
287
      tbaa.subtract_offset(calculated_offset);
8✔
288
    }
8✔
289
  }
44✔
290

291
  if (struct_name.empty()) {
126✔
292
    LOG_DEBUG("No name match possible")
293
    // Here no name matching is possible, is TBAA node referring to current DI type? We need to verify:
294
    if (!composite_fits_tbaa(composite, tbaa)) {
56✔
295
      LOG_DEBUG("TBAA of anonymous struct has different offsets to DIComposite type!")
296
      return root;
20✔
297
    }
298
  }
30✔
299

300
  assert(tbaa.access_is_ptr() && "TBAA access should be pointer");
196!
301

302
  LOG_DEBUG("TBAA tree data.")
303
  LOG_DEBUG("  From ditype: " << log::ditype_str(composite))
304
  LOG_DEBUG("  From TBAA: " << log::ditype_str(tbaa.base_ty))
305
  LOG_DEBUG("  At offset: " << tbaa.offset_value)
306

307
  auto result = visitor::util::resolve_byte_offset_to_member_of(composite, tbaa.offset_value);
106✔
308
  if (result) {
106✔
309
    return result->type_of_member;
86✔
310
  }
311
  return {};
20✔
312
}
284✔
313

314
std::optional<llvm::DIType*> resolve_tbaa(llvm::DIType* root, const llvm::Instruction& instruction) {
946✔
315
  LOG_DEBUG("Resolve TBAA of instruction '" << instruction << "' with ditype: " << log::ditype_str(root))
316

317
  auto tbaa = TBAAHandle::create(instruction);
946✔
318
  if (!tbaa) {
946✔
319
    LOG_DEBUG("Requires TBAA metadata in LLVM IR.")
320
    return root;
6✔
321
  }
322

323
  // assign any ptr to any ptr, e.g., struct A** a; a[0] = malloc(struct A):
324
  if (tbaa->base_ty == tbaa->access_ty && tbaa->access_is_ptr()) {
940!
325
    LOG_DEBUG("No work: TBAA base type is same as access type (both ptr).")
326
    return root;
656✔
327
  }
328

329
  const auto node = tbaa_resolver(root, tbaa.value());
284✔
330
  if (node.has_value() && node.value() != root) {
284✔
331
    LOG_DEBUG("Determined new type with TBAA")
332
  }
74✔
333

334
  return node;
284✔
335
}
946✔
336

337
}  // namespace dimeta::tbaa
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc