• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tudasc / TypeART / 13528988609

25 Feb 2025 07:06PM UTC coverage: 88.854% (-1.9%) from 90.735%
13528988609

Pull #163

github

web-flow
Merge e4a2d80f6 into d2e14acc5
Pull Request #163: LLVM 18 support

974 of 1122 new or added lines in 38 files covered. (86.81%)

30 existing lines in 6 files now uncovered.

4201 of 4728 relevant lines covered (88.85%)

190054.62 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.85
/lib/passes/analysis/MemOpVisitor.cpp
1
// TypeART library
2
//
3
// Copyright (c) 2017-2025 TypeART Authors
4
// Distributed under the BSD 3-Clause license.
5
// (See accompanying file LICENSE.txt or copy at
6
// https://opensource.org/licenses/BSD-3-Clause)
7
//
8
// Project home: https://github.com/tudasc/TypeART
9
//
10
// SPDX-License-Identifier: BSD-3-Clause
11
//
12

13
#include "MemOpVisitor.h"
14

15
#include "analysis/MemOpData.h"
16
#include "compat/CallSite.h"
17
#include "configuration/Configuration.h"
18
#include "support/ConfigurationBase.h"
19
#include "support/Error.h"
20
#include "support/Logger.h"
21
#include "support/TypeUtil.h"
22
#include "support/Util.h"
23

24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallPtrSet.h"
26
#include "llvm/ADT/StringRef.h"
27

28
#include <llvm/IR/Instruction.h>
29
#include <llvm/Support/Error.h>
30
#include <type_traits>
31

32
#if LLVM_VERSION_MAJOR >= 12
33
#include "llvm/Analysis/ValueTracking.h"  // llvm::findAllocaForValue
34
#else
35
#include "llvm/Transforms/Utils/Local.h"  // llvm::findAllocaForValue
36
#endif
37
#include "llvm/IR/Constants.h"
38
#include "llvm/IR/Function.h"
39
#include "llvm/IR/InstrTypes.h"
1,654✔
40
#include "llvm/IR/Instructions.h"
41
#include "llvm/IR/Module.h"
42
#include "llvm/IR/Value.h"
43
#include "llvm/Support/Casting.h"
44
#include "llvm/Support/raw_ostream.h"
45

46
#include <cstddef>
47
#include <optional>
48

49
namespace typeart::analysis {
50

51
using namespace llvm;
52

53
MemOpVisitor::MemOpVisitor() : MemOpVisitor(true, true) {
×
54
}
×
55

56
MemOpVisitor::MemOpVisitor(const config::Configuration& config)
2,350✔
57
    : MemOpVisitor(config[config::ConfigStdArgs::stack], config[config::ConfigStdArgs::heap]) {
2,350✔
58
}
2,350✔
59
MemOpVisitor::MemOpVisitor(bool stack, bool heap) : collect_allocas(stack), collect_heap(heap) {
4,004✔
60
}
1,654✔
61

62
void MemOpVisitor::collect(llvm::Function& function) {
17,670✔
63
  visit(function);
17,670✔
64

65
  for (auto& [lifetime, alloc] : lifetime_starts) {
342,823✔
66
    auto* data = llvm::find_if(
325,153✔
67
        allocas, [alloc_ = std::ref(alloc)](const AllocaData& alloca_data) { return alloca_data.alloca == alloc_; });
806,641✔
68
    if (data != std::end(allocas)) {
325,153✔
69
      data->lifetime_start.insert(lifetime);
12,866✔
70
    }
6,433✔
71
  }
72

73
  for (const auto& alloc : allocas) {
34,768✔
74
    if (alloc.lifetime_start.size() > 1) {
17,098✔
75
      LOG_DEBUG("Lifetime: " << alloc.lifetime_start.size());
76
      LOG_DEBUG(*alloc.alloca);
77
      for (auto* lifetime : alloc.lifetime_start) {
9✔
78
        LOG_DEBUG(*lifetime);
79
      }
80
    }
3✔
81
  }
82
}
17,670✔
83

84
void MemOpVisitor::collectGlobals(Module& module) {
2,350✔
85
  for (auto& g : module.globals()) {
83,100✔
86
    globals.emplace_back(GlobalData{&g});
80,750✔
87
  }
88
}
2,350✔
89

90
void MemOpVisitor::visitCallBase(llvm::CallBase& cb) {
94,185✔
91
  if (!collect_heap) {
94,185✔
92
    return;
31,427✔
93
  }
94
  const auto isInSet = [&](const auto& fMap) -> std::optional<MemOpKind> {
186,950✔
95
    const auto* f = cb.getCalledFunction();
124,192✔
96
    if (!f) {
124,192✔
97
      // TODO handle calls through, e.g., function pointers? - seems infeasible
98
      // LOG_INFO("Encountered indirect call, skipping.");
99
      return {};
1,064✔
100
    }
101
    const auto name = f->getName().str();
123,128✔
102

103
    const auto res = fMap.find(name);
123,128✔
104
    if (res != fMap.end()) {
123,128✔
105
      return {(*res).second};
2,396✔
106
    }
107
    return {};
120,732✔
108
  };
124,192✔
109

110
  if (auto alloc_val = isInSet(mem_operations.allocs())) {
62,758✔
111
    visitMallocLike(cb, alloc_val.value());
1,324✔
112
  } else if (auto dealloc_val = isInSet(mem_operations.deallocs())) {
62,758✔
113
    visitFreeLike(cb, dealloc_val.value());
1,072✔
114
  }
1,072✔
115
}
94,185✔
116

117
template <class InstTy>
118
std::optional<InstTy*> getSingleUserAs(llvm::Instruction* value) {
168✔
119
  auto users            = value->users();
168✔
120
  const auto num_stores = llvm::count_if(users, [](llvm::User* use) { return llvm::isa<InstTy>(*use); });
396✔
121
  RETURN_NONE_IF((num_stores == 0), "Expected a single store on call \"{0}\". It has no users!", *value);
168✔
122

123
  const auto num_asan_call = llvm::count_if(users, [](llvm::User* user) {
396✔
124
    CallSite csite(user);
228✔
125
    if (!(csite.isCall() || csite.isInvoke()) || csite.getCalledFunction() == nullptr) {
228✔
126
      return false;
201✔
127
    }
128
    const auto name = csite.getCalledFunction()->getName();
27✔
129
    return util::starts_with_any_of(name, "__asan");
27✔
130
  });
228✔
131

132
  RETURN_NONE_IF(num_asan_call > 1, "Expected one ASAN call for array cookie.");
168✔
133

134
  auto* target_instruction =
168✔
135
      dyn_cast<InstTy>(*llvm::find_if(users, [](llvm::User* use) { return llvm::isa<InstTy>(*use); }));
396✔
136

137
  if constexpr (std::is_same_v<InstTy, llvm::StoreInst>) {
138
    // if (llvm::isa<CallBase>(value)) {
139
    RETURN_NONE_IF((target_instruction->getValueOperand() == value),
99✔
140
                   "Did not expect malloc-like \"{0}\" as store value operand.", *value);
141
    // }
142
  }
143

144
  if (num_asan_call != 0) {
165✔
145
    const auto* asan_call = dyn_cast<CallBase>(*llvm::find_if(users, [](llvm::User* user) {
63✔
146
      CallSite csite(user);
36✔
147
      if (!(csite.isCall() || csite.isInvoke()) || csite.getCalledFunction() == nullptr) {
36✔
148
        return false;
9✔
149
      }
150
      const auto name = csite.getCalledFunction()->getName();
27✔
151
      return util::starts_with_any_of(name, "__asan");
12,467✔
152
    }));
36✔
153
    if constexpr (std::is_same_v<InstTy, llvm::StoreInst>) {
154
      RETURN_NONE_IF(target_instruction->getPointerOperand() != asan_call->getArgOperand(0),
27✔
155
                     "Expected a single user on value \"{0}\" but found multiple potential candidates!", *value);
156
    } else {
157
      if constexpr (std::is_same_v<InstTy, llvm::BitCastInst>) {
158
        RETURN_NONE_IF(target_instruction != asan_call->getArgOperand(0),
159
                       "Expected a single user on value \"{0}\" but found multiple potential candidates!", *value);
160
      }
161
    }
162
  }
27✔
163

164
  return {target_instruction};
165✔
165
}
168✔
166

167
using MallocGeps   = SmallPtrSet<GetElementPtrInst*, 2>;
168
using MallocBcasts = SmallPtrSet<BitCastInst*, 4>;
169

170
std::pair<MallocGeps, MallocBcasts> collectRelevantMallocUsers(llvm::CallBase& ci) {
1,324✔
171
  auto geps   = MallocGeps{};
1,324✔
172
  auto bcasts = MallocBcasts{};
1,324✔
173
  for (auto user : ci.users()) {
3,259✔
174
    // Simple case: Pointer is immediately casted
175
    if (auto inst = dyn_cast<BitCastInst>(user)) {
1,935✔
176
      bcasts.insert(inst);
774✔
177
    }
774✔
178
    // Pointer is first stored, then loaded and subsequently casted
179
    if (auto storeInst = dyn_cast<StoreInst>(user)) {
1,935✔
180
      auto storeAddr = storeInst->getPointerOperand();
479✔
181
      for (auto storeUser : storeAddr->users()) {  // TODO: Ensure that load occurs after store?
2,790✔
182
        if (auto loadInst = dyn_cast<LoadInst>(storeUser)) {
2,311✔
183
          for (auto loadUser : loadInst->users()) {
2,590✔
184
            if (auto bcastInst = dyn_cast<BitCastInst>(loadUser)) {
1,763✔
185
              // LOG_MSG(*bcastInst)
186
              bcasts.insert(bcastInst);
24✔
187
            }
24✔
188
          }
189
        }
827✔
190
      }
191
    }
479✔
192
    // GEP indicates that an array cookie is added to the allocation. (Fixes #13)
193
    if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
1,935✔
194
      geps.insert(gep);
108✔
195
    }
108✔
196
  }
197
  return {geps, bcasts};
1,324✔
198
}
1,324✔
199

200
std::optional<ArrayCookieData> handleUnpaddedArrayCookie(llvm::CallBase& ci, const MallocGeps& geps,
90✔
201
                                                         MallocBcasts& bcasts, BitCastInst*& primary_cast) {
202
  using namespace util::type;
203
#if LLVM_VERSION_MAJOR < 15
204
  // We expect only the bitcast to size_t for the array cookie store.
205
  RETURN_NONE_IF(bcasts.size() != 1, "Couldn't identify bitcast instruction of an unpadded array cookie!");
57✔
206
  auto cookie_bcast = *bcasts.begin();
57✔
207
  RETURN_NONE_IF(!isi64Ptr(cookie_bcast->getDestTy()), "Found non-i64Ptr bitcast instruction for an array cookie!");
57✔
208

209
  auto cookie_store = getSingleUserAs<StoreInst>(cookie_bcast);
57✔
210
  RETURN_ON_NONE(cookie_store);
57✔
211

212
  auto array_gep = *geps.begin();
57✔
213
  RETURN_NONE_IF(array_gep->getNumIndices() != 1, "Found multidimensional array cookie gep!");
57✔
214

215
  auto array_bcast = getSingleUserAs<BitCastInst>(array_gep);
57✔
216
  RETURN_ON_NONE(array_bcast);
57✔
217

218
  bcasts.insert(*array_bcast);
57✔
219
  primary_cast = *array_bcast;
57✔
220
#else
221
  auto cookie_store = getSingleUserAs<StoreInst>(&ci);
33✔
222
  RETURN_ON_NONE(cookie_store);
33✔
223
  // RETURN_NONE_IF(cookie_store.get()->getValueOperand() == &ci, "Cookie store has CallBase as value operand.")
224
  auto array_gep = *geps.begin();
30✔
225
  RETURN_NONE_IF(array_gep->getNumIndices() != 1, "Found multidimensional array cookie gep!");
30✔
226
#endif
227
  return {ArrayCookieData{*cookie_store, array_gep}};
87✔
228
}
90✔
229

230
std::optional<ArrayCookieData> handlePaddedArrayCookie(llvm::CallBase& ci, const MallocGeps& geps, MallocBcasts& bcasts,
9✔
231
                                                       BitCastInst*& primary_cast) {
232
  using namespace util::type;
233
#if LLVM_VERSION_MAJOR < 15
234
  // We expect bitcasts only after the GEP instructions in this case.
235
  RETURN_NONE_IF(!bcasts.empty(), "Found unrelated bitcast instructions on a padded array cookie!");
6✔
236

237
  auto gep_it     = geps.begin();
6✔
238
  auto array_gep  = *gep_it++;
6✔
239
  auto cookie_gep = *gep_it++;
6✔
240

241
  auto cookie_bcast = getSingleUserAs<BitCastInst>(cookie_gep);
6✔
242
  RETURN_ON_NONE(cookie_bcast);
6✔
243
  RETURN_NONE_IF(!isi64Ptr((*cookie_bcast)->getDestTy()), "Found non-i64Ptr bitcast instruction for an array cookie!");
6✔
244

245
  auto cookie_store = getSingleUserAs<StoreInst>(*cookie_bcast);
6✔
246
  RETURN_ON_NONE(cookie_store);
6✔
247
  RETURN_NONE_IF(array_gep->getNumIndices() != 1, "Found multidimensional array cookie gep!");
6✔
248

249
  auto array_bcast = getSingleUserAs<BitCastInst>(array_gep);
6✔
250
  RETURN_ON_NONE(array_bcast);
6✔
251

252
  bcasts.insert(*array_bcast);
6✔
253
  primary_cast = *array_bcast;
6✔
254
#else
255
  auto gep_it       = geps.begin();
3✔
256
  auto array_gep    = *gep_it++;
3✔
257
  auto cookie_gep   = *gep_it++;
3✔
258
  auto cookie_store = getSingleUserAs<StoreInst>(cookie_gep);
3✔
259
  RETURN_ON_NONE(cookie_store);
3✔
260
  RETURN_NONE_IF(array_gep->getNumIndices() != 1, "Found multidimensional array cookie gep!");
3✔
261
#endif
262
  return {ArrayCookieData{*cookie_store, array_gep}};
9✔
263
}
9✔
264

265
std::optional<ArrayCookieData> handleArrayCookie(llvm::CallBase& ci, const MallocGeps& geps, MallocBcasts& bcasts,
1,324✔
266
                                                 BitCastInst*& primary_cast) {
267
  if (geps.size() == 1) {
1,324✔
268
    return handleUnpaddedArrayCookie(ci, geps, bcasts, primary_cast);
90✔
269
  } else if (geps.size() == 2) {
1,234✔
270
    return handlePaddedArrayCookie(ci, geps, bcasts, primary_cast);
9✔
271
  } else if (geps.size() > 2) {
1,225✔
272
    // Found a case where the address of an allocation is used more than two
273
    // times as an argument to a GEP instruction. This is unexpected as at most
274
    // two GEPs, for calculating the offsets of an array cookie itself and the
275
    // array pointer, are expected.
NEW
276
    auto exit_on_error = llvm::ExitOnError{"Array Cookie Detection failed!"};
×
NEW
277
    auto err           = "Expected at most two GEP instructions!";
×
278
    LOG_FATAL(err);
×
279
    exit_on_error({error::make_string_error(err)});
×
NEW
280
    return {};
×
UNCOV
281
  }
×
282
  return {};
1,225✔
283
}
1,324✔
284

285
void MemOpVisitor::visitMallocLike(llvm::CallBase& ci, MemOpKind k) {
1,324✔
286
  auto [geps, bcasts] = collectRelevantMallocUsers(ci);
4,758✔
287
  auto primary_cast   = bcasts.empty() ? nullptr : *bcasts.begin();
1,324✔
288
  auto array_cookie   = handleArrayCookie(ci, geps, bcasts, primary_cast);
2,648✔
289
  if (primary_cast == nullptr) {
1,324✔
290
    LOG_DEBUG("Primary bitcast null: " << ci)
291
  }
532✔
292
  mallocs.push_back(MallocData{&ci, array_cookie, primary_cast, bcasts, k, isa<InvokeInst>(ci)});
2,648✔
293
}
1,324✔
294

295
void MemOpVisitor::visitFreeLike(llvm::CallBase& ci, MemOpKind k) {
1,072✔
296
  //  LOG_DEBUG(ci.getCalledFunction()->getName());
297
  MemOpKind kind = k;
1,072✔
298

299
  // FIXME is that superfluous?
300
  if (auto f = ci.getCalledFunction()) {
1,072✔
301
    auto dkind = mem_operations.deallocKind(f->getName());
1,072✔
302
    if (dkind) {
1,072✔
303
      kind = dkind.value();
1,072✔
304
    }
1,072✔
305
  }
1,072✔
306

307
  auto gep              = dyn_cast<GetElementPtrInst>(ci.getArgOperand(0));
1,072✔
308
  auto array_cookie_gep = gep != nullptr ? std::optional<llvm::GetElementPtrInst*>{gep} : std::nullopt;
1,072✔
309
  frees.emplace_back(FreeData{&ci, array_cookie_gep, kind, isa<InvokeInst>(ci)});
1,072✔
310
}
1,072✔
311

312
// void MemOpVisitor::visitIntrinsicInst(llvm::IntrinsicInst& ii) {
313
//
314
//}
315

316
void MemOpVisitor::visitAllocaInst(llvm::AllocaInst& ai) {
47,300✔
317
  if (!collect_allocas) {
47,300✔
318
    return;
30,202✔
319
  }
320
  //  LOG_DEBUG("Found alloca " << ai);
321
  Value* arraySizeOperand = ai.getArraySize();
17,098✔
322
  size_t arraySize{0};
17,098✔
323
  bool is_vla{false};
17,098✔
324
  if (auto arraySizeConst = llvm::dyn_cast<ConstantInt>(arraySizeOperand)) {
17,098✔
325
    arraySize = arraySizeConst->getZExtValue();
17,017✔
326
  } else {
17,017✔
327
    is_vla = true;
81✔
328
  }
329

330
  allocas.push_back({&ai, arraySize, is_vla});
34,196✔
331
  //  LOG_DEBUG("Alloca: " << util::dump(ai) << " -> lifetime marker: " << util::dump(lifetimes));
332
}
33,076✔
333

334
void MemOpVisitor::visitIntrinsicInst(llvm::IntrinsicInst& inst) {
127,864✔
335
  if (inst.getIntrinsicID() == Intrinsic::lifetime_start) {
127,864✔
336
#if LLVM_VERSION_MAJOR >= 12
337
    auto alloca = llvm::findAllocaForValue(inst.getOperand(1));
13,380✔
338
#else
339
    DenseMap<Value*, AllocaInst*> alloca_for_value;
340
    auto* alloca = llvm::findAllocaForValue(inst.getOperand(1), alloca_for_value);
341
#endif
342
    if (alloca != nullptr) {
13,380✔
343
      lifetime_starts.emplace_back(&inst, alloca);
13,377✔
344
    }
13,377✔
345
  }
13,380✔
346
}
127,864✔
347

348
void MemOpVisitor::clear() {
17,670✔
349
  allocas.clear();
17,670✔
350
  mallocs.clear();
17,670✔
351
  frees.clear();
17,670✔
352
}
17,670✔
353

354
}  // namespace typeart::analysis
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc