• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ahueck / llvm-dimeta / 23300805560

19 Mar 2026 02:49PM UTC coverage: 73.206% (-0.4%) from 73.626%
23300805560

push

github

web-flow
Initial Fortran Support (#49)

2176 of 3670 branches covered (59.29%)

Branch coverage included in aggregate %.

388 of 445 new or added lines in 15 files covered. (87.19%)

19 existing lines in 5 files now uncovered.

2578 of 2824 relevant lines covered (91.29%)

14323.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.43
/lib/type/DITypeExtractor.cpp
1

2
//  llvm-dimeta library
3
//  Copyright (c) 2022-2025 llvm-dimeta authors
4
//  Distributed under the BSD 3-Clause license.
5
//  (See accompanying file LICENSE)
6
//  SPDX-License-Identifier: BSD-3-Clause
7
//
8

9
#include "DIFinder.h"
10
#include "DIFortranTypeExtractor.h"
11
#include "DIPath.h"
12
#include "DIRootType.h"
13
#include "DIUtil.h"
14
#include "DataflowAnalysis.h"
15
#include "DefUseAnalysis.h"
16
#include "DimetaData.h"
17
#include "GEP.h"
18
#include "TBAA.h"
19
#include "Util.h"
20
#include "ValuePath.h"
21
#include "support/Logger.h"
22

23
#include "llvm/ADT/ArrayRef.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/ilist_iterator.h"
27
#include "llvm/BinaryFormat/Dwarf.h"
28
#include "llvm/Config/llvm-config.h"
29
#include "llvm/IR/Argument.h"
30
#include "llvm/IR/Constants.h"
31
#include "llvm/IR/DebugInfoMetadata.h"
32
#include "llvm/IR/Function.h"
33
#include "llvm/IR/GlobalVariable.h"
34
#include "llvm/IR/InstIterator.h"
35
#include "llvm/IR/InstrTypes.h"
36
#include "llvm/IR/Instructions.h"
37
#include "llvm/IR/IntrinsicInst.h"
38
#include "llvm/IR/Metadata.h"
39
#include "llvm/IR/Operator.h"
40
#include "llvm/IR/Value.h"
41
#include "llvm/Support/Casting.h"
42
#include "llvm/Support/Debug.h"
43
#include "llvm/Support/ErrorHandling.h"
44
#include "llvm/Support/raw_ostream.h"
45

46
#include <cassert>
47
#include <iterator>
48
#include <optional>
49
#include <type_traits>
50
#include <unordered_map>
51
#include <utility>
52

53
namespace dimeta::type {
54

55
namespace reset {
56

57
using GepToDIMemberMap = std::unordered_map<const llvm::GEPOperator*, llvm::DIDerivedType*>;
58

59
namespace detail {
60

61
template <typename T, typename InstTy>
62
std::optional<const T*> get_operand_to(const InstTy* memory_instruction) {
27,590✔
63
  static_assert(std::is_same_v<InstTy, llvm::StoreInst> || std::is_same_v<InstTy, llvm::LoadInst>,
64
                "Expected load or store instruction");
65
  const auto* memory_target = memory_instruction->getPointerOperand();
27,590✔
66
  if (llvm::isa<T>(memory_target)) {
27,590✔
67
    return llvm::dyn_cast<T>(memory_target);
6,953✔
68
  }
69

70
  if (auto bcast = llvm::dyn_cast<llvm::BitCastInst>(memory_target)) {
21,755!
71
    if (llvm::isa<T>(bcast->getOperand(0))) {
1,118!
72
      return llvm::dyn_cast<T>(bcast->getOperand(0));
222✔
73
    }
74
  }
896✔
75

76
  return {};
20,415✔
77
}
27,590✔
78

79
bool is_array_gep_with_non_const_indices(const llvm::GetElementPtrInst* gep) {
2,856✔
80
#if LLVM_VERSION_MAJOR > 12
81
  auto indices = gep->indices();
2,856✔
82
#else
83
  auto indices = llvm::make_range(gep->idx_begin(), gep->idx_end());
84
#endif
85
  for (const auto& index : indices) {
7,038✔
86
    if (!llvm::isa<llvm::ConstantInt>(index.get())) {
4,182✔
87
      return true;
1,064✔
88
    }
89
  }
4,182✔
90
  return false;
1,792✔
91
}
2,856✔
92

93
bool is_array_gep(const llvm::GetElementPtrInst* gep) {
750✔
94
  if (!gep) {
750✔
95
    return false;
328✔
96
  }
97
  return detail::is_array_gep_with_non_const_indices(gep) || gep->getSourceElementType()->isArrayTy();
422✔
98
}
750✔
99

100
}  // namespace detail
101

102
template <typename T>
103
bool store_to(const llvm::StoreInst* store) {
14,916✔
104
  return detail::get_operand_to<T>(store).has_value();
14,916✔
105
}
106

107
template <typename T>
108
bool load_to(const llvm::LoadInst* load) {
7,754✔
109
  return detail::get_operand_to<T>(load).has_value();
7,754✔
110
}
111

UNCOV
112
bool load_for_array_gep(const llvm::LoadInst* load) {
×
UNCOV
113
  for (const auto* user : load->users()) {
×
UNCOV
114
    if (const auto* gep = llvm::dyn_cast<llvm::GetElementPtrInst>(user)) {
×
UNCOV
115
      return detail::is_array_gep(gep);
×
116
    }
UNCOV
117
  }
×
UNCOV
118
  return false;
×
UNCOV
119
}
×
120

121
bool load_of_array_gep(const llvm::LoadInst* load) {
750✔
122
  return detail::is_array_gep(llvm::dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand()));
750✔
123
}
124

125
bool store_to_array_gep(const llvm::StoreInst* store) {
3,232✔
126
  auto gep = detail::get_operand_to<llvm::GetElementPtrInst>(store);
3,232✔
127
  if (!gep) {
3,232✔
128
    return false;
798✔
129
  }
130
  return detail::is_array_gep_with_non_const_indices(gep.value());
2,434✔
131
}
3,232✔
132

133
std::optional<llvm::DIType*> reset_load_related_basic(const dataflow::ValuePath& path, llvm::DIType* type_to_reset,
3,619✔
134
                                                      const llvm::LoadInst* load) {
135
  auto* type = type_to_reset;
3,619✔
136

137
  if (load_to<llvm::GlobalVariable>(load) || load_to<llvm::AllocaInst>(load)) {
3,619✔
138
    LOG_DEBUG("Do not reset DIType based on load to global,alloca")
139
    return type;
1,931✔
140
  }
141

142
  // Fortran test 11_...F90:
143
  if (auto gep = detail::get_operand_to<llvm::GetElementPtrInst>(load)) {
3,064✔
144
    const bool fortran_descriptor = fortran::is_fortran_descriptor(gep.value()->getSourceElementType());
1,376✔
145
    if (fortran_descriptor) {
1,376✔
146
      // auto comp   = di::util::desugar(*type);
147
      // auto result = di::util::resolve_byte_offset_to_member_of(comp.value(), 0);
148
      // return result->type_of_member;
149
      return type;
72✔
150
    }
151
  }
1,376✔
152

153
  if (di::util::is_array_member(*type)) {
1,616✔
154
    auto* base_type = llvm::dyn_cast<llvm::DIDerivedType>(type)->getBaseType();
188✔
155
    LOG_DEBUG("Load of array-like " << log::ditype_str(base_type))
156
    // auto type_de = di::util::desugar(*base_type, 1);
157
    if (auto underlying_type = llvm::dyn_cast<llvm::DICompositeType>(base_type)->getBaseType()) {
376!
158
      return underlying_type;
188✔
159
    }
160
  }
188!
161

162
  const auto try_resolve_to_first_member = [&](llvm::DIType* candidate_type) -> std::optional<llvm::DIType*> {
1,866✔
163
    auto comp = di::util::desugar(*candidate_type);
438✔
164
    LOG_DEBUG("Desugared load to " << log::ditype_str(comp.value_or(nullptr)));
165
    if (!comp) {
438✔
166
      return {};
114✔
167
    }
168
    LOG_DEBUG("Loading first pointer member?");
169
    auto result = di::util::resolve_byte_offset_to_member_of(comp.value(), 0);
324✔
170
    if (result) {
324✔
171
      LOG_DEBUG("Return type of load " << log::ditype_str(result->type_of_member.value_or(nullptr)));
172
      return result->type_of_member;
282✔
173
    }
174
    return {};
42✔
175
  };
438✔
176

177
  // a (last?) load to a GEP of a composite likely loads the first member in an optimized context:
178
  const bool last_load     = path.start_value().value_or(nullptr) == load;
1,428✔
179
  const bool is_not_member = !di::util::is_member(*type);
1,428✔
180
  if (is_not_member || last_load) {
1,428✔
181
    const bool is_not_arg_load = !load_to<llvm::Argument>(load);
750✔
182
    // test Fortran 17 (first allocate): SROA optimization: load on argument selects first member of struct:
183
    // const bool is_not_array_gep = !load_of_array_gep(load) && !load_for_array_gep(load);
184
    if (!load_of_array_gep(load)) {  // && (is_not_arg_load || last_load)) {
750✔
185
      if (auto resolved = try_resolve_to_first_member(type)) {
720✔
186
        return resolved.value();
282✔
187
      }
188
    }
156✔
189
  }
750✔
190

191
  if (auto* maybe_ptr_to_type = llvm::dyn_cast<llvm::DIDerivedType>(type)) {
2,292!
192
    if (di::util::is_pointer(*maybe_ptr_to_type)) {
1,146✔
193
      LOG_DEBUG("Load of pointer-like " << log::ditype_str(maybe_ptr_to_type))
194
    }
390✔
195
    auto* base_type = maybe_ptr_to_type->getBaseType();
1,146✔
196

197
#if DIMETA_USE_TBAA == 1
198
    if (auto* composite = llvm::dyn_cast<llvm::DICompositeType>(base_type)) {
450✔
199
      LOG_DEBUG("Have ptr to composite " << log::ditype_str(composite))
200
      auto type_tbaa = tbaa::resolve_tbaa(base_type, *load);
20✔
201
      if (type_tbaa) {
20✔
202
        return type_tbaa;
10✔
203
      }
204
    }
14✔
205
#endif
206
    return base_type;
1,136✔
207
  }
1,146✔
208

209
  return type;
×
210
}
3,619✔
211

212
std::optional<llvm::DIType*> reset_store_related_basic(const dataflow::ValuePath&, llvm::DIType* type_to_reset,
4,250✔
213
                                                       const llvm::StoreInst* store_inst) {
214
  auto* type = type_to_reset;
4,250✔
215

216
  if (store_to<llvm::GlobalVariable>(store_inst) || store_to<llvm::AllocaInst>(store_inst)) {
4,250✔
217
    // Relevant in "heap_lulesh_mock_char.cpp"
218
    LOG_DEBUG("Store to alloca/global, return " << log::ditype_str(type))
219
    return type;
420✔
220
  }
221

222
  if (di::util::is_array(*type)) {
3,830✔
223
    return llvm::cast<llvm::DICompositeType>(type)->getBaseType();
134✔
224
  }
225

226
  if (!di::util::is_array_member(*type)) {
3,696✔
227
    // !di::util::is_array(*type) &&
228
    const auto is_non_pointer_member = [&](auto& type_) {
6,464✔
229
      return di::util::is_non_static_member(type_) &&
5,138✔
230
             !di::util::is_pointer_like(*llvm::cast<llvm::DIDerivedType>(&type_)->getBaseType());
1,906✔
231
    };
232

233
    auto desugared_composite = di::util::desugar(*type);
3,232✔
234
    LOG_DEBUG("Desugared " << log::ditype_str(desugared_composite.value_or(nullptr)))
235

236
    const bool is_load_target     = store_to<llvm::LoadInst>(store_inst);
3,232✔
237
    const bool is_arg_target      = store_to<llvm::Argument>(store_inst);
3,232✔
238
    const bool is_array_gep_store = store_to_array_gep(store_inst);
3,232✔
239
    const bool is_non_ptr_member  = is_non_pointer_member(*type);
3,232✔
240

241
    if (desugared_composite && (is_non_ptr_member || is_array_gep_store || is_arg_target || is_load_target)) {
3,232✔
242
      LOG_DEBUG("Storing to first pointer member?")
243
      auto result = di::util::resolve_byte_offset_to_member_of(desugared_composite.value(), 0);
372✔
244
      if (result) {
372✔
245
#if DIMETA_USE_TBAA == 1
246
        if (result->member) {
146!
247
          const auto member_name = result->member.value()->getName();
146✔
248
          const bool is_vptr     = dimeta::util::starts_with_any_of(member_name, "_vptr");
146✔
249
          // Let this be handled by TBAA is available, see test 10_lulesh_ad_tbaa_static_member.ll
250
          if (is_vptr) {
146✔
251
            return type;
6✔
252
          }
253
        }
146✔
254
#endif
255
        LOG_DEBUG("Return type of store " << log::ditype_str(result->type_of_member.value_or(nullptr)))
256
        return result->type_of_member;
340✔
257
      }
258
    }
372✔
259
  }
3,232✔
260

261
  if (!llvm::isa<llvm::DIDerivedType>(type)) {
3,350✔
262
    LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
263
    return type;
16✔
264
  }
265

266
  auto* derived_type = llvm::cast<llvm::DIDerivedType>(type);
3,334✔
267

268
  if (di::util::is_array_member(*type)) {
3,334✔
269
    auto* member_base               = derived_type->getBaseType();
464✔
270
    const bool is_array_type_member = di::util::is_array(*member_base);
464✔
271
    if (is_array_type_member) {
464!
272
      LOG_DEBUG("Store to member with type array, looks through to base type of array")
273
      // Fortran: test 17, 18: with optim, we do not detect the tag "array" otherwise:
274
      auto base_of_member = llvm::cast<llvm::DICompositeType>(member_base)->getBaseType();
464✔
275
      if (di::util::is_pointer(*base_of_member)) {
464✔
276
        return base_of_member;
416✔
277
      }
278
      return member_base;
48✔
279
      // return llvm::cast<llvm::DICompositeType>(member_base)->getBaseType();
280
    }
464✔
281
  }
464!
282

283
  if (di::util::is_non_static_member(*derived_type)) {
2,870✔
284
    auto* member_base = derived_type->getBaseType();
1,900✔
285
    return member_base;
1,900✔
286
  }
1,900✔
287

288
  if (di::util::is_pointer(*derived_type)) {
970✔
289
    if (auto* may_be_ptr_to_ptr = llvm::dyn_cast<llvm::DIDerivedType>(derived_type->getBaseType())) {
1,784✔
290
      // Pointer to pointer by default remove one level for RHS assignment type w.r.t. store:
291
      const auto is_ptr_to_ptr = di::util::is_pointer(*may_be_ptr_to_ptr);
840✔
292
      if (is_ptr_to_ptr) {
840!
293
        LOG_DEBUG("Store to ptr-ptr, return " << log::ditype_str(may_be_ptr_to_ptr))
294
        return may_be_ptr_to_ptr;
840✔
295
      }
296
    }
840!
297
  }
104✔
298

299
  LOG_DEBUG("Store resolved, return " << log::ditype_str(type))
300
  return type;
130✔
301
}
4,250✔
302

303
template <typename Iter>
304
std::optional<llvm::DIType*> reset_ditype(llvm::DIType* type_to_reset, const dataflow::ValuePath& path,
20,373✔
305
                                          const Iter& path_iter, dipath::ValueToDiPath& logged_dipath) {
306
  std::optional<llvm::DIType*> type = type_to_reset;
20,373✔
307

308
  const auto& current_value = path_iter;
20,373✔
309
  LOG_DEBUG("Type to reset: " << log::ditype_str(*type));
310
  LOG_DEBUG(">> based on IR: " << **current_value);
311

312
  if (llvm::isa<llvm::GEPOperator>(*current_value)) {
20,373✔
313
    LOG_DEBUG("Reset based on GEP")
314
    const llvm::GEPOperator* gep_op = llvm::cast<llvm::GEPOperator>(*current_value);
6,252✔
315
    const bool fortran_descriptor   = fortran::is_fortran_descriptor(gep_op->getSourceElementType());
6,252✔
316
    if (!fortran_descriptor) {
6,252✔
317
      const auto gep_result = gep::extract_gep_dereferenced_type(type.value(), *gep_op);
6,180✔
318
      if (gep_result.member && !gep_result.use_type) {
6,180!
319
        LOG_DEBUG("Using gep member type result")
320
        type = gep_result.member;
4,322✔
321
      } else if (gep_result.type) {
6,180!
322
        LOG_DEBUG("Using gep type result")
323
        type = gep_result.type;
1,858✔
324
      }
1,858✔
325
    } else {
6,180✔
326
      // Fortran test 11_...F90:
327
      LOG_DEBUG("Skipping GEP, Fortran descriptor")
328
    }
329
  } else if (const auto* load = llvm::dyn_cast<llvm::LoadInst>(*current_value)) {
34,494✔
330
    LOG_DEBUG("Reset based on load")
331
    type = reset::reset_load_related_basic(path, type.value(), load);
3,619✔
332
  } else if (const auto* store_inst = llvm::dyn_cast<llvm::StoreInst>(*current_value)) {
24,623✔
333
    LOG_DEBUG("Reset based on store")
334
    type = reset::reset_store_related_basic(path, type.value(), store_inst);
4,250✔
335
  } else {
4,250✔
336
    LOG_DEBUG(">> skipping");
337
    // logged_dipath.emplace_back(*current_value, type.value_or(nullptr));
338
    // return type;
339
  }
340

341
  logged_dipath.emplace_back(*current_value, type.value_or(nullptr));
20,373!
342

343
  return type;
344
}
20,373✔
345

346
}  // namespace reset
347

348
std::optional<llvm::DIType*> find_type(const dataflow::CallValuePath& call_path) {
5,264✔
349
  auto type = root::find_type_root(call_path);
5,264✔
350

351
  if (!type) {
5,264✔
352
    LOG_DEBUG("find_type_root failed to find a type for path " << call_path.path)
353
    return {};
42✔
354
  }
355

356
  LOG_DEBUG("IR path to analyze " << call_path.path)
357

358
  if (call_path.call) {
5,222✔
359
    const auto function       = call_path.call.value()->getCalledFunction();
4,782✔
360
    const auto fortran_handle = function ? util::starts_with_any_of(function->getName(), "_FortranAAllocatableAllocate",
4,782!
361
                                                                    "_FortranAPointerAllocate")
362
                                         : false;
363
    if (fortran_handle) {
4,782✔
364
      LOG_DEBUG("Fortran handle found " << function->getName())
365
      return fortran::extract(call_path, type);
120✔
366
    }
367
  }
4,782✔
368

369
  dipath::ValueToDiPath dipath;
5,102✔
370

371
  const auto path_end = call_path.path.path_to_value.rend();
5,102!
372
  for (auto path_iter = call_path.path.path_to_value.rbegin(); path_iter != path_end; ++path_iter) {
25,475!
373
    LOG_DEBUG("Extracted type: " << log::ditype_str(*type));
374
    type = reset::reset_ditype(type.value(), call_path.path, path_iter, dipath).value_or(type.value());
20,373!
375
    LOG_DEBUG("reset_ditype result " << log::ditype_str(type.value_or(nullptr)) << "\n")
376
    if (!type) {
20,373!
377
      break;
×
378
    }
379
  }
20,373✔
380

381
#if DIMETA_USE_TBAA == 1
382
  if (type) {
2,008!
383
    // If last node is a store inst, try to extract type via TBAA
384
    const auto* const start_node = llvm::dyn_cast_or_null<llvm::StoreInst>(*call_path.path.start_value());
2,008!
385
    if (start_node) {
2,008✔
386
      auto type_tbaa = tbaa::resolve_tbaa(type.value(), *llvm::dyn_cast<llvm::Instruction>(start_node));
1,666!
387
      if (type_tbaa) {
1,666✔
388
        dipath.emplace_back(start_node, type_tbaa.value(), "TBAA");
1,646!
389
      }
1,646✔
390
    }
1,666✔
391
  }
2,008✔
392
#endif
393

394
  LOG_DEBUG("Final mapping\n" << dipath)
395

396
  return dipath.final_type();
5,102!
397
}
5,264✔
398
}  // namespace dimeta::type
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc