• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27873992769

20 Jun 2026 02:26PM UTC coverage: 61.751% (-0.08%) from 61.832%
27873992769

Pull #789

github

web-flow
Merge fe80e61aa into b7103c21a
Pull Request #789: Device Residency

81 of 204 new or added lines in 6 files covered. (39.71%)

26 existing lines in 2 files now uncovered.

37023 of 59955 relevant lines covered (61.75%)

1015.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

46.27
/opt/src/passes/offloading/cuda_library_node_rewriter_pass.cpp
1
#include "sdfg/passes/offloading/cuda_library_node_rewriter_pass.h"
2
#include <optional>
3

4
#include "sdfg/data_flow/library_node.h"
5
#include "sdfg/data_flow/library_nodes/math/math.h"
6
#include "sdfg/data_flow/library_nodes/stdlib/memcpy.h"
7
#include "sdfg/data_flow/library_nodes/stdlib/memset.h"
8
#include "sdfg/structured_sdfg.h"
9
#include "sdfg/symbolic/symbolic.h"
10
#include "sdfg/targets/cuda/cuda.h"
11

12
namespace sdfg {
13
namespace cuda {
14

15
std::optional<data_flow::ImplementationType> CudaLibraryNodeRewriter::
16
    try_library_node_implementation(const data_flow::LibraryNode& lib_node, types::PrimitiveType data_type) {
1✔
17
    if (data_type == types::PrimitiveType::Float || data_type == types::PrimitiveType::Double) {
1✔
18
        if (lib_node.code() == math::blas::LibraryNodeType_GEMM.value()) {
1✔
19
            auto& gemm_node = static_cast<const math::blas::GEMMNode&>(lib_node);
×
20
            return try_cublas_gemm_node_implementation(gemm_node, data_type);
×
21
        } else if (lib_node.code() == math::blas::LibraryNodeType_BatchedGEMM.value()) {
1✔
22
            return cuda::ImplementationType_CUDAWithTransfers;
1✔
23
        } else if (lib_node.code() == math::blas::LibraryNodeType_DOT.value()) {
1✔
24
            return cuda::ImplementationType_CUDAWithTransfers;
×
25
        } else if (lib_node.code() == math::blas::LibraryNodeType_BatchedGEMM.value()) {
×
26
            auto& batched_gemm_node = static_cast<const math::blas::BatchedGEMMNode&>(lib_node);
×
27
            return try_cublas_batched_gemm_node_implementation(batched_gemm_node, data_type);
×
28
        } else {
×
29
            return std::nullopt;
×
30
        }
×
31
    } else {
1✔
32
        return std::nullopt;
×
33
    }
×
34
}
1✔
35

36
std::optional<data_flow::ImplementationType> CudaLibraryNodeRewriter::
37
    try_cublas_gemm_node_implementation(const math::blas::GEMMNode& gemm_node, types::PrimitiveType data_type) {
×
38
    // Heuristic: Avoid using CUBLAS for very small matrix multiplications
39
    if (symbolic::eq(gemm_node.m(), symbolic::one()) || symbolic::eq(gemm_node.n(), symbolic::one()) ||
×
UNCOV
40
        symbolic::eq(gemm_node.k(), symbolic::one())) {
×
UNCOV
41
        return std::nullopt;
×
UNCOV
42
    }
×
UNCOV
43
    return cuda::ImplementationType_CUDAWithTransfers;
×
UNCOV
44
}
×
45

46
std::optional<data_flow::ImplementationType> CudaLibraryNodeRewriter::
47
    try_memset_implementation(const ::sdfg::stdlib::MemsetNode& memset_node) {
2✔
48
    return cuda::ImplementationType_CUDAWithTransfers;
2✔
49
}
2✔
50

51
std::optional<data_flow::ImplementationType> CudaLibraryNodeRewriter::
UNCOV
52
    try_memcpy_implementation(const ::sdfg::stdlib::MemcpyNode& memcpy_node) {
×
53
    return cuda::ImplementationType_CUDAWithTransfers;
×
UNCOV
54
}
×
55

56
std::optional<data_flow::ImplementationType> CudaLibraryNodeRewriter::try_cublas_batched_gemm_node_implementation(
57
    const math::blas::BatchedGEMMNode& batched_gemm_node, types::PrimitiveType data_type
58
) {
×
59
    // Heuristic: Avoid using CUBLAS for very small matrix multiplications
60
    auto m = batched_gemm_node.m();
×
61
    auto n = batched_gemm_node.n();
×
62
    auto k = batched_gemm_node.k();
×
63
    auto size = symbolic::mul(symbolic::mul(m, n), k);
×
UNCOV
64
    if (symbolic::eq(size, symbolic::one())) {
×
UNCOV
65
        return std::nullopt;
×
UNCOV
66
    }
×
UNCOV
67
    return cuda::ImplementationType_CUDAWithTransfers;
×
UNCOV
68
}
×
69

70
CudaLibraryNodeRewriter::
71
    CudaLibraryNodeRewriter(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager)
72
    : visitor::StructuredSDFGVisitor(builder, analysis_manager) {}
3✔
73

74
bool CudaLibraryNodeRewriter::accept(structured_control_flow::Block& node) {
3✔
75
    auto& dataflow = node.dataflow();
3✔
76
    for (auto& library_node : dataflow.nodes()) {
10✔
77
        if (auto lib_node = dynamic_cast<math::blas::BLASNode*>(&library_node)) {
10✔
78
            auto implType = try_library_node_implementation(*lib_node, lib_node->scalar_primitive());
1✔
79

80
            if (implType) {
1✔
81
                lib_node->implementation_type() = implType.value();
1✔
82
            }
1✔
83
        }
1✔
84
        if (auto memset_node = dynamic_cast<::sdfg::stdlib::MemsetNode*>(&library_node)) {
10✔
85
            auto implType = try_memset_implementation(*memset_node);
2✔
86
            if (implType) {
2✔
87
                memset_node->implementation_type() = implType.value();
2✔
88
            }
2✔
89
        }
2✔
90
        if (auto memcpy_node = dynamic_cast<::sdfg::stdlib::MemcpyNode*>(&library_node)) {
10✔
UNCOV
91
            auto implType = try_memcpy_implementation(*memcpy_node);
×
UNCOV
92
            if (implType) {
×
UNCOV
93
                memcpy_node->implementation_type() = implType.value();
×
UNCOV
94
            }
×
UNCOV
95
        }
×
96
    }
10✔
97
    return false;
3✔
98
}
3✔
99

100
} // namespace cuda
101
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc