• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 19830125009

01 Dec 2025 04:38PM UTC coverage: 61.822% (-0.06%) from 61.885%
19830125009

push

github

web-flow
Merge pull request #369 from daisytuner/cleaner-flop-analysis-api

Cleaner API to FlopAnalysis to hide its internal artifacts when we on…

11 of 56 new or added lines in 7 files covered. (19.64%)

3 existing lines in 2 files now uncovered.

11254 of 18204 relevant lines covered (61.82%)

110.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.36
/src/data_flow/library_nodes/math/blas/dot.cpp
1
#include "sdfg/data_flow/library_nodes/math/blas/dot.h"
2
#include <stdexcept>
3
#include <string>
4

5
#include "sdfg/analysis/analysis.h"
6
#include "sdfg/builder/structured_sdfg_builder.h"
7

8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/symbolic/symbolic.h"
10

11
namespace sdfg {
12
namespace math {
13
namespace blas {
14

15
DotNode::DotNode(
1✔
16
    size_t element_id,
17
    const DebugInfo& debug_info,
18
    const graph::Vertex vertex,
19
    data_flow::DataFlowGraph& parent,
20
    const data_flow::ImplementationType& implementation_type,
21
    const BLAS_Precision& precision,
22
    symbolic::Expression n,
23
    symbolic::Expression incx,
24
    symbolic::Expression incy
25
)
26
    : BLASNode(
1✔
27
          element_id, debug_info, vertex, parent, LibraryNodeType_DOT, {"_out"}, {"x", "y"}, implementation_type, precision
1✔
28
      ),
29
      n_(n), incx_(incx), incy_(incy) {}
1✔
30

31
symbolic::Expression DotNode::n() const { return this->n_; };
×
32

33
symbolic::Expression DotNode::incx() const { return this->incx_; };
×
34

35
symbolic::Expression DotNode::incy() const { return this->incy_; };
×
36

37
void DotNode::validate(const Function& function) const {}
×
38

39
bool DotNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
1✔
40
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
1✔
41

42
    auto& dataflow = this->get_parent();
1✔
43
    auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());
1✔
44
    auto& parent = static_cast<structured_control_flow::Sequence&>(*scope_analysis.parent_scope(&block));
1✔
45
    int index = parent.index(block);
1✔
46
    auto& transition = parent.at(index).second;
1✔
47

48
    const data_flow::Memlet* iedge_x = nullptr;
1✔
49
    const data_flow::Memlet* iedge_y = nullptr;
1✔
50
    for (const auto& iedge : dataflow.in_edges(*this)) {
3✔
51
        if (iedge.dst_conn() == "x") {
2✔
52
            iedge_x = &iedge;
1✔
53
        } else if (iedge.dst_conn() == "y") {
2✔
54
            iedge_y = &iedge;
1✔
55
        }
1✔
56
    }
57

58
    const data_flow::Memlet* oedge_res = nullptr;
1✔
59
    for (const auto& oedge : dataflow.out_edges(*this)) {
1✔
60
        if (oedge.src_conn() == "_out") {
1✔
61
            oedge_res = &oedge;
1✔
62
            break;
1✔
63
        }
64
    }
65

66
    // Check if legal
67
    auto& input_node_x = static_cast<const data_flow::AccessNode&>(iedge_x->src());
1✔
68
    auto& input_node_y = static_cast<const data_flow::AccessNode&>(iedge_y->src());
1✔
69
    auto& output_node_res = static_cast<const data_flow::AccessNode&>(oedge_res->dst());
1✔
70
    if (dataflow.in_degree(input_node_x) != 0 || dataflow.in_degree(input_node_y) != 0 ||
1✔
71
        dataflow.out_degree(output_node_res) != 0) {
1✔
72
        return false;
×
73
    }
74

75
    auto& new_sequence = builder.add_sequence_before(parent, block, transition.assignments(), block.debug_info());
1✔
76

77
    std::string loop_var = builder.find_new_name("_i");
1✔
78
    builder.add_container(loop_var, types::Scalar(types::PrimitiveType::UInt64));
1✔
79

80
    auto loop_indvar = symbolic::symbol(loop_var);
1✔
81
    auto loop_init = symbolic::integer(0);
1✔
82
    auto loop_condition = symbolic::Lt(loop_indvar, this->n_);
1✔
83
    auto loop_update = symbolic::add(loop_indvar, symbolic::integer(1));
1✔
84

85
    auto& loop =
1✔
86
        builder.add_for(new_sequence, loop_indvar, loop_condition, loop_init, loop_update, {}, block.debug_info());
1✔
87
    auto& body = loop.root();
1✔
88

89
    auto& new_block = builder.add_block(body);
1✔
90

91
    auto& res_in = builder.add_access(new_block, output_node_res.data());
1✔
92
    auto& res_out = builder.add_access(new_block, output_node_res.data());
1✔
93
    auto& x = builder.add_access(new_block, input_node_x.data());
1✔
94
    auto& y = builder.add_access(new_block, input_node_y.data());
1✔
95

96
    auto& tasklet = builder.add_tasklet(new_block, data_flow::TaskletCode::fp_fma, "_out", {"_in1", "_in2", "_in3"});
1✔
97

98
    builder.add_computational_memlet(
2✔
99
        new_block,
1✔
100
        x,
1✔
101
        tasklet,
1✔
102
        "_in1",
1✔
103
        {symbolic::mul(loop_indvar, this->incx_)},
1✔
104
        iedge_x->base_type(),
1✔
105
        iedge_x->debug_info()
1✔
106
    );
107
    builder.add_computational_memlet(
2✔
108
        new_block,
1✔
109
        y,
1✔
110
        tasklet,
1✔
111
        "_in2",
1✔
112
        {symbolic::mul(loop_indvar, this->incy_)},
1✔
113
        iedge_y->base_type(),
1✔
114
        iedge_y->debug_info()
1✔
115
    );
116
    builder
2✔
117
        .add_computational_memlet(new_block, res_in, tasklet, "_in3", {}, oedge_res->base_type(), oedge_res->debug_info());
1✔
118
    builder.add_computational_memlet(
2✔
119
        new_block, tasklet, "_out", res_out, {}, oedge_res->base_type(), oedge_res->debug_info()
1✔
120
    );
121

122
    // Clean up
123
    builder.remove_memlet(block, *iedge_x);
1✔
124
    builder.remove_memlet(block, *iedge_y);
1✔
125
    builder.remove_memlet(block, *oedge_res);
1✔
126
    builder.remove_node(block, input_node_x);
1✔
127
    builder.remove_node(block, input_node_y);
1✔
128
    builder.remove_node(block, output_node_res);
1✔
129
    builder.remove_node(block, *this);
1✔
130
    builder.remove_child(parent, index + 1);
1✔
131

132
    return true;
1✔
133
}
1✔
134

NEW
135
symbolic::Expression DotNode::flop() const {
×
NEW
136
    auto muls = this->n_;
×
NEW
137
    auto adds = symbolic::sub(this->n_, symbolic::one());
×
NEW
138
    return symbolic::add(muls, adds);
×
NEW
139
}
×
140

141
std::unique_ptr<data_flow::DataFlowNode> DotNode::
142
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
×
143
    auto node_clone = std::unique_ptr<DotNode>(new DotNode(
×
144
        element_id,
×
145
        this->debug_info(),
×
146
        vertex,
×
147
        parent,
×
148
        this->implementation_type_,
×
149
        this->precision_,
×
150
        this->n_,
×
151
        this->incx_,
×
152
        this->incy_
×
153
    ));
154
    return std::move(node_clone);
×
155
}
×
156

157
nlohmann::json DotNodeSerializer::serialize(const data_flow::LibraryNode& library_node) {
×
158
    const DotNode& gemm_node = static_cast<const DotNode&>(library_node);
×
159
    nlohmann::json j;
×
160

161
    serializer::JSONSerializer serializer;
×
162
    j["code"] = gemm_node.code().value();
×
163
    j["precision"] = gemm_node.precision();
×
164
    j["n"] = serializer.expression(gemm_node.n());
×
165
    j["incx"] = serializer.expression(gemm_node.incx());
×
166
    j["incy"] = serializer.expression(gemm_node.incy());
×
167

168
    return j;
×
169
}
×
170

171
data_flow::LibraryNode& DotNodeSerializer::deserialize(
×
172
    const nlohmann::json& j, builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
173
) {
174
    // Assertions for required fields
175
    assert(j.contains("element_id"));
×
176
    assert(j.contains("code"));
×
177
    assert(j.contains("debug_info"));
×
178

179
    auto code = j["code"].get<std::string>();
×
180
    if (code != LibraryNodeType_DOT.value()) {
×
181
        throw std::runtime_error("Invalid library node code");
×
182
    }
183

184
    // Extract debug info using JSONSerializer
185
    sdfg::serializer::JSONSerializer serializer;
×
186
    DebugInfo debug_info = serializer.json_to_debug_info(j["debug_info"]);
×
187

188
    auto precision = j.at("precision").get<BLAS_Precision>();
×
189
    auto n = symbolic::parse(j.at("n"));
×
190
    auto incx = symbolic::parse(j.at("incx"));
×
191
    auto incy = symbolic::parse(j.at("incy"));
×
192

193
    auto implementation_type = j.at("implementation_type").get<std::string>();
×
194

195
    return builder.add_library_node<DotNode>(parent, debug_info, implementation_type, precision, n, incx, incy);
×
196
}
×
197

198
DotNodeDispatcher_BLAS::DotNodeDispatcher_BLAS(
×
199
    codegen::LanguageExtension& language_extension,
200
    const Function& function,
201
    const data_flow::DataFlowGraph& data_flow_graph,
202
    const DotNode& node
203
)
204
    : codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}
×
205

206
void DotNodeDispatcher_BLAS::dispatch_code(
×
207
    codegen::PrettyPrinter& stream,
208
    codegen::PrettyPrinter& globals_stream,
209
    codegen::CodeSnippetFactory& library_snippet_factory
210
) {
211
    stream << "{" << std::endl;
×
212
    stream.setIndent(stream.indent() + 4);
×
213

214
    auto& dot_node = static_cast<const DotNode&>(this->node_);
×
215

216
    sdfg::types::Scalar base_type(types::PrimitiveType::Void);
×
217
    BLAS_Precision precision = dot_node.precision();
×
218
    switch (precision) {
×
219
        case BLAS_Precision::h:
220
            base_type = types::Scalar(types::PrimitiveType::Half);
×
221
            break;
×
222
        case BLAS_Precision::s:
223
            base_type = types::Scalar(types::PrimitiveType::Float);
×
224
            break;
×
225
        case BLAS_Precision::d:
226
            base_type = types::Scalar(types::PrimitiveType::Double);
×
227
            break;
×
228
        default:
229
            throw std::runtime_error("Invalid BLAS_Precision value");
×
230
    }
231

232
    stream << dot_node.outputs().at(0) << " = ";
×
233
    stream << "cblas_" << BLAS_Precision_to_string(precision) << "dot(";
×
234
    stream.setIndent(stream.indent() + 4);
×
235
    stream << this->language_extension_.expression(dot_node.n());
×
236
    stream << ", ";
×
237
    stream << dot_node.inputs().at(0);
×
238
    stream << ", ";
×
239
    stream << this->language_extension_.expression(dot_node.incx());
×
240
    stream << ", ";
×
241
    stream << dot_node.inputs().at(1);
×
242
    stream << ", ";
×
243
    stream << this->language_extension_.expression(dot_node.incy());
×
244
    stream.setIndent(stream.indent() - 4);
×
245
    stream << ");" << std::endl;
×
246

247
    stream.setIndent(stream.indent() - 4);
×
248
    stream << "}" << std::endl;
×
249
}
×
250

251
DotNodeDispatcher_CUBLASWithTransfers::DotNodeDispatcher_CUBLASWithTransfers(
×
252
    codegen::LanguageExtension& language_extension,
253
    const Function& function,
254
    const data_flow::DataFlowGraph& data_flow_graph,
255
    const DotNode& node
256
)
257
    : codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}
×
258

259
void DotNodeDispatcher_CUBLASWithTransfers::dispatch_code(
×
260
    codegen::PrettyPrinter& stream,
261
    codegen::PrettyPrinter& globals_stream,
262
    codegen::CodeSnippetFactory& library_snippet_factory
263
) {
264
    auto& dot_node = static_cast<const DotNode&>(this->node_);
×
265

266
    globals_stream << "#include <cuda.h>" << std::endl;
×
267
    globals_stream << "#include <cublas_v2.h>" << std::endl;
×
268

269
    std::string type, type2;
×
270
    switch (dot_node.precision()) {
×
271
        case s:
272
            type = "float";
×
273
            type2 = "S";
×
274
            break;
×
275
        case d:
276
            type = "double";
×
277
            type2 = "D";
×
278
            break;
×
279
        default:
280
            throw std::runtime_error("Invalid precision for CUBLAS DOT node");
×
281
    }
282

283
    const std::string x_size =
284
        this->language_extension_.expression(
×
285
            symbolic::add(symbolic::mul(symbolic::sub(dot_node.n(), symbolic::one()), dot_node.incx()), symbolic::one())
×
286
        ) +
×
287
        " * sizeof(" + type + ")";
×
288
    const std::string y_size =
289
        this->language_extension_.expression(
×
290
            symbolic::add(symbolic::mul(symbolic::sub(dot_node.n(), symbolic::one()), dot_node.incy()), symbolic::one())
×
291
        ) +
×
292
        " * sizeof(" + type + ")";
×
293

294
    stream << type << " *dx, *dy;" << std::endl;
×
295
    stream << "cudaMalloc(&dx, " << x_size << ");" << std::endl;
×
296
    stream << "cudaMalloc(&dy, " << y_size << ");" << std::endl;
×
297

298
    stream << "cudaMemcpy(dx, x, " << x_size << ", cudaMemcpyHostToDevice);" << std::endl;
×
299
    stream << "cudaMemcpy(dy, y, " << y_size << ", cudaMemcpyHostToDevice);" << std::endl;
×
300

301
    stream << "cublasStatus_t err;" << std::endl;
×
302
    stream << "cublasHandle_t handle;" << std::endl;
×
303
    stream << "err = cublasCreate(&handle);" << std::endl;
×
304
    stream << "if (err != CUBLAS_STATUS_SUCCESS) {" << std::endl;
×
305
    stream.setIndent(stream.indent() + 4);
×
306
    stream << this->language_extension_.external_prefix() << "exit(1);" << std::endl;
×
307
    stream.setIndent(stream.indent() - 4);
×
308
    stream << "}" << std::endl;
×
309
    stream << "err = cublas" << type2 << "dot(handle, " << this->language_extension_.expression(dot_node.n())
×
310
           << ", dx, " << this->language_extension_.expression(dot_node.incx()) << ", dy, "
×
311
           << this->language_extension_.expression(dot_node.incy()) << ", &_out);" << std::endl;
×
312
    stream << "if (err != CUBLAS_STATUS_SUCCESS) {" << std::endl;
×
313
    stream.setIndent(stream.indent() + 4);
×
314
    stream << this->language_extension_.external_prefix() << "exit(1);" << std::endl;
×
315
    stream.setIndent(stream.indent() - 4);
×
316
    stream << "}" << std::endl;
×
317
    stream << "err = cublasDestroy(handle);" << std::endl;
×
318
    stream << "if (err != CUBLAS_STATUS_SUCCESS) {" << std::endl;
×
319
    stream.setIndent(stream.indent() + 4);
×
320
    stream << this->language_extension_.external_prefix() << "exit(1);" << std::endl;
×
321
    stream.setIndent(stream.indent() - 4);
×
322
    stream << "}" << std::endl;
×
323

324
    stream << "cudaFree(dx);" << std::endl;
×
325
    stream << "cudaFree(dy);" << std::endl;
×
326
}
×
327

328
DotNodeDispatcher_CUBLASWithoutTransfers::DotNodeDispatcher_CUBLASWithoutTransfers(
×
329
    codegen::LanguageExtension& language_extension,
330
    const Function& function,
331
    const data_flow::DataFlowGraph& data_flow_graph,
332
    const DotNode& node
333
)
334
    : codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}
×
335

336
void DotNodeDispatcher_CUBLASWithoutTransfers::dispatch_code(
×
337
    codegen::PrettyPrinter& stream,
338
    codegen::PrettyPrinter& globals_stream,
339
    codegen::CodeSnippetFactory& library_snippet_factory
340
) {
341
    auto& dot_node = static_cast<const DotNode&>(this->node_);
×
342

343
    globals_stream << "#include <cuda.h>" << std::endl;
×
344
    globals_stream << "#include <cublas_v2.h>" << std::endl;
×
345

346
    stream << "cublasStatus_t err;" << std::endl;
×
347
    stream << "cublasHandle_t handle;" << std::endl;
×
348
    stream << "err = cublasCreate(&handle);" << std::endl;
×
349
    stream << "if (err != CUBLAS_STATUS_SUCCESS) {" << std::endl;
×
350
    stream.setIndent(stream.indent() + 4);
×
351
    stream << this->language_extension_.external_prefix() << "exit(1);" << std::endl;
×
352
    stream.setIndent(stream.indent() - 4);
×
353
    stream << "}" << std::endl;
×
354
    stream << "err = cublas";
×
355
    switch (dot_node.precision()) {
×
356
        case s:
357
            stream << "S";
×
358
            break;
×
359
        case d:
360
            stream << "D";
×
361
            break;
×
362
        default:
363
            throw std::runtime_error("Invalid precision for CUBLAS DOT node");
×
364
    }
365
    stream << "dot(handle, " << this->language_extension_.expression(dot_node.n()) << ", x, "
×
366
           << this->language_extension_.expression(dot_node.incx()) << ", y, "
×
367
           << this->language_extension_.expression(dot_node.incy()) << ", &_out);" << std::endl;
×
368
    stream << "if (err != CUBLAS_STATUS_SUCCESS) {" << std::endl;
×
369
    stream.setIndent(stream.indent() + 4);
×
370
    stream << this->language_extension_.external_prefix() << "exit(1);" << std::endl;
×
371
    stream.setIndent(stream.indent() - 4);
×
372
    stream << "}" << std::endl;
×
373
    stream << "err = cublasDestroy(handle);" << std::endl;
×
374
    stream << "if (err != CUBLAS_STATUS_SUCCESS) {" << std::endl;
×
375
    stream.setIndent(stream.indent() + 4);
×
376
    stream << this->language_extension_.external_prefix() << "exit(1);" << std::endl;
×
377
    stream.setIndent(stream.indent() - 4);
×
378
    stream << "}" << std::endl;
×
379
}
×
380

381
} // namespace blas
382
} // namespace math
383
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc