• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

celerity / celerity-runtime / 11609757631

31 Oct 2024 09:55AM UTC coverage: 95.249% (+0.05%) from 95.198%
11609757631

Pull #295

github

fknorr
Update benchmark results for new TDAG structure
Pull Request #295: Remove cross-thread task lookup by `task_id`

3034 of 3420 branches covered (88.71%)

Branch coverage included in aggregate %.

152 of 152 new or added lines in 15 files covered. (100.0%)

1 existing line in 1 file now uncovered.

6730 of 6831 relevant lines covered (98.52%)

1464480.12 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.51
/src/print_graph.cc
1
#include "print_graph.h"
2

3
#include <fmt/format.h>
4

5
#include "access_modes.h"
6
#include "grid.h"
7
#include "instruction_graph.h"
8
#include "log.h"
9
#include "print_utils.h"
10
#include "recorders.h"
11
#include "task.h"
12

13
#include <map>
14
#include <set>
15
#include <unordered_map>
16

17
namespace celerity::detail {
18

19
const char* dependency_style(const dependency_kind kind, const dependency_origin origin) {
348✔
20
        if(kind == dependency_kind::anti_dep) return "color=limegreen";
348✔
21
        switch(origin) {
330✔
22
        case dependency_origin::collective_group_serialization: return "color=blue";
4✔
23
        case dependency_origin::execution_front: return "color=orange";
130✔
24
        case dependency_origin::last_epoch: return "color=orchid";
73✔
25
        default: return "";
123✔
26
        }
27
}
28

29
const char* task_type_string(const task_type tt) {
285✔
30
        switch(tt) {
285!
31
        case task_type::epoch: return "epoch";
118✔
32
        case task_type::host_compute: return "host-compute";
18✔
33
        case task_type::device_compute: return "device-compute";
97✔
34
        case task_type::collective: return "collective host";
8✔
35
        case task_type::master_node: return "master-node host";
4✔
36
        case task_type::horizon: return "horizon";
36✔
37
        case task_type::fence: return "fence";
4✔
38
        default: return "unknown";
×
39
        }
40
}
41

42
void format_requirements(std::string& label, const reduction_list& reductions, const access_list& accesses, const side_effect_map& side_effects,
198✔
43
    const access_mode reduction_init_mode) {
44
        for(const auto& [rid, bid, buffer_name, init_from_buffer] : reductions) {
211✔
45
                auto rmode = init_from_buffer ? reduction_init_mode : sycl::access::mode::discard_write;
13✔
46
                const region scalar_region(box<3>({0, 0, 0}, {1, 1, 1}));
13✔
47
                const std::string bl = utils::escape_for_dot_label(utils::make_buffer_debug_label(bid, buffer_name));
13✔
48
                fmt::format_to(std::back_inserter(label), "<br/>(R{}) <i>{}</i> {} {}", rid, detail::access::mode_traits::name(rmode), bl, scalar_region);
26✔
49
        }
13✔
50

51
        for(const auto& [bid, buffer_name, mode, req] : accesses) {
343✔
52
                const std::string bl = utils::escape_for_dot_label(utils::make_buffer_debug_label(bid, buffer_name));
145✔
53
                // While uncommon, we do support chunks that don't require access to a particular buffer at all.
54
                if(!req.empty()) { fmt::format_to(std::back_inserter(label), "<br/><i>{}</i> {} {}", detail::access::mode_traits::name(mode), bl, req); }
290!
55
        }
145✔
56

57
        for(const auto& [hoid, order] : side_effects) {
198!
58
                fmt::format_to(std::back_inserter(label), "<br/><i>affect</i> H{}", hoid);
×
59
        }
60
}
198✔
61

62
std::string get_task_label(const task_record& tsk) {
128✔
63
        std::string label;
128✔
64
        fmt::format_to(std::back_inserter(label), "T{}", tsk.tid);
128✔
65
        if(!tsk.debug_name.empty()) { fmt::format_to(std::back_inserter(label), " \"{}\"", utils::escape_for_dot_label(tsk.debug_name)); }
171✔
66

67
        fmt::format_to(std::back_inserter(label), "<br/><b>{}</b>", task_type_string(tsk.type));
256✔
68
        if(tsk.type == task_type::host_compute || tsk.type == task_type::device_compute) {
128✔
69
                fmt::format_to(std::back_inserter(label), " {}", subrange<3>{tsk.geometry.global_offset, tsk.geometry.global_size});
118✔
70
        } else if(tsk.type == task_type::collective) {
69!
71
                fmt::format_to(std::back_inserter(label), " in CG{}", tsk.cgid);
×
72
        }
73

74
        format_requirements(label, tsk.reductions, tsk.accesses, tsk.side_effect_map, access_mode::read_write);
128✔
75

76
        return label;
128✔
77
}
×
78

79
std::string make_graph_preamble(const std::string& title) { return fmt::format("digraph G{{label=<{}>;pad=0.2;", title); }
67✔
80

81
std::string print_task_graph(const task_recorder& recorder, const std::string& title) {
18✔
82
        std::string dot = make_graph_preamble(title);
18✔
83

84
        CELERITY_DEBUG("print_task_graph, {} entries", recorder.get_tasks().size());
18✔
85

86
        for(const auto& tsk : recorder.get_tasks()) {
146✔
87
                const char* shape = tsk.type == task_type::epoch || tsk.type == task_type::horizon ? "ellipse" : "box style=rounded";
128✔
88
                fmt::format_to(std::back_inserter(dot), "{}[shape={} label=<{}>];", tsk.tid, shape, get_task_label(tsk));
256✔
89
                for(auto d : tsk.dependencies) {
261✔
90
                        fmt::format_to(std::back_inserter(dot), "{}->{}[{}];", d.node, tsk.tid, dependency_style(d.kind, d.origin));
266✔
91
                }
92
        }
93

94
        dot += "}";
18✔
95
        return dot;
18✔
96
}
×
97

98
const char* print_epoch_label(epoch_action action) {
102✔
99
        switch(action) {
102!
100
        case epoch_action::none: return "<b>epoch</b>";
46✔
101
        case epoch_action::init: return "<b>epoch</b> (init)";
31✔
UNCOV
102
        case epoch_action::barrier: return "<b>epoch</b> (barrier)";
×
103
        case epoch_action::shutdown: return "<b>epoch</b> (shutdown)";
25✔
104
        default: utils::unreachable(); // LCOV_EXCL_LINE
105
        }
106
}
107

108
std::string print_command_graph(const node_id local_nid, const command_recorder& recorder, const std::string& title) {
22✔
109
        std::string main_dot;
22✔
110
        std::map<task_id, std::string> task_subgraph_dot; // this map must be ordered!
22✔
111

112
        const auto local_to_global_id = [local_nid](uint64_t id) {
22✔
113
                // IDs in the DOT language may not start with a digit (unless the whole thing is a numeral)
114
                return fmt::format("id_{}_{}", local_nid, id);
772✔
115
        };
22✔
116

117
        const auto get_subgraph = [&](const task_command_record& task_cmd) {
22✔
118
                if(!task_subgraph_dot.contains(task_cmd.tid)) {
157!
119
                        std::string task_label;
157✔
120
                        fmt::format_to(std::back_inserter(task_label), "T{} ", task_cmd.tid);
157✔
121
                        if(!task_cmd.debug_name.empty()) { fmt::format_to(std::back_inserter(task_label), "\"{}\" ", utils::escape_for_dot_label(task_cmd.debug_name)); }
193✔
122
                        task_label += "(";
157✔
123
                        task_label += task_type_string(task_cmd.type);
157✔
124
                        if(task_cmd.type == task_type::collective) { fmt::format_to(std::back_inserter(task_label), " on CG{}", task_cmd.cgid); }
157✔
125
                        task_label += ")";
157✔
126

127
                        task_subgraph_dot.emplace(task_cmd.tid,
314✔
128
                            fmt::format("subgraph cluster_{}{{label=<<font color=\"#606060\">{}</font>>;color=darkgray;", local_to_global_id(task_cmd.tid), task_label));
314✔
129
                }
157✔
130
                return &task_subgraph_dot[task_cmd.tid];
157✔
131
        };
22✔
132

133
        const auto get_buffer_label = [](const buffer_id bid, const std::string& debug_name) {
22✔
134
                return utils::escape_for_dot_label(utils::make_buffer_debug_label(bid, debug_name));
32✔
135
        };
136

137
        // we want to iterate over our command records in a sorted order, without moving everything around, and we aren't in C++20 (yet)
138
        std::vector<const command_record*> sorted_cmd_pointers;
22✔
139
        for(const auto& cmd : recorder.get_graph_nodes()) {
207✔
140
                sorted_cmd_pointers.push_back(cmd.get());
185✔
141
        }
142
        std::sort(sorted_cmd_pointers.begin(), sorted_cmd_pointers.end(), [](const command_record* a, const command_record* b) { return a->id < b->id; });
348✔
143

144
        for(const auto& cmd : sorted_cmd_pointers) {
207✔
145
                std::string* output = &main_dot;
185✔
146
                if(utils::isa<task_command_record>(cmd)) { output = get_subgraph(dynamic_cast<const task_command_record&>(*cmd)); }
185!
147
                auto back = std::back_inserter(*output);
185✔
148

149
                const auto begin_node = [&](const command_record& cmd, const std::string_view& shape, const std::string_view& color) {
185✔
150
                        fmt::format_to(back, "{}[color={},shape={},label=<C{} on N{}<br/>", local_to_global_id(cmd.id), color, shape, cmd.id, local_nid);
370✔
151
                };
185✔
152
                const auto end_node = [&] { fmt::format_to(back, ">];"); };
370✔
153

154
                const auto add_reduction_id_if_reduction = [&](const transfer_id trid) {
185✔
155
                        if(trid.rid != 0) { fmt::format_to(back, "(R{}) ", trid.rid); }
22✔
156
                };
207✔
157
                const auto list_completed_reductions = [&](const std::vector<reduction_id>& completed_reductions) {
185✔
158
                        for(const auto rid : completed_reductions) {
97✔
159
                                fmt::format_to(back, "<br/>completed R{}", rid);
10✔
160
                        }
161
                };
87✔
162

163
                matchbox::match(
185✔
164
                    *cmd,
165
                    [&](const push_command_record& pcmd) {
185✔
166
                            begin_node(pcmd, "ellipse", "deeppink2");
12✔
167
                            add_reduction_id_if_reduction(pcmd.trid);
12✔
168
                            fmt::format_to(back, "<b>push</b> {}", pcmd.trid);
12✔
169
                            if(!pcmd.buffer_name.empty()) { fmt::format_to(back, " {}", utils::escape_for_dot_label(pcmd.buffer_name)); }
12!
170
                            fmt::format_to(back, "<br/>");
12✔
171
                            for(size_t i = 0; i < pcmd.target_regions.size(); ++i) {
40✔
172
                                    const auto& [nid, region] = pcmd.target_regions[i];
28✔
173
                                    fmt::format_to(back, "{} to N{}", region, nid);
28✔
174
                                    if(i < pcmd.target_regions.size() + 1) { *output += "<br/>"; }
28!
175
                            }
176
                            end_node();
12✔
177
                    },
12✔
178
                    [&](const await_push_command_record& apcmd) {
370✔
179
                            begin_node(apcmd, "ellipse", "deeppink2");
10✔
180
                            add_reduction_id_if_reduction(apcmd.trid);
10✔
181
                            fmt::format_to(back, "<b>await push</b> {} <br/>{} {}", apcmd.trid, get_buffer_label(apcmd.trid.bid, apcmd.buffer_name), apcmd.await_region);
20✔
182
                            end_node();
10✔
183
                    },
10✔
184
                    [&](const reduction_command_record& rcmd) {
370✔
185
                            begin_node(rcmd, "ellipse", "blue");
6✔
186
                            const region scalar_region(box<3>({0, 0, 0}, {1, 1, 1}));
6✔
187
                            fmt::format_to(back, "<b>reduction</b> R{}<br/> {} {}", rcmd.rid, get_buffer_label(rcmd.bid, rcmd.buffer_name), scalar_region);
12✔
188
                            if(!rcmd.has_local_contribution) { *output += "<br/>(no local contribution)"; }
6!
189
                            end_node();
6✔
190
                    },
12✔
191
                    [&](const epoch_command_record& ecmd) {
370✔
192
                            begin_node(ecmd, "box", "black");
68✔
193
                            *output += print_epoch_label(ecmd.action);
68✔
194
                            list_completed_reductions(ecmd.completed_reductions);
68✔
195
                            end_node();
68✔
196
                    },
68✔
197
                    [&](const horizon_command_record& hcmd) {
370✔
198
                            begin_node(hcmd, "box", "black");
19✔
199
                            *output += "<b>horizon</b>";
19✔
200
                            list_completed_reductions(hcmd.completed_reductions);
19✔
201
                            end_node();
19✔
202
                    },
19✔
203
                    [&](const execution_command_record& ecmd) {
370✔
204
                            begin_node(ecmd, "box", "darkorange2");
66✔
205
                            fmt::format_to(back, "<b>execution</b> {}", ecmd.execution_range);
66✔
206
                            auto reduction_init_mode = ecmd.is_reduction_initializer ? access_mode::read_write : access_mode::discard_write;
66!
207
                            format_requirements(*output, ecmd.reductions, ecmd.accesses, ecmd.side_effects, reduction_init_mode);
66✔
208
                            end_node();
66✔
209
                    },
66✔
210
                    [&](const fence_command_record& fcmd) {
370✔
211
                            begin_node(fcmd, "box", "darkorange");
4✔
212
                            *output += "<b>fence</b>";
4✔
213
                            format_requirements(*output, reduction_list{}, fcmd.accesses, fcmd.side_effects, access_mode::discard_write);
4✔
214
                            end_node();
4✔
215
                    });
4✔
216
        };
217

218
        // Sort and deduplicate edges
219
        struct dependency_edge {
220
                command_id predecessor;
221
                command_id successor;
222
        };
223
        struct dependency_edge_order {
224
                bool operator()(const dependency_edge& lhs, const dependency_edge& rhs) const {
1,323✔
225
                        if(lhs.predecessor < rhs.predecessor) return true;
1,323✔
226
                        if(lhs.predecessor > rhs.predecessor) return false;
535✔
227
                        return lhs.successor < rhs.successor;
312✔
228
                }
229
        };
230
        struct dependency_kind_order {
231
                bool operator()(const std::pair<dependency_kind, dependency_origin>& lhs, const std::pair<dependency_kind, dependency_origin>& rhs) const {
181✔
232
                        return (lhs.first == dependency_kind::true_dep && rhs.first != dependency_kind::true_dep);
181✔
233
                }
234
        };
235
        std::map<dependency_edge, std::set<std::pair<dependency_kind, dependency_origin>, dependency_kind_order>, dependency_edge_order>
236
            dependencies_by_edge; // ordered and unique
22✔
237
        for(const auto& dep : recorder.get_dependencies()) {
311✔
238
                dependencies_by_edge[{dep.predecessor, dep.successor}].insert(std::pair{dep.kind, dep.origin});
289✔
239
        }
240
        for(const auto& [edge, meta] : dependencies_by_edge) {
237✔
241
                // If there's at most two edges, take the first one (likely a true dependency followed by an anti-dependency). If there's more, bail (don't style).
242
                const auto style = meta.size() <= 2 ? dependency_style(meta.begin()->first, meta.begin()->second) : std::string{};
645✔
243
                fmt::format_to(std::back_inserter(main_dot), "{}->{}[{}];", local_to_global_id(edge.predecessor), local_to_global_id(edge.successor), style);
430✔
244
        }
215✔
245

246
        std::string result_dot = make_graph_preamble(title);
22✔
247
        for(auto& [_, sg_dot] : task_subgraph_dot) {
179✔
248
                result_dot += sg_dot;
157✔
249
                result_dot += "}";
157✔
250
        }
251
        result_dot += main_dot;
22✔
252
        result_dot += "}";
22✔
253
        return result_dot;
44✔
254
} // namespace celerity::detail
22✔
255

256
std::string combine_command_graphs(const std::vector<std::string>& graphs, const std::string& title) {
9✔
257
        const auto preamble = make_graph_preamble(title);
9✔
258
        std::string result_dot = make_graph_preamble(title);
9✔
259
        for(const auto& g : graphs) {
27✔
260
                result_dot += g.substr(preamble.size(), g.size() - preamble.size() - 1);
18✔
261
        }
262
        result_dot += "}";
9✔
263
        return result_dot;
18✔
264
}
9✔
265

266
std::string print_buffer_label(const buffer_id bid, const std::string& buffer_name = {}) {
155✔
267
        return utils::escape_for_dot_label(utils::make_buffer_debug_label(bid, buffer_name));
310✔
268
}
269

270
std::string instruction_dependency_style(const instruction_dependency_origin origin) {
259✔
271
        switch(origin) {
259!
272
        case instruction_dependency_origin::allocation_lifetime: return "color=cyan3";
252✔
273
        case instruction_dependency_origin::write_to_allocation: return "color=limegreen";
36✔
274
        case instruction_dependency_origin::read_from_allocation: return {};
40✔
275
        case instruction_dependency_origin::side_effect: return {};
3✔
276
        case instruction_dependency_origin::collective_group_order: return "color=blue";
6✔
277
        case instruction_dependency_origin::last_epoch: return "color=orchid";
111✔
278
        case instruction_dependency_origin::execution_front: return "color=orange";
237✔
279
        case instruction_dependency_origin::split_receive: return "color=gray";
6✔
280
        default: utils::unreachable(); // LCOV_EXCL_LINE
281
        }
282
}
283

284
std::string print_instruction_graph(const instruction_recorder& irec, const command_recorder& crec, const task_recorder& trec, const std::string& title) {
9✔
285
        std::string dot = make_graph_preamble(title);
9✔
286
        const auto back = std::back_inserter(dot);
9✔
287

288
        const auto begin_node = [&](const instruction_record& instr, const std::string_view& shape, const std::string_view& color) {
9✔
289
                fmt::format_to(back, "I{}[color={},shape={},label=<", instr.id, color, shape);
198✔
290
        };
207✔
291

292
        const auto end_node = [&] { fmt::format_to(back, ">];"); };
207✔
293

294
        const auto print_instruction_graph_garbage = [&](const instruction_garbage& garbage) {
9✔
295
                for(const auto rid : garbage.reductions) {
46✔
296
                        fmt::format_to(back, "<br/>collect R{}", rid);
3✔
297
                }
298
                for(const auto aid : garbage.user_allocations) {
49✔
299
                        fmt::format_to(back, "<br/>collect {}", aid);
6✔
300
                }
301
        };
43✔
302

303
        std::unordered_map<message_id, instruction_id> send_instructions_by_message_id; // for connecting pilot messages to send instructions
9✔
304
        for(const auto& instr : irec.get_graph_nodes()) {
207✔
305
                matchbox::match(
396✔
306
                    *instr,
198✔
307
                    [&](const clone_collective_group_instruction_record& ccginstr) {
396✔
308
                            begin_node(ccginstr, "ellipse", "darkred");
1✔
309
                            fmt::format_to(back, "I{}<br/><b>clone collective group</b><br/>CG{} -&gt; CG{}", ccginstr.id, ccginstr.original_collective_group_id,
1✔
310
                                ccginstr.new_collective_group_id);
1✔
311
                            end_node();
1✔
312
                    },
1✔
313
                    [&](const alloc_instruction_record& ainstr) {
396✔
314
                            begin_node(ainstr, "ellipse", "cyan3");
37✔
315
                            fmt::format_to(back, "I{}<br/>", ainstr.id);
37✔
316
                            switch(ainstr.origin) {
37!
317
                            case alloc_instruction_record::alloc_origin::buffer: dot += "buffer "; break;
31✔
318
                            case alloc_instruction_record::alloc_origin::gather: dot += "gather "; break;
5✔
319
                            case alloc_instruction_record::alloc_origin::staging: dot += "staging "; break;
1✔
320
                            }
321
                            fmt::format_to(back, "<b>alloc</b> {}", ainstr.allocation_id);
37✔
322
                            if(ainstr.buffer_allocation.has_value()) {
37✔
323
                                    fmt::format_to(back, "<br/>for {} {}", print_buffer_label(ainstr.buffer_allocation->buffer_id, ainstr.buffer_allocation->buffer_name),
72✔
324
                                        ainstr.buffer_allocation->box);
36✔
325
                                    if(ainstr.num_chunks.has_value()) { fmt::format_to(back, " x{}", *ainstr.num_chunks); }
36✔
326
                            }
327
                            fmt::format_to(back, "<br/>{} % {} bytes", fmt::group_digits(ainstr.size_bytes), ainstr.alignment_bytes);
74✔
328
                            end_node();
37✔
329
                    },
37✔
330
                    [&](const free_instruction_record& finstr) {
396✔
331
                            begin_node(finstr, "ellipse", "cyan3");
37✔
332
                            fmt::format_to(back, "I{}<br/>", finstr.id);
37✔
333
                            fmt::format_to(back, "<b>free</b> {}", finstr.allocation_id);
37✔
334
                            if(finstr.buffer_allocation.has_value()) {
37✔
335
                                    fmt::format_to(back, "<br/>{} {}", print_buffer_label(finstr.buffer_allocation->buffer_id, finstr.buffer_allocation->buffer_name),
62✔
336
                                        finstr.buffer_allocation->box);
31✔
337
                            }
338
                            fmt::format_to(back, " <br/>{} bytes", fmt::group_digits(finstr.size));
74✔
339
                            end_node();
37✔
340
                    },
37✔
341
                    [&](const copy_instruction_record& cinstr) {
396✔
342
                            begin_node(cinstr, "ellipse,margin=0", "green3");
20✔
343
                            fmt::format_to(back, "I{}<br/>", cinstr.id);
20✔
344
                            switch(cinstr.origin) {
20!
345
                            case copy_instruction_record::copy_origin::resize: dot += "resize "; break;
×
346
                            case copy_instruction_record::copy_origin::coherence: dot += "coherence "; break;
10✔
347
                            case copy_instruction_record::copy_origin::gather: dot += "gather "; break;
7✔
348
                            case copy_instruction_record::copy_origin::fence: dot += "fence "; break;
1✔
349
                            case copy_instruction_record::copy_origin::staging: dot += "staging "; break;
2✔
350
                            case copy_instruction_record::copy_origin::linearizing: dot += "linearizing "; break;
×
351
                            case copy_instruction_record::copy_origin::delinearizing: dot += "delinearizing "; break;
×
352
                            }
353
                            fmt::format_to(back, "<b>copy</b><br/>from {} {}<br/>to {} {}<br/>{} {} x{} bytes<br/>{} bytes total", cinstr.source_allocation_id,
20✔
354
                                cinstr.source_layout, cinstr.dest_allocation_id, cinstr.dest_layout, print_buffer_label(cinstr.buffer_id, cinstr.buffer_name),
40✔
355
                                cinstr.copy_region, cinstr.element_size, fmt::group_digits(cinstr.copy_region.get_area() * cinstr.element_size));
40✔
356
                            end_node();
20✔
357
                    },
20✔
358
                    [&](const device_kernel_instruction_record& dkinstr) {
396✔
359
                            begin_node(dkinstr, "box,margin=0.2,style=rounded", "darkorange2");
30✔
360
                            fmt::format_to(back, "I{}", dkinstr.id);
30✔
361
                            fmt::format_to(
30✔
362
                                back, " (device-compute T{}, execution C{})<br/><b>device kernel</b>", dkinstr.command_group_task_id, dkinstr.execution_command_id);
30✔
363
                            if(!dkinstr.debug_name.empty()) { fmt::format_to(back, " {}", utils::escape_for_dot_label(dkinstr.debug_name)); }
48✔
364
                            fmt::format_to(back, "<br/>on D{} {}", dkinstr.device_id, dkinstr.execution_range);
30✔
365

366
                            for(const auto& access : dkinstr.access_map) {
64✔
367
                                    const auto accessed_box_in_allocation = box( //
34✔
368
                                        access.accessed_box_in_buffer.get_min() - access.allocated_box_in_buffer.get_min(),
68✔
369
                                        access.accessed_box_in_buffer.get_max() - access.allocated_box_in_buffer.get_min());
102✔
370
                                    fmt::format_to(back, "<br/>+ access {} {}", print_buffer_label(access.buffer_id, access.buffer_name), access.accessed_box_in_buffer);
68✔
371
                                    fmt::format_to(back, "<br/>via {} {}", access.allocation_id, accessed_box_in_allocation);
34✔
372
                            }
373
                            for(const auto& access : dkinstr.reduction_map) {
35✔
374
                                    const auto accessed_box_in_allocation = box( //
5✔
375
                                        access.accessed_box_in_buffer.get_min() - access.allocated_box_in_buffer.get_min(),
10✔
376
                                        access.accessed_box_in_buffer.get_max() - access.allocated_box_in_buffer.get_min());
15✔
377
                                    fmt::format_to(back, "<br/>+ (R{}) reduce into {} {}", access.reduction_id, print_buffer_label(access.buffer_id, access.buffer_name),
10✔
378
                                        access.accessed_box_in_buffer);
5✔
379
                                    fmt::format_to(back, "<br/>via {} {}", access.allocation_id, accessed_box_in_allocation);
5✔
380
                            }
381
                            end_node();
30✔
382
                    },
30✔
383
                    [&](const host_task_instruction_record& htinstr) {
396✔
384
                            begin_node(htinstr, "box,margin=0.2,style=rounded", "darkorange2");
9✔
385
                            fmt::format_to(back, "I{}", htinstr.id);
9✔
386
                            // TODO does not correctly label master-node host tasks
387
                            fmt::format_to(back, " ({} T{}, execution C{})<br/><b>host task</b>",
9✔
388
                                htinstr.collective_group_id != non_collective_group_id ? fmt::format("CG{} collective-host", htinstr.collective_group_id) : "host-compute",
34✔
389
                                htinstr.command_group_task_id, htinstr.execution_command_id);
9✔
390
                            if(!htinstr.debug_name.empty()) { fmt::format_to(back, " {}", utils::escape_for_dot_label(htinstr.debug_name)); }
9!
391
                            fmt::format_to(back, "<br/>on host {}", htinstr.execution_range);
9✔
392

393
                            for(const auto& access : htinstr.access_map) {
19✔
394
                                    const auto accessed_box_in_allocation = box( //
10✔
395
                                        access.accessed_box_in_buffer.get_min() - access.allocated_box_in_buffer.get_min(),
20✔
396
                                        access.accessed_box_in_buffer.get_max() - access.allocated_box_in_buffer.get_min());
30✔
397
                                    fmt::format_to(back, "<br/>+ access {} {}", print_buffer_label(access.buffer_id, access.buffer_name), access.accessed_box_in_buffer);
20✔
398
                                    fmt::format_to(back, "<br/>via {} {}", access.allocation_id, accessed_box_in_allocation);
10✔
399
                            }
400
                            end_node();
9✔
401
                    },
9✔
402
                    [&](const send_instruction_record& sinstr) {
396✔
403
                            begin_node(sinstr, "box,margin=0.2,style=rounded", "deeppink2");
3✔
404
                            fmt::format_to(back, "I{} (push C{})", sinstr.id, sinstr.push_cid);
3✔
405
                            fmt::format_to(back, "<br/><b>send</b> {}", sinstr.transfer_id);
3✔
406
                            fmt::format_to(back, "<br/>to N{} MSG{}", sinstr.dest_node_id, sinstr.message_id);
3✔
407
                            fmt::format_to(back, "<br/>{} {}", print_buffer_label(sinstr.transfer_id.bid, sinstr.buffer_name),
9✔
408
                                box(subrange(sinstr.offset_in_buffer, sinstr.send_range)));
6✔
409
                            fmt::format_to(back, "<br/>via {} {}", sinstr.source_allocation_id, box(subrange(sinstr.offset_in_source_allocation, sinstr.send_range)));
6✔
410
                            fmt::format_to(back, "<br/>{}x{} bytes", sinstr.send_range, sinstr.element_size);
3✔
411
                            fmt::format_to(back, "<br/>{} bytes total", fmt::group_digits(sinstr.send_range.size() * sinstr.element_size));
6✔
412
                            send_instructions_by_message_id.emplace(sinstr.message_id, sinstr.id);
3✔
413
                            end_node();
3✔
414
                    },
3✔
415
                    [&](const receive_instruction_record& rinstr) {
396✔
416
                            begin_node(rinstr, "box,margin=0.2,style=rounded", "deeppink2");
1✔
417
                            fmt::format_to(back, "I{} (await-push C{})", rinstr.id, irec.get_await_push_command_id(rinstr.transfer_id));
2✔
418
                            fmt::format_to(back, "<br/><b>receive</b> {}", rinstr.transfer_id);
1✔
419
                            fmt::format_to(back, "<br/>{} {}", print_buffer_label(rinstr.transfer_id.bid, rinstr.buffer_name), rinstr.requested_region);
2✔
420
                            fmt::format_to(back, "<br/>into {} (B{} {})", rinstr.dest_allocation_id, rinstr.transfer_id.bid, rinstr.allocated_box);
1✔
421
                            fmt::format_to(back, "<br/>x{} bytes", rinstr.element_size);
1✔
422
                            fmt::format_to(back, "<br/>{} bytes total", fmt::group_digits(rinstr.requested_region.get_area() * rinstr.element_size));
2✔
423
                            end_node();
1✔
424
                    },
1✔
425
                    [&](const split_receive_instruction_record& srinstr) {
396✔
426
                            begin_node(srinstr, "box,margin=0.2,style=rounded", "deeppink2");
1✔
427
                            fmt::format_to(back, "I{} (await-push C{})", srinstr.id, irec.get_await_push_command_id(srinstr.transfer_id));
2✔
428
                            fmt::format_to(back, "<br/><b>split receive</b> {}", srinstr.transfer_id);
1✔
429
                            fmt::format_to(back, "<br/>{} {}", print_buffer_label(srinstr.transfer_id.bid, srinstr.buffer_name), srinstr.requested_region);
2✔
430
                            fmt::format_to(back, "<br/>into {} (B{} {})", srinstr.dest_allocation_id, srinstr.transfer_id.bid, srinstr.allocated_box);
1✔
431
                            fmt::format_to(back, "<br/>x{} bytes", srinstr.element_size);
1✔
432
                            fmt::format_to(back, "<br/>{} bytes total", fmt::group_digits(srinstr.requested_region.get_area() * srinstr.element_size));
2✔
433
                            end_node();
1✔
434
                    },
1✔
435
                    [&](const await_receive_instruction_record& arinstr) {
396✔
436
                            begin_node(arinstr, "box,margin=0.2,style=rounded", "deeppink2");
2✔
437
                            fmt::format_to(back, "I{} (await-push C{})", arinstr.id, irec.get_await_push_command_id(arinstr.transfer_id));
4✔
438
                            fmt::format_to(back, "<br/><b>await receive</b> {}", arinstr.transfer_id);
2✔
439
                            fmt::format_to(back, "<br/>{} {}", print_buffer_label(arinstr.transfer_id.bid, arinstr.buffer_name), arinstr.received_region);
4✔
440
                            end_node();
2✔
441
                    },
2✔
442
                    [&](const gather_receive_instruction_record& grinstr) {
396✔
443
                            begin_node(grinstr, "box,margin=0.2,style=rounded", "deeppink2");
3✔
444
                            fmt::format_to(back, "I{} (await-push C{})", grinstr.id, irec.get_await_push_command_id(grinstr.transfer_id));
6✔
445
                            fmt::format_to(back, "<br/><b>gather receive</b> {}", grinstr.transfer_id);
3✔
446
                            fmt::format_to(back, "<br/>{} {} x{}", print_buffer_label(grinstr.transfer_id.bid, grinstr.buffer_name), grinstr.gather_box, grinstr.num_nodes);
6✔
447
                            fmt::format_to(back, "<br/>into {}", grinstr.allocation_id);
3✔
448
                            end_node();
3✔
449
                    },
3✔
450
                    [&](const fill_identity_instruction_record& fiinstr) {
396✔
451
                            begin_node(fiinstr, "ellipse", "blue");
3✔
452
                            fmt::format_to(back, "I{}", fiinstr.id);
3✔
453
                            fmt::format_to(back, "<br/><b>fill identity</b> for R{}", fiinstr.reduction_id);
3✔
454
                            fmt::format_to(back, "<br/>{} x{}", fiinstr.allocation_id, fiinstr.num_values);
3✔
455
                            end_node();
3✔
456
                    },
3✔
457
                    [&](const reduce_instruction_record& rinstr) {
396✔
458
                            begin_node(rinstr, rinstr.reduction_command_id.has_value() ? "box,margin=0.2,style=rounded" : "ellipse", "blue");
5✔
459
                            fmt::format_to(back, "I{}", rinstr.id);
5✔
460
                            if(rinstr.reduction_command_id.has_value()) { fmt::format_to(back, " (reduction C{})", *rinstr.reduction_command_id); }
5✔
461
                            fmt::format_to(back, "<br/>{} <b>reduce</b> B{}.R{}", rinstr.scope == reduce_instruction_record::reduction_scope::global ? "global" : "local",
10✔
462
                                rinstr.buffer_id, rinstr.reduction_id);
5✔
463
                            fmt::format_to(back, "<br/>{} {}", print_buffer_label(rinstr.buffer_id, rinstr.buffer_name), rinstr.box);
10✔
464
                            fmt::format_to(back, "<br/>from {} x{}", rinstr.source_allocation_id, rinstr.num_source_values);
5✔
465
                            fmt::format_to(back, "<br/>to {} x1", rinstr.dest_allocation_id);
5✔
466
                            end_node();
5✔
467
                    },
5✔
468
                    [&](const fence_instruction_record& finstr) {
396✔
469
                            begin_node(finstr, "box,margin=0.2,style=rounded", "darkorange");
2✔
470
                            fmt::format_to(back, "I{} (T{}, C{})<br/><b>fence</b><br/>", finstr.id, finstr.tid, finstr.cid);
2✔
471
                            matchbox::match(
2✔
472
                                finstr.variant, //
2✔
473
                                [&](const fence_instruction_record::buffer_variant& buffer) {
2✔
474
                                        fmt::format_to(back, "{} {}", print_buffer_label(buffer.bid, buffer.name), buffer.box);
2✔
475
                                },
1✔
476
                                [&](const fence_instruction_record::host_object_variant& obj) { fmt::format_to(back, "H{}", obj.hoid); });
5✔
477
                            end_node();
2✔
478
                    },
2✔
479
                    [&](const destroy_host_object_instruction_record& dhoinstr) {
396✔
480
                            begin_node(dhoinstr, "ellipse", "black");
1✔
481
                            fmt::format_to(back, "I{}<br/><b>destroy</b> H{}", dhoinstr.id, dhoinstr.host_object_id);
1✔
482
                            end_node();
1✔
483
                    },
1✔
484
                    [&](const horizon_instruction_record& hinstr) {
396✔
485
                            begin_node(hinstr, "box,margin=0.2,style=rounded", "black");
9✔
486
                            fmt::format_to(back, "I{} (T{}, C{})<br/><b>horizon</b>", hinstr.id, hinstr.horizon_task_id, hinstr.horizon_command_id);
9✔
487
                            print_instruction_graph_garbage(hinstr.garbage);
9✔
488
                            end_node();
9✔
489
                    },
9✔
490
                    [&](const epoch_instruction_record& einstr) {
396✔
491
                            begin_node(einstr, "box,margin=0.2,style=rounded", "black");
34✔
492
                            fmt::format_to(back, "I{} (T{}, C{})<br/>{}", einstr.id, einstr.epoch_task_id, einstr.epoch_command_id, print_epoch_label(einstr.epoch_action));
68✔
493
                            print_instruction_graph_garbage(einstr.garbage);
34✔
494
                            end_node();
34✔
495
                    });
34✔
496
        }
497

498
        struct dependency_edge {
499
                instruction_id predecessor;
500
                instruction_id successor;
501
        };
502
        struct dependency_edge_order {
503
                bool operator()(const dependency_edge& lhs, const dependency_edge& rhs) const {
2,242✔
504
                        if(lhs.predecessor < rhs.predecessor) return true;
2,242✔
505
                        if(lhs.predecessor > rhs.predecessor) return false;
678✔
506
                        return lhs.successor < rhs.successor;
334✔
507
                }
508
        };
509
        std::map<dependency_edge, std::set<instruction_dependency_origin>, dependency_edge_order> dependencies_by_edge; // ordered and unique
9✔
510
        for(const auto& dep : irec.get_dependencies()) {
306✔
511
                dependencies_by_edge[{dep.predecessor, dep.successor}].insert(dep.origin);
297✔
512
        }
513
        for(const auto& [edge, origins] : dependencies_by_edge) {
287✔
514
                const auto style = origins.size() == 1 ? instruction_dependency_style(*origins.begin()) : std::string{};
278!
515
                fmt::format_to(back, "I{}->I{}[{}];", edge.predecessor, edge.successor, style);
278✔
516
        }
278✔
517

518
        for(const auto& pilot : irec.get_outbound_pilots()) {
12✔
519
                fmt::format_to(back,
6✔
520
                    "P{}[margin=0.25,shape=cds,color=\"#606060\",label=<<font color=\"#606060\"><b>pilot</b> to N{} MSG{}<br/>{}<br/>for {} {}</font>>];",
521
                    pilot.message.id, pilot.to, pilot.message.id, pilot.message.transfer_id, print_buffer_label(pilot.message.transfer_id.bid), pilot.message.box);
6✔
522
                if(auto it = send_instructions_by_message_id.find(pilot.message.id); it != send_instructions_by_message_id.end()) {
3!
523
                        fmt::format_to(back, "P{}->I{}[dir=none,style=dashed,color=\"#606060\"];", pilot.message.id, it->second);
3✔
524
                }
525
        }
526

527
        dot += "}";
9✔
528
        return dot;
18✔
529
}
9✔
530

531

532
} // namespace celerity::detail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc