• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

STEllAR-GROUP / hpx / #882

31 Aug 2023 07:44PM UTC coverage: 41.798% (-44.7%) from 86.546%
#882

push

19442 of 46514 relevant lines covered (41.8%)

126375.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/examples/transpose/transpose_smp_block.cpp
1
//  Copyright (c) 2014 Thomas Heller
2
//
3
//  SPDX-License-Identifier: BSL-1.0
4
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
5
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6

7
#include <hpx/algorithm.hpp>
8
#include <hpx/init.hpp>
9
#include <hpx/modules/iterator_support.hpp>
10
#include <hpx/numeric.hpp>
11

12
#include <algorithm>
13
#include <cstdint>
14
#include <exception>
15
#include <iostream>
16
#include <vector>
17

18
#define COL_SHIFT 1000.00    // Constant to shift column index
19
#define ROW_SHIFT 0.001      // Constant to shift row index
20

21
bool verbose = false;
22

23
typedef std::vector<double> block;
24
typedef double* sub_block;
25

26
void transpose(sub_block A, sub_block B, std::uint64_t block_order,
27
    std::uint64_t tile_size);
28
double test_results(std::uint64_t order, std::uint64_t block_order,
29
    std::vector<block> const& trans);
30

31
///////////////////////////////////////////////////////////////////////////////
32
int hpx_main(hpx::program_options::variables_map& vm)
×
33
{
34
    std::uint64_t order = vm["matrix_size"].as<std::uint64_t>();
×
35
    std::uint64_t iterations = vm["iterations"].as<std::uint64_t>();
×
36
    std::uint64_t num_blocks = vm["num_blocks"].as<std::uint64_t>();
×
37
    std::uint64_t tile_size = order;
×
38

39
    if (vm.count("tile_size"))
×
40
        tile_size = vm["tile_size"].as<std::uint64_t>();
×
41

42
    verbose = vm.count("verbose") ? true : false;
×
43

44
    std::uint64_t bytes =
×
45
        static_cast<std::uint64_t>(2 * sizeof(double) * order * order);
×
46

47
    std::uint64_t block_order = order / num_blocks;
×
48
    std::uint64_t col_block_size = order * block_order;
×
49

50
    std::vector<block> A(num_blocks, block(col_block_size));
×
51
    std::vector<block> B(num_blocks, block(col_block_size));
×
52

53
    std::cout << "Serial Matrix transpose: B = A^T\n"
54
              << "Matrix order          = " << order << "\n";
×
55
    if (tile_size < order)
×
56
        std::cout << "Tile size             = " << tile_size << "\n";
×
57
    else
58
        std::cout << "Untiled\n";
×
59
    std::cout << "Number of iterations  = " << iterations << "\n";
×
60

61
    using hpx::execution::par;
62
    using hpx::execution::task;
63
    using hpx::ranges::for_each;
64

65
    std::uint64_t const start = 0;
66

67
    // Fill the original matrix, set transpose to known garbage value.
68
    auto range = hpx::util::counting_shape(start, num_blocks);
69
    for_each(par, range, [&](std::uint64_t b) {
×
70
        for (std::uint64_t i = 0; i < order; ++i)
×
71
        {
72
            for (std::uint64_t j = 0; j < block_order; ++j)
×
73
            {
74
                double col_val =
×
75
                    COL_SHIFT * static_cast<double>(b * block_order + j);
×
76

77
                A[b][i * block_order + j] =
×
78
                    col_val + ROW_SHIFT * static_cast<double>(i);
×
79
                B[b][i * block_order + j] = -1.0;
80
            }
81
        }
×
82
    });
83

84
    double errsq = 0.0;
85
    double avgtime = 0.0;
×
86
    double maxtime = 0.0;
×
87
    double mintime =
88
        366.0 * 24.0 * 3600.0;    // set the minimum time to a large value;
89
                                  // one leap year should be enough
×
90
    for (std::uint64_t iter = 0; iter < iterations; ++iter)
91
    {
92
        hpx::chrono::high_resolution_timer t;
93

94
        auto range = hpx::util::counting_shape(start, num_blocks);
95

×
96
        std::vector<hpx::shared_future<void>> transpose_futures;
×
97
        transpose_futures.resize(num_blocks);
98

×
99
        for_each(par, range, [&](std::uint64_t b) {
×
100
            transpose_futures[b] =
×
101
                for_each(par(task), range, [&, b](std::uint64_t phase) {
×
102
                    std::uint64_t const block_size = block_order * block_order;
103
                    std::uint64_t const from_block = phase;
×
104
                    std::uint64_t const from_phase = b;
×
105
                    std::uint64_t const A_offset = from_phase * block_size;
×
106
                    std::uint64_t const B_offset = phase * block_size;
107

×
108
                    transpose(&A[from_block][A_offset], &B[b][B_offset],
×
109
                        block_order, tile_size);
×
110
                }).share();
×
111
        });
112

113
        hpx::wait_all(transpose_futures);
114

×
115
        double elapsed = t.elapsed();
116

×
117
        if (iter > 0 || iterations == 1)    // Skip the first iteration
118
        {
×
119
            avgtime = avgtime + elapsed;
×
120
            maxtime = (std::max) (maxtime, elapsed);
×
121
            mintime = (std::min) (mintime, elapsed);
122
        }
123

×
124
        errsq += test_results(order, block_order, B);
×
125
    }    // end of iter loop
126

127
    // Analyze and output results
128

129
    double epsilon = 1.e-8;
×
130
    if (errsq < epsilon)
131
    {
×
132
        std::cout << "Solution validates\n";
×
133
        avgtime = avgtime /
×
134
            static_cast<double>(
×
135
                (std::max) (iterations - 1, static_cast<std::uint64_t>(1)));
×
136
        std::cout << "Rate (MB/s): "
137
                  << 1.e-6 * static_cast<double>(bytes) / mintime << ", "
138
                  << "Avg time (s): " << avgtime << ", "
×
139
                  << "Min time (s): " << mintime << ", "
140
                  << "Max time (s): " << maxtime << "\n";
×
141

×
142
        if (verbose)
143
            std::cout << "Squared errors: " << errsq << "\n";
144
    }
145
    else
146
    {
×
147
        std::cout << "ERROR: Aggregate squared error " << errsq
×
148
                  << " exceeds threshold " << epsilon << "\n";
149
        std::terminate();
150
    }
×
151

×
152
    return hpx::local::finalize();
153
}
×
154

155
int main(int argc, char* argv[])
156
{
157
    using namespace hpx::program_options;
×
158

159
    options_description desc_commandline;
×
160
    // clang-format off
×
161
    desc_commandline.add_options()
162
        ("matrix_size", value<std::uint64_t>()->default_value(1024),
×
163
         "Matrix Size")
164
        ("iterations", value<std::uint64_t>()->default_value(10),
×
165
         "# iterations")
166
        ("tile_size", value<std::uint64_t>(),
167
         "Number of tiles to divide the individual matrix blocks for improved "
×
168
         "cache and TLB performance")
169
        ("num_blocks", value<std::uint64_t>()->default_value(256),
170
         "Number of blocks to divide the individual matrix blocks for improved "
×
171
         "cache and TLB performance")
172
        ( "verbose", "Verbose output")
173
    ;
174
    // clang-format on
×
175

×
176
    hpx::local::init_params init_args;
177
    init_args.desc_cmdline = desc_commandline;
×
178

×
179
    return hpx::local::init(hpx_main, argc, argv, init_args);
180
}
×
181

182
void transpose(sub_block A, sub_block B, std::uint64_t block_order,
183
    std::uint64_t tile_size)
×
184
{
185
    if (tile_size < block_order)
×
186
    {
187
        for (std::uint64_t i = 0; i < block_order; i += tile_size)
×
188
        {
189
            for (std::uint64_t j = 0; j < block_order; j += tile_size)
×
190
            {
×
191
                std::uint64_t i_max = (std::min) (block_order, i + tile_size);
192
                std::uint64_t j_max = (std::min) (block_order, j + tile_size);
×
193

194
                for (std::uint64_t it = i; it < i_max; ++it)
×
195
                {
196
                    for (std::uint64_t jt = j; jt < j_max; ++jt)
×
197
                    {
198
                        B[it + block_order * jt] = A[jt + block_order * it];
199
                    }
200
                }
201
            }
202
        }
203
    }
204
    else
×
205
    {
206
        for (std::uint64_t i = 0; i < block_order; ++i)
×
207
        {
208
            for (std::uint64_t j = 0; j < block_order; ++j)
×
209
            {
210
                B[i + block_order * j] = A[j + block_order * i];
211
            }
212
        }
×
213
    }
214
}
×
215

216
double test_results(std::uint64_t order, std::uint64_t block_order,
217
    std::vector<block> const& trans)
218
{
219
    using hpx::transform_reduce;
220
    using hpx::execution::par;
221

222
    std::uint64_t const start = 0;
223
    std::uint64_t const end = trans.size();
224

225
    // Fill the original matrix, set transpose to known garbage value.
226
    auto range = hpx::util::counting_shape(start, end);
227
    double errsq = transform_reduce(
×
228
        par, std::begin(range), std::end(range), 0.0,
×
229
        [](double lhs, double rhs) { return lhs + rhs; },
230
        [&](std::uint64_t b) -> double {
×
231
            double errsq = 0.0;
232
            for (std::uint64_t i = 0; i < order; ++i)
×
233
            {
×
234
                double col_val = COL_SHIFT * static_cast<double>(i);
235
                for (std::uint64_t j = 0; j < block_order; ++j)
×
236
                {
×
237
                    double diff = trans[b][i * block_order + j] -
×
238
                        (col_val +
×
239
                            ROW_SHIFT *
×
240
                                static_cast<double>(b * block_order + j));
241
                    errsq += diff * diff;
242
                }
×
243
            }
244
            return errsq;
245
        });
×
246

×
247
    if (verbose)
248
        std::cout << " Squared sum of differences: " << errsq << "\n";
×
249

250
    return errsq;
251
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc