• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

STEllAR-GROUP / hpx / #882

31 Aug 2023 07:44PM UTC coverage: 41.798% (-44.7%) from 86.546%
#882

push

19442 of 46514 relevant lines covered (41.8%)

126375.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/examples/transpose/transpose_serial_block.cpp
1
//  Copyright (c) 2014 Thomas Heller
2
//
3
//  SPDX-License-Identifier: BSL-1.0
4
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
5
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6

7
#include <hpx/chrono.hpp>
8
#include <hpx/init.hpp>
9

10
#include <algorithm>
11
#include <cstdint>
12
#include <exception>
13
#include <iostream>
14
#include <string>
15
#include <vector>
16

17
#define COL_SHIFT 1000.00    // Constant to shift column index
18
#define ROW_SHIFT 0.001      // Constant to shift row index
19

20
bool verbose = false;
21

22
typedef std::vector<double> block;
23
typedef double* sub_block;
24

25
void transpose(sub_block A, sub_block B, std::uint64_t block_order,
26
    std::uint64_t tile_size);
27
double test_results(std::uint64_t order, std::uint64_t block_order,
28
    std::vector<block> const& trans);
29

30
///////////////////////////////////////////////////////////////////////////////
31
int hpx_main(hpx::program_options::variables_map& vm)
×
32
{
33
    std::uint64_t order = vm["matrix_size"].as<std::uint64_t>();
×
34
    std::uint64_t iterations = vm["iterations"].as<std::uint64_t>();
×
35
    std::uint64_t num_blocks = vm["num_blocks"].as<std::uint64_t>();
×
36
    std::uint64_t tile_size = order;
37

38
    if (vm.count("tile_size"))
×
39
        tile_size = vm["tile_size"].as<std::uint64_t>();
×
40

41
    verbose = vm.count("verbose") ? true : false;
×
42

43
    std::uint64_t bytes =
×
44
        static_cast<std::uint64_t>(2 * sizeof(double) * order * order);
×
45

46
    std::uint64_t block_order = order / num_blocks;
×
47
    std::uint64_t col_block_size = order * block_order;
×
48

49
    std::vector<block> A(num_blocks, block(col_block_size));
×
50
    std::vector<block> B(num_blocks, block(col_block_size));
×
51

52
    std::cout << "Serial Matrix transpose: B = A^T\n"
53
              << "Matrix order          = " << order << "\n";
×
54
    if (tile_size < order)
×
55
        std::cout << "Tile size             = " << tile_size << "\n";
×
56
    else
57
        std::cout << "Untiled\n";
×
58
    std::cout << "Number of iterations  = " << iterations << "\n";
×
59

60
    // Fill the original matrix, set transpose to known garbage value.
61
    for (std::uint64_t b = 0; b < num_blocks; ++b)
×
62
    {
63
        for (std::uint64_t i = 0; i < order; ++i)
×
64
        {
65
            for (std::uint64_t j = 0; j < block_order; ++j)
×
66
            {
67
                double col_val =
×
68
                    COL_SHIFT * static_cast<double>(b * block_order + j);
×
69

70
                A[b][i * block_order + j] =
×
71
                    col_val + ROW_SHIFT * static_cast<double>(i);
×
72
                B[b][i * block_order + j] = -1.0;
73
            }
74
        }
75
    }
76

77
    double errsq = 0.0;
78
    double avgtime = 0.0;
×
79
    double maxtime = 0.0;
×
80
    double mintime =
81
        366.0 * 24.0 * 3600.0;    // set the minimum time to a large value;
82
                                  // one leap year should be enough
×
83
    for (std::uint64_t iter = 0; iter < iterations; ++iter)
84
    {
85
        hpx::chrono::high_resolution_timer t;
86

×
87
        for (std::uint64_t b = 0; b < num_blocks; ++b)
88
        {
×
89
            for (std::uint64_t phase = 0; phase < num_blocks; ++phase)
90
            {
×
91
                std::uint64_t const block_size = block_order * block_order;
92
                std::uint64_t const from_block = phase;
93
                std::uint64_t const from_phase = b;
×
94
                std::uint64_t const A_offset = from_phase * block_size;
×
95
                std::uint64_t const B_offset = phase * block_size;
×
96
                transpose(&A[from_block][A_offset], &B[b][B_offset],
97
                    block_order, tile_size);
98
            }
99
        }
100

×
101
        double elapsed = t.elapsed();
102

×
103
        if (iter > 0 || iterations == 1)    // Skip the first iteration
104
        {
×
105
            avgtime = avgtime + elapsed;
×
106
            maxtime = (std::max) (maxtime, elapsed);
×
107
            mintime = (std::min) (mintime, elapsed);
108
        }
109

×
110
        errsq += test_results(order, block_order, B);
111
    }    // end of iter loop
112

113
    // Analyze and output results
114

115
    double epsilon = 1.e-8;
×
116
    if (errsq < epsilon)
117
    {
×
118
        std::cout << "Solution validates\n";
×
119
        avgtime = avgtime /
×
120
            static_cast<double>(
×
121
                (std::max) (iterations - 1, static_cast<std::uint64_t>(1)));
×
122
        std::cout << "Rate (MB/s): "
123
                  << 1.e-6 * static_cast<double>(bytes) / mintime << ", "
124
                  << "Avg time (s): " << avgtime << ", "
×
125
                  << "Min time (s): " << mintime << ", "
126
                  << "Max time (s): " << maxtime << "\n";
×
127

×
128
        if (verbose)
129
            std::cout << "Squared errors: " << errsq << "\n";
130
    }
131
    else
132
    {
×
133
        std::cout << "ERROR: Aggregate squared error " << errsq
×
134
                  << " exceeds threshold " << epsilon << "\n";
135
        std::terminate();
136
    }
×
137

×
138
    return hpx::local::finalize();
139
}
×
140

141
int main(int argc, char* argv[])
142
{
143
    using namespace hpx::program_options;
×
144

145
    options_description desc_commandline;
×
146
    // clang-format off
×
147
    desc_commandline.add_options()
148
        ("matrix_size", value<std::uint64_t>()->default_value(1024),
×
149
         "Matrix Size")
150
        ("iterations", value<std::uint64_t>()->default_value(10),
×
151
         "# iterations")
152
        ("tile_size", value<std::uint64_t>(),
153
        "Number of tiles to divide the individual matrix blocks for improved "
×
154
         "cache and TLB performance")
155
        ("num_blocks", value<std::uint64_t>()->default_value(256),
156
        "Number of blocks to divide the individual matrix blocks for improved "
×
157
         "cache and TLB performance")
158
        ("verbose", "Verbose output");
159
    // clang-format on
160

161
    // Initialize and run HPX, this example is serial and therefore only needs
×
162
    // one thread. We just use hpx::init to parse our command line arguments
163
    std::vector<std::string> const cfg = {"hpx.os_threads!=1"};
×
164

×
165
    hpx::local::init_params init_args;
×
166
    init_args.desc_cmdline = desc_commandline;
167
    init_args.cfg = cfg;
×
168

×
169
    return hpx::local::init(hpx_main, argc, argv, init_args);
170
}
×
171

172
void transpose(sub_block A, sub_block B, std::uint64_t block_order,
173
    std::uint64_t tile_size)
×
174
{
175
    if (tile_size < block_order)
×
176
    {
177
        for (std::uint64_t i = 0; i < block_order; i += tile_size)
×
178
        {
179
            for (std::uint64_t j = 0; j < block_order; j += tile_size)
×
180
            {
×
181
                std::uint64_t i_max = (std::min) (block_order, i + tile_size);
182
                for (std::uint64_t it = i; it < i_max; ++it)
183
                {
×
184
                    std::uint64_t j_max =
×
185
                        (std::min) (block_order, j + tile_size);
186
                    for (std::uint64_t jt = j; jt < j_max; ++jt)
×
187
                    {
188
                        B[it + block_order * jt] = A[jt + block_order * it];
189
                    }
190
                }
191
            }
192
        }
193
    }
194
    else
×
195
    {
196
        for (std::uint64_t i = 0; i < block_order; ++i)
×
197
        {
198
            for (std::uint64_t j = 0; j < block_order; ++j)
×
199
            {
200
                B[i + block_order * j] = A[j + block_order * i];
201
            }
202
        }
×
203
    }
204
}
×
205

206
double test_results(std::uint64_t order, std::uint64_t block_order,
207
    std::vector<block> const& trans)
208
{
209
    double errsq = 0.0;
×
210

211
    for (std::uint64_t b = 0; b < trans.size(); ++b)
×
212
    {
213
        for (std::uint64_t i = 0; i < order; ++i)
×
214
        {
×
215
            double col_val = COL_SHIFT * static_cast<double>(i);
216
            for (std::uint64_t j = 0; j < block_order; ++j)
×
217
            {
×
218
                double diff = trans[b][i * block_order + j] -
×
219
                    (col_val +
×
220
                        ROW_SHIFT * static_cast<double>(b * block_order + j));
221
                errsq += diff * diff;
222
            }
223
        }
224
    }
×
225

×
226
    if (verbose)
227
        std::cout << " Squared sum of differences: " << errsq << "\n";
×
228

229
    return errsq;
230
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc