• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

STEllAR-GROUP / hpx / #882

31 Aug 2023 07:44PM UTC coverage: 41.798% (-44.7%) from 86.546%
#882

push

19442 of 46514 relevant lines covered (41.8%)

126375.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/examples/transpose/transpose_block.cpp
1
//  Copyright (c) 2014 Thomas Heller
2
//
3
//  SPDX-License-Identifier: BSL-1.0
4
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
5
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6

7
#include <hpx/config.hpp>
8
#if !defined(HPX_COMPUTE_DEVICE_CODE)
9
#include <hpx/hpx.hpp>
10
#include <hpx/hpx_init.hpp>
11

12
#include <hpx/algorithm.hpp>
13
#include <hpx/modules/iterator_support.hpp>
14
#include <hpx/numeric.hpp>
15

16
#include <algorithm>
17
#include <cstddef>
18
#include <cstdint>
19
#include <iostream>
20
#include <memory>
21
#include <string>
22
#include <utility>
23
#include <vector>
24

25
#define COL_SHIFT 1000.00    // Constant to shift column index
26
#define ROW_SHIFT 0.001      // Constant to shift row index
27

28
bool verbose = false;
29

30
char const* A_block_basename = "/transpose/block/A";
31
char const* B_block_basename = "/transpose/block/B";
32

33
struct sub_block
34
{
35
    enum mode
36
    {
37
        reference,
38
        owning
39
    };
40

41
    sub_block()
42
      : size_(0)
×
43
      , data_(nullptr)
×
44
      , mode_(reference)
×
45
    {
46
    }
47

48
    sub_block(double* data, std::uint64_t size)
49
      : size_(size)
×
50
      , data_(data)
×
51
      , mode_(reference)
×
52
    {
53
    }
54

55
    ~sub_block()
×
56
    {
57
        if (data_ && mode_ == owning)
×
58
        {
59
            delete[] data_;
×
60
        }
61
    }
×
62

63
    sub_block(sub_block&& other)
64
      : size_(other.size_)
×
65
      , data_(other.data_)
×
66
      , mode_(other.mode_)
×
67
    {
68
        if (mode_ == owning)
×
69
        {
70
            other.data_ = nullptr;
×
71
            other.size_ = 0;
×
72
        }
73
    }
74

75
    sub_block& operator=(sub_block&& other)
76
    {
77
        size_ = other.size_;
78
        data_ = other.data_;
79
        mode_ = other.mode_;
80
        if (mode_ == owning)
81
        {
82
            other.data_ = nullptr;
83
            other.size_ = 0;
84
        }
85

86
        return *this;
87
    }
88

89
    double operator[](std::size_t i) const
90
    {
91
        HPX_ASSERT(data_);
92
        return data_[i];
×
93
    }
94

95
    double& operator[](std::size_t i)
96
    {
97
        HPX_ASSERT(data_);
98
        HPX_ASSERT(mode_ == reference);
99
        return data_[i];
×
100
    }
101

102
    void load(hpx::serialization::input_archive& ar, unsigned)
×
103
    {
104
        // clang-format off
105
        ar & size_;
×
106
        // clang-format on
107
        if (size_ > 0)
×
108
        {
109
            data_ = new double[size_];
×
110
            hpx::serialization::array<double> arr(data_, size_);
×
111
            ar >> arr;
×
112
            mode_ = owning;
×
113
        }
114
    }
×
115

116
    void save(hpx::serialization::output_archive& ar, unsigned) const
×
117
    {
118
        // clang-format off
119
        ar & size_;
×
120
        // clang-format on
121
        if (size_ > 0)
×
122
        {
123
            hpx::serialization::array<double> arr(data_, size_);
×
124
            ar << arr;
×
125
        }
126
    }
×
127

128
    HPX_SERIALIZATION_SPLIT_MEMBER()
×
129

130
    std::uint64_t size_;
131
    double* data_;
132
    mode mode_;
133
};
134

135
struct block_component : hpx::components::component_base<block_component>
136
{
137
    block_component() {}
138

139
    block_component(std::uint64_t size)
×
140
      : data_(size)
×
141
    {
142
    }
×
143

144
    sub_block get_sub_block(std::uint64_t offset, std::uint64_t size)
×
145
    {
146
        HPX_ASSERT(!data_.empty());
147
        return sub_block(&data_[offset], size);
×
148
    }
149

150
    HPX_DEFINE_COMPONENT_DIRECT_ACTION(block_component, get_sub_block)
151

152
    std::vector<double> data_;
153
};
154

155
struct block : hpx::components::client_base<block, block_component>
×
156
{
157
    typedef hpx::components::client_base<block, block_component> base_type;
158
    block() {}
×
159

160
    block(hpx::future<hpx::id_type>&& id)
161
      : base_type(std::move(id))
×
162
    {
163
    }
164

165
    block(
×
166
        std::uint64_t /* id */, std::uint64_t size, char const* /* base_name */)
167
      : base_type(hpx::new_<block_component>(hpx::find_here(), size))
×
168
    {
169
    }
×
170

171
    hpx::future<sub_block> get_sub_block(
×
172
        std::uint64_t offset, std::uint64_t size)
173
    {
174
        block_component::get_sub_block_action act;
175
        return hpx::async(act, get_id(), offset, size);
×
176
    }
177
};
178

179
// The macros below are necessary to generate the code required for exposing
180
// our partition type remotely.
181
//
182
// HPX_REGISTER_COMPONENT() exposes the component creation
183
// through hpx::new_<>().
184
typedef hpx::components::component<block_component> block_component_type;
185
HPX_REGISTER_COMPONENT(block_component_type, block_component)
×
186

187
// HPX_REGISTER_ACTION() exposes the component member function for remote
188
// invocation.
189
typedef block_component::get_sub_block_action get_sub_block_action;
190
HPX_REGISTER_ACTION(get_sub_block_action)
×
191

192
void transpose(hpx::future<sub_block> A, hpx::future<sub_block> B,
193
    std::uint64_t block_order, std::uint64_t tile_size);
194
double test_results(std::uint64_t order, std::uint64_t block_order,
195
    std::vector<block>& trans, std::uint64_t blocks_start,
196
    std::uint64_t blocks_end);
197

198
///////////////////////////////////////////////////////////////////////////////
199
int hpx_main(hpx::program_options::variables_map& vm)
×
200
{
201
    {
202
        hpx::id_type here = hpx::find_here();
×
203
        bool root = here == hpx::find_root_locality();
×
204

205
        std::uint64_t num_localities = hpx::get_num_localities().get();
×
206

207
        std::uint64_t order = vm["matrix_size"].as<std::uint64_t>();
×
208
        std::uint64_t iterations = vm["iterations"].as<std::uint64_t>();
×
209
        std::uint64_t num_local_blocks = vm["num_blocks"].as<std::uint64_t>();
×
210
        std::uint64_t tile_size = order;
×
211

212
        if (vm.count("tile_size"))
×
213
            tile_size = vm["tile_size"].as<std::uint64_t>();
×
214

215
        verbose = vm.count("verbose") ? true : false;
×
216

217
        std::uint64_t bytes =
×
218
            static_cast<std::uint64_t>(2 * sizeof(double) * order * order);
×
219

220
        std::uint64_t num_blocks = num_localities * num_local_blocks;
×
221

222
        std::uint64_t block_order = order / num_blocks;
×
223
        std::uint64_t col_block_size = order * block_order;
×
224

225
        std::uint64_t id = hpx::get_locality_id();
×
226

227
        std::vector<block> A(num_blocks);
×
228
        std::vector<block> B(num_blocks);
×
229

230
        std::uint64_t blocks_start = id * num_local_blocks;
×
231
        std::uint64_t blocks_end = (id + 1) * num_local_blocks;
×
232

233
        // First allocate and create our local blocks
234
        for (std::uint64_t b = 0; b != num_local_blocks; ++b)
×
235
        {
236
            std::uint64_t block_idx = b + blocks_start;
×
237
            A[block_idx] = block(block_idx, col_block_size, A_block_basename);
×
238
            B[block_idx] = block(block_idx, col_block_size, B_block_basename);
×
239
        }
240

241
        // Find all blocks ...
242
        std::cout << "Finding blocks ...\n";
×
243
        std::vector<hpx::future<hpx::id_type>> A_ids =
244
            hpx::find_all_from_basename(A_block_basename, num_blocks);
×
245
        std::vector<hpx::future<hpx::id_type>> B_ids =
246
            hpx::find_all_from_basename(B_block_basename, num_blocks);
×
247

248
        if (root)
×
249
        {
250
            std::cout << "Serial Matrix transpose: B = A^T\n"
251
                      << "Matrix order          = " << order << "\n"
252
                      << "Matrix local columns  = " << block_order << "\n"
×
253
                      << "Number of blocks      = " << num_blocks << "\n"
×
254
                      << "Number of localities  = " << num_localities << "\n";
×
255
            if (tile_size < order)
×
256
                std::cout << "Tile size             = " << tile_size << "\n";
×
257
            else
258
                std::cout << "Untiled\n";
×
259
            std::cout << "Number of iterations  = " << iterations << "\n";
×
260
        }
261
        using hpx::execution::par;
262
        using hpx::ranges::for_each;
263

264
        // Fill the original matrix, set transpose to known garbage value.
265
        auto range = hpx::util::counting_shape(blocks_start, blocks_end);
×
266
        for_each(par, range, [&](std::uint64_t b) {
×
267
            std::shared_ptr<block_component> A_ptr =
268
                hpx::get_ptr<block_component>(A[b].get_id()).get();
×
269
            std::shared_ptr<block_component> B_ptr =
270
                hpx::get_ptr<block_component>(B[b].get_id()).get();
×
271

272
            for (std::uint64_t i = 0; i != order; ++i)
×
273
            {
274
                for (std::uint64_t j = 0; j != block_order; ++j)
×
275
                {
276
                    double col_val =
×
277
                        COL_SHIFT * static_cast<double>(b * block_order + j);
×
278
                    A_ptr->data_[i * block_order + j] =
×
279
                        col_val + ROW_SHIFT * static_cast<double>(i);
×
280
                    B_ptr->data_[i * block_order + j] = -1.0;
281
                }
282
            }
×
283
            hpx::register_with_basename(A_block_basename, A[b].get_id(), b);
×
284
            hpx::register_with_basename(B_block_basename, B[b].get_id(), b);
×
285
        });
286

287
        hpx::wait_all(A_ids);
×
288
        std::cout << "Finding blocks A ... done\n";
289
        hpx::wait_all(B_ids);
×
290
        std::cout << "Finding blocks B ... done\n";
×
291
        for (std::uint64_t b = 0; b != num_blocks; ++b)
292
        {
293
            // Convert the gids to clients
×
294
            if (b < blocks_start || b >= blocks_end)
295
            {
×
296
                A[b] = block(std::move(A_ids[b]));
×
297
                B[b] = block(std::move(B_ids[b]));
298
            }
299
        }
300

301
        double errsq = 0.0;
302
        double avgtime = 0.0;
×
303
        double maxtime = 0.0;
×
304
        double mintime =
305
            366.0 * 24.0 * 3600.0;    // set the minimum time to a large value;
306
                                      // one leap year should be enough
×
307
        for (std::uint64_t iter = 0; iter < iterations; ++iter)
308
        {
309
            hpx::chrono::high_resolution_timer t;
310

×
311
            auto range = hpx::util::counting_shape(blocks_start, blocks_end);
312

×
313
            std::vector<hpx::future<void>> block_futures;
×
314
            block_futures.resize(num_local_blocks);
315

×
316
            for_each(par, range, [&](std::uint64_t b) {
×
317
                std::vector<hpx::future<void>> phase_futures;
×
318
                phase_futures.reserve(num_blocks);
319

320
                auto phase_range = hpx::util::counting_shape(
×
321
                    static_cast<std::uint64_t>(0), num_blocks);
×
322
                for (std::uint64_t phase : phase_range)
323
                {
×
324
                    std::uint64_t const block_size = block_order * block_order;
325
                    std::uint64_t const from_block = phase;
326
                    std::uint64_t const from_phase = b;
×
327
                    std::uint64_t const A_offset = from_phase * block_size;
×
328
                    std::uint64_t const B_offset = phase * block_size;
329

×
330
                    phase_futures.push_back(hpx::dataflow(&transpose,
×
331
                        A[from_block].get_sub_block(A_offset, block_size),
×
332
                        B[b].get_sub_block(B_offset, block_size), block_order,
×
333
                        tile_size));
334
                }
335

×
336
                block_futures[b - blocks_start] = hpx::when_all(phase_futures);
×
337
            });
338

339
            hpx::wait_all(block_futures);
340

×
341
            double elapsed = t.elapsed();
342

×
343
            if (iter > 0 || iterations == 1)    // Skip the first iteration
344
            {
×
345
                avgtime = avgtime + elapsed;
×
346
                maxtime = (std::max) (maxtime, elapsed);
×
347
                mintime = (std::min) (mintime, elapsed);
348
            }
349

×
350
            if (root)
×
351
                errsq += test_results(
352
                    order, block_order, B, blocks_start, blocks_end);
×
353
        }    // end of iter loop
354

355
        // Analyze and output results
356

357
        double epsilon = 1.e-8;
×
358
        if (root)
359
        {
×
360
            if (errsq < epsilon)
361
            {
×
362
                std::cout << "Solution validates\n";
×
363
                avgtime = avgtime /
×
364
                    static_cast<double>((std::max) (iterations - 1,
×
365
                        static_cast<std::uint64_t>(1)));
×
366
                std::cout << "Rate (MB/s): "
367
                          << 1.e-6 * static_cast<double>(bytes) / mintime
368
                          << ", "
×
369
                          << "Avg time (s): " << avgtime << ", "
370
                          << "Min time (s): " << mintime << ", "
×
371
                          << "Max time (s): " << maxtime << "\n";
×
372

373
                if (verbose)
374
                    std::cout << "Squared errors: " << errsq << "\n";
375
            }
376
            else
×
377
            {
×
378
                std::cout << "ERROR: Aggregate squared error " << errsq
379
                          << " exceeds threshold " << epsilon << "\n";
380
                hpx::terminate();
381
            }
382
        }
×
383
        {
×
384
            // unregister names...
385
            auto range = hpx::util::counting_shape(blocks_start, blocks_end);
×
386
            for (auto b : range)
×
387
            {
388
                hpx::unregister_with_basename(A_block_basename, b);
389
                hpx::unregister_with_basename(B_block_basename, b);
×
390
            }
391
        }
×
392
    }
393

394
    return hpx::finalize();
×
395
}
396

397
int main(int argc, char* argv[])
398
{
×
399
    using namespace hpx::program_options;
400

×
401
    options_description desc_commandline;
×
402
    // clang-format off
403
    desc_commandline.add_options()
×
404
        ("matrix_size", value<std::uint64_t>()->default_value(1024),
405
         "Matrix Size")
×
406
        ("iterations", value<std::uint64_t>()->default_value(10),
407
         "# iterations")
408
        ("tile_size", value<std::uint64_t>(),
×
409
         "Number of tiles to divide the individual matrix blocks for improved "
410
         "cache and TLB performance")
411
        ("num_blocks", value<std::uint64_t>()->default_value(1),
×
412
         "Number of blocks to divide the individual matrix blocks for "
413
         "improved cache and TLB performance")
414
        ( "verbose", "Verbose output")
415
    ;
416
    // clang-format on
417

×
418
    // Initialize and run HPX, this example requires to run hpx_main on all
419
    // localities
×
420
    std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1"};
×
421

×
422
    hpx::init_params init_args;
423
    init_args.desc_cmdline = desc_commandline;
×
424
    init_args.cfg = cfg;
×
425

426
    return hpx::init(argc, argv, init_args);
×
427
}
428

429
void transpose(hpx::future<sub_block> Af, hpx::future<sub_block> Bf,
×
430
    std::uint64_t block_order, std::uint64_t tile_size)
×
431
{
432
    sub_block const A(Af.get());
×
433
    sub_block B(Bf.get());
434

×
435
    if (tile_size < block_order)
436
    {
×
437
        for (std::uint64_t i = 0; i < block_order; i += tile_size)
438
        {
×
439
            for (std::uint64_t j = 0; j < block_order; j += tile_size)
×
440
            {
441
                std::uint64_t max_i = (std::min) (block_order, i + tile_size);
×
442
                std::uint64_t max_j = (std::min) (block_order, j + tile_size);
443

×
444
                for (std::uint64_t it = i; it != max_i; ++it)
445
                {
×
446
                    for (std::uint64_t jt = j; jt != max_j; ++jt)
447
                    {
448
                        B[it + block_order * jt] = A[jt + block_order * it];
449
                    }
450
                }
451
            }
452
        }
453
    }
×
454
    else
455
    {
×
456
        for (std::uint64_t i = 0; i != block_order; ++i)
457
        {
×
458
            for (std::uint64_t j = 0; j != block_order; ++j)
459
            {
460
                B[i + block_order * j] = A[j + block_order * i];
461
            }
×
462
        }
463
    }
×
464
}
465

466
double test_results(std::uint64_t order, std::uint64_t block_order,
467
    std::vector<block>& trans, std::uint64_t blocks_start,
468
    std::uint64_t blocks_end)
469
{
470
    using hpx::transform_reduce;
471
    using hpx::execution::par;
472

473
    // Fill the original matrix, set transpose to known garbage value.
474
    auto range = hpx::util::counting_shape(blocks_start, blocks_end);
×
475
    double errsq = transform_reduce(
×
476
        par, std::begin(range), std::end(range), 0.0,
477
        [](double lhs, double rhs) { return lhs + rhs; },
×
478
        [&](std::uint64_t b) -> double {
479
            sub_block trans_block =
×
480
                trans[b].get_sub_block(0, order * block_order).get();
481
            double errsq = 0.0;
×
482
            for (std::uint64_t i = 0; i < order; ++i)
×
483
            {
484
                double col_val = COL_SHIFT * static_cast<double>(i);
×
485
                for (std::uint64_t j = 0; j < block_order; ++j)
×
486
                {
×
487
                    double diff = trans_block[i * block_order + j] -
×
488
                        (col_val +
×
489
                            ROW_SHIFT *
490
                                static_cast<double>(b * block_order + j));
491
                    errsq += diff * diff;
×
492
                }
493
            }
494
            return errsq;
×
495
        });
×
496

497
    if (verbose)
×
498
        std::cout << " Squared sum of differences: " << errsq << "\n";
499

500
    return errsq;
501
}
502
#endif
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc