• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ska-sa / spead2 / 7005465978

27 Nov 2023 01:09PM UTC coverage: 69.961% (+0.3%) from 69.614%
7005465978

push

github

bmerry
Make async_send_heap and async_send_heaps work with completion tokens

When using a completion token rather than a callback function, the
boolean return is no longer available. Unfortunately this requires
having two overloads.

Using this with boost::asio::use_future will greatly simplify the common
case of sending a heap then at a later point blocking until it is sent
or throwing if there was a problem.

85 of 86 new or added lines in 2 files covered. (98.84%)

4 existing lines in 2 files now uncovered.

4835 of 6911 relevant lines covered (69.96%)

92414.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

50.0
/src/common_memcpy.cpp
1
/* Copyright 2016, 2020, 2023 National Research Foundation (SARAO)
2
 *
3
 * This program is free software: you can redistribute it and/or modify it under
4
 * the terms of the GNU Lesser General Public License as published by the Free
5
 * Software Foundation, either version 3 of the License, or (at your option) any
6
 * later version.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
10
 * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
11
 * details.
12
 *
13
 * You should have received a copy of the GNU Lesser General Public License
14
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
 */
16

17
#include <cstddef>
18
#include <cstdint>
19
#include <cstring>
20
#include <utility>
21
#include <spead2/common_defines.h>
22
#include <spead2/common_features.h>
23
#include <spead2/common_memcpy.h>
24

25
#if SPEAD2_USE_SSE2_STREAM
26
# include <emmintrin.h>
27
# define SPEAD2_MEMCPY_NAME memcpy_nontemporal_sse2
28
# define SPEAD2_MEMCPY_TARGET "sse2"
29
# define SPEAD2_MEMCPY_TYPE __m128i
30
# define SPEAD2_MEMCPY_LOAD _mm_loadu_si128
31
# define SPEAD2_MEMCPY_STORE _mm_stream_si128
32
# define SPEAD2_MEMCPY_UNROLL 16
33
# define SPEAD2_MEMCPY_VZEROUPPER 0
34
# include "common_memcpy_impl.h"
35
#endif
36

37
#if SPEAD2_USE_AVX_STREAM
38
# include <immintrin.h>
39
# define SPEAD2_MEMCPY_NAME memcpy_nontemporal_avx
40
# define SPEAD2_MEMCPY_TARGET "avx"
41
# define SPEAD2_MEMCPY_TYPE __m256i
42
# define SPEAD2_MEMCPY_LOAD _mm256_loadu_si256
43
# define SPEAD2_MEMCPY_STORE _mm256_stream_si256
44
# define SPEAD2_MEMCPY_UNROLL 8
45
# define SPEAD2_MEMCPY_VZEROUPPER 1
46
# include "common_memcpy_impl.h"
47
#endif
48

49
#if SPEAD2_USE_AVX512_STREAM
50
# include <immintrin.h>
51
# define SPEAD2_MEMCPY_NAME memcpy_nontemporal_avx512
52
# define SPEAD2_MEMCPY_TARGET "avx512f"
53
# define SPEAD2_MEMCPY_TYPE __m512i
54
# define SPEAD2_MEMCPY_LOAD _mm512_loadu_si512
55
# define SPEAD2_MEMCPY_STORE _mm512_stream_si512
56
# define SPEAD2_MEMCPY_UNROLL 8
57
# define SPEAD2_MEMCPY_VZEROUPPER 1
58
# include "common_memcpy_impl.h"
59
#endif
60

61
namespace spead2
62
{
63

64
void *(*resolve_memcpy_nontemporal())(void *, const void *, std::size_t) noexcept
7✔
65
{
66
#if SPEAD2_USE_AVX512_STREAM || SPEAD2_USE_AVX_STREAM || SPEAD2_USE_SSE2_STREAM
67
    __builtin_cpu_init();
7✔
68
#endif
69
#if SPEAD2_USE_AVX512_STREAM
70
    /* On Skylake server, AVX-512 reduces clock speeds. Use the same logic as
71
     * Glibc to decide whether AVX-512 is okay: it's okay if either AVX512ER or
72
     * AVX512-VNNI is present. Glibc only applies that logic to Intel CPUs, but
73
     * AMD introduced AVX-512 with Zen 4 which also supports AVX512-VNNI (and
74
     * performs well), so we don't need to distinguish.
75
     */
76
    if (__builtin_cpu_supports("avx512f")
7✔
UNCOV
77
        && (__builtin_cpu_supports("avx512er") || __builtin_cpu_supports("avx512vnni")))
×
78
        return memcpy_nontemporal_avx512;
×
79
#endif
80
#if SPEAD2_USE_AVX_STREAM
81
    if (__builtin_cpu_supports("avx"))
7✔
82
        return memcpy_nontemporal_avx;
7✔
83
#endif
84
#if SPEAD2_USE_SSE2_STREAM
85
    if (__builtin_cpu_supports("sse2"))
×
86
        return memcpy_nontemporal_sse2;
×
87
#endif
88
    /* Depending on the C library, std::memcpy might or might not be marked
89
     * as noexcept. If not, we need this explicit cast.
90
     */
91
    return (void *(*)(void *, const void *, std::size_t) noexcept) std::memcpy;
×
92
}
93

94
#if SPEAD2_USE_FMV
95

96
[[gnu::ifunc("_ZN6spead226resolve_memcpy_nontemporalEv")]]
97
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept;
98

99
#else
100

101
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
102
{
103
    static void *(*memcpy_nontemporal_ptr)(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept = resolve_memcpy_nontemporal();
104
    return memcpy_nontemporal_ptr(dest, src, n);
105
}
106

107
#endif
108

109
} // namespace spead2
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc