• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

ska-sa / spead2 / 5782497308

pending completion
5782497308

Pull #234

github

web-flow
Merge 10bac1955 into 5f1dead30
Pull Request #234: Add detection of function multi-versioning in the compiler

1 of 1 new or added line in 1 file covered. (100.0%)

5420 of 7232 relevant lines covered (74.94%)

52619.09 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.1
/src/common_memcpy.cpp
1
/* Copyright 2016, 2020 National Research Foundation (SARAO)
2
 *
3
 * This program is free software: you can redistribute it and/or modify it under
4
 * the terms of the GNU Lesser General Public License as published by the Free
5
 * Software Foundation, either version 3 of the License, or (at your option) any
6
 * later version.
7
 *
8
 * This program is distributed in the hope that it will be useful, but WITHOUT
9
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
10
 * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
11
 * details.
12
 *
13
 * You should have received a copy of the GNU Lesser General Public License
14
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
 */
16

17
#include <cstddef>
18
#include <cstdint>
19
#include <cstring>
20
#include <spead2/common_defines.h>
21
#include <spead2/common_features.h>
22
#include <spead2/common_memcpy.h>
23
#if SPEAD2_USE_MOVNTDQ
24
# include <emmintrin.h>
25
#endif
26

27
namespace spead2
28
{
29

30
#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ
31
SPEAD2_FMV_TARGET("default")
32
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
×
33
{
34
    return std::memcpy(dest, src, n);
×
35
}
36
#endif // SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ
37

38
#if SPEAD2_USE_MOVNTDQ
39
SPEAD2_FMV_TARGET("sse2")
40
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
530,079✔
41
{
42
    char * __restrict__ dest_c = (char *) dest;
530,079✔
43
    const char * __restrict__ src_c = (const char *) src;
530,079✔
44
    // Align the destination to a cache-line boundary
45
    std::uintptr_t dest_i = std::uintptr_t(dest_c);
530,079✔
46
    constexpr std::uintptr_t cache_line_mask = detail::cache_line_size - 1;
530,079✔
47
    std::uintptr_t aligned = (dest_i + cache_line_mask) & ~cache_line_mask;
530,079✔
48
    std::size_t head = aligned - dest_i;
530,079✔
49
    if (head > 0)
530,079✔
50
    {
51
        if (head >= n)
521,583✔
52
        {
53
            std::memcpy(dest_c, src_c, n);
133,056✔
54
            /* Not normally required, but if the destination is
55
             * write-combining memory then this will flush the combining
56
             * buffers. That may be necessary if the memory is actually on
57
             * a GPU or other accelerator.
58
             */
59
            _mm_sfence();
60
            return dest;
133,056✔
61
        }
62
        std::memcpy(dest_c, src_c, head);
388,527✔
63
        dest_c += head;
388,527✔
64
        src_c += head;
388,527✔
65
        n -= head;
388,527✔
66
    }
67
    std::size_t offset;
68
    for (offset = 0; offset + 64 <= n; offset += 64)
570,378✔
69
    {
70
        __m128i value0 = _mm_loadu_si128((__m128i const *) (src_c + offset + 0));
173,355✔
71
        __m128i value1 = _mm_loadu_si128((__m128i const *) (src_c + offset + 16));
173,355✔
72
        __m128i value2 = _mm_loadu_si128((__m128i const *) (src_c + offset + 32));
173,355✔
73
        __m128i value3 = _mm_loadu_si128((__m128i const *) (src_c + offset + 48));
173,355✔
74
        _mm_stream_si128((__m128i *) (dest_c + offset + 0), value0);
173,355✔
75
        _mm_stream_si128((__m128i *) (dest_c + offset + 16), value1);
173,355✔
76
        _mm_stream_si128((__m128i *) (dest_c + offset + 32), value2);
173,355✔
77
        _mm_stream_si128((__m128i *) (dest_c + offset + 48), value3);
173,355✔
78
    }
79
    std::size_t tail = n - offset;
397,023✔
80
    std::memcpy(dest_c + offset, src_c + offset, tail);
397,023✔
81
    _mm_sfence();
82
    return dest;
397,023✔
83
}
84
#endif // SPEAD2_USE_MOVNTDQ
85

86
} // namespace spead2
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc