• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OSGeo / gdal / 12706066811

10 Jan 2025 08:38AM UTC coverage: 70.084% (-2.5%) from 72.549%
12706066811

Pull #11629

github

web-flow
Merge 9418dc48f into 0df468c56
Pull Request #11629: add uv documentation for python package

563296 of 803749 relevant lines covered (70.08%)

223434.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.57
/gcore/gdal_priv_templates.hpp
1
/******************************************************************************
2
 *
3
 * Project:  GDAL Core
4
 * Purpose:  Inline C++ templates
5
 * Author:   Phil Vachon, <philippe at cowpig.ca>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2009, Phil Vachon, <philippe at cowpig.ca>
9
 * Copyright (c) 2025, Even Rouault, <even.rouault at spatialys.com>
10
 *
11
 * SPDX-License-Identifier: MIT
12
 ****************************************************************************/
13

14
#ifndef GDAL_PRIV_TEMPLATES_HPP_INCLUDED
15
#define GDAL_PRIV_TEMPLATES_HPP_INCLUDED
16

17
#include "cpl_port.h"
18

19
#include <algorithm>
20
#include <cmath>
21
#include <cstdint>
22
#include <limits>
23
#include <type_traits>
24

25
/************************************************************************/
26
/*                        GDALGetDataLimits()                           */
27
/************************************************************************/
28
/**
29
 * Compute the limits of values that can be placed in Tout in terms of
30
 * Tin. Usually used for output clamping, when the output data type's
31
 * limits are stable relative to the input type (i.e. no roundoff error).
32
 *
33
 * @param tMaxValue the returned maximum value
34
 * @param tMinValue the returned minimum value
35
 */
36

37
template <class Tin, class Tout>
38
inline void GDALGetDataLimits(Tin &tMaxValue, Tin &tMinValue)
372,761,804✔
39
{
40
    tMaxValue = std::numeric_limits<Tin>::max();
372,761,804✔
41
    tMinValue = std::numeric_limits<Tin>::min();
372,829,014✔
42

43
    // Compute the actual minimum value of Tout in terms of Tin.
44
    if constexpr (std::numeric_limits<Tout>::is_signed &&
45
                  std::numeric_limits<Tout>::is_integer)
46
    {
47
        // the minimum value is less than zero
48
        if constexpr (std::numeric_limits<Tout>::digits <
49
                          std::numeric_limits<Tin>::digits ||
50
                      !std::numeric_limits<Tin>::is_integer)
51
        {
52
            // Tout is smaller than Tin, so we need to clamp values in input
53
            // to the range of Tout's min/max values
54
            if (std::numeric_limits<Tin>::is_signed)
55
            {
56
                tMinValue = static_cast<Tin>(std::numeric_limits<Tout>::min());
78,397,826✔
57
            }
58
            tMaxValue = static_cast<Tin>(std::numeric_limits<Tout>::max());
78,912,204✔
59
        }
60
    }
61
    else if constexpr (std::numeric_limits<Tout>::is_integer)
62
    {
63
        // the output is unsigned, so we just need to determine the max
64
        /* coverity[same_on_both_sides] */
65
        if constexpr (std::numeric_limits<Tout>::digits <=
66
                      std::numeric_limits<Tin>::digits)
67
        {
68
            // Tout is smaller than Tin, so we need to clamp the input values
69
            // to the range of Tout's max
70
            tMaxValue = static_cast<Tin>(std::numeric_limits<Tout>::max());
143,587,787✔
71
        }
72
        tMinValue = 0;
144,679,191✔
73
    }
74
}
371,822,824✔
75

76
/************************************************************************/
77
/*                          GDALClampValue()                            */
78
/************************************************************************/
79
/**
80
 * Clamp values of type T to a specified range
81
 *
82
 * @param tValue the value
83
 * @param tMax the max value
84
 * @param tMin the min value
85
 */
86
template <class T>
87
inline T GDALClampValue(const T tValue, const T tMax, const T tMin)
370,821,982✔
88
{
89
    return tValue > tMax ? tMax : tValue < tMin ? tMin : tValue;
370,821,982✔
90
}
91

92
/************************************************************************/
93
/*                          GDALClampDoubleValue()                            */
94
/************************************************************************/
95
/**
96
 * Clamp double values to a specified range, this uses the same
97
 * argument ordering as std::clamp, returns TRUE if the value was clamped.
98
 *
99
 * @param tValue the value
100
 * @param tMin the min value
101
 * @param tMax the max value
102
 *
103
 */
104
template <class T2, class T3>
105
inline bool GDALClampDoubleValue(double &tValue, const T2 tMin, const T3 tMax)
203✔
106
{
107
    const double tMin2{static_cast<double>(tMin)};
203✔
108
    const double tMax2{static_cast<double>(tMax)};
203✔
109
    if (tValue > tMax2 || tValue < tMin2)
203✔
110
    {
111
        tValue = tValue > tMax2 ? tMax2 : tValue < tMin2 ? tMin2 : tValue;
22✔
112
        return true;
22✔
113
    }
114
    else
115
    {
116
        return false;
181✔
117
    }
118
}
119

120
/************************************************************************/
121
/*                         GDALIsValueInRange()                         */
122
/************************************************************************/
123
/**
124
 * Returns whether a value is in the type range.
125
 * NaN is considered not to be in type range.
126
 *
127
 * @param dfValue the value
128
 * @return whether the value is in the type range.
129
 */
130
template <class T> inline bool GDALIsValueInRange(double dfValue)
142,318✔
131
{
132
    return dfValue >= static_cast<double>(std::numeric_limits<T>::lowest()) &&
284,588✔
133
           dfValue <= static_cast<double>(std::numeric_limits<T>::max());
284,588✔
134
}
135

136
template <> inline bool GDALIsValueInRange<double>(double dfValue)
20✔
137
{
138
    return !std::isnan(dfValue);
20✔
139
}
140

141
template <> inline bool GDALIsValueInRange<float>(double dfValue)
41,023✔
142
{
143
    return std::isinf(dfValue) ||
81,317✔
144
           (dfValue >= -std::numeric_limits<float>::max() &&
40,197✔
145
            dfValue <= std::numeric_limits<float>::max());
80,583✔
146
}
147

148
template <> inline bool GDALIsValueInRange<int64_t>(double dfValue)
6,814✔
149
{
150
    // Values in the range [INT64_MAX - 1023, INT64_MAX - 1]
151
    // get converted to a double that once cast to int64_t is
152
    // INT64_MAX + 1, hence the < strict comparison.
153
    return dfValue >=
154
               static_cast<double>(std::numeric_limits<int64_t>::min()) &&
13,627✔
155
           dfValue < static_cast<double>(std::numeric_limits<int64_t>::max());
13,627✔
156
}
157

158
template <> inline bool GDALIsValueInRange<uint64_t>(double dfValue)
7,981✔
159
{
160
    // Values in the range [UINT64_MAX - 2047, UINT64_MAX - 1]
161
    // get converted to a double that once cast to uint64_t is
162
    // UINT64_MAX + 1, hence the < strict comparison.
163
    return dfValue >= 0 &&
15,959✔
164
           dfValue < static_cast<double>(std::numeric_limits<uint64_t>::max());
15,959✔
165
}
166

167
/************************************************************************/
168
/*                         GDALIsValueExactAs()                         */
169
/************************************************************************/
170
/**
171
 * Returns whether a value can be exactly represented on type T.
172
 *
173
 * That is static_cast\<double\>(static_cast\<T\>(dfValue)) is legal and is
174
 * equal to dfValue.
175
 *
176
 * Note: for T=float or double, a NaN input leads to true
177
 *
178
 * @param dfValue the value
179
 * @return whether the value can be exactly represented on type T.
180
 */
181
template <class T> inline bool GDALIsValueExactAs(double dfValue)
578✔
182
{
183
    return GDALIsValueInRange<T>(dfValue) &&
1,109✔
184
           static_cast<double>(static_cast<T>(dfValue)) == dfValue;
1,109✔
185
}
186

187
template <> inline bool GDALIsValueExactAs<float>(double dfValue)
97✔
188
{
189
    return std::isnan(dfValue) ||
277✔
190
           (GDALIsValueInRange<float>(dfValue) &&
92✔
191
            static_cast<double>(static_cast<float>(dfValue)) == dfValue);
185✔
192
}
193

194
template <> inline bool GDALIsValueExactAs<double>(double)
16✔
195
{
196
    return true;
16✔
197
}
198

199
/************************************************************************/
200
/*                          GDALCopyWord()                              */
201
/************************************************************************/
202

203
template <class Tin, class Tout> struct sGDALCopyWord
204
{
205
    static inline void f(const Tin tValueIn, Tout &tValueOut)
207,157,514✔
206
    {
207
        Tin tMaxVal, tMinVal;
208
        GDALGetDataLimits<Tin, Tout>(tMaxVal, tMinVal);
207,157,514✔
209
        tValueOut =
207,596,214✔
210
            static_cast<Tout>(GDALClampValue(tValueIn, tMaxVal, tMinVal));
207,217,314✔
211
    }
207,596,214✔
212
};
213

214
template <class Tin> struct sGDALCopyWord<Tin, float>
215
{
216
    static inline void f(const Tin tValueIn, float &fValueOut)
10,886,548✔
217
    {
218
        fValueOut = static_cast<float>(tValueIn);
10,886,548✔
219
    }
10,886,548✔
220
};
221

222
template <class Tin> struct sGDALCopyWord<Tin, double>
223
{
224
    static inline void f(const Tin tValueIn, double &dfValueOut)
75,873,436✔
225
    {
226
        dfValueOut = static_cast<double>(tValueIn);
75,873,436✔
227
    }
75,873,436✔
228
};
229

230
template <> struct sGDALCopyWord<float, double>
231
{
232
    static inline void f(const float fValueIn, double &dfValueOut)
46,469,000✔
233
    {
234
        dfValueOut = fValueIn;
46,469,000✔
235
    }
46,469,000✔
236
};
237

238
template <> struct sGDALCopyWord<double, float>
239
{
240
    static inline void f(const double dfValueIn, float &fValueOut)
2,303,880✔
241
    {
242
        if (dfValueIn > std::numeric_limits<float>::max())
2,303,880✔
243
        {
244
            fValueOut = std::numeric_limits<float>::infinity();
48✔
245
            return;
48✔
246
        }
247
        if (dfValueIn < -std::numeric_limits<float>::max())
2,303,830✔
248
        {
249
            fValueOut = -std::numeric_limits<float>::infinity();
52✔
250
            return;
52✔
251
        }
252

253
        fValueOut = static_cast<float>(dfValueIn);
2,303,780✔
254
    }
255
};
256

257
template <class Tout> struct sGDALCopyWord<float, Tout>
258
{
259
    static inline void f(const float fValueIn, Tout &tValueOut)
3,978,000✔
260
    {
261
        if (std::isnan(fValueIn))
3,978,000✔
262
        {
263
            tValueOut = 0;
×
264
            return;
×
265
        }
266
        float fMaxVal, fMinVal;
267
        GDALGetDataLimits<float, Tout>(fMaxVal, fMinVal);
3,978,120✔
268
        tValueOut = static_cast<Tout>(
3,978,050✔
269
            GDALClampValue(fValueIn + 0.5f, fMaxVal, fMinVal));
3,978,020✔
270
    }
271
};
272

273
template <> struct sGDALCopyWord<float, short>
274
{
275
    static inline void f(const float fValueIn, short &nValueOut)
2,928,940✔
276
    {
277
        if (std::isnan(fValueIn))
2,928,940✔
278
        {
279
            nValueOut = 0;
×
280
            return;
×
281
        }
282
        float fMaxVal, fMinVal;
283
        GDALGetDataLimits<float, short>(fMaxVal, fMinVal);
2,928,940✔
284
        float fValue = fValueIn >= 0.0f ? fValueIn + 0.5f : fValueIn - 0.5f;
2,928,940✔
285
        nValueOut =
2,928,940✔
286
            static_cast<short>(GDALClampValue(fValue, fMaxVal, fMinVal));
2,928,940✔
287
    }
288
};
289

290
template <> struct sGDALCopyWord<float, signed char>
291
{
292
    static inline void f(const float fValueIn, signed char &nValueOut)
297✔
293
    {
294
        if (std::isnan(fValueIn))
297✔
295
        {
296
            nValueOut = 0;
×
297
            return;
×
298
        }
299
        float fMaxVal, fMinVal;
300
        GDALGetDataLimits<float, signed char>(fMaxVal, fMinVal);
297✔
301
        float fValue = fValueIn >= 0.0f ? fValueIn + 0.5f : fValueIn - 0.5f;
297✔
302
        nValueOut =
297✔
303
            static_cast<signed char>(GDALClampValue(fValue, fMaxVal, fMinVal));
297✔
304
    }
305
};
306

307
template <class Tout> struct sGDALCopyWord<double, Tout>
308
{
309
    static inline void f(const double dfValueIn, Tout &tValueOut)
83,870,120✔
310
    {
311
        if (std::isnan(dfValueIn))
83,870,120✔
312
        {
313
            tValueOut = 0;
×
314
            return;
×
315
        }
316
        double dfMaxVal, dfMinVal;
317
        GDALGetDataLimits<double, Tout>(dfMaxVal, dfMinVal);
83,629,820✔
318
        tValueOut = static_cast<Tout>(
82,142,920✔
319
            GDALClampValue(dfValueIn + 0.5, dfMaxVal, dfMinVal));
81,975,820✔
320
    }
321
};
322

323
template <> struct sGDALCopyWord<double, int>
324
{
325
    static inline void f(const double dfValueIn, int &nValueOut)
70,358,300✔
326
    {
327
        if (std::isnan(dfValueIn))
70,358,300✔
328
        {
329
            nValueOut = 0;
×
330
            return;
×
331
        }
332
        double dfMaxVal, dfMinVal;
333
        GDALGetDataLimits<double, int>(dfMaxVal, dfMinVal);
70,358,300✔
334
        double dfValue = dfValueIn >= 0.0 ? dfValueIn + 0.5 : dfValueIn - 0.5;
70,358,300✔
335
        nValueOut =
70,358,300✔
336
            static_cast<int>(GDALClampValue(dfValue, dfMaxVal, dfMinVal));
70,358,300✔
337
    }
338
};
339

340
template <> struct sGDALCopyWord<double, std::int64_t>
341
{
342
    static inline void f(const double dfValueIn, std::int64_t &nValueOut)
688✔
343
    {
344
        if (std::isnan(dfValueIn))
688✔
345
        {
346
            nValueOut = 0;
1✔
347
        }
348
        else if (dfValueIn >=
687✔
349
                 static_cast<double>(std::numeric_limits<std::int64_t>::max()))
687✔
350
        {
351
            nValueOut = std::numeric_limits<std::int64_t>::max();
6✔
352
        }
353
        else if (dfValueIn <=
681✔
354
                 static_cast<double>(std::numeric_limits<std::int64_t>::min()))
681✔
355
        {
356
            nValueOut = std::numeric_limits<std::int64_t>::min();
4✔
357
        }
358
        else
359
        {
360
            nValueOut = static_cast<std::int64_t>(
1,354✔
361
                dfValueIn > 0.0f ? dfValueIn + 0.5f : dfValueIn - 0.5f);
677✔
362
        }
363
    }
688✔
364
};
365

366
template <> struct sGDALCopyWord<double, std::uint64_t>
367
{
368
    static inline void f(const double dfValueIn, std::uint64_t &nValueOut)
604✔
369
    {
370
        if (!(dfValueIn > 0))
604✔
371
        {
372
            nValueOut = 0;
164✔
373
        }
374
        else if (dfValueIn >
440✔
375
                 static_cast<double>(std::numeric_limits<uint64_t>::max()))
440✔
376
        {
377
            nValueOut = std::numeric_limits<uint64_t>::max();
4✔
378
        }
379
        else
380
        {
381
            nValueOut = static_cast<std::uint64_t>(dfValueIn + 0.5);
436✔
382
        }
383
    }
604✔
384
};
385

386
template <> struct sGDALCopyWord<double, short>
387
{
388
    static inline void f(const double dfValueIn, short &nValueOut)
5,101,770✔
389
    {
390
        if (std::isnan(dfValueIn))
5,101,770✔
391
        {
392
            nValueOut = 0;
×
393
            return;
×
394
        }
395
        double dfMaxVal, dfMinVal;
396
        GDALGetDataLimits<double, short>(dfMaxVal, dfMinVal);
5,101,770✔
397
        double dfValue = dfValueIn > 0.0 ? dfValueIn + 0.5 : dfValueIn - 0.5;
5,101,770✔
398
        nValueOut =
5,101,770✔
399
            static_cast<short>(GDALClampValue(dfValue, dfMaxVal, dfMinVal));
5,101,770✔
400
    }
401
};
402

403
template <> struct sGDALCopyWord<double, signed char>
404
{
405
    static inline void f(const double dfValueIn, signed char &nValueOut)
463✔
406
    {
407
        if (std::isnan(dfValueIn))
463✔
408
        {
409
            nValueOut = 0;
×
410
            return;
×
411
        }
412
        double dfMaxVal, dfMinVal;
413
        GDALGetDataLimits<double, signed char>(dfMaxVal, dfMinVal);
463✔
414
        double dfValue = dfValueIn > 0.0 ? dfValueIn + 0.5 : dfValueIn - 0.5;
463✔
415
        nValueOut = static_cast<signed char>(
463✔
416
            GDALClampValue(dfValue, dfMaxVal, dfMinVal));
463✔
417
    }
418
};
419

420
// Roundoff occurs for Float32 -> int32 for max/min. Overload GDALCopyWord
421
// specifically for this case.
422
template <> struct sGDALCopyWord<float, int>
423
{
424
    static inline void f(const float fValueIn, int &nValueOut)
139,382,000✔
425
    {
426
        if (std::isnan(fValueIn))
139,382,000✔
427
        {
428
            nValueOut = 0;
×
429
        }
430
        else if (fValueIn >=
140,701,000✔
431
                 static_cast<float>(std::numeric_limits<int>::max()))
141,468,000✔
432
        {
433
            nValueOut = std::numeric_limits<int>::max();
160✔
434
        }
435
        else if (fValueIn <=
138,142,000✔
436
                 static_cast<float>(std::numeric_limits<int>::min()))
140,701,000✔
437
        {
438
            nValueOut = std::numeric_limits<int>::min();
×
439
        }
440
        else
441
        {
442
            nValueOut = static_cast<int>(fValueIn > 0.0f ? fValueIn + 0.5f
139,628,000✔
443
                                                         : fValueIn - 0.5f);
120,973✔
444
        }
445
    }
139,507,000✔
446
};
447

448
// Roundoff occurs for Float32 -> uint32 for max. Overload GDALCopyWord
449
// specifically for this case.
450
template <> struct sGDALCopyWord<float, unsigned int>
451
{
452
    static inline void f(const float fValueIn, unsigned int &nValueOut)
203✔
453
    {
454
        if (!(fValueIn > 0))
203✔
455
        {
456
            nValueOut = 0;
20✔
457
        }
458
        else if (fValueIn >=
183✔
459
                 static_cast<float>(std::numeric_limits<unsigned int>::max()))
183✔
460
        {
461
            nValueOut = std::numeric_limits<unsigned int>::max();
20✔
462
        }
463
        else
464
        {
465
            nValueOut = static_cast<unsigned int>(fValueIn + 0.5f);
163✔
466
        }
467
    }
203✔
468
};
469

470
// Roundoff occurs for Float32 -> std::int64_t for max/min. Overload
471
// GDALCopyWord specifically for this case.
472
template <> struct sGDALCopyWord<float, std::int64_t>
473
{
474
    static inline void f(const float fValueIn, std::int64_t &nValueOut)
238✔
475
    {
476
        if (std::isnan(fValueIn))
238✔
477
        {
478
            nValueOut = 0;
1✔
479
        }
480
        else if (fValueIn >=
237✔
481
                 static_cast<float>(std::numeric_limits<std::int64_t>::max()))
237✔
482
        {
483
            nValueOut = std::numeric_limits<std::int64_t>::max();
2✔
484
        }
485
        else if (fValueIn <=
235✔
486
                 static_cast<float>(std::numeric_limits<std::int64_t>::min()))
235✔
487
        {
488
            nValueOut = std::numeric_limits<std::int64_t>::min();
2✔
489
        }
490
        else
491
        {
492
            nValueOut = static_cast<std::int64_t>(
466✔
493
                fValueIn > 0.0f ? fValueIn + 0.5f : fValueIn - 0.5f);
233✔
494
        }
495
    }
238✔
496
};
497

498
// Roundoff occurs for Float32 -> std::uint64_t for max. Overload GDALCopyWord
499
// specifically for this case.
500
template <> struct sGDALCopyWord<float, std::uint64_t>
501
{
502
    static inline void f(const float fValueIn, std::uint64_t &nValueOut)
168✔
503
    {
504
        if (!(fValueIn > 0))
168✔
505
        {
506
            nValueOut = 0;
3✔
507
        }
508
        else if (fValueIn >=
165✔
509
                 static_cast<float>(std::numeric_limits<std::uint64_t>::max()))
165✔
510
        {
511
            nValueOut = std::numeric_limits<std::uint64_t>::max();
2✔
512
        }
513
        else
514
        {
515
            nValueOut = static_cast<std::uint64_t>(fValueIn + 0.5f);
163✔
516
        }
517
    }
168✔
518
};
519

520
/**
521
 * Copy a single word, optionally rounding if appropriate (i.e. going
522
 * from the float to the integer case). Note that this is the function
523
 * you should specialize if you're adding a new data type.
524
 *
525
 * @param tValueIn value of type Tin; the input value to be converted
526
 * @param tValueOut value of type Tout; the output value
527
 */
528

529
template <class Tin, class Tout>
530
inline void GDALCopyWord(const Tin tValueIn, Tout &tValueOut)
680,701,272✔
531
{
532
    if constexpr (std::is_same<Tin, Tout>::value)
533
        tValueOut = tValueIn;
32,391,043✔
534
    else
535
        sGDALCopyWord<Tin, Tout>::f(tValueIn, tValueOut);
648,310,229✔
536
}
680,368,962✔
537

538
/************************************************************************/
539
/*                         GDALCopy4Words()                             */
540
/************************************************************************/
541
/**
542
 * Copy 4 packed words to 4 packed words, optionally rounding if appropriate
543
 * (i.e. going from the float to the integer case).
544
 *
545
 * @param pValueIn pointer to 4 input values of type Tin.
546
 * @param pValueOut pointer to 4 output values of type Tout.
547
 */
548

549
template <class Tin, class Tout>
550
inline void GDALCopy4Words(const Tin *pValueIn, Tout *const pValueOut)
16✔
551
{
552
    GDALCopyWord(pValueIn[0], pValueOut[0]);
16✔
553
    GDALCopyWord(pValueIn[1], pValueOut[1]);
16✔
554
    GDALCopyWord(pValueIn[2], pValueOut[2]);
16✔
555
    GDALCopyWord(pValueIn[3], pValueOut[3]);
16✔
556
}
16✔
557

558
/************************************************************************/
559
/*                         GDALCopy8Words()                             */
560
/************************************************************************/
561
/**
562
 * Copy 8 packed words to 8 packed words, optionally rounding if appropriate
563
 * (i.e. going from the float to the integer case).
564
 *
565
 * @param pValueIn pointer to 8 input values of type Tin.
566
 * @param pValueOut pointer to 8 output values of type Tout.
567
 */
568

569
template <class Tin, class Tout>
570
inline void GDALCopy8Words(const Tin *pValueIn, Tout *const pValueOut)
14,782,879✔
571
{
572
    GDALCopy4Words(pValueIn, pValueOut);
14,782,879✔
573
    GDALCopy4Words(pValueIn + 4, pValueOut + 4);
14,783,779✔
574
}
14,783,679✔
575

576
// Needs SSE2
577
#if defined(__x86_64) || defined(_M_X64) || defined(USE_SSE2) ||               \
578
    defined(USE_NEON_OPTIMIZATIONS)
579

580
#ifdef USE_NEON_OPTIMIZATIONS
581
#include "include_sse2neon.h"
582
#else
583
#include <emmintrin.h>
584
#endif
585

586
static inline void GDALCopyXMMToInt32(const __m128i xmm, void *pDest)
32,641,985✔
587
{
588
    int n32 = _mm_cvtsi128_si32(xmm);  // Extract lower 32 bit word
32,641,985✔
589
    memcpy(pDest, &n32, sizeof(n32));
32,641,985✔
590
}
32,641,985✔
591

592
static inline void GDALCopyXMMToInt64(const __m128i xmm, void *pDest)
77,468,273✔
593
{
594
    _mm_storel_epi64(reinterpret_cast<__m128i *>(pDest), xmm);
595
}
77,468,273✔
596

597
#if __SSSE3__
598
#include <tmmintrin.h>
599
#endif
600

601
#if defined(__SSE4_1__) || defined(__AVX__)
602
#include <smmintrin.h>
603
#endif
604

605
template <>
606
inline void GDALCopy4Words(const float *pValueIn, GByte *const pValueOut)
26,203,102✔
607
{
608
    __m128 xmm = _mm_loadu_ps(pValueIn);
26,203,102✔
609

610
    // The following clamping would be useless due to the final saturating
611
    // packing if we could guarantee the input range in [INT_MIN,INT_MAX]
612
    const __m128 p0d5 = _mm_set1_ps(0.5f);
26,203,102✔
613
    const __m128 xmm_max = _mm_set1_ps(255);
26,203,102✔
614
    xmm = _mm_add_ps(xmm, p0d5);
26,203,102✔
615
    xmm = _mm_min_ps(_mm_max_ps(xmm, p0d5), xmm_max);
52,401,604✔
616

617
    __m128i xmm_i = _mm_cvttps_epi32(xmm);
26,205,802✔
618

619
#if defined(__SSSE3__) || defined(USE_NEON_OPTIMIZATIONS)
620
    xmm_i = _mm_shuffle_epi8(
621
        xmm_i, _mm_cvtsi32_si128(0 | (4 << 8) | (8 << 16) | (12 << 24)));
622
#else
623
    xmm_i = _mm_packs_epi32(xmm_i, xmm_i);   // Pack int32 to int16
26,201,002✔
624
    xmm_i = _mm_packus_epi16(xmm_i, xmm_i);  // Pack int16 to uint8
26,204,302✔
625
#endif
626
    GDALCopyXMMToInt32(xmm_i, pValueOut);
26,204,302✔
627
}
26,207,102✔
628

629
template <>
630
inline void GDALCopy4Words(const float *pValueIn, GInt16 *const pValueOut)
3,355,730✔
631
{
632
    __m128 xmm = _mm_loadu_ps(pValueIn);
3,355,730✔
633

634
    const __m128 xmm_min = _mm_set1_ps(-32768);
3,355,730✔
635
    const __m128 xmm_max = _mm_set1_ps(32767);
3,355,730✔
636
    xmm = _mm_min_ps(_mm_max_ps(xmm, xmm_min), xmm_max);
6,711,460✔
637

638
    const __m128 p0d5 = _mm_set1_ps(0.5f);
3,355,730✔
639
    const __m128 m0d5 = _mm_set1_ps(-0.5f);
3,355,730✔
640
    const __m128 mask = _mm_cmpge_ps(xmm, p0d5);
3,355,730✔
641
    // f >= 0.5f ? f + 0.5f : f - 0.5f
642
    xmm = _mm_add_ps(
13,422,900✔
643
        xmm, _mm_or_ps(_mm_and_ps(mask, p0d5), _mm_andnot_ps(mask, m0d5)));
644

645
    __m128i xmm_i = _mm_cvttps_epi32(xmm);
3,355,730✔
646

647
    xmm_i = _mm_packs_epi32(xmm_i, xmm_i);  // Pack int32 to int16
3,355,730✔
648
    GDALCopyXMMToInt64(xmm_i, pValueOut);
3,355,730✔
649
}
3,355,730✔
650

651
template <>
652
inline void GDALCopy4Words(const float *pValueIn, GUInt16 *const pValueOut)
1✔
653
{
654
    __m128 xmm = _mm_loadu_ps(pValueIn);
1✔
655

656
    const __m128 p0d5 = _mm_set1_ps(0.5f);
1✔
657
    const __m128 xmm_max = _mm_set1_ps(65535);
1✔
658
    xmm = _mm_add_ps(xmm, p0d5);
1✔
659
    xmm = _mm_min_ps(_mm_max_ps(xmm, p0d5), xmm_max);
2✔
660

661
    __m128i xmm_i = _mm_cvttps_epi32(xmm);
1✔
662

663
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
664
    xmm_i = _mm_packus_epi32(xmm_i, xmm_i);  // Pack int32 to uint16
665
#else
666
    // Translate to int16 range because _mm_packus_epi32 is SSE4.1 only
667
    xmm_i = _mm_add_epi32(xmm_i, _mm_set1_epi32(-32768));
2✔
668
    xmm_i = _mm_packs_epi32(xmm_i, xmm_i);  // Pack int32 to int16
1✔
669
    // Translate back to uint16 range (actually -32768==32768 in int16)
670
    xmm_i = _mm_add_epi16(xmm_i, _mm_set1_epi16(-32768));
1✔
671
#endif
672
    GDALCopyXMMToInt64(xmm_i, pValueOut);
1✔
673
}
1✔
674

675
#ifdef __AVX2__
676

677
#include <immintrin.h>
678

679
template <>
680
inline void GDALCopy8Words(const float *pValueIn, GByte *const pValueOut)
681
{
682
    __m256 ymm = _mm256_loadu_ps(pValueIn);
683

684
    const __m256 p0d5 = _mm256_set1_ps(0.5f);
685
    const __m256 ymm_max = _mm256_set1_ps(255);
686
    ymm = _mm256_add_ps(ymm, p0d5);
687
    ymm = _mm256_min_ps(_mm256_max_ps(ymm, p0d5), ymm_max);
688

689
    __m256i ymm_i = _mm256_cvttps_epi32(ymm);
690

691
    ymm_i = _mm256_packus_epi32(ymm_i, ymm_i);  // Pack int32 to uint16
692
    ymm_i = _mm256_permute4x64_epi64(ymm_i, 0 | (2 << 2));  // AVX2
693

694
    __m128i xmm_i = _mm256_castsi256_si128(ymm_i);
695
    xmm_i = _mm_packus_epi16(xmm_i, xmm_i);
696
    GDALCopyXMMToInt64(xmm_i, pValueOut);
697
}
698

699
template <>
700
inline void GDALCopy8Words(const float *pValueIn, GUInt16 *const pValueOut)
701
{
702
    __m256 ymm = _mm256_loadu_ps(pValueIn);
703

704
    const __m256 p0d5 = _mm256_set1_ps(0.5f);
705
    const __m256 ymm_max = _mm256_set1_ps(65535);
706
    ymm = _mm256_add_ps(ymm, p0d5);
707
    ymm = _mm256_min_ps(_mm256_max_ps(ymm, p0d5), ymm_max);
708

709
    __m256i ymm_i = _mm256_cvttps_epi32(ymm);
710

711
    ymm_i = _mm256_packus_epi32(ymm_i, ymm_i);  // Pack int32 to uint16
712
    ymm_i = _mm256_permute4x64_epi64(ymm_i, 0 | (2 << 2));  // AVX2
713

714
    _mm_storeu_si128(reinterpret_cast<__m128i *>(pValueOut),
715
                     _mm256_castsi256_si128(ymm_i));
716
}
717
#else
718
template <>
719
inline void GDALCopy8Words(const float *pValueIn, GUInt16 *const pValueOut)
7,754,381✔
720
{
721
    __m128 xmm = _mm_loadu_ps(pValueIn);
7,754,381✔
722
    __m128 xmm1 = _mm_loadu_ps(pValueIn + 4);
15,508,802✔
723

724
    const __m128 p0d5 = _mm_set1_ps(0.5f);
7,754,381✔
725
    const __m128 xmm_max = _mm_set1_ps(65535);
7,754,381✔
726
    xmm = _mm_add_ps(xmm, p0d5);
7,754,381✔
727
    xmm1 = _mm_add_ps(xmm1, p0d5);
7,754,381✔
728
    xmm = _mm_min_ps(_mm_max_ps(xmm, p0d5), xmm_max);
15,511,002✔
729
    xmm1 = _mm_min_ps(_mm_max_ps(xmm1, p0d5), xmm_max);
15,528,002✔
730

731
    __m128i xmm_i = _mm_cvttps_epi32(xmm);
7,762,051✔
732
    __m128i xmm1_i = _mm_cvttps_epi32(xmm1);
7,752,331✔
733

734
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
735
    xmm_i = _mm_packus_epi32(xmm_i, xmm1_i);  // Pack int32 to uint16
736
#else
737
    // Translate to int16 range because _mm_packus_epi32 is SSE4.1 only
738
    xmm_i = _mm_add_epi32(xmm_i, _mm_set1_epi32(-32768));
15,504,702✔
739
    xmm1_i = _mm_add_epi32(xmm1_i, _mm_set1_epi32(-32768));
15,504,702✔
740
    xmm_i = _mm_packs_epi32(xmm_i, xmm1_i);  // Pack int32 to int16
7,770,931✔
741
    // Translate back to uint16 range (actually -32768==32768 in int16)
742
    xmm_i = _mm_add_epi16(xmm_i, _mm_set1_epi16(-32768));
15,541,902✔
743
#endif
744
    _mm_storeu_si128(reinterpret_cast<__m128i *>(pValueOut), xmm_i);
745
}
7,770,931✔
746
#endif
747

748
#ifdef notdef_because_slightly_slower_than_default_implementation
749
template <>
750
inline void GDALCopy4Words(const double *pValueIn, float *const pValueOut)
751
{
752
    __m128d float_posmax = _mm_set1_pd(std::numeric_limits<float>::max());
753
    __m128d float_negmax = _mm_set1_pd(-std::numeric_limits<float>::max());
754
    __m128d float_posinf = _mm_set1_pd(std::numeric_limits<float>::infinity());
755
    __m128d float_neginf = _mm_set1_pd(-std::numeric_limits<float>::infinity());
756
    __m128d val01 = _mm_loadu_pd(pValueIn);
757
    __m128d val23 = _mm_loadu_pd(pValueIn + 2);
758
    __m128d mask_max = _mm_cmpge_pd(val01, float_posmax);
759
    __m128d mask_max23 = _mm_cmpge_pd(val23, float_posmax);
760
    val01 = _mm_or_pd(_mm_and_pd(mask_max, float_posinf),
761
                      _mm_andnot_pd(mask_max, val01));
762
    val23 = _mm_or_pd(_mm_and_pd(mask_max23, float_posinf),
763
                      _mm_andnot_pd(mask_max23, val23));
764
    __m128d mask_min = _mm_cmple_pd(val01, float_negmax);
765
    __m128d mask_min23 = _mm_cmple_pd(val23, float_negmax);
766
    val01 = _mm_or_pd(_mm_and_pd(mask_min, float_neginf),
767
                      _mm_andnot_pd(mask_min, val01));
768
    val23 = _mm_or_pd(_mm_and_pd(mask_min23, float_neginf),
769
                      _mm_andnot_pd(mask_min23, val23));
770
    __m128 val01_s = _mm_cvtpd_ps(val01);
771
    __m128 val23_s = _mm_cvtpd_ps(val23);
772
    __m128i val01_i = _mm_castps_si128(val01_s);
773
    __m128i val23_i = _mm_castps_si128(val23_s);
774
    GDALCopyXMMToInt64(val01_i, pValueOut);
775
    GDALCopyXMMToInt64(val23_i, pValueOut + 2);
776
}
777
#endif
778

779
#endif  //  defined(__x86_64) || defined(_M_X64)
780

781
#endif  // GDAL_PRIV_TEMPLATES_HPP_INCLUDED
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc