• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OSGeo / gdal / 12706066811

10 Jan 2025 08:38AM UTC coverage: 70.084% (-2.5%) from 72.549%
12706066811

Pull #11629

github

web-flow
Merge 9418dc48f into 0df468c56
Pull Request #11629: add uv documentation for python package

563296 of 803749 relevant lines covered (70.08%)

223434.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.59
/gcore/rasterio.cpp
1
/******************************************************************************
2
 *
3
 * Project:  GDAL Core
4
 * Purpose:  Contains default implementation of GDALRasterBand::IRasterIO()
5
 *           and supporting functions of broader utility.
6
 * Author:   Frank Warmerdam, warmerdam@pobox.com
7
 *
8
 ******************************************************************************
9
 * Copyright (c) 1998, Frank Warmerdam
10
 * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11
 *
12
 * SPDX-License-Identifier: MIT
13
 ****************************************************************************/
14

15
#include "cpl_port.h"
16
#include "gdal.h"
17
#include "gdal_priv.h"
18

19
#include <cassert>
20
#include <climits>
21
#include <cmath>
22
#include <cstddef>
23
#include <cstdio>
24
#include <cstdlib>
25
#include <cstring>
26

27
#include <algorithm>
28
#include <limits>
29
#include <stdexcept>
30
#include <type_traits>
31

32
#include "cpl_conv.h"
33
#include "cpl_cpu_features.h"
34
#include "cpl_error.h"
35
#include "cpl_progress.h"
36
#include "cpl_string.h"
37
#include "cpl_vsi.h"
38
#include "gdal_priv_templates.hpp"
39
#include "gdal_vrt.h"
40
#include "gdalwarper.h"
41
#include "memdataset.h"
42
#include "vrtdataset.h"
43

44
#if defined(__x86_64) || defined(_M_X64)
45
#include <emmintrin.h>
46
#define HAVE_SSE2
47
#elif defined(USE_NEON_OPTIMIZATIONS)
48
#include "include_sse2neon.h"
49
#define HAVE_SSE2
50
#endif
51

52
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
53
#include "rasterio_ssse3.h"
54
#ifdef __SSSE3__
55
#include <tmmintrin.h>
56
#endif
57
#endif
58

59
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
60
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
61
                             int nDstPixelStride, GPtrDiff_t nWordCount);
62

63
/************************************************************************/
64
/*                    DownsamplingIntegerXFactor()                      */
65
/************************************************************************/
66

67
template <bool bSameDataType, int DATA_TYPE_SIZE>
68
static bool DownsamplingIntegerXFactor(
413,236✔
69
    GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
70
    GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
71
    GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
72
    int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
73
{
74
    const int nBandDataSize =
413,236✔
75
        bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
76
    int nOuterLoopIters = nBufXSize - 1;
413,236✔
77
    const int nIncSrcOffset = nSrcXInc * nBandDataSize;
413,236✔
78
    const GByte *CPL_RESTRICT pabySrcData;
79
    int nEndBlockX = nBlockXSize + nStartBlockX;
413,236✔
80

81
    if (iSrcX < nEndBlockX)
413,236✔
82
    {
83
        CPLAssert(poBlock);
226,134✔
84
        goto no_reload_block;
226,134✔
85
    }
86
    goto reload_block;
187,102✔
87

88
    // Don't do the last iteration in the loop, as iSrcX might go beyond
89
    // nRasterXSize - 1
90
    while (--nOuterLoopIters >= 1)
932,852✔
91
    {
92
        iSrcX += nSrcXInc;
189,034✔
93
        pabySrcData += nIncSrcOffset;
189,034✔
94
        pabyDstData += nPixelSpace;
189,034✔
95

96
        /* --------------------------------------------------------------------
97
         */
98
        /*      Ensure we have the appropriate block loaded. */
99
        /* --------------------------------------------------------------------
100
         */
101
        if (iSrcX >= nEndBlockX)
189,034✔
102
        {
103
        reload_block:
189,034✔
104
        {
105
            const int nLBlockX = iSrcX / nBlockXSize;
388,726✔
106
            nStartBlockX = nLBlockX * nBlockXSize;
388,726✔
107
            nEndBlockX = nStartBlockX + nBlockXSize;
388,726✔
108

109
            if (poBlock != nullptr)
388,726✔
110
                poBlock->DropLock();
316,739✔
111

112
            poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
388,726✔
113
            if (poBlock == nullptr)
388,726✔
114
            {
115
                return false;
1✔
116
            }
117
        }
118

119
        no_reload_block:
388,725✔
120
            const GByte *pabySrcBlock =
121
                static_cast<const GByte *>(poBlock->GetDataRef());
932,852✔
122
            GPtrDiff_t iSrcOffset =
932,852✔
123
                (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
932,852✔
124
            pabySrcData = pabySrcBlock + iSrcOffset;
932,852✔
125
        }
126

127
        /* --------------------------------------------------------------------
128
         */
129
        /*      Copy the maximum run of pixels. */
130
        /* --------------------------------------------------------------------
131
         */
132

133
        const int nIters = std::min(
932,852✔
134
            (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
932,852✔
135
        if (bSameDataType)
136
        {
137
            memcpy(pabyDstData, pabySrcData, nBandDataSize);
932,447✔
138
            if (nIters > 1)
932,447✔
139
            {
140
                if (DATA_TYPE_SIZE == 1)
141
                {
142
                    pabySrcData += nIncSrcOffset;
276,287✔
143
                    pabyDstData += nPixelSpace;
276,287✔
144
                    GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
276,287✔
145
                                     nPixelSpace, nIters - 1);
276,287✔
146
                    pabySrcData +=
276,287✔
147
                        static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
276,287✔
148
                    pabyDstData +=
276,287✔
149
                        static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
276,287✔
150
                }
151
                else
152
                {
153
                    for (int i = 0; i < nIters - 1; i++)
4,443,828✔
154
                    {
155
                        pabySrcData += nIncSrcOffset;
4,245,254✔
156
                        pabyDstData += nPixelSpace;
4,245,254✔
157
                        memcpy(pabyDstData, pabySrcData, nBandDataSize);
4,245,254✔
158
                    }
159
                }
160
                iSrcX += nSrcXInc * (nIters - 1);
474,861✔
161
                nOuterLoopIters -= nIters - 1;
474,861✔
162
            }
163
        }
164
        else
165
        {
166
            // Type to type conversion ...
167
            GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
405✔
168
                            eBufType, nPixelSpace, std::max(1, nIters));
405✔
169
            if (nIters > 1)
405✔
170
            {
171
                pabySrcData +=
198✔
172
                    static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
198✔
173
                pabyDstData +=
198✔
174
                    static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
198✔
175
                iSrcX += nSrcXInc * (nIters - 1);
198✔
176
                nOuterLoopIters -= nIters - 1;
198✔
177
            }
178
        }
179
    }
180

181
    // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
182
    if (nOuterLoopIters == 0)
743,818✔
183
    {
184
        const int nRasterXSize = poBand->GetXSize();
330,583✔
185
        iSrcX =
330,583✔
186
            static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
661,166✔
187
                                      static_cast<GInt64>(nRasterXSize - 1)));
330,583✔
188
        pabyDstData += nPixelSpace;
330,583✔
189
        if (iSrcX < nEndBlockX)
330,583✔
190
        {
191
            goto no_reload_block;
317,993✔
192
        }
193
        goto reload_block;
12,590✔
194
    }
195
    return true;
413,235✔
196
}
197

198
/************************************************************************/
199
/*                             IRasterIO()                              */
200
/*                                                                      */
201
/*      Default internal implementation of RasterIO() ... utilizes      */
202
/*      the Block access methods to satisfy the request.  This would    */
203
/*      normally only be overridden by formats with overviews.          */
204
/************************************************************************/
205

206
CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
5,703,260✔
207
                                 int nXSize, int nYSize, void *pData,
208
                                 int nBufXSize, int nBufYSize,
209
                                 GDALDataType eBufType, GSpacing nPixelSpace,
210
                                 GSpacing nLineSpace,
211
                                 GDALRasterIOExtraArg *psExtraArg)
212

213
{
214
    if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
5,703,260✔
215
    {
216
        CPLError(eFlushBlockErr, CPLE_AppDefined,
×
217
                 "An error occurred while writing a dirty block "
218
                 "from GDALRasterBand::IRasterIO");
219
        CPLErr eErr = eFlushBlockErr;
×
220
        eFlushBlockErr = CE_None;
×
221
        return eErr;
×
222
    }
223
    if (nBlockXSize <= 0 || nBlockYSize <= 0)
5,703,260✔
224
    {
225
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
2,159✔
226
        return CE_Failure;
×
227
    }
228

229
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
5,701,100✔
230
    const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
5,700,600✔
231
    GByte dummyBlock[2] = {0, 0};
5,700,630✔
232
    GByte *pabySrcBlock =
5,700,630✔
233
        dummyBlock; /* to avoid Coverity warning about nullptr dereference */
234
    GDALRasterBlock *poBlock = nullptr;
5,700,630✔
235
    const bool bUseIntegerRequestCoords =
5,700,630✔
236
        (!psExtraArg->bFloatingPointWindowValidity ||
5,739,960✔
237
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
39,331✔
238
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
15,971✔
239

240
    /* ==================================================================== */
241
    /*      A common case is the data requested with the destination        */
242
    /*      is packed, and the block width is the raster width.             */
243
    /* ==================================================================== */
244
    if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
5,618,850✔
245
        nBlockXSize == GetXSize() && nBufXSize == nXSize &&
2,938,290✔
246
        nBufYSize == nYSize && bUseIntegerRequestCoords)
11,320,200✔
247
    {
248
        CPLErr eErr = CE_None;
2,806,630✔
249
        int nLBlockY = -1;
2,806,630✔
250

251
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
8,193,880✔
252
        {
253
            const int iSrcY = iBufYOff + nYOff;
5,383,890✔
254

255
            if (iSrcY < nLBlockY * nBlockYSize ||
5,383,890✔
256
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
5,383,970✔
257
            {
258
                nLBlockY = iSrcY / nBlockYSize;
3,050,460✔
259
                bool bJustInitialize =
3,050,460✔
260
                    eRWFlag == GF_Write && nXOff == 0 &&
95,910✔
261
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
3,197,810✔
262
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
51,443✔
263

264
                // Is this a partial tile at right and/or bottom edges of
265
                // the raster, and that is going to be completely written?
266
                // If so, do not load it from storage, but zero it so that
267
                // the content outsize of the validity area is initialized.
268
                bool bMemZeroBuffer = false;
3,050,460✔
269
                if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
95,910✔
270
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
21,151✔
271
                    nYOff + nYSize == GetYSize() &&
3,146,460✔
272
                    nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
89✔
273
                {
274
                    bJustInitialize = true;
89✔
275
                    bMemZeroBuffer = true;
89✔
276
                }
277

278
                if (poBlock)
3,050,460✔
279
                    poBlock->DropLock();
241,901✔
280

281
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
3,050,460✔
282
                poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
3,047,430✔
283
                if (poBlock == nullptr)
3,052,260✔
284
                {
285
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
1,067✔
286
                        nullptr)
287
                    {
288
                        CPLError(CE_Failure, CPLE_AppDefined,
×
289
                                 "GetBlockRef failed at X block offset %d, "
290
                                 "Y block offset %d%s",
291
                                 0, nLBlockY,
292
                                 (nErrorCounter != CPLGetErrorCounter())
×
293
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
×
294
                                     : "");
295
                    }
296
                    eErr = CE_Failure;
1,067✔
297
                    break;
1,067✔
298
                }
299

300
                if (eRWFlag == GF_Write)
3,051,190✔
301
                    poBlock->MarkDirty();
95,910✔
302

303
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
3,051,190✔
304
                if (bMemZeroBuffer)
3,051,180✔
305
                {
306
                    memset(pabySrcBlock, 0,
89✔
307
                           static_cast<GPtrDiff_t>(nBandDataSize) *
89✔
308
                               nBlockXSize * nBlockYSize);
89✔
309
                }
310
            }
311

312
            const auto nSrcByteOffset =
5,384,610✔
313
                (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
5,384,610✔
314
                     nBlockXSize +
5,384,610✔
315
                 nXOff) *
5,384,610✔
316
                nBandDataSize;
5,384,610✔
317

318
            if (eDataType == eBufType)
5,384,610✔
319
            {
320
                if (eRWFlag == GF_Read)
1,740,800✔
321
                    memcpy(static_cast<GByte *>(pData) +
1,498,800✔
322
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
1,498,800✔
323
                           pabySrcBlock + nSrcByteOffset,
1,498,800✔
324
                           static_cast<size_t>(nLineSpace));
325
                else
326
                    memcpy(pabySrcBlock + nSrcByteOffset,
242,003✔
327
                           static_cast<GByte *>(pData) +
242,003✔
328
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
242,003✔
329
                           static_cast<size_t>(nLineSpace));
330
            }
331
            else
332
            {
333
                // Type to type conversion.
334

335
                if (eRWFlag == GF_Read)
3,643,810✔
336
                    GDALCopyWords64(
3,627,870✔
337
                        pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
3,627,870✔
338
                        static_cast<GByte *>(pData) +
339
                            static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
3,627,870✔
340
                        eBufType, static_cast<int>(nPixelSpace), nBufXSize);
341
                else
342
                    GDALCopyWords64(static_cast<GByte *>(pData) +
15,944✔
343
                                        static_cast<GPtrDiff_t>(iBufYOff) *
15,944✔
344
                                            nLineSpace,
345
                                    eBufType, static_cast<int>(nPixelSpace),
346
                                    pabySrcBlock + nSrcByteOffset, eDataType,
15,944✔
347
                                    nBandDataSize, nBufXSize);
348
            }
349

350
            if (psExtraArg->pfnProgress != nullptr &&
5,447,040✔
351
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
59,779✔
352
                                         psExtraArg->pProgressData))
353
            {
354
                eErr = CE_Failure;
5✔
355
                break;
5✔
356
            }
357
        }
358

359
        if (poBlock)
2,811,070✔
360
            poBlock->DropLock();
2,809,150✔
361

362
        return eErr;
2,810,140✔
363
    }
364

365
    /* ==================================================================== */
366
    /*      Do we have overviews that would be appropriate to satisfy       */
367
    /*      this request?                                                   */
368
    /* ==================================================================== */
369
    if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
2,894,760✔
370
        eRWFlag == GF_Read)
371
    {
372
        GDALRasterIOExtraArg sExtraArg;
373
        GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
2,832✔
374

375
        const int nOverview =
376
            GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
2,832✔
377
                                          nBufXSize, nBufYSize, &sExtraArg);
378
        if (nOverview >= 0)
2,832✔
379
        {
380
            GDALRasterBand *poOverviewBand = GetOverview(nOverview);
2,812✔
381
            if (poOverviewBand == nullptr)
2,812✔
382
                return CE_Failure;
2,812✔
383

384
            return poOverviewBand->RasterIO(
2,812✔
385
                eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
386
                nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
2,812✔
387
        }
388
    }
389

390
    if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
702,462✔
391
        nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
×
392
        nLineSpace == nPixelSpace * nBufXSize &&
3,593,550✔
393
        CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
×
394
    {
395
        memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
×
396
        return CE_None;
×
397
    }
398

399
    /* ==================================================================== */
400
    /*      The second case when we don't need subsample data but likely    */
401
    /*      need data type conversion.                                      */
402
    /* ==================================================================== */
403
    if (  // nPixelSpace == nBufDataSize &&
2,891,090✔
404
        nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
2,891,090✔
405
    {
406
#if DEBUG_VERBOSE
407
        printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
408
               nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
409
#endif
410

411
        /* --------------------------------------------------------------------
412
         */
413
        /*      Loop over buffer computing source locations. */
414
        /* --------------------------------------------------------------------
415
         */
416
        // Calculate starting values out of loop
417
        const int nLBlockXStart = nXOff / nBlockXSize;
2,528,220✔
418
        const int nXSpanEnd = nBufXSize + nXOff;
2,528,220✔
419

420
        int nYInc = 0;
2,528,220✔
421
        for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
5,090,990✔
422
             iBufYOff += nYInc, iSrcY += nYInc)
2,562,770✔
423
        {
424
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
2,562,780✔
425
                                    static_cast<GPtrDiff_t>(nLineSpace);
426
            int nLBlockY = iSrcY / nBlockYSize;
2,562,780✔
427
            int nLBlockX = nLBlockXStart;
2,562,780✔
428
            int iSrcX = nXOff;
2,562,780✔
429
            while (iSrcX < nXSpanEnd)
5,336,930✔
430
            {
431
                int nXSpan = nLBlockX * nBlockXSize;
2,774,150✔
432
                if (nXSpan < INT_MAX - nBlockXSize)
2,774,150✔
433
                    nXSpan += nBlockXSize;
2,774,130✔
434
                else
435
                    nXSpan = INT_MAX;
18✔
436
                const int nXRight = nXSpan;
2,774,150✔
437
                nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
2,774,150✔
438
                const size_t nXSpanSize =
2,774,150✔
439
                    nXSpan * static_cast<size_t>(nPixelSpace);
2,774,150✔
440

441
                bool bJustInitialize =
2,774,150✔
442
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
2,042,060✔
443
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
37,172✔
444
                    nXOff <= nLBlockX * nBlockXSize &&
4,841,780✔
445
                    nXOff + nXSize >= nXRight;
25,575✔
446

447
                // Is this a partial tile at right and/or bottom edges of
448
                // the raster, and that is going to be completely written?
449
                // If so, do not load it from storage, but zero it so that
450
                // the content outsize of the validity area is initialized.
451
                bool bMemZeroBuffer = false;
2,774,150✔
452
                if (eRWFlag == GF_Write && !bJustInitialize &&
2,042,060✔
453
                    nXOff <= nLBlockX * nBlockXSize &&
2,017,740✔
454
                    nYOff <= nLBlockY * nBlockYSize &&
2,016,120✔
455
                    (nXOff + nXSize >= nXRight ||
12,094✔
456
                     // cppcheck-suppress knownConditionTrueFalse
457
                     (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
4,818,890✔
458
                    (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
11,916✔
459
                     (nYOff + nYSize == GetYSize() &&
10,677✔
460
                      nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
1,890✔
461
                {
462
                    bJustInitialize = true;
3,129✔
463
                    bMemZeroBuffer = true;
3,129✔
464
                }
465

466
                /* --------------------------------------------------------------------
467
                 */
468
                /*      Ensure we have the appropriate block loaded. */
469
                /* --------------------------------------------------------------------
470
                 */
471
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
2,774,150✔
472
                poBlock =
2,774,220✔
473
                    GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
2,774,090✔
474
                if (!poBlock)
2,774,220✔
475
                {
476
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
74✔
477
                        nullptr)
478
                    {
479
                        CPLError(CE_Failure, CPLE_AppDefined,
×
480
                                 "GetBlockRef failed at X block offset %d, "
481
                                 "Y block offset %d%s",
482
                                 nLBlockX, nLBlockY,
483
                                 (nErrorCounter != CPLGetErrorCounter())
×
484
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
×
485
                                     : "");
486
                    }
487
                    return (CE_Failure);
74✔
488
                }
489

490
                if (eRWFlag == GF_Write)
2,774,150✔
491
                    poBlock->MarkDirty();
2,042,060✔
492

493
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
2,774,150✔
494
                if (bMemZeroBuffer)
2,774,140✔
495
                {
496
                    memset(pabySrcBlock, 0,
3,129✔
497
                           static_cast<GPtrDiff_t>(nBandDataSize) *
3,129✔
498
                               nBlockXSize * nBlockYSize);
3,129✔
499
                }
500
                /* --------------------------------------------------------------------
501
                 */
502
                /*      Copy over this chunk of data. */
503
                /* --------------------------------------------------------------------
504
                 */
505
                GPtrDiff_t iSrcOffset =
2,774,140✔
506
                    (static_cast<GPtrDiff_t>(iSrcX) -
2,774,140✔
507
                     static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
2,774,140✔
508
                     (static_cast<GPtrDiff_t>(iSrcY) -
2,774,140✔
509
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
2,774,140✔
510
                         nBlockXSize) *
2,774,140✔
511
                    nBandDataSize;
2,774,140✔
512
                // Fill up as many rows as possible for the loaded block.
513
                const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
5,548,280✔
514
                                          nBufYSize - iBufYOff);
2,774,140✔
515
                for (int k = 0; k < kmax; k++)
58,551,700✔
516
                {
517
                    if (eDataType == eBufType && nPixelSpace == nBufDataSize)
55,777,600✔
518
                    {
519
                        if (eRWFlag == GF_Read)
51,793,400✔
520
                            memcpy(static_cast<GByte *>(pData) + iBufOffset +
47,382,800✔
521
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
47,382,800✔
522
                                   pabySrcBlock + iSrcOffset, nXSpanSize);
47,382,800✔
523
                        else
524
                            memcpy(pabySrcBlock + iSrcOffset,
4,410,590✔
525
                                   static_cast<GByte *>(pData) + iBufOffset +
4,410,590✔
526
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
4,410,590✔
527
                                   nXSpanSize);
528
                    }
529
                    else
530
                    {
531
                        /* type to type conversion */
532
                        if (eRWFlag == GF_Read)
3,984,220✔
533
                            GDALCopyWords64(
3,908,500✔
534
                                pabySrcBlock + iSrcOffset, eDataType,
3,908,500✔
535
                                nBandDataSize,
536
                                static_cast<GByte *>(pData) + iBufOffset +
3,908,500✔
537
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
3,908,500✔
538
                                eBufType, static_cast<int>(nPixelSpace),
539
                                nXSpan);
540
                        else
541
                            GDALCopyWords64(
75,720✔
542
                                static_cast<GByte *>(pData) + iBufOffset +
75,720✔
543
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
75,720✔
544
                                eBufType, static_cast<int>(nPixelSpace),
545
                                pabySrcBlock + iSrcOffset, eDataType,
75,720✔
546
                                nBandDataSize, nXSpan);
547
                    }
548

549
                    iSrcOffset +=
55,777,600✔
550
                        static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
55,777,600✔
551
                }
552

553
                iBufOffset =
554
                    CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
2,774,100✔
555
                nLBlockX++;
2,774,110✔
556
                iSrcX += nXSpan;
2,774,110✔
557

558
                poBlock->DropLock();
2,774,110✔
559
                poBlock = nullptr;
2,774,150✔
560
            }
561

562
            /* Compute the increment to go on a block boundary */
563
            nYInc = nBlockYSize - (iSrcY % nBlockYSize);
2,562,780✔
564

565
            if (psExtraArg->pfnProgress != nullptr &&
2,564,560✔
566
                !psExtraArg->pfnProgress(
1,783✔
567
                    1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
2,564,560✔
568
                    psExtraArg->pProgressData))
569
            {
570
                return CE_Failure;
2✔
571
            }
572
        }
573

574
        return CE_None;
2,528,220✔
575
    }
576

577
    /* ==================================================================== */
578
    /*      Loop reading required source blocks to satisfy output           */
579
    /*      request.  This is the most general implementation.              */
580
    /* ==================================================================== */
581

582
    double dfXOff = nXOff;
362,870✔
583
    double dfYOff = nYOff;
362,870✔
584
    double dfXSize = nXSize;
362,870✔
585
    double dfYSize = nYSize;
362,870✔
586
    if (psExtraArg->bFloatingPointWindowValidity)
362,870✔
587
    {
588
        dfXOff = psExtraArg->dfXOff;
28,159✔
589
        dfYOff = psExtraArg->dfYOff;
28,159✔
590
        dfXSize = psExtraArg->dfXSize;
28,159✔
591
        dfYSize = psExtraArg->dfYSize;
28,159✔
592
    }
593

594
    /* -------------------------------------------------------------------- */
595
    /*      Compute stepping increment.                                     */
596
    /* -------------------------------------------------------------------- */
597
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
362,870✔
598
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
362,870✔
599
    CPLErr eErr = CE_None;
362,870✔
600

601
    if (eRWFlag == GF_Write)
362,870✔
602
    {
603
        /* --------------------------------------------------------------------
604
         */
605
        /*    Write case */
606
        /*    Loop over raster window computing source locations in the buffer.
607
         */
608
        /* --------------------------------------------------------------------
609
         */
610
        GByte *pabyDstBlock = nullptr;
166,650✔
611
        int nLBlockX = -1;
166,650✔
612
        int nLBlockY = -1;
166,650✔
613

614
        for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
1,259,590✔
615
        {
616
            const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
1,092,940✔
617

618
            for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
12,063,600✔
619
            {
620
                const int iBufXOff =
10,970,600✔
621
                    static_cast<int>((iDstX - nXOff) / dfSrcXInc);
10,970,600✔
622
                GPtrDiff_t iBufOffset =
10,970,600✔
623
                    static_cast<GPtrDiff_t>(iBufYOff) *
10,970,600✔
624
                        static_cast<GPtrDiff_t>(nLineSpace) +
625
                    iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
10,970,600✔
626

627
                // FIXME: this code likely doesn't work if the dirty block gets
628
                // flushed to disk before being completely written.
629
                // In the meantime, bJustInitialize should probably be set to
630
                // FALSE even if it is not ideal performance wise, and for
631
                // lossy compression.
632

633
                /* --------------------------------------------------------------------
634
                 */
635
                /*      Ensure we have the appropriate block loaded. */
636
                /* --------------------------------------------------------------------
637
                 */
638
                if (iDstX < nLBlockX * nBlockXSize ||
10,970,600✔
639
                    iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
10,721,300✔
640
                    iDstY < nLBlockY * nBlockYSize ||
10,264,600✔
641
                    iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
10,264,600✔
642
                {
643
                    nLBlockX = iDstX / nBlockXSize;
738,642✔
644
                    nLBlockY = iDstY / nBlockYSize;
738,642✔
645

646
                    const bool bJustInitialize =
738,642✔
647
                        nYOff <= nLBlockY * nBlockYSize &&
1,065,870✔
648
                        nYOff + nYSize - nBlockYSize >=
327,231✔
649
                            nLBlockY * nBlockYSize &&
327,231✔
650
                        nXOff <= nLBlockX * nBlockXSize &&
1,116,140✔
651
                        nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
50,265✔
652
                    /*bool bMemZeroBuffer = FALSE;
653
                    if( !bJustInitialize &&
654
                        nXOff <= nLBlockX * nBlockXSize &&
655
                        nYOff <= nLBlockY * nBlockYSize &&
656
                        (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
657
                         (nXOff + nXSize == GetXSize() &&
658
                         (nLBlockX+1) * nBlockXSize > GetXSize())) &&
659
                        (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
660
                         (nYOff + nYSize == GetYSize() &&
661
                         (nLBlockY+1) * nBlockYSize > GetYSize())) )
662
                    {
663
                        bJustInitialize = TRUE;
664
                        bMemZeroBuffer = TRUE;
665
                    }*/
666
                    if (poBlock != nullptr)
738,642✔
667
                        poBlock->DropLock();
571,992✔
668

669
                    poBlock =
738,642✔
670
                        GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
738,642✔
671
                    if (poBlock == nullptr)
738,642✔
672
                    {
673
                        return (CE_Failure);
×
674
                    }
675

676
                    poBlock->MarkDirty();
738,642✔
677

678
                    pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
738,642✔
679
                    /*if( bMemZeroBuffer )
680
                    {
681
                        memset(pabyDstBlock, 0,
682
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
683
                    * nBlockYSize);
684
                    }*/
685
                }
686

687
                // To make Coverity happy. Should not happen by design.
688
                if (pabyDstBlock == nullptr)
10,970,600✔
689
                {
690
                    CPLAssert(false);
×
691
                    eErr = CE_Failure;
692
                    break;
693
                }
694

695
                /* --------------------------------------------------------------------
696
                 */
697
                /*      Copy over this pixel of data. */
698
                /* --------------------------------------------------------------------
699
                 */
700
                GPtrDiff_t iDstOffset =
10,970,600✔
701
                    (static_cast<GPtrDiff_t>(iDstX) -
10,970,600✔
702
                     static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
10,970,600✔
703
                     (static_cast<GPtrDiff_t>(iDstY) -
10,970,600✔
704
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
10,970,600✔
705
                         nBlockXSize) *
10,970,600✔
706
                    nBandDataSize;
10,970,600✔
707

708
                if (eDataType == eBufType)
10,970,600✔
709
                {
710
                    memcpy(pabyDstBlock + iDstOffset,
10,967,500✔
711
                           static_cast<GByte *>(pData) + iBufOffset,
10,967,500✔
712
                           nBandDataSize);
713
                }
714
                else
715
                {
716
                    /* type to type conversion ... ouch, this is expensive way
717
                    of handling single words */
718

719
                    GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
3,096✔
720
                                    eBufType, 0, pabyDstBlock + iDstOffset,
3,096✔
721
                                    eDataType, 0, 1);
722
                }
723
            }
724

725
            if (psExtraArg->pfnProgress != nullptr &&
1,092,940✔
726
                !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
×
727
                                         psExtraArg->pProgressData))
728
            {
729
                eErr = CE_Failure;
×
730
                break;
×
731
            }
732
        }
733
    }
734
    else
735
    {
736
        if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
196,220✔
737
        {
738
            if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
7,638✔
739
                 psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
2,496✔
740
                 psExtraArg->eResampleAlg == GRIORA_Bilinear ||
2,494✔
741
                 psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
5,147✔
742
                GetColorTable() != nullptr)
2,465✔
743
            {
744
                CPLError(CE_Warning, CPLE_NotSupported,
×
745
                         "Resampling method not supported on paletted band. "
746
                         "Falling back to nearest neighbour");
747
            }
748
            else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
2,574✔
749
                     GDALDataTypeIsComplex(eDataType))
3✔
750
            {
751
                CPLError(CE_Warning, CPLE_NotSupported,
×
752
                         "Resampling method not supported on complex data type "
753
                         "band. Falling back to nearest neighbour");
754
            }
755
            else
756
            {
757
                return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
2,571✔
758
                                         pData, nBufXSize, nBufYSize, eBufType,
759
                                         nPixelSpace, nLineSpace, psExtraArg);
2,571✔
760
            }
761
        }
762

763
        int nLimitBlockY = 0;
193,625✔
764
        const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
193,625✔
765
        int nStartBlockX = -nBlockXSize;
193,625✔
766
        const double EPS = 1e-10;
193,625✔
767
        int nLBlockY = -1;
193,625✔
768
        const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
193,625✔
769
        const bool bIntegerXFactor =
193,625✔
770
            bUseIntegerRequestCoords &&
170,978✔
771
            static_cast<int>(dfSrcXInc) == dfSrcXInc &&
265,612✔
772
            static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
71,987✔
773

774
        /* --------------------------------------------------------------------
775
         */
776
        /*      Read case */
777
        /*      Loop over buffer computing source locations. */
778
        /* --------------------------------------------------------------------
779
         */
780
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
1,945,910✔
781
        {
782
            // Add small epsilon to avoid some numeric precision issues.
783
            const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
1,752,300✔
784
            const int iSrcY = static_cast<int>(std::min(
1,752,300✔
785
                std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
1,752,300✔
786

787
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
1,752,300✔
788
                                    static_cast<GPtrDiff_t>(nLineSpace);
789

790
            if (iSrcY >= nLimitBlockY)
1,752,300✔
791
            {
792
                nLBlockY = iSrcY / nBlockYSize;
234,842✔
793
                nLimitBlockY = nLBlockY * nBlockYSize;
234,842✔
794
                if (nLimitBlockY < INT_MAX - nBlockYSize)
234,842✔
795
                    nLimitBlockY += nBlockYSize;
234,842✔
796
                else
797
                    nLimitBlockY = INT_MAX;
×
798
                // Make sure a new block is loaded.
799
                nStartBlockX = -nBlockXSize;
234,842✔
800
            }
801
            else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
1,517,450✔
802
            {
803
                // Make sure a new block is loaded.
804
                nStartBlockX = -nBlockXSize;
429,795✔
805
            }
806

807
            GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
1,752,300✔
808
                                       static_cast<GPtrDiff_t>(nBlockXSize);
1,752,300✔
809

810
            if (bIntegerXFactor)
1,752,300✔
811
            {
812
                int iSrcX = static_cast<int>(dfSrcXStart);
413,236✔
813
                const int nSrcXInc = static_cast<int>(dfSrcXInc);
413,236✔
814
                GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
413,236✔
815
                bool bRet = false;
413,236✔
816
                if (bByteCopy)
413,236✔
817
                {
818
                    bRet = DownsamplingIntegerXFactor<true, 1>(
302,849✔
819
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
820
                        static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
821
                        GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
822
                }
823
                else if (eDataType == eBufType)
110,387✔
824
                {
825
                    switch (nBandDataSize)
110,182✔
826
                    {
827
                        case 2:
110,102✔
828
                            bRet = DownsamplingIntegerXFactor<true, 2>(
110,102✔
829
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
830
                                pabyDstData, static_cast<int>(nPixelSpace),
831
                                nBufXSize, eDataType, eDataType, nStartBlockX,
832
                                nBlockXSize, poBlock, nLBlockY);
833
                            break;
110,102✔
834
                        case 4:
22✔
835
                            bRet = DownsamplingIntegerXFactor<true, 4>(
22✔
836
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
837
                                pabyDstData, static_cast<int>(nPixelSpace),
838
                                nBufXSize, eDataType, eDataType, nStartBlockX,
839
                                nBlockXSize, poBlock, nLBlockY);
840
                            break;
22✔
841
                        case 8:
56✔
842
                            bRet = DownsamplingIntegerXFactor<true, 8>(
56✔
843
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
844
                                pabyDstData, static_cast<int>(nPixelSpace),
845
                                nBufXSize, eDataType, eDataType, nStartBlockX,
846
                                nBlockXSize, poBlock, nLBlockY);
847
                            break;
56✔
848
                        case 16:
2✔
849
                            bRet = DownsamplingIntegerXFactor<true, 16>(
2✔
850
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
851
                                pabyDstData, static_cast<int>(nPixelSpace),
852
                                nBufXSize, eDataType, eDataType, nStartBlockX,
853
                                nBlockXSize, poBlock, nLBlockY);
854
                            break;
2✔
855
                        default:
×
856
                            CPLAssert(false);
×
857
                            break;
858
                    }
859
                }
860
                else
861
                {
862
                    bRet = DownsamplingIntegerXFactor<false, 0>(
205✔
863
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
864
                        static_cast<int>(nPixelSpace), nBufXSize, eDataType,
865
                        eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
866
                }
867
                if (!bRet)
413,236✔
868
                    eErr = CE_Failure;
1✔
869
            }
870
            else
871
            {
872
                double dfSrcX = dfSrcXStart;
1,339,060✔
873
                for (int iBufXOff = 0; iBufXOff < nBufXSize;
560,923,000✔
874
                     iBufXOff++, dfSrcX += dfSrcXInc)
559,584,000✔
875
                {
876
                    // TODO?: try to avoid the clamping for most iterations
877
                    const int iSrcX = static_cast<int>(
878
                        std::min(std::max(0.0, dfSrcX),
1,119,170,000✔
879
                                 static_cast<double>(nRasterXSize - 1)));
559,584,000✔
880

881
                    /* --------------------------------------------------------------------
882
                     */
883
                    /*      Ensure we have the appropriate block loaded. */
884
                    /* --------------------------------------------------------------------
885
                     */
886
                    if (iSrcX >= nBlockXSize + nStartBlockX)
559,584,000✔
887
                    {
888
                        const int nLBlockX = iSrcX / nBlockXSize;
1,705,400✔
889
                        nStartBlockX = nLBlockX * nBlockXSize;
1,705,400✔
890

891
                        if (poBlock != nullptr)
1,705,400✔
892
                            poBlock->DropLock();
1,583,760✔
893

894
                        poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
1,705,400✔
895
                        if (poBlock == nullptr)
1,705,400✔
896
                        {
897
                            eErr = CE_Failure;
9✔
898
                            break;
9✔
899
                        }
900

901
                        pabySrcBlock =
902
                            static_cast<GByte *>(poBlock->GetDataRef());
1,705,390✔
903
                    }
904
                    const GPtrDiff_t nDiffX =
559,584,000✔
905
                        static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
559,584,000✔
906

907
                    /* --------------------------------------------------------------------
908
                     */
909
                    /*      Copy over this pixel of data. */
910
                    /* --------------------------------------------------------------------
911
                     */
912

913
                    if (bByteCopy)
559,584,000✔
914
                    {
915
                        GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
506,145,000✔
916
                        static_cast<GByte *>(pData)[iBufOffset] =
506,145,000✔
917
                            pabySrcBlock[iSrcOffset];
506,145,000✔
918
                    }
919
                    else if (eDataType == eBufType)
53,439,100✔
920
                    {
921
                        GPtrDiff_t iSrcOffset =
48,225,500✔
922
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
48,225,500✔
923
                        memcpy(static_cast<GByte *>(pData) + iBufOffset,
48,225,500✔
924
                               pabySrcBlock + iSrcOffset, nBandDataSize);
48,225,500✔
925
                    }
926
                    else
927
                    {
928
                        // Type to type conversion ...
929
                        GPtrDiff_t iSrcOffset =
5,213,610✔
930
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
5,213,610✔
931
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
5,213,610✔
932
                                        static_cast<GByte *>(pData) +
933
                                            iBufOffset,
5,213,610✔
934
                                        eBufType, 0, 1);
935
                    }
936

937
                    iBufOffset += static_cast<int>(nPixelSpace);
559,584,000✔
938
                }
939
            }
940
            if (eErr == CE_Failure)
1,752,300✔
941
                break;
11✔
942

943
            if (psExtraArg->pfnProgress != nullptr &&
1,963,450✔
944
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
211,166✔
945
                                         psExtraArg->pProgressData))
946
            {
947
                eErr = CE_Failure;
1✔
948
                break;
1✔
949
            }
950
        }
951
    }
952

953
    if (poBlock != nullptr)
360,275✔
954
        poBlock->DropLock();
360,265✔
955

956
    return eErr;
360,275✔
957
}
958

959
/************************************************************************/
960
/*                         GDALRasterIOTransformer()                    */
961
/************************************************************************/
962

963
struct GDALRasterIOTransformerStruct
964
{
965
    double dfXOff;
966
    double dfYOff;
967
    double dfXRatioDstToSrc;
968
    double dfYRatioDstToSrc;
969
};
970

971
static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
6,748✔
972
                                   int nPointCount, double *x, double *y,
973
                                   double * /* z */, int *panSuccess)
974
{
975
    GDALRasterIOTransformerStruct *psParams =
6,748✔
976
        static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
977
    if (bDstToSrc)
6,748✔
978
    {
979
        for (int i = 0; i < nPointCount; i++)
252,996✔
980
        {
981
            x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
246,836✔
982
            y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
246,836✔
983
            panSuccess[i] = TRUE;
246,836✔
984
        }
985
    }
986
    else
987
    {
988
        for (int i = 0; i < nPointCount; i++)
1,176✔
989
        {
990
            x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
588✔
991
            y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
588✔
992
            panSuccess[i] = TRUE;
588✔
993
        }
994
    }
995
    return TRUE;
6,748✔
996
}
997

998
/************************************************************************/
999
/*                          RasterIOResampled()                         */
1000
/************************************************************************/
1001

1002
//! @cond Doxygen_Suppress
1003
CPLErr GDALRasterBand::RasterIOResampled(
2,571✔
1004
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1005
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1006
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1007
{
1008
    // Determine if we use warping resampling or overview resampling
1009
    const bool bUseWarp =
1010
        (GDALDataTypeIsComplex(eDataType) &&
2,571✔
1011
         psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
2,728✔
1012
         psExtraArg->eResampleAlg != GRIORA_Mode);
157✔
1013

1014
    double dfXOff = nXOff;
2,571✔
1015
    double dfYOff = nYOff;
2,571✔
1016
    double dfXSize = nXSize;
2,571✔
1017
    double dfYSize = nYSize;
2,571✔
1018
    if (psExtraArg->bFloatingPointWindowValidity)
2,571✔
1019
    {
1020
        dfXOff = psExtraArg->dfXOff;
2,114✔
1021
        dfYOff = psExtraArg->dfYOff;
2,114✔
1022
        dfXSize = psExtraArg->dfXSize;
2,114✔
1023
        dfYSize = psExtraArg->dfYSize;
2,114✔
1024
    }
1025

1026
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
2,571✔
1027
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
2,571✔
1028

1029
    // Determine the coordinates in the "virtual" output raster to see
1030
    // if there are not integers, in which case we will use them as a shift
1031
    // so that subwindow extracts give the exact same results as entire raster
1032
    // scaling.
1033
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
2,571✔
1034
    bool bHasXOffVirtual = false;
2,571✔
1035
    int nDestXOffVirtual = 0;
2,571✔
1036
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
2,571✔
1037
    {
1038
        bHasXOffVirtual = true;
2,245✔
1039
        dfXOff = nXOff;
2,245✔
1040
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
2,245✔
1041
    }
1042

1043
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
2,571✔
1044
    bool bHasYOffVirtual = false;
2,571✔
1045
    int nDestYOffVirtual = 0;
2,571✔
1046
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
2,571✔
1047
    {
1048
        bHasYOffVirtual = true;
2,239✔
1049
        dfYOff = nYOff;
2,239✔
1050
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
2,239✔
1051
    }
1052

1053
    // Create a MEM dataset that wraps the output buffer.
1054
    GDALDataset *poMEMDS;
1055
    void *pTempBuffer = nullptr;
2,571✔
1056
    GSpacing nPSMem = nPixelSpace;
2,571✔
1057
    GSpacing nLSMem = nLineSpace;
2,571✔
1058
    void *pDataMem = pData;
2,571✔
1059
    GDALDataType eDTMem = eBufType;
2,571✔
1060
    if (eBufType != eDataType)
2,571✔
1061
    {
1062
        nPSMem = GDALGetDataTypeSizeBytes(eDataType);
40✔
1063
        nLSMem = nPSMem * nBufXSize;
40✔
1064
        pTempBuffer =
1065
            VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
40✔
1066
        if (pTempBuffer == nullptr)
40✔
1067
            return CE_Failure;
×
1068
        pDataMem = pTempBuffer;
40✔
1069
        eDTMem = eDataType;
40✔
1070
    }
1071

1072
    poMEMDS =
1073
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
2,571✔
1074
                           nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1075
    GByte *pabyData = static_cast<GByte *>(pDataMem) -
2,571✔
1076
                      nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
2,571✔
1077
    GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
2,571✔
1078
        poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1079
    poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
2,571✔
1080

1081
    const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
2,571✔
1082
    const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
2,571✔
1083
    if (pszNBITS)
2,571✔
1084
        GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
6✔
1085
            "NBITS", pszNBITS, "IMAGE_STRUCTURE");
6✔
1086

1087
    CPLErr eErr = CE_None;
2,571✔
1088

1089
    // Do the resampling.
1090
    if (bUseWarp)
2,571✔
1091
    {
1092
        int bHasNoData = FALSE;
149✔
1093
        double dfNoDataValue = GetNoDataValue(&bHasNoData);
149✔
1094

1095
        VRTDatasetH hVRTDS = nullptr;
149✔
1096
        GDALRasterBandH hVRTBand = nullptr;
149✔
1097
        if (GetDataset() == nullptr)
149✔
1098
        {
1099
            /* Create VRT dataset that wraps the whole dataset */
1100
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
×
1101
            VRTAddBand(hVRTDS, eDataType, nullptr);
×
1102
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
×
1103
            VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
×
1104
                               0, 0, nRasterXSize, nRasterYSize, nullptr,
1105
                               VRT_NODATA_UNSET);
1106

1107
            /* Add a mask band if needed */
1108
            if (GetMaskFlags() != GMF_ALL_VALID)
×
1109
            {
1110
                GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
×
1111
                VRTSourcedRasterBand *poVRTMaskBand =
1112
                    reinterpret_cast<VRTSourcedRasterBand *>(
1113
                        reinterpret_cast<GDALRasterBand *>(hVRTBand)
1114
                            ->GetMaskBand());
×
1115
                poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
×
1116
                                                 nRasterYSize, 0, 0,
×
1117
                                                 nRasterXSize, nRasterYSize);
×
1118
            }
1119
        }
1120

1121
        GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
149✔
1122
        switch (psExtraArg->eResampleAlg)
149✔
1123
        {
1124
            case GRIORA_NearestNeighbour:
×
1125
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
×
1126
                break;
×
1127
            case GRIORA_Bilinear:
147✔
1128
                psWarpOptions->eResampleAlg = GRA_Bilinear;
147✔
1129
                break;
147✔
1130
            case GRIORA_Cubic:
×
1131
                psWarpOptions->eResampleAlg = GRA_Cubic;
×
1132
                break;
×
1133
            case GRIORA_CubicSpline:
×
1134
                psWarpOptions->eResampleAlg = GRA_CubicSpline;
×
1135
                break;
×
1136
            case GRIORA_Lanczos:
×
1137
                psWarpOptions->eResampleAlg = GRA_Lanczos;
×
1138
                break;
×
1139
            case GRIORA_Average:
×
1140
                psWarpOptions->eResampleAlg = GRA_Average;
×
1141
                break;
×
1142
            case GRIORA_RMS:
2✔
1143
                psWarpOptions->eResampleAlg = GRA_RMS;
2✔
1144
                break;
2✔
1145
            case GRIORA_Mode:
×
1146
                psWarpOptions->eResampleAlg = GRA_Mode;
×
1147
                break;
×
1148
            default:
×
1149
                CPLAssert(false);
×
1150
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1151
                break;
1152
        }
1153
        psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
149✔
1154
        psWarpOptions->hDstDS = poMEMDS;
149✔
1155
        psWarpOptions->nBandCount = 1;
149✔
1156
        int nSrcBandNumber = hVRTDS ? 1 : nBand;
149✔
1157
        int nDstBandNumber = 1;
149✔
1158
        psWarpOptions->panSrcBands = &nSrcBandNumber;
149✔
1159
        psWarpOptions->panDstBands = &nDstBandNumber;
149✔
1160
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress
298✔
1161
                                         ? psExtraArg->pfnProgress
149✔
1162
                                         : GDALDummyProgress;
1163
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
149✔
1164
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
149✔
1165
        if (bHasNoData)
149✔
1166
        {
1167
            psWarpOptions->papszWarpOptions = CSLSetNameValue(
×
1168
                psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1169
            if (psWarpOptions->padfSrcNoDataReal == nullptr)
×
1170
            {
1171
                psWarpOptions->padfSrcNoDataReal =
×
1172
                    static_cast<double *>(CPLMalloc(sizeof(double)));
×
1173
                psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
×
1174
            }
1175

1176
            if (psWarpOptions->padfDstNoDataReal == nullptr)
×
1177
            {
1178
                psWarpOptions->padfDstNoDataReal =
×
1179
                    static_cast<double *>(CPLMalloc(sizeof(double)));
×
1180
                psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
×
1181
            }
1182
        }
1183

1184
        GDALRasterIOTransformerStruct sTransformer;
1185
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
149✔
1186
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
149✔
1187
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
149✔
1188
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
149✔
1189
        psWarpOptions->pTransformerArg = &sTransformer;
149✔
1190

1191
        GDALWarpOperationH hWarpOperation =
1192
            GDALCreateWarpOperation(psWarpOptions);
149✔
1193
        eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
149✔
1194
                                     nDestYOffVirtual, nBufXSize, nBufYSize);
1195
        GDALDestroyWarpOperation(hWarpOperation);
149✔
1196

1197
        psWarpOptions->panSrcBands = nullptr;
149✔
1198
        psWarpOptions->panDstBands = nullptr;
149✔
1199
        GDALDestroyWarpOptions(psWarpOptions);
149✔
1200

1201
        if (hVRTDS)
149✔
1202
            GDALClose(hVRTDS);
×
1203
    }
1204
    else
1205
    {
1206
        const char *pszResampling =
2,422✔
1207
            (psExtraArg->eResampleAlg == GRIORA_Bilinear)      ? "BILINEAR"
2,608✔
1208
            : (psExtraArg->eResampleAlg == GRIORA_Cubic)       ? "CUBIC"
297✔
1209
            : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
220✔
1210
            : (psExtraArg->eResampleAlg == GRIORA_Lanczos)     ? "LANCZOS"
213✔
1211
            : (psExtraArg->eResampleAlg == GRIORA_Average)     ? "AVERAGE"
159✔
1212
            : (psExtraArg->eResampleAlg == GRIORA_RMS)         ? "RMS"
95✔
1213
            : (psExtraArg->eResampleAlg == GRIORA_Mode)        ? "MODE"
43✔
1214
            : (psExtraArg->eResampleAlg == GRIORA_Gauss)       ? "GAUSS"
3✔
1215
                                                               : "UNKNOWN";
1216

1217
        int nKernelRadius = 0;
2,422✔
1218
        GDALResampleFunction pfnResampleFunc =
1219
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
2,422✔
1220
        CPLAssert(pfnResampleFunc);
2,422✔
1221
        GDALDataType eWrkDataType =
1222
            GDALGetOvrWorkDataType(pszResampling, eDataType);
2,422✔
1223
        int nHasNoData = 0;
2,422✔
1224
        double dfNoDataValue = GetNoDataValue(&nHasNoData);
2,422✔
1225
        const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
2,422✔
1226
        if (!bHasNoData)
2,422✔
1227
            dfNoDataValue = 0.0;
2,358✔
1228

1229
        int nDstBlockXSize = nBufXSize;
2,422✔
1230
        int nDstBlockYSize = nBufYSize;
2,422✔
1231
        int nFullResXChunk = 0;
2,422✔
1232
        int nFullResYChunk = 0;
2,422✔
1233
        while (true)
1234
        {
1235
            nFullResXChunk =
2,422✔
1236
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
2,422✔
1237
            nFullResYChunk =
2,422✔
1238
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
2,422✔
1239
            if (nFullResXChunk > nRasterXSize)
2,422✔
1240
                nFullResXChunk = nRasterXSize;
2,233✔
1241
            if (nFullResYChunk > nRasterYSize)
2,422✔
1242
                nFullResYChunk = nRasterYSize;
216✔
1243
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
2,422✔
1244
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
2,376✔
1245
                 1024 * 1024))
1246
                break;
1247
            // When operating on the full width of a raster whose block width is
1248
            // the raster width, prefer doing chunks in height.
1249
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
×
1250
                nDstBlockYSize > 1)
1251
                nDstBlockYSize /= 2;
×
1252
            /* Otherwise cut the maximal dimension */
1253
            else if (nDstBlockXSize > 1 &&
×
1254
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
×
1255
                nDstBlockXSize /= 2;
×
1256
            else
1257
                nDstBlockYSize /= 2;
×
1258
        }
1259

1260
        int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
2,422✔
1261
        int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
2,422✔
1262
        if (nOvrXFactor == 0)
2,422✔
1263
            nOvrXFactor = 1;
2,024✔
1264
        if (nOvrYFactor == 0)
2,422✔
1265
            nOvrYFactor = 1;
2,023✔
1266
        int nFullResXSizeQueried =
2,422✔
1267
            nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
2,422✔
1268
        int nFullResYSizeQueried =
2,422✔
1269
            nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
2,422✔
1270

1271
        if (nFullResXSizeQueried > nRasterXSize)
2,422✔
1272
            nFullResXSizeQueried = nRasterXSize;
2,135✔
1273
        if (nFullResYSizeQueried > nRasterYSize)
2,422✔
1274
            nFullResYSizeQueried = nRasterYSize;
129✔
1275

1276
        void *pChunk =
1277
            VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
2,422✔
1278
                                nFullResXSizeQueried, nFullResYSizeQueried);
1279
        GByte *pabyChunkNoDataMask = nullptr;
2,422✔
1280

1281
        GDALRasterBand *poMaskBand = GetMaskBand();
2,422✔
1282
        int l_nMaskFlags = GetMaskFlags();
2,422✔
1283

1284
        bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
2,422✔
1285
        if (bUseNoDataMask)
2,422✔
1286
        {
1287
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
126✔
1288
                nFullResXSizeQueried, nFullResYSizeQueried));
1289
        }
1290
        if (pChunk == nullptr ||
2,422✔
1291
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
126✔
1292
        {
1293
            GDALClose(poMEMDS);
×
1294
            CPLFree(pChunk);
×
1295
            CPLFree(pabyChunkNoDataMask);
×
1296
            VSIFree(pTempBuffer);
×
1297
            return CE_Failure;
×
1298
        }
1299

1300
        int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
2,422✔
1301
                           ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
2,422✔
1302
        int nBlocksDone = 0;
2,422✔
1303

1304
        int nDstYOff;
1305
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
4,844✔
1306
             nDstYOff += nDstBlockYSize)
2,422✔
1307
        {
1308
            int nDstYCount;
1309
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
2,422✔
1310
                nDstYCount = nDstBlockYSize;
2,422✔
1311
            else
1312
                nDstYCount = nBufYSize - nDstYOff;
×
1313

1314
            int nChunkYOff =
2,422✔
1315
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
2,422✔
1316
            int nChunkYOff2 = nYOff + 1 +
2,422✔
1317
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
2,422✔
1318
                                                    dfYRatioDstToSrc));
1319
            if (nChunkYOff2 > nRasterYSize)
2,422✔
1320
                nChunkYOff2 = nRasterYSize;
323✔
1321
            int nYCount = nChunkYOff2 - nChunkYOff;
2,422✔
1322
            CPLAssert(nYCount <= nFullResYChunk);
2,422✔
1323

1324
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
2,422✔
1325
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
2,422✔
1326
            if (nChunkYOffQueried < 0)
2,422✔
1327
            {
1328
                nChunkYSizeQueried += nChunkYOffQueried;
231✔
1329
                nChunkYOffQueried = 0;
231✔
1330
            }
1331
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
2,422✔
1332
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
331✔
1333
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
2,422✔
1334

1335
            int nDstXOff = 0;
2,422✔
1336
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
4,844✔
1337
                 nDstXOff += nDstBlockXSize)
2,422✔
1338
            {
1339
                int nDstXCount = 0;
2,422✔
1340
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
2,422✔
1341
                    nDstXCount = nDstBlockXSize;
2,422✔
1342
                else
1343
                    nDstXCount = nBufXSize - nDstXOff;
×
1344

1345
                int nChunkXOff =
2,422✔
1346
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
2,422✔
1347
                int nChunkXOff2 =
2,422✔
1348
                    nXOff + 1 +
2,422✔
1349
                    static_cast<int>(
2,422✔
1350
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
2,422✔
1351
                if (nChunkXOff2 > nRasterXSize)
2,422✔
1352
                    nChunkXOff2 = nRasterXSize;
2,234✔
1353
                int nXCount = nChunkXOff2 - nChunkXOff;
2,422✔
1354
                CPLAssert(nXCount <= nFullResXChunk);
2,422✔
1355

1356
                int nChunkXOffQueried =
2,422✔
1357
                    nChunkXOff - nKernelRadius * nOvrXFactor;
2,422✔
1358
                int nChunkXSizeQueried =
2,422✔
1359
                    nXCount + 2 * nKernelRadius * nOvrXFactor;
2,422✔
1360
                if (nChunkXOffQueried < 0)
2,422✔
1361
                {
1362
                    nChunkXSizeQueried += nChunkXOffQueried;
2,148✔
1363
                    nChunkXOffQueried = 0;
2,148✔
1364
                }
1365
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
2,422✔
1366
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
2,134✔
1367
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
2,422✔
1368

1369
                // Read the source buffers.
1370
                eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
2,422✔
1371
                                nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1372
                                nChunkXSizeQueried, nChunkYSizeQueried,
1373
                                eWrkDataType, 0, 0, nullptr);
1374

1375
                bool bSkipResample = false;
2,422✔
1376
                bool bNoDataMaskFullyOpaque = false;
2,422✔
1377
                if (eErr == CE_None && bUseNoDataMask)
2,422✔
1378
                {
1379
                    eErr = poMaskBand->RasterIO(
126✔
1380
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1381
                        nChunkXSizeQueried, nChunkYSizeQueried,
1382
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1383
                        nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1384

1385
                    /* Optimizations if mask if fully opaque or transparent */
1386
                    int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
126✔
1387
                    GByte bVal = pabyChunkNoDataMask[0];
126✔
1388
                    int i = 1;
126✔
1389
                    for (; i < nPixels; i++)
241,310✔
1390
                    {
1391
                        if (pabyChunkNoDataMask[i] != bVal)
241,261✔
1392
                            break;
77✔
1393
                    }
1394
                    if (i == nPixels)
126✔
1395
                    {
1396
                        if (bVal == 0)
49✔
1397
                        {
1398
                            for (int j = 0; j < nDstYCount; j++)
712✔
1399
                            {
1400
                                GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
686✔
1401
                                                static_cast<GByte *>(pDataMem) +
1402
                                                    nLSMem * (j + nDstYOff) +
686✔
1403
                                                    nDstXOff * nPSMem,
686✔
1404
                                                eDTMem,
1405
                                                static_cast<int>(nPSMem),
1406
                                                nDstXCount);
1407
                            }
1408
                            bSkipResample = true;
26✔
1409
                        }
1410
                        else
1411
                        {
1412
                            bNoDataMaskFullyOpaque = true;
23✔
1413
                        }
1414
                    }
1415
                }
1416

1417
                if (!bSkipResample && eErr == CE_None)
2,422✔
1418
                {
1419
                    const bool bPropagateNoData = false;
2,394✔
1420
                    void *pDstBuffer = nullptr;
2,394✔
1421
                    GDALDataType eDstBufferDataType = GDT_Unknown;
2,394✔
1422
                    GDALRasterBand *poMEMBand =
1423
                        GDALRasterBand::FromHandle(hMEMBand);
2,394✔
1424
                    GDALOverviewResampleArgs args;
2,394✔
1425
                    args.eSrcDataType = eDataType;
2,394✔
1426
                    args.eOvrDataType = poMEMBand->GetRasterDataType();
2,394✔
1427
                    args.nOvrXSize = poMEMBand->GetXSize();
2,394✔
1428
                    args.nOvrYSize = poMEMBand->GetYSize();
2,394✔
1429
                    args.nOvrNBITS = nNBITS;
2,394✔
1430
                    args.dfXRatioDstToSrc = dfXRatioDstToSrc;
2,394✔
1431
                    args.dfYRatioDstToSrc = dfYRatioDstToSrc;
2,394✔
1432
                    args.dfSrcXDelta =
2,394✔
1433
                        dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
2,394✔
1434
                    args.dfSrcYDelta =
2,394✔
1435
                        dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
2,394✔
1436
                    args.eWrkDataType = eWrkDataType;
2,394✔
1437
                    args.pabyChunkNodataMask =
2,394✔
1438
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
2,394✔
1439
                    args.nChunkXOff =
2,394✔
1440
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
2,394✔
1441
                    args.nChunkXSize = nChunkXSizeQueried;
2,394✔
1442
                    args.nChunkYOff =
2,394✔
1443
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
2,394✔
1444
                    args.nChunkYSize = nChunkYSizeQueried;
2,394✔
1445
                    args.nDstXOff = nDstXOff + nDestXOffVirtual;
2,394✔
1446
                    args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
2,394✔
1447
                    args.nDstYOff = nDstYOff + nDestYOffVirtual;
2,394✔
1448
                    args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
2,394✔
1449
                    args.pszResampling = pszResampling;
2,394✔
1450
                    args.bHasNoData = bHasNoData;
2,394✔
1451
                    args.dfNoDataValue = dfNoDataValue;
2,394✔
1452
                    args.poColorTable = GetColorTable();
2,394✔
1453
                    args.bPropagateNoData = bPropagateNoData;
2,394✔
1454
                    eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
2,394✔
1455
                                           &eDstBufferDataType);
1456
                    if (eErr == CE_None)
2,394✔
1457
                    {
1458
                        eErr = poMEMBand->RasterIO(
2,394✔
1459
                            GF_Write, nDstXOff + nDestXOffVirtual,
1460
                            nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1461
                            pDstBuffer, nDstXCount, nDstYCount,
1462
                            eDstBufferDataType, 0, 0, nullptr);
1463
                    }
1464
                    CPLFree(pDstBuffer);
2,394✔
1465
                }
1466

1467
                nBlocksDone++;
2,422✔
1468
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
2,451✔
1469
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
29✔
1470
                                             "", psExtraArg->pProgressData))
1471
                {
1472
                    eErr = CE_Failure;
1✔
1473
                }
1474
            }
1475
        }
1476

1477
        CPLFree(pChunk);
2,422✔
1478
        CPLFree(pabyChunkNoDataMask);
2,422✔
1479
    }
1480

1481
    if (eBufType != eDataType)
2,571✔
1482
    {
1483
        CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
40✔
1484
            GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1485
            pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1486
            nullptr));
1487
    }
1488
    GDALClose(poMEMDS);
2,571✔
1489
    VSIFree(pTempBuffer);
2,571✔
1490

1491
    return eErr;
2,571✔
1492
}
1493

1494
/************************************************************************/
1495
/*                          RasterIOResampled()                         */
1496
/************************************************************************/
1497

1498
CPLErr GDALDataset::RasterIOResampled(
284✔
1499
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1500
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1501
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1502
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1503

1504
{
1505
#if 0
1506
    // Determine if we use warping resampling or overview resampling
1507
    bool bUseWarp = false;
1508
    if( GDALDataTypeIsComplex( eDataType ) )
1509
        bUseWarp = true;
1510
#endif
1511

1512
    double dfXOff = nXOff;
284✔
1513
    double dfYOff = nYOff;
284✔
1514
    double dfXSize = nXSize;
284✔
1515
    double dfYSize = nYSize;
284✔
1516
    if (psExtraArg->bFloatingPointWindowValidity)
284✔
1517
    {
1518
        dfXOff = psExtraArg->dfXOff;
163✔
1519
        dfYOff = psExtraArg->dfYOff;
163✔
1520
        dfXSize = psExtraArg->dfXSize;
163✔
1521
        dfYSize = psExtraArg->dfYSize;
163✔
1522
    }
1523

1524
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
284✔
1525
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
284✔
1526

1527
    // Determine the coordinates in the "virtual" output raster to see
1528
    // if there are not integers, in which case we will use them as a shift
1529
    // so that subwindow extracts give the exact same results as entire raster
1530
    // scaling.
1531
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
284✔
1532
    bool bHasXOffVirtual = false;
284✔
1533
    int nDestXOffVirtual = 0;
284✔
1534
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
284✔
1535
    {
1536
        bHasXOffVirtual = true;
164✔
1537
        dfXOff = nXOff;
164✔
1538
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
164✔
1539
    }
1540

1541
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
284✔
1542
    bool bHasYOffVirtual = false;
284✔
1543
    int nDestYOffVirtual = 0;
284✔
1544
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
284✔
1545
    {
1546
        bHasYOffVirtual = true;
124✔
1547
        dfYOff = nYOff;
124✔
1548
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
124✔
1549
    }
1550

1551
    // Create a MEM dataset that wraps the output buffer.
1552
    GDALDataset *poMEMDS =
1553
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
284✔
1554
                           nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1555
    GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1556
        CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
283✔
1557
    int nNBITS = 0;
275✔
1558
    for (int i = 0; i < nBandCount; i++)
1,240✔
1559
    {
1560
        char szBuffer[32] = {'\0'};
961✔
1561
        int nRet = CPLPrintPointer(
1,930✔
1562
            szBuffer,
1563
            static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
961✔
1564
                nLineSpace * nDestYOffVirtual + nBandSpace * i,
961✔
1565
            sizeof(szBuffer));
1566
        szBuffer[nRet] = 0;
969✔
1567

1568
        char szBuffer0[64] = {'\0'};
969✔
1569
        snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
969✔
1570

1571
        char szBuffer1[64] = {'\0'};
969✔
1572
        snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
969✔
1573
                 static_cast<GIntBig>(nPixelSpace));
1574

1575
        char szBuffer2[64] = {'\0'};
969✔
1576
        snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
969✔
1577
                 static_cast<GIntBig>(nLineSpace));
1578

1579
        char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
969✔
1580

1581
        poMEMDS->AddBand(eBufType, apszOptions);
969✔
1582

1583
        GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
970✔
1584
        papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
964✔
1585
        const char *pszNBITS =
1586
            poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
966✔
1587
        if (pszNBITS)
962✔
1588
        {
1589
            nNBITS = atoi(pszNBITS);
×
1590
            poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
×
1591
                                                           "IMAGE_STRUCTURE");
×
1592
        }
1593
    }
1594

1595
    CPLErr eErr = CE_None;
279✔
1596

1597
    // TODO(schwehr): Why disabled?  Why not just delete?
1598
    // Looks like this code was initially added as disable by copying
1599
    // from RasterIO here:
1600
    // https://trac.osgeo.org/gdal/changeset/29572
1601
#if 0
1602
    // Do the resampling.
1603
    if( bUseWarp )
1604
    {
1605
        VRTDatasetH hVRTDS = nullptr;
1606
        GDALRasterBandH hVRTBand = nullptr;
1607
        if( GetDataset() == nullptr )
1608
        {
1609
            /* Create VRT dataset that wraps the whole dataset */
1610
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1611
            VRTAddBand( hVRTDS, eDataType, nullptr );
1612
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1613
            VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1614
                                (GDALRasterBandH)this,
1615
                                0, 0,
1616
                                nRasterXSize, nRasterYSize,
1617
                                0, 0,
1618
                                nRasterXSize, nRasterYSize,
1619
                                nullptr, VRT_NODATA_UNSET );
1620

1621
            /* Add a mask band if needed */
1622
            if( GetMaskFlags() != GMF_ALL_VALID )
1623
            {
1624
                ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1625
                VRTSourcedRasterBand* poVRTMaskBand =
1626
                    (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1627
                poVRTMaskBand->
1628
                    AddMaskBandSource( this,
1629
                                    0, 0,
1630
                                    nRasterXSize, nRasterYSize,
1631
                                    0, 0,
1632
                                    nRasterXSize, nRasterYSize);
1633
            }
1634
        }
1635

1636
        GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1637
        psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1638
        psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1639
        psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1640
        psWarpOptions->nBandCount = 1;
1641
        int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1642
        int nDstBandNumber = 1;
1643
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1644
        psWarpOptions->panDstBands = &nDstBandNumber;
1645
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1646
                    psExtraArg->pfnProgress : GDALDummyProgress;
1647
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1648
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1649
        GDALRasterIOTransformerStruct sTransformer;
1650
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1651
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1652
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1653
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1654
        psWarpOptions->pTransformerArg = &sTransformer;
1655

1656
        GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1657
        eErr = GDALChunkAndWarpImage( hWarpOperation,
1658
                                      nDestXOffVirtual, nDestYOffVirtual,
1659
                                      nBufXSize, nBufYSize );
1660
        GDALDestroyWarpOperation( hWarpOperation );
1661

1662
        psWarpOptions->panSrcBands = nullptr;
1663
        psWarpOptions->panDstBands = nullptr;
1664
        GDALDestroyWarpOptions( psWarpOptions );
1665

1666
        if( hVRTDS )
1667
            GDALClose(hVRTDS);
1668
    }
1669
    else
1670
#endif
1671
    {
1672
        const char *pszResampling =
279✔
1673
            (psExtraArg->eResampleAlg == GRIORA_Bilinear)      ? "BILINEAR"
440✔
1674
            : (psExtraArg->eResampleAlg == GRIORA_Cubic)       ? "CUBIC"
161✔
1675
            : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
×
1676
            : (psExtraArg->eResampleAlg == GRIORA_Lanczos)     ? "LANCZOS"
×
1677
            : (psExtraArg->eResampleAlg == GRIORA_Average)     ? "AVERAGE"
×
1678
            : (psExtraArg->eResampleAlg == GRIORA_RMS)         ? "RMS"
×
1679
            : (psExtraArg->eResampleAlg == GRIORA_Mode)        ? "MODE"
×
1680
            : (psExtraArg->eResampleAlg == GRIORA_Gauss)       ? "GAUSS"
×
1681
                                                               : "UNKNOWN";
1682

1683
        GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
279✔
1684
        GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
279✔
1685
        int nBlockXSize, nBlockYSize;
1686
        poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
282✔
1687

1688
        int nKernelRadius;
1689
        GDALResampleFunction pfnResampleFunc =
1690
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
276✔
1691
        CPLAssert(pfnResampleFunc);
274✔
1692
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1693
        GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1694
            GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1695
#endif
1696
        GDALDataType eWrkDataType =
1697
            GDALGetOvrWorkDataType(pszResampling, eDataType);
274✔
1698

1699
        int nDstBlockXSize = nBufXSize;
274✔
1700
        int nDstBlockYSize = nBufYSize;
274✔
1701
        int nFullResXChunk, nFullResYChunk;
1702
        while (true)
1703
        {
1704
            nFullResXChunk =
274✔
1705
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
274✔
1706
            nFullResYChunk =
274✔
1707
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
274✔
1708
            if (nFullResXChunk > nRasterXSize)
274✔
1709
                nFullResXChunk = nRasterXSize;
149✔
1710
            if (nFullResYChunk > nRasterYSize)
274✔
1711
                nFullResYChunk = nRasterYSize;
33✔
1712
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
274✔
1713
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
272✔
1714
                 1024 * 1024))
1715
                break;
1716
            // When operating on the full width of a raster whose block width is
1717
            // the raster width, prefer doing chunks in height.
1718
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
×
1719
                nDstBlockYSize > 1)
1720
                nDstBlockYSize /= 2;
×
1721
            /* Otherwise cut the maximal dimension */
1722
            else if (nDstBlockXSize > 1 &&
×
1723
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
×
1724
                nDstBlockXSize /= 2;
×
1725
            else
1726
                nDstBlockYSize /= 2;
×
1727
        }
1728

1729
        int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
554✔
1730
                                  static_cast<int>(0.5 + dfYRatioDstToSrc));
274✔
1731
        if (nOvrFactor == 0)
280✔
1732
            nOvrFactor = 1;
99✔
1733
        int nFullResXSizeQueried =
280✔
1734
            nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
280✔
1735
        int nFullResYSizeQueried =
280✔
1736
            nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
280✔
1737

1738
        if (nFullResXSizeQueried > nRasterXSize)
280✔
1739
            nFullResXSizeQueried = nRasterXSize;
164✔
1740
        if (nFullResYSizeQueried > nRasterYSize)
280✔
1741
            nFullResYSizeQueried = nRasterYSize;
36✔
1742

1743
        void *pChunk = VSI_MALLOC3_VERBOSE(
280✔
1744
            cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1745
                              nBandCount),
1746
            nFullResXSizeQueried, nFullResYSizeQueried);
1747
        GByte *pabyChunkNoDataMask = nullptr;
284✔
1748

1749
        GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
284✔
1750
        int nMaskFlags = poFirstSrcBand->GetMaskFlags();
283✔
1751

1752
        bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
284✔
1753
        if (bUseNoDataMask)
284✔
1754
        {
1755
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
55✔
1756
                nFullResXSizeQueried, nFullResYSizeQueried));
1757
        }
1758
        if (pChunk == nullptr ||
284✔
1759
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
55✔
1760
        {
1761
            GDALClose(poMEMDS);
6✔
1762
            CPLFree(pChunk);
×
1763
            CPLFree(pabyChunkNoDataMask);
×
1764
            CPLFree(papoDstBands);
×
1765
            return CE_Failure;
×
1766
        }
1767

1768
        int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
278✔
1769
                           ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
278✔
1770
        int nBlocksDone = 0;
278✔
1771

1772
        int nDstYOff;
1773
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
565✔
1774
             nDstYOff += nDstBlockYSize)
287✔
1775
        {
1776
            int nDstYCount;
1777
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
278✔
1778
                nDstYCount = nDstBlockYSize;
276✔
1779
            else
1780
                nDstYCount = nBufYSize - nDstYOff;
2✔
1781

1782
            int nChunkYOff =
278✔
1783
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
278✔
1784
            int nChunkYOff2 = nYOff + 1 +
278✔
1785
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
278✔
1786
                                                    dfYRatioDstToSrc));
1787
            if (nChunkYOff2 > nRasterYSize)
278✔
1788
                nChunkYOff2 = nRasterYSize;
56✔
1789
            int nYCount = nChunkYOff2 - nChunkYOff;
278✔
1790
            CPLAssert(nYCount <= nFullResYChunk);
278✔
1791

1792
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
278✔
1793
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
278✔
1794
            if (nChunkYOffQueried < 0)
278✔
1795
            {
1796
                nChunkYSizeQueried += nChunkYOffQueried;
56✔
1797
                nChunkYOffQueried = 0;
56✔
1798
            }
1799
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
278✔
1800
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
66✔
1801
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
278✔
1802

1803
            int nDstXOff;
1804
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
562✔
1805
                 nDstXOff += nDstBlockXSize)
284✔
1806
            {
1807
                int nDstXCount;
1808
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
275✔
1809
                    nDstXCount = nDstBlockXSize;
275✔
1810
                else
1811
                    nDstXCount = nBufXSize - nDstXOff;
×
1812

1813
                int nChunkXOff =
275✔
1814
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
275✔
1815
                int nChunkXOff2 =
275✔
1816
                    nXOff + 1 +
275✔
1817
                    static_cast<int>(
275✔
1818
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
275✔
1819
                if (nChunkXOff2 > nRasterXSize)
275✔
1820
                    nChunkXOff2 = nRasterXSize;
151✔
1821
                int nXCount = nChunkXOff2 - nChunkXOff;
275✔
1822
                CPLAssert(nXCount <= nFullResXChunk);
275✔
1823

1824
                int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
275✔
1825
                int nChunkXSizeQueried =
275✔
1826
                    nXCount + 2 * nKernelRadius * nOvrFactor;
275✔
1827
                if (nChunkXOffQueried < 0)
275✔
1828
                {
1829
                    nChunkXSizeQueried += nChunkXOffQueried;
149✔
1830
                    nChunkXOffQueried = 0;
149✔
1831
                }
1832
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
275✔
1833
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
150✔
1834
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
275✔
1835

1836
                bool bSkipResample = false;
275✔
1837
                bool bNoDataMaskFullyOpaque = false;
275✔
1838
                if (eErr == CE_None && bUseNoDataMask)
275✔
1839
                {
1840
                    eErr = poMaskBand->RasterIO(
55✔
1841
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1842
                        nChunkXSizeQueried, nChunkYSizeQueried,
1843
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1844
                        nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1845

1846
                    /* Optimizations if mask if fully opaque or transparent */
1847
                    const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
55✔
1848
                    const GByte bVal = pabyChunkNoDataMask[0];
55✔
1849
                    int i = 1;  // Used after for.
55✔
1850
                    for (; i < nPixels; i++)
123,794✔
1851
                    {
1852
                        if (pabyChunkNoDataMask[i] != bVal)
123,777✔
1853
                            break;
38✔
1854
                    }
1855
                    if (i == nPixels)
55✔
1856
                    {
1857
                        if (bVal == 0)
17✔
1858
                        {
1859
                            GByte abyZero[16] = {0};
16✔
1860
                            for (int iBand = 0; iBand < nBandCount; iBand++)
64✔
1861
                            {
1862
                                for (int j = 0; j < nDstYCount; j++)
2,016✔
1863
                                {
1864
                                    GDALCopyWords64(
1,968✔
1865
                                        abyZero, GDT_Byte, 0,
1866
                                        static_cast<GByte *>(pData) +
1867
                                            iBand * nBandSpace +
1,968✔
1868
                                            nLineSpace * (j + nDstYOff) +
1,968✔
1869
                                            nDstXOff * nPixelSpace,
1,968✔
1870
                                        eBufType, static_cast<int>(nPixelSpace),
1871
                                        nDstXCount);
1872
                                }
1873
                            }
1874
                            bSkipResample = true;
16✔
1875
                        }
1876
                        else
1877
                        {
1878
                            bNoDataMaskFullyOpaque = true;
1✔
1879
                        }
1880
                    }
1881
                }
1882

1883
                if (!bSkipResample && eErr == CE_None)
275✔
1884
                {
1885
                    /* Read the source buffers */
1886
                    eErr = RasterIO(
258✔
1887
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1888
                        nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1889
                        nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1890
                        nBandCount, panBandMap, 0, 0, 0, nullptr);
1891
                }
1892

1893
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1894
                if (pfnResampleFuncMultiBands && !bSkipResample &&
1895
                    eErr == CE_None)
1896
                {
1897
                    eErr = pfnResampleFuncMultiBands(
1898
                        dfXRatioDstToSrc, dfYRatioDstToSrc,
1899
                        dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1900
                        dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1901
                        eWrkDataType, (GByte *)pChunk, nBandCount,
1902
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1903
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1904
                        nChunkXSizeQueried,
1905
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1906
                        nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1907
                        nDstXOff + nDestXOffVirtual + nDstXCount,
1908
                        nDstYOff + nDestYOffVirtual,
1909
                        nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1910
                        pszResampling, FALSE /*bHasNoData*/,
1911
                        0.0 /* dfNoDataValue */, nullptr /* color table*/,
1912
                        eDataType);
1913
                }
1914
                else
1915
#endif
1916
                {
1917
                    size_t nChunkBandOffset =
1918
                        static_cast<size_t>(nChunkXSizeQueried) *
283✔
1919
                        nChunkYSizeQueried *
283✔
1920
                        GDALGetDataTypeSizeBytes(eWrkDataType);
283✔
1921
                    for (int i = 0;
1,206✔
1922
                         i < nBandCount && !bSkipResample && eErr == CE_None;
1,206✔
1923
                         i++)
1924
                    {
1925
                        const bool bPropagateNoData = false;
922✔
1926
                        void *pDstBuffer = nullptr;
922✔
1927
                        GDALDataType eDstBufferDataType = GDT_Unknown;
922✔
1928
                        GDALRasterBand *poMEMBand =
1929
                            poMEMDS->GetRasterBand(i + 1);
922✔
1930
                        GDALOverviewResampleArgs args;
921✔
1931
                        args.eSrcDataType = eDataType;
921✔
1932
                        args.eOvrDataType = poMEMBand->GetRasterDataType();
921✔
1933
                        args.nOvrXSize = poMEMBand->GetXSize();
921✔
1934
                        args.nOvrYSize = poMEMBand->GetYSize();
921✔
1935
                        args.nOvrNBITS = nNBITS;
921✔
1936
                        args.dfXRatioDstToSrc = dfXRatioDstToSrc;
921✔
1937
                        args.dfYRatioDstToSrc = dfYRatioDstToSrc;
921✔
1938
                        args.dfSrcXDelta =
921✔
1939
                            dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
921✔
1940
                        args.dfSrcYDelta =
921✔
1941
                            dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
921✔
1942
                        args.eWrkDataType = eWrkDataType;
921✔
1943
                        args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
921✔
1944
                                                       ? nullptr
921✔
1945
                                                       : pabyChunkNoDataMask;
1946
                        args.nChunkXOff =
921✔
1947
                            nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
921✔
1948
                        args.nChunkXSize = nChunkXSizeQueried;
921✔
1949
                        args.nChunkYOff =
921✔
1950
                            nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
921✔
1951
                        args.nChunkYSize = nChunkYSizeQueried;
921✔
1952
                        args.nDstXOff = nDstXOff + nDestXOffVirtual;
921✔
1953
                        args.nDstXOff2 =
921✔
1954
                            nDstXOff + nDestXOffVirtual + nDstXCount;
921✔
1955
                        args.nDstYOff = nDstYOff + nDestYOffVirtual;
921✔
1956
                        args.nDstYOff2 =
921✔
1957
                            nDstYOff + nDestYOffVirtual + nDstYCount;
921✔
1958
                        args.pszResampling = pszResampling;
921✔
1959
                        args.bHasNoData = false;
921✔
1960
                        args.dfNoDataValue = 0.0;
921✔
1961
                        args.poColorTable = nullptr;
921✔
1962
                        args.bPropagateNoData = bPropagateNoData;
921✔
1963

1964
                        eErr =
1965
                            pfnResampleFunc(args,
1,843✔
1966
                                            reinterpret_cast<GByte *>(pChunk) +
921✔
1967
                                                i * nChunkBandOffset,
921✔
1968
                                            &pDstBuffer, &eDstBufferDataType);
1969
                        if (eErr == CE_None)
922✔
1970
                        {
1971
                            eErr = poMEMBand->RasterIO(
922✔
1972
                                GF_Write, nDstXOff + nDestXOffVirtual,
1973
                                nDstYOff + nDestYOffVirtual, nDstXCount,
1974
                                nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1975
                                eDstBufferDataType, 0, 0, nullptr);
1976
                        }
1977
                        CPLFree(pDstBuffer);
922✔
1978
                    }
1979
                }
1980

1981
                nBlocksDone++;
284✔
1982
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
286✔
1983
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
2✔
1984
                                             "", psExtraArg->pProgressData))
1985
                {
1986
                    eErr = CE_Failure;
×
1987
                }
1988
            }
1989
        }
1990

1991
        CPLFree(pChunk);
287✔
1992
        CPLFree(pabyChunkNoDataMask);
284✔
1993
    }
1994

1995
    CPLFree(papoDstBands);
284✔
1996
    GDALClose(poMEMDS);
284✔
1997

1998
    return eErr;
284✔
1999
}
2000

2001
//! @endcond
2002

2003
/************************************************************************/
2004
/*                           GDALSwapWords()                            */
2005
/************************************************************************/
2006

2007
/**
2008
 * Byte swap words in-place.
2009
 *
2010
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2011
 * a memory array.  No assumption is made that the words being swapped are
2012
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2013
 * to determine if the current platform is big endian or little endian.  Use
2014
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2015
 * of a function call.
2016
 *
2017
 * @param pData pointer to start of data buffer.
2018
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2019
 * @param nWordCount the number of words to be swapped in this call.
2020
 * @param nWordSkip the byte offset from the start of one word to the start of
2021
 * the next. For packed buffers this is the same as nWordSize.
2022
 */
2023

2024
void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
438,669✔
2025
                               int nWordSkip)
2026

2027
{
2028
    if (nWordCount > 0)
438,669✔
2029
        VALIDATE_POINTER0(pData, "GDALSwapWords");
438,669✔
2030

2031
    GByte *pabyData = static_cast<GByte *>(pData);
438,669✔
2032

2033
    switch (nWordSize)
438,669✔
2034
    {
2035
        case 1:
7,234✔
2036
            break;
7,234✔
2037

2038
        case 2:
418,175✔
2039
            CPLAssert(nWordSkip >= 2 || nWordCount == 1);
418,175✔
2040
            for (int i = 0; i < nWordCount; i++)
289,160,000✔
2041
            {
2042
                CPL_SWAP16PTR(pabyData);
288,742,000✔
2043
                pabyData += nWordSkip;
288,742,000✔
2044
            }
2045
            break;
418,175✔
2046

2047
        case 4:
10,689✔
2048
            CPLAssert(nWordSkip >= 4 || nWordCount == 1);
10,689✔
2049
            if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
10,689✔
2050
            {
2051
                for (int i = 0; i < nWordCount; i++)
29,148,800✔
2052
                {
2053
                    *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
29,138,100✔
2054
                        *reinterpret_cast<const GUInt32 *>(pabyData));
2055
                    pabyData += nWordSkip;
29,138,100✔
2056
                }
10,686✔
2057
            }
2058
            else
2059
            {
2060
                for (int i = 0; i < nWordCount; i++)
9✔
2061
                {
2062
                    CPL_SWAP32PTR(pabyData);
6✔
2063
                    pabyData += nWordSkip;
6✔
2064
                }
2065
            }
2066
            break;
10,689✔
2067

2068
        case 8:
2,571✔
2069
            CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2,571✔
2070
#ifdef CPL_HAS_GINT64
2071
            if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2,571✔
2072
            {
2073
                for (int i = 0; i < nWordCount; i++)
3,359,870✔
2074
                {
2075
                    *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
3,357,300✔
2076
                        *reinterpret_cast<const GUInt64 *>(pabyData));
2077
                    pabyData += nWordSkip;
3,357,300✔
2078
                }
2,570✔
2079
            }
2080
            else
2081
#endif
2082
            {
2083
                for (int i = 0; i < nWordCount; i++)
3✔
2084
                {
2085
                    CPL_SWAP64PTR(pabyData);
2✔
2086
                    pabyData += nWordSkip;
2✔
2087
                }
2088
            }
2089
            break;
2,571✔
2090

2091
        default:
×
2092
            CPLAssert(false);
×
2093
    }
2094
}
2095

2096
/************************************************************************/
2097
/*                           GDALSwapWordsEx()                          */
2098
/************************************************************************/
2099

2100
/**
2101
 * Byte swap words in-place.
2102
 *
2103
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2104
 * a memory array.  No assumption is made that the words being swapped are
2105
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2106
 * to determine if the current platform is big endian or little endian.  Use
2107
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2108
 * of a function call.
2109
 *
2110
 * @param pData pointer to start of data buffer.
2111
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2112
 * @param nWordCount the number of words to be swapped in this call.
2113
 * @param nWordSkip the byte offset from the start of one word to the start of
2114
 * the next. For packed buffers this is the same as nWordSize.
2115
 * @since GDAL 2.1
2116
 */
2117
void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
6,378✔
2118
                                 int nWordSkip)
2119
{
2120
    GByte *pabyData = static_cast<GByte *>(pData);
6,378✔
2121
    while (nWordCount)
12,756✔
2122
    {
2123
        // Pick-up a multiple of 8 as max chunk size.
2124
        const int nWordCountSmall =
6,378✔
2125
            (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
6,378✔
2126
        GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
6,378✔
2127
        pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
6,378✔
2128
        nWordCount -= nWordCountSmall;
6,378✔
2129
    }
2130
}
6,378✔
2131

2132
// Place the new GDALCopyWords helpers in an anonymous namespace
2133
namespace
2134
{
2135

2136
/************************************************************************/
2137
/*                           GDALCopyWordsT()                           */
2138
/************************************************************************/
2139
/**
2140
 * Template function, used to copy data from pSrcData into buffer
2141
 * pDstData, with stride nSrcPixelStride in the source data and
2142
 * stride nDstPixelStride in the destination data. This template can
2143
 * deal with the case where the input data type is real or complex and
2144
 * the output is real.
2145
 *
2146
 * @param pSrcData the source data buffer
2147
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2148
 *                      of interest.
2149
 * @param pDstData the destination buffer.
2150
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2151
 *                      interest.
2152
 * @param nWordCount the total number of pixel words to copy
2153
 *
2154
 * @code
2155
 * // Assume an input buffer of type GUInt16 named pBufferIn
2156
 * GByte *pBufferOut = new GByte[numBytesOut];
2157
 * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2158
 * @endcode
2159
 * @note
2160
 * This is a private function, and should not be exposed outside of
2161
 * rasterio.cpp. External users should call the GDALCopyWords driver function.
2162
 */
2163

2164
template <class Tin, class Tout>
2165
static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
46,718,488✔
2166
                                         int nSrcPixelStride,
2167
                                         Tout *const CPL_RESTRICT pDstData,
2168
                                         int nDstPixelStride,
2169
                                         GPtrDiff_t nWordCount)
2170
{
2171
    decltype(nWordCount) nDstOffset = 0;
46,718,488✔
2172

2173
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
46,718,488✔
2174
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
46,718,488✔
2175
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
584,320,221✔
2176
    {
2177
        const Tin tValue =
537,602,028✔
2178
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
537,602,028✔
2179
        Tout *const pOutPixel =
537,602,028✔
2180
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
537,602,028✔
2181

2182
        GDALCopyWord(tValue, *pOutPixel);
537,602,028✔
2183

2184
        nDstOffset += nDstPixelStride;
537,601,628✔
2185
    }
2186
}
46,718,167✔
2187

2188
template <class Tin, class Tout>
2189
static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
38,277,918✔
2190
                                  int nSrcPixelStride,
2191
                                  Tout *const CPL_RESTRICT pDstData,
2192
                                  int nDstPixelStride, GPtrDiff_t nWordCount)
2193
{
2194
    GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
38,277,918✔
2195
                          nWordCount);
2196
}
38,278,105✔
2197

2198
template <class Tin, class Tout>
2199
static void inline GDALCopyWordsT_8atatime(
194,348✔
2200
    const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2201
    Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2202
    GPtrDiff_t nWordCount)
2203
{
2204
    decltype(nWordCount) nDstOffset = 0;
194,348✔
2205

2206
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
194,348✔
2207
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
194,348✔
2208
    decltype(nWordCount) n = 0;
194,348✔
2209
    if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
194,348✔
2210
        nDstPixelStride == static_cast<int>(sizeof(Tout)))
2211
    {
2212
        for (; n < nWordCount - 7; n += 8)
22,741,130✔
2213
        {
2214
            const Tin *pInValues = reinterpret_cast<const Tin *>(
22,550,098✔
2215
                pSrcDataPtr + (n * nSrcPixelStride));
22,550,098✔
2216
            Tout *const pOutPixels =
22,550,098✔
2217
                reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
22,550,098✔
2218

2219
            GDALCopy8Words(pInValues, pOutPixels);
22,550,098✔
2220

2221
            nDstOffset += 8 * nDstPixelStride;
22,547,668✔
2222
        }
2223
    }
2224
    for (; n < nWordCount; n++)
687,692✔
2225
    {
2226
        const Tin tValue =
493,346✔
2227
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
493,346✔
2228
        Tout *const pOutPixel =
493,346✔
2229
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
493,346✔
2230

2231
        GDALCopyWord(tValue, *pOutPixel);
493,346✔
2232

2233
        nDstOffset += nDstPixelStride;
495,780✔
2234
    }
2235
}
194,346✔
2236

2237
#ifdef HAVE_SSE2
2238

2239
template <class Tout>
2240
void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
39,381✔
2241
                              int nSrcPixelStride,
2242
                              Tout *const CPL_RESTRICT pDstData,
2243
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2244
{
2245
    static_assert(std::is_integral<Tout>::value &&
2246
                      sizeof(Tout) == sizeof(uint16_t),
2247
                  "Bad Tout");
2248
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
39,381✔
2249
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2250
    {
2251
        decltype(nWordCount) n = 0;
33,330✔
2252
        const __m128i xmm_zero = _mm_setzero_si128();
33,330✔
2253
        GByte *CPL_RESTRICT pabyDstDataPtr =
33,330✔
2254
            reinterpret_cast<GByte *>(pDstData);
2255
        for (; n < nWordCount - 15; n += 16)
1,501,757✔
2256
        {
2257
            __m128i xmm = _mm_loadu_si128(
1,468,427✔
2258
                reinterpret_cast<const __m128i *>(pSrcData + n));
1,468,427✔
2259
            __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
1,468,427✔
2260
            __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
1,468,427✔
2261
            _mm_storeu_si128(
2262
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
1,468,427✔
2263
            _mm_storeu_si128(
2264
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
1,468,427✔
2265
        }
2266
        for (; n < nWordCount; n++)
108,789✔
2267
        {
2268
            pDstData[n] = pSrcData[n];
75,459✔
2269
        }
33,330✔
2270
    }
2271
    else
2272
    {
2273
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
6,051✔
2274
                              nDstPixelStride, nWordCount);
2275
    }
2276
}
39,381✔
2277

2278
template <>
2279
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
25,764✔
2280
                    int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2281
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2282
{
2283
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
25,764✔
2284
                             nDstPixelStride, nWordCount);
2285
}
25,764✔
2286

2287
template <>
2288
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
13,617✔
2289
                    int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2290
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2291
{
2292
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
13,617✔
2293
                             nDstPixelStride, nWordCount);
2294
}
13,617✔
2295

2296
template <class Tout>
2297
void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
12,273,038✔
2298
                              int nSrcPixelStride,
2299
                              Tout *const CPL_RESTRICT pDstData,
2300
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2301
{
2302
    static_assert(std::is_integral<Tout>::value &&
2303
                      sizeof(Tout) == sizeof(uint32_t),
2304
                  "Bad Tout");
2305
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
12,273,038✔
2306
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2307
    {
2308
        decltype(nWordCount) n = 0;
6,219,818✔
2309
        const __m128i xmm_zero = _mm_setzero_si128();
6,219,818✔
2310
        GByte *CPL_RESTRICT pabyDstDataPtr =
6,219,818✔
2311
            reinterpret_cast<GByte *>(pDstData);
2312
        for (; n < nWordCount - 15; n += 16)
68,826,752✔
2313
        {
2314
            __m128i xmm = _mm_loadu_si128(
62,738,624✔
2315
                reinterpret_cast<const __m128i *>(pSrcData + n));
62,738,624✔
2316
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
62,706,524✔
2317
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
62,705,824✔
2318
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
62,569,724✔
2319
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
62,551,624✔
2320
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
62,544,324✔
2321
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
62,606,924✔
2322
            _mm_storeu_si128(
2323
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
62,606,924✔
2324
            _mm_storeu_si128(
2325
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
62,606,924✔
2326
            _mm_storeu_si128(
2327
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
62,606,924✔
2328
            _mm_storeu_si128(
2329
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
62,606,924✔
2330
        }
2331
        for (; n < nWordCount; n++)
14,312,539✔
2332
        {
2333
            pDstData[n] = pSrcData[n];
8,224,451✔
2334
        }
6,088,128✔
2335
    }
2336
    else
2337
    {
2338
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
6,053,190✔
2339
                              nDstPixelStride, nWordCount);
2340
    }
2341
}
12,138,738✔
2342

2343
template <>
2344
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
438✔
2345
                    int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2346
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2347
{
2348
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
438✔
2349
                             nDstPixelStride, nWordCount);
2350
}
438✔
2351

2352
template <>
2353
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
12,273,100✔
2354
                    int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2355
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2356
{
2357
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
12,273,100✔
2358
                             nDstPixelStride, nWordCount);
2359
}
12,279,200✔
2360

2361
template <>
2362
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2,470,670✔
2363
                    int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2364
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2365
{
2366
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2,470,670✔
2367
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2368
    {
2369
        decltype(nWordCount) n = 0;
111,225✔
2370
        const __m128i xmm_zero = _mm_setzero_si128();
111,225✔
2371
        GByte *CPL_RESTRICT pabyDstDataPtr =
111,225✔
2372
            reinterpret_cast<GByte *>(pDstData);
2373
        for (; n < nWordCount - 15; n += 16)
3,273,060✔
2374
        {
2375
            __m128i xmm = _mm_loadu_si128(
3,161,840✔
2376
                reinterpret_cast<const __m128i *>(pSrcData + n));
3,161,840✔
2377
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
3,161,840✔
2378
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
3,161,840✔
2379
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
3,161,840✔
2380
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
3,161,840✔
2381
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
3,161,840✔
2382
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
3,161,840✔
2383
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
3,161,840✔
2384
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
3,161,840✔
2385
            __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
3,161,840✔
2386
            __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
3,161,840✔
2387
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
3,161,840✔
2388
                          xmm0_f);
2389
            _mm_storeu_ps(
2390
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
3,161,840✔
2391
            _mm_storeu_ps(
2392
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
3,161,840✔
2393
            _mm_storeu_ps(
2394
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
3,161,840✔
2395
        }
2396
        for (; n < nWordCount; n++)
472,813✔
2397
        {
2398
            pDstData[n] = pSrcData[n];
361,588✔
2399
        }
111,225✔
2400
    }
2401
    else
2402
    {
2403
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2,359,440✔
2404
                              nDstPixelStride, nWordCount);
2405
    }
2406
}
2,470,670✔
2407

2408
template <>
2409
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
145,505✔
2410
                    int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2411
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2412
{
2413
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
145,505✔
2414
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2415
    {
2416
        decltype(nWordCount) n = 0;
122,580✔
2417
        const __m128i xmm_zero = _mm_setzero_si128();
122,580✔
2418
        GByte *CPL_RESTRICT pabyDstDataPtr =
122,580✔
2419
            reinterpret_cast<GByte *>(pDstData);
2420
        for (; n < nWordCount - 15; n += 16)
1,409,500✔
2421
        {
2422
            __m128i xmm = _mm_loadu_si128(
1,286,920✔
2423
                reinterpret_cast<const __m128i *>(pSrcData + n));
1,286,920✔
2424
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
1,286,920✔
2425
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
1,286,920✔
2426
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
1,286,920✔
2427
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
1,286,920✔
2428
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
1,286,920✔
2429
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
1,286,920✔
2430

2431
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
1,286,920✔
2432
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
1,286,920✔
2433
            __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
1,286,920✔
2434
            __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
1,286,920✔
2435
            xmm0 = _mm_srli_si128(xmm0, 8);
1,286,920✔
2436
            xmm1 = _mm_srli_si128(xmm1, 8);
1,286,920✔
2437
            xmm2 = _mm_srli_si128(xmm2, 8);
1,286,920✔
2438
            xmm3 = _mm_srli_si128(xmm3, 8);
1,286,920✔
2439
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
1,286,920✔
2440
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
1,286,920✔
2441
            __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
1,286,920✔
2442
            __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
1,286,920✔
2443

2444
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
1,286,920✔
2445
                          xmm0_low_d);
2446
            _mm_storeu_pd(
2447
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
1,286,920✔
2448
                xmm0_high_d);
2449
            _mm_storeu_pd(
2450
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
1,286,920✔
2451
                xmm1_low_d);
2452
            _mm_storeu_pd(
2453
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
1,286,920✔
2454
                xmm1_high_d);
2455
            _mm_storeu_pd(
2456
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
1,286,920✔
2457
                xmm2_low_d);
2458
            _mm_storeu_pd(
2459
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
1,286,920✔
2460
                xmm2_high_d);
2461
            _mm_storeu_pd(
2462
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
1,286,920✔
2463
                xmm3_low_d);
2464
            _mm_storeu_pd(
2465
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
1,286,920✔
2466
                xmm3_high_d);
2467
        }
2468
        for (; n < nWordCount; n++)
233,550✔
2469
        {
2470
            pDstData[n] = pSrcData[n];
110,970✔
2471
        }
122,580✔
2472
    }
2473
    else
2474
    {
2475
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
22,925✔
2476
                              nDstPixelStride, nWordCount);
2477
    }
2478
}
145,505✔
2479

2480
template <>
2481
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
6,006✔
2482
                    int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2483
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2484
{
2485
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
6,006✔
2486
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2487
    {
2488
        decltype(nWordCount) n = 0;
5,031✔
2489
        // In SSE2, min_epu16 does not exist, so shift from
2490
        // UInt16 to SInt16 to be able to use min_epi16
2491
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
5,031✔
2492
        const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
5,031✔
2493
        for (; n < nWordCount - 7; n += 8)
138,471✔
2494
        {
2495
            __m128i xmm = _mm_loadu_si128(
133,440✔
2496
                reinterpret_cast<const __m128i *>(pSrcData + n));
133,440✔
2497
            xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
133,440✔
2498
            xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
133,440✔
2499
            xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
133,440✔
2500
            xmm = _mm_packus_epi16(xmm, xmm);
133,440✔
2501
            GDALCopyXMMToInt64(xmm,
133,440✔
2502
                               reinterpret_cast<GPtrDiff_t *>(pDstData + n));
133,440✔
2503
        }
2504
        for (; n < nWordCount; n++)
16,005✔
2505
        {
2506
            pDstData[n] =
10,974✔
2507
                pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
10,974✔
2508
        }
5,031✔
2509
    }
2510
    else
2511
    {
2512
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
975✔
2513
                              nDstPixelStride, nWordCount);
2514
    }
2515
}
6,006✔
2516

2517
template <>
2518
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
21✔
2519
                    int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2520
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2521
{
2522
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
21✔
2523
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2524
    {
2525
        decltype(nWordCount) n = 0;
15✔
2526
        // In SSE2, min_epu16 does not exist, so shift from
2527
        // UInt16 to SInt16 to be able to use min_epi16
2528
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
15✔
2529
        const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
15✔
2530
        for (; n < nWordCount - 7; n += 8)
31✔
2531
        {
2532
            __m128i xmm = _mm_loadu_si128(
16✔
2533
                reinterpret_cast<const __m128i *>(pSrcData + n));
16✔
2534
            xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
16✔
2535
            xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
16✔
2536
            xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
16✔
2537
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
16✔
2538
        }
2539
        for (; n < nWordCount; n++)
55✔
2540
        {
2541
            pDstData[n] =
40✔
2542
                pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
40✔
2543
        }
15✔
2544
    }
2545
    else
2546
    {
2547
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
6✔
2548
                              nDstPixelStride, nWordCount);
2549
    }
2550
}
21✔
2551

2552
template <>
2553
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
412✔
2554
                    int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2555
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2556
{
2557
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
412✔
2558
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2559
    {
2560
        decltype(nWordCount) n = 0;
406✔
2561
        const __m128i xmm_zero = _mm_setzero_si128();
406✔
2562
        GByte *CPL_RESTRICT pabyDstDataPtr =
406✔
2563
            reinterpret_cast<GByte *>(pDstData);
2564
        for (; n < nWordCount - 7; n += 8)
1,500✔
2565
        {
2566
            __m128i xmm = _mm_loadu_si128(
1,094✔
2567
                reinterpret_cast<const __m128i *>(pSrcData + n));
1,094✔
2568
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
1,094✔
2569
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
1,094✔
2570
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
1,094✔
2571
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
1,094✔
2572
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
1,094✔
2573
                          xmm0_f);
2574
            _mm_storeu_ps(
2575
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
1,094✔
2576
        }
2577
        for (; n < nWordCount; n++)
1,483✔
2578
        {
2579
            pDstData[n] = pSrcData[n];
1,077✔
2580
        }
406✔
2581
    }
2582
    else
2583
    {
2584
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
6✔
2585
                              nDstPixelStride, nWordCount);
2586
    }
2587
}
412✔
2588

2589
template <>
2590
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
279✔
2591
                    int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2592
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2593
{
2594
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
279✔
2595
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2596
    {
2597
        decltype(nWordCount) n = 0;
171✔
2598
        const __m128i xmm_zero = _mm_setzero_si128();
171✔
2599
        GByte *CPL_RESTRICT pabyDstDataPtr =
171✔
2600
            reinterpret_cast<GByte *>(pDstData);
2601
        for (; n < nWordCount - 7; n += 8)
219✔
2602
        {
2603
            __m128i xmm = _mm_loadu_si128(
48✔
2604
                reinterpret_cast<const __m128i *>(pSrcData + n));
48✔
2605
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
48✔
2606
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
48✔
2607

2608
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
48✔
2609
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
48✔
2610
            xmm0 = _mm_srli_si128(xmm0, 8);
48✔
2611
            xmm1 = _mm_srli_si128(xmm1, 8);
48✔
2612
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
48✔
2613
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
48✔
2614

2615
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
48✔
2616
                          xmm0_low_d);
2617
            _mm_storeu_pd(
2618
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
48✔
2619
                xmm0_high_d);
2620
            _mm_storeu_pd(
2621
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
48✔
2622
                xmm1_low_d);
2623
            _mm_storeu_pd(
2624
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
48✔
2625
                xmm1_high_d);
2626
        }
2627
        for (; n < nWordCount; n++)
429✔
2628
        {
2629
            pDstData[n] = pSrcData[n];
258✔
2630
        }
171✔
2631
    }
2632
    else
2633
    {
2634
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
108✔
2635
                              nDstPixelStride, nWordCount);
2636
    }
2637
}
279✔
2638

2639
template <>
2640
void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
790✔
2641
                    int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2642
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2643
{
2644
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
790✔
2645
                            nDstPixelStride, nWordCount);
2646
}
790✔
2647

2648
#endif  // HAVE_SSE2
2649

2650
template <>
2651
void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
116,766✔
2652
                    int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2653
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2654
{
2655
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
116,766✔
2656
                            nDstPixelStride, nWordCount);
2657
}
116,766✔
2658

2659
template <>
2660
void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
15,146✔
2661
                    int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2662
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2663
{
2664
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
15,146✔
2665
                            nDstPixelStride, nWordCount);
2666
}
15,146✔
2667

2668
template <>
2669
void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
61,648✔
2670
                    int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2671
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2672
{
2673
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
61,648✔
2674
                            nDstPixelStride, nWordCount);
2675
}
61,646✔
2676

2677
/************************************************************************/
2678
/*                   GDALCopyWordsComplexT()                            */
2679
/************************************************************************/
2680
/**
2681
 * Template function, used to copy data from pSrcData into buffer
2682
 * pDstData, with stride nSrcPixelStride in the source data and
2683
 * stride nDstPixelStride in the destination data. Deals with the
2684
 * complex case, where input is complex and output is complex.
2685
 *
2686
 * @param pSrcData the source data buffer
2687
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2688
 *                      of interest.
2689
 * @param pDstData the destination buffer.
2690
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2691
 *                      interest.
2692
 * @param nWordCount the total number of pixel words to copy
2693
 *
2694
 */
2695
template <class Tin, class Tout>
2696
inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
125,172✔
2697
                                  int nSrcPixelStride,
2698
                                  Tout *const CPL_RESTRICT pDstData,
2699
                                  int nDstPixelStride, GPtrDiff_t nWordCount)
2700
{
2701
    decltype(nWordCount) nDstOffset = 0;
125,172✔
2702
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
125,172✔
2703
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
125,172✔
2704

2705
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
7,337,873✔
2706
    {
2707
        const Tin *const pPixelIn =
7,212,696✔
2708
            reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
7,212,696✔
2709
        Tout *const pPixelOut =
7,212,696✔
2710
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
7,212,696✔
2711

2712
        GDALCopyWord(pPixelIn[0], pPixelOut[0]);
7,212,696✔
2713
        GDALCopyWord(pPixelIn[1], pPixelOut[1]);
7,212,696✔
2714

2715
        nDstOffset += nDstPixelStride;
7,212,696✔
2716
    }
2717
}
125,172✔
2718

2719
/************************************************************************/
2720
/*                   GDALCopyWordsComplexOutT()                         */
2721
/************************************************************************/
2722
/**
2723
 * Template function, used to copy data from pSrcData into buffer
2724
 * pDstData, with stride nSrcPixelStride in the source data and
2725
 * stride nDstPixelStride in the destination data. Deals with the
2726
 * case where the value is real coming in, but complex going out.
2727
 *
2728
 * @param pSrcData the source data buffer
2729
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2730
 *                      of interest, in bytes.
2731
 * @param pDstData the destination buffer.
2732
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2733
 *                      interest, in bytes.
2734
 * @param nWordCount the total number of pixel words to copy
2735
 *
2736
 */
2737
template <class Tin, class Tout>
2738
inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
3,168✔
2739
                                     int nSrcPixelStride,
2740
                                     Tout *const CPL_RESTRICT pDstData,
2741
                                     int nDstPixelStride, GPtrDiff_t nWordCount)
2742
{
2743
    decltype(nWordCount) nDstOffset = 0;
3,168✔
2744

2745
    const Tout tOutZero = static_cast<Tout>(0);
3,168✔
2746

2747
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
3,168✔
2748
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
3,168✔
2749

2750
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
1,112,479✔
2751
    {
2752
        const Tin tValue =
1,109,311✔
2753
            *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
1,109,311✔
2754
        Tout *const pPixelOut =
1,109,311✔
2755
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
1,109,311✔
2756
        GDALCopyWord(tValue, *pPixelOut);
1,109,311✔
2757

2758
        pPixelOut[1] = tOutZero;
1,109,311✔
2759

2760
        nDstOffset += nDstPixelStride;
1,109,311✔
2761
    }
2762
}
3,168✔
2763

2764
/************************************************************************/
2765
/*                           GDALCopyWordsFromT()                       */
2766
/************************************************************************/
2767
/**
2768
 * Template driver function. Given the input type T, call the appropriate
2769
 * GDALCopyWordsT function template for the desired output type. You should
2770
 * never call this function directly (call GDALCopyWords instead).
2771
 *
2772
 * @param pSrcData source data buffer
2773
 * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2774
 * @param bInComplex input is complex
2775
 * @param pDstData destination data buffer
2776
 * @param eDstType destination data type
2777
 * @param nDstPixelStride pixel stride in output buffer, in pixel words
2778
 * @param nWordCount number of pixel words to be copied
2779
 */
2780
template <class T>
2781
inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
53,539,580✔
2782
                               int nSrcPixelStride, bool bInComplex,
2783
                               void *CPL_RESTRICT pDstData,
2784
                               GDALDataType eDstType, int nDstPixelStride,
2785
                               GPtrDiff_t nWordCount)
2786
{
2787
    switch (eDstType)
53,539,580✔
2788
    {
2789
        case GDT_Byte:
4,556,846✔
2790
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
4,556,846✔
2791
                           static_cast<unsigned char *>(pDstData),
2792
                           nDstPixelStride, nWordCount);
2793
            break;
4,556,930✔
2794
        case GDT_Int8:
458✔
2795
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
458✔
2796
                           static_cast<signed char *>(pDstData),
2797
                           nDstPixelStride, nWordCount);
2798
            break;
458✔
2799
        case GDT_UInt16:
101,125✔
2800
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
101,125✔
2801
                           static_cast<unsigned short *>(pDstData),
2802
                           nDstPixelStride, nWordCount);
2803
            break;
101,124✔
2804
        case GDT_Int16:
4,126,329✔
2805
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
4,126,329✔
2806
                           static_cast<short *>(pDstData), nDstPixelStride,
2807
                           nWordCount);
2808
            break;
4,126,329✔
2809
        case GDT_UInt32:
4,180✔
2810
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
4,180✔
2811
                           static_cast<unsigned int *>(pDstData),
2812
                           nDstPixelStride, nWordCount);
2813
            break;
4,180✔
2814
        case GDT_Int32:
25,496,991✔
2815
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
25,496,991✔
2816
                           static_cast<int *>(pDstData), nDstPixelStride,
2817
                           nWordCount);
2818
            break;
25,506,493✔
2819
        case GDT_UInt64:
593✔
2820
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
593✔
2821
                           static_cast<std::uint64_t *>(pDstData),
2822
                           nDstPixelStride, nWordCount);
2823
            break;
593✔
2824
        case GDT_Int64:
4,158✔
2825
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
4,158✔
2826
                           static_cast<std::int64_t *>(pDstData),
2827
                           nDstPixelStride, nWordCount);
2828
            break;
4,158✔
2829
        case GDT_Float32:
3,869,395✔
2830
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3,869,395✔
2831
                           static_cast<float *>(pDstData), nDstPixelStride,
2832
                           nWordCount);
2833
            break;
3,869,395✔
2834
        case GDT_Float64:
15,243,554✔
2835
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
15,243,554✔
2836
                           static_cast<double *>(pDstData), nDstPixelStride,
2837
                           nWordCount);
2838
            break;
15,243,574✔
2839
        case GDT_CInt16:
122,401✔
2840
            if (bInComplex)
122,401✔
2841
            {
2842
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
121,390✔
2843
                                      static_cast<short *>(pDstData),
2844
                                      nDstPixelStride, nWordCount);
2845
            }
2846
            else  // input is not complex, so we need to promote to a complex
2847
                  // buffer
2848
            {
2849
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
1,011✔
2850
                                         static_cast<short *>(pDstData),
2851
                                         nDstPixelStride, nWordCount);
2852
            }
2853
            break;
122,401✔
2854
        case GDT_CInt32:
800✔
2855
            if (bInComplex)
800✔
2856
            {
2857
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
411✔
2858
                                      static_cast<int *>(pDstData),
2859
                                      nDstPixelStride, nWordCount);
2860
            }
2861
            else  // input is not complex, so we need to promote to a complex
2862
                  // buffer
2863
            {
2864
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
389✔
2865
                                         static_cast<int *>(pDstData),
2866
                                         nDstPixelStride, nWordCount);
2867
            }
2868
            break;
800✔
2869
        case GDT_CFloat32:
3,171✔
2870
            if (bInComplex)
3,171✔
2871
            {
2872
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2,589✔
2873
                                      static_cast<float *>(pDstData),
2874
                                      nDstPixelStride, nWordCount);
2875
            }
2876
            else  // input is not complex, so we need to promote to a complex
2877
                  // buffer
2878
            {
2879
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
582✔
2880
                                         static_cast<float *>(pDstData),
2881
                                         nDstPixelStride, nWordCount);
2882
            }
2883
            break;
3,171✔
2884
        case GDT_CFloat64:
1,968✔
2885
            if (bInComplex)
1,968✔
2886
            {
2887
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
782✔
2888
                                      static_cast<double *>(pDstData),
2889
                                      nDstPixelStride, nWordCount);
2890
            }
2891
            else  // input is not complex, so we need to promote to a complex
2892
                  // buffer
2893
            {
2894
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
1,186✔
2895
                                         static_cast<double *>(pDstData),
2896
                                         nDstPixelStride, nWordCount);
2897
            }
2898
            break;
1,968✔
2899
        case GDT_Unknown:
×
2900
        case GDT_TypeCount:
2901
            CPLAssert(false);
×
2902
    }
2903
}
53,549,201✔
2904

2905
}  // end anonymous namespace
2906

2907
/************************************************************************/
2908
/*                          GDALReplicateWord()                         */
2909
/************************************************************************/
2910

2911
template <class T>
2912
inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
521,815✔
2913
                               GPtrDiff_t nWordCount)
2914
{
2915
    const T valSet = *static_cast<const T *>(pDstData);
521,815✔
2916
    if (nDstPixelStride == static_cast<int>(sizeof(T)))
521,815✔
2917
    {
2918
        T *pDstPtr = static_cast<T *>(pDstData) + 1;
492,874✔
2919
        while (nWordCount >= 4)
19,136,724✔
2920
        {
2921
            nWordCount -= 4;
18,643,852✔
2922
            pDstPtr[0] = valSet;
18,643,852✔
2923
            pDstPtr[1] = valSet;
18,643,852✔
2924
            pDstPtr[2] = valSet;
18,643,852✔
2925
            pDstPtr[3] = valSet;
18,643,852✔
2926
            pDstPtr += 4;
18,643,852✔
2927
        }
2928
        while (nWordCount > 0)
1,254,048✔
2929
        {
2930
            --nWordCount;
761,174✔
2931
            *pDstPtr = valSet;
761,174✔
2932
            pDstPtr++;
761,174✔
2933
        }
2934
    }
2935
    else
2936
    {
2937
        GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
28,941✔
2938
        while (nWordCount > 0)
954,820✔
2939
        {
2940
            --nWordCount;
925,879✔
2941
            *reinterpret_cast<T *>(pabyDstPtr) = valSet;
925,879✔
2942
            pabyDstPtr += nDstPixelStride;
925,879✔
2943
        }
2944
    }
2945
}
521,815✔
2946

2947
static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
901,984✔
2948
                              GDALDataType eSrcType,
2949
                              void *CPL_RESTRICT pDstData,
2950
                              GDALDataType eDstType, int nDstPixelStride,
2951
                              GPtrDiff_t nWordCount)
2952
{
2953
    /* -----------------------------------------------------------------------
2954
     */
2955
    /* Special case when the source data is always the same value */
2956
    /* (for VRTSourcedRasterBand::IRasterIO and
2957
     * VRTDerivedRasterBand::IRasterIO*/
2958
    /*  for example) */
2959
    /* -----------------------------------------------------------------------
2960
     */
2961
    // Let the general translation case do the necessary conversions
2962
    // on the first destination element.
2963
    GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
901,984✔
2964

2965
    // Now copy the first element to the nWordCount - 1 following destination
2966
    // elements.
2967
    nWordCount--;
900,301✔
2968
    GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
900,301✔
2969

2970
    switch (eDstType)
900,301✔
2971
    {
2972
        case GDT_Byte:
377,951✔
2973
        case GDT_Int8:
2974
        {
2975
            if (nDstPixelStride == 1)
377,951✔
2976
            {
2977
                if (nWordCount > 0)
344,199✔
2978
                    memset(pabyDstWord,
344,199✔
2979
                           *reinterpret_cast<const GByte *>(pDstData),
344,199✔
2980
                           nWordCount);
2981
            }
2982
            else
2983
            {
2984
                GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
33,752✔
2985
                while (nWordCount > 0)
5,438,530✔
2986
                {
2987
                    --nWordCount;
5,404,780✔
2988
                    *pabyDstWord = valSet;
5,404,780✔
2989
                    pabyDstWord += nDstPixelStride;
5,404,780✔
2990
                }
2991
            }
2992
            break;
377,951✔
2993
        }
2994

2995
#define CASE_DUPLICATE_SIMPLE(enum_type, c_type)                               \
2996
    case enum_type:                                                            \
2997
    {                                                                          \
2998
        GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount);     \
2999
        break;                                                                 \
3000
    }
3001

3002
            CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
333✔
3003
            CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
169,647✔
3004
            CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
56✔
3005
            CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
295,564✔
3006
            CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
21✔
3007
            CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
662✔
3008
            CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
52,210✔
3009
            CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
5,011✔
3010

3011
#define CASE_DUPLICATE_COMPLEX(enum_type, c_type)                              \
3012
    case enum_type:                                                            \
3013
    {                                                                          \
3014
        c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0];        \
3015
        c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1];        \
3016
        while (nWordCount > 0)                                                 \
3017
        {                                                                      \
3018
            --nWordCount;                                                      \
3019
            reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1;              \
3020
            reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2;              \
3021
            pabyDstWord += nDstPixelStride;                                    \
3022
        }                                                                      \
3023
        break;                                                                 \
3024
    }
3025

3026
            CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
784✔
3027
            CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
784✔
3028
            CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
784✔
3029
            CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
784✔
3030

3031
        case GDT_Unknown:
×
3032
        case GDT_TypeCount:
3033
            CPLAssert(false);
×
3034
    }
3035
}
901,784✔
3036

3037
/************************************************************************/
3038
/*                        GDALUnrolledCopy()                            */
3039
/************************************************************************/
3040

3041
template <class T, int srcStride, int dstStride>
3042
static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
5,329,330✔
3043
                                           const T *CPL_RESTRICT pSrc,
3044
                                           GPtrDiff_t nIters)
3045
{
3046
    if (nIters >= 16)
5,329,330✔
3047
    {
3048
        for (GPtrDiff_t i = nIters / 16; i != 0; i--)
138,283,768✔
3049
        {
3050
            pDest[0 * dstStride] = pSrc[0 * srcStride];
133,084,184✔
3051
            pDest[1 * dstStride] = pSrc[1 * srcStride];
133,084,184✔
3052
            pDest[2 * dstStride] = pSrc[2 * srcStride];
133,084,184✔
3053
            pDest[3 * dstStride] = pSrc[3 * srcStride];
133,084,184✔
3054
            pDest[4 * dstStride] = pSrc[4 * srcStride];
133,084,184✔
3055
            pDest[5 * dstStride] = pSrc[5 * srcStride];
133,084,184✔
3056
            pDest[6 * dstStride] = pSrc[6 * srcStride];
133,084,184✔
3057
            pDest[7 * dstStride] = pSrc[7 * srcStride];
133,084,184✔
3058
            pDest[8 * dstStride] = pSrc[8 * srcStride];
133,084,184✔
3059
            pDest[9 * dstStride] = pSrc[9 * srcStride];
133,084,184✔
3060
            pDest[10 * dstStride] = pSrc[10 * srcStride];
133,084,184✔
3061
            pDest[11 * dstStride] = pSrc[11 * srcStride];
133,084,184✔
3062
            pDest[12 * dstStride] = pSrc[12 * srcStride];
133,084,184✔
3063
            pDest[13 * dstStride] = pSrc[13 * srcStride];
133,084,184✔
3064
            pDest[14 * dstStride] = pSrc[14 * srcStride];
133,084,184✔
3065
            pDest[15 * dstStride] = pSrc[15 * srcStride];
133,084,184✔
3066
            pDest += 16 * dstStride;
133,084,184✔
3067
            pSrc += 16 * srcStride;
133,084,184✔
3068
        }
3069
        nIters = nIters % 16;
5,199,652✔
3070
    }
3071
    for (GPtrDiff_t i = 0; i < nIters; i++)
7,589,974✔
3072
    {
3073
        pDest[i * dstStride] = *pSrc;
2,260,649✔
3074
        pSrc += srcStride;
2,260,649✔
3075
    }
3076
}
5,329,330✔
3077

3078
template <class T, int srcStride, int dstStride>
3079
static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
5,324,133✔
3080
                                    const T *CPL_RESTRICT pSrc,
3081
                                    GPtrDiff_t nIters)
3082
{
3083
    GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
5,324,133✔
3084
}
5,324,143✔
3085

3086
#ifdef HAVE_SSE2
3087

3088
template <>
3089
void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
303,985✔
3090
                                   const GByte *CPL_RESTRICT pSrc,
3091
                                   GPtrDiff_t nIters)
3092
{
3093
    decltype(nIters) i = 0;
303,985✔
3094
    if (nIters > 16)
303,985✔
3095
    {
3096
        const __m128i xmm_mask = _mm_set1_epi16(0xff);
145,815✔
3097
        // If we were sure that there would always be 1 trailing byte, we could
3098
        // check against nIters - 15
3099
        for (; i < nIters - 16; i += 16)
2,544,120✔
3100
        {
3101
            __m128i xmm0 =
3102
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
2,398,300✔
3103
            __m128i xmm1 =
3104
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
4,796,610✔
3105
            // Set higher 8bit of each int16 packed word to 0
3106
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
2,398,300✔
3107
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
2,398,300✔
3108
            // Pack int16 to uint8 and merge back both vector
3109
            xmm0 = _mm_packus_epi16(xmm0, xmm1);
2,398,300✔
3110

3111
            // Store result
3112
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
2,398,300✔
3113

3114
            pSrc += 2 * 16;
2,398,300✔
3115
        }
3116
    }
3117
    for (; i < nIters; i++)
3,875,160✔
3118
    {
3119
        pDest[i] = *pSrc;
3,571,180✔
3120
        pSrc += 2;
3,571,180✔
3121
    }
3122
}
303,985✔
3123

3124
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
3125

3126
template <>
3127
void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
184,627✔
3128
                                   const GByte *CPL_RESTRICT pSrc,
3129
                                   GPtrDiff_t nIters)
3130
{
3131
    if (nIters > 16 && CPLHaveRuntimeSSSE3())
184,627✔
3132
    {
3133
        GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
179,430✔
3134
    }
3135
    else
3136
    {
3137
        GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
5,197✔
3138
    }
3139
}
184,627✔
3140

3141
#endif
3142

3143
template <>
3144
void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
104,815✔
3145
                                   const GByte *CPL_RESTRICT pSrc,
3146
                                   GPtrDiff_t nIters)
3147
{
3148
    decltype(nIters) i = 0;
104,815✔
3149
    if (nIters > 16)
104,815✔
3150
    {
3151
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
99,519✔
3152
        // If we were sure that there would always be 3 trailing bytes, we could
3153
        // check against nIters - 15
3154
        for (; i < nIters - 16; i += 16)
8,779,400✔
3155
        {
3156
            __m128i xmm0 =
3157
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
8,680,200✔
3158
            __m128i xmm1 =
3159
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
8,680,200✔
3160
            __m128i xmm2 =
3161
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
8,680,200✔
3162
            __m128i xmm3 =
3163
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
17,360,400✔
3164
            // Set higher 24bit of each int32 packed word to 0
3165
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
8,680,200✔
3166
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
8,680,200✔
3167
            xmm2 = _mm_and_si128(xmm2, xmm_mask);
8,680,200✔
3168
            xmm3 = _mm_and_si128(xmm3, xmm_mask);
8,680,200✔
3169
            // Pack int32 to int16
3170
            xmm0 = _mm_packs_epi32(xmm0, xmm1);
8,680,530✔
3171
            xmm2 = _mm_packs_epi32(xmm2, xmm3);
8,679,190✔
3172
            // Pack int16 to uint8
3173
            xmm0 = _mm_packus_epi16(xmm0, xmm2);
8,679,880✔
3174

3175
            // Store result
3176
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
8,679,880✔
3177

3178
            pSrc += 4 * 16;
8,679,880✔
3179
        }
3180
    }
3181
    for (; i < nIters; i++)
1,114,790✔
3182
    {
3183
        pDest[i] = *pSrc;
1,010,290✔
3184
        pSrc += 4;
1,010,290✔
3185
    }
3186
}
104,500✔
3187
#endif  // HAVE_SSE2
3188

3189
/************************************************************************/
3190
/*                         GDALFastCopy()                               */
3191
/************************************************************************/
3192

3193
template <class T>
3194
static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
39,792,300✔
3195
                                const T *CPL_RESTRICT pSrc, int nSrcStride,
3196
                                GPtrDiff_t nIters)
3197
{
3198
    constexpr int sizeofT = static_cast<int>(sizeof(T));
39,792,300✔
3199
    if (nIters == 1)
39,792,300✔
3200
    {
3201
        *pDest = *pSrc;
22,302,090✔
3202
    }
3203
    else if (nDestStride == sizeofT)
17,490,181✔
3204
    {
3205
        if (nSrcStride == sizeofT)
12,241,862✔
3206
        {
3207
            memcpy(pDest, pSrc, nIters * sizeof(T));
11,496,047✔
3208
        }
3209
        else if (nSrcStride == 2 * sizeofT)
745,837✔
3210
        {
3211
            GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
306,926✔
3212
        }
3213
        else if (nSrcStride == 3 * sizeofT)
438,911✔
3214
        {
3215
            GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
289,935✔
3216
        }
3217
        else if (nSrcStride == 4 * sizeofT)
148,976✔
3218
        {
3219
            GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
133,685✔
3220
        }
3221
        else
3222
        {
3223
            while (nIters-- > 0)
13,026,830✔
3224
            {
3225
                *pDest = *pSrc;
13,011,530✔
3226
                pSrc += nSrcStride / sizeofT;
13,011,530✔
3227
                pDest++;
13,011,530✔
3228
            }
3229
        }
3230
    }
3231
    else if (nSrcStride == sizeofT)
5,248,249✔
3232
    {
3233
        if (nDestStride == 2 * sizeofT)
5,246,459✔
3234
        {
3235
            GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
129,119✔
3236
        }
3237
        else if (nDestStride == 3 * sizeofT)
5,117,340✔
3238
        {
3239
            GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
4,410,191✔
3240
        }
3241
        else if (nDestStride == 4 * sizeofT)
707,150✔
3242
        {
3243
            GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
647,716✔
3244
        }
3245
        else
3246
        {
3247
            while (nIters-- > 0)
12,650,500✔
3248
            {
3249
                *pDest = *pSrc;
12,591,080✔
3250
                pSrc++;
12,591,080✔
3251
                pDest += nDestStride / sizeofT;
12,591,080✔
3252
            }
3253
        }
3254
    }
3255
    else
3256
    {
3257
        while (nIters-- > 0)
1,100,684✔
3258
        {
3259
            *pDest = *pSrc;
1,098,888✔
3260
            pSrc += nSrcStride / sizeofT;
1,098,888✔
3261
            pDest += nDestStride / sizeofT;
1,098,888✔
3262
        }
3263
    }
3264
}
39,792,300✔
3265

3266
/************************************************************************/
3267
/*                         GDALFastCopyByte()                           */
3268
/************************************************************************/
3269

3270
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
276,287✔
3271
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3272
                             int nDstPixelStride, GPtrDiff_t nWordCount)
3273
{
3274
    GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
276,287✔
3275
                 nWordCount);
3276
}
276,287✔
3277

3278
/************************************************************************/
3279
/*                           GDALCopyWords()                            */
3280
/************************************************************************/
3281

3282
/**
3283
 * Copy pixel words from buffer to buffer.
3284
 *
3285
 * @see GDALCopyWords64()
3286
 */
3287
void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
87,096,200✔
3288
                               GDALDataType eSrcType, int nSrcPixelStride,
3289
                               void *CPL_RESTRICT pDstData,
3290
                               GDALDataType eDstType, int nDstPixelStride,
3291
                               int nWordCount)
3292
{
3293
    GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
87,096,200✔
3294
                    nDstPixelStride, nWordCount);
3295
}
87,094,500✔
3296

3297
/************************************************************************/
3298
/*                          GDALCopyWords64()                           */
3299
/************************************************************************/
3300

3301
/**
3302
 * Copy pixel words from buffer to buffer.
3303
 *
3304
 * This function is used to copy pixel word values from one memory buffer
3305
 * to another, with support for conversion between data types, and differing
3306
 * step factors. The data type conversion is done using the following
3307
 * rules:
3308
 * <ul>
3309
 * <li>Values assigned to a lower range integer type are clipped. For
3310
 * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3311
 * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3312
 * </li>
3313
 * <li>
3314
 * Assignment from floating point to integer rounds to closest integer.
3315
 * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3316
 * smallest integer. NaN is mapped to 0.
3317
 * </li>
3318
 * <li>
3319
 * Assignment from non-complex to complex will result in the imaginary part
3320
 * being set to zero on output.
3321
 * </li>
3322
 * <li> Assignment from complex to
3323
 * non-complex will result in the complex portion being lost and the real
3324
 * component being preserved (<i>not magnitude!</i>).
3325
 * </li>
3326
 * </ul>
3327
 *
3328
 * No assumptions are made about the source or destination words occurring
3329
 * on word boundaries.  It is assumed that all values are in native machine
3330
 * byte order.
3331
 *
3332
 * @param pSrcData Pointer to source data to be converted.
3333
 * @param eSrcType the source data type (see GDALDataType enum)
3334
 * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3335
 * in bytes
3336
 * @param pDstData Pointer to buffer where destination data should go
3337
 * @param eDstType the destination data type (see GDALDataType enum)
3338
 * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3339
 * words), in bytes
3340
 * @param nWordCount number of words to be copied
3341
 *
3342
 * @note
3343
 * When adding a new data type to GDAL, you must do the following to
3344
 * support it properly within the GDALCopyWords function:
3345
 * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3346
 *    This should invoke the appropriate GDALCopyWordsFromT wrapper.
3347
 * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3348
 *    This should call the appropriate GDALCopyWordsT template.
3349
 * 3. If appropriate, overload the appropriate CopyWord template in the
3350
 *    above namespace. This will ensure that any conversion issues are
3351
 *    handled (cases like the float -> int32 case, where the min/max)
3352
 *    values are subject to roundoff error.
3353
 */
3354

3355
void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
108,286,000✔
3356
                                 GDALDataType eSrcType, int nSrcPixelStride,
3357
                                 void *CPL_RESTRICT pDstData,
3358
                                 GDALDataType eDstType, int nDstPixelStride,
3359
                                 GPtrDiff_t nWordCount)
3360

3361
{
3362
    // On platforms where alignment matters, be careful
3363
    const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
108,286,000✔
3364
    const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
108,264,000✔
3365
    if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
108,270,000✔
3366
    {
3367
        CPLError(CE_Failure, CPLE_NotSupported,
2✔
3368
                 "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3369
                 "argument");
3370
        return;
2✔
3371
    }
3372
    if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
108,270,000✔
3373
        ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
60,226,600✔
3374
         (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
60,234,300✔
3375
         (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
60,227,500✔
3376
         (nDstPixelStride % nDstDataTypeSize) != 0))
60,223,100✔
3377
    {
3378
        if (eSrcType == eDstType)
905✔
3379
        {
3380
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
34,800✔
3381
            {
3382
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
34,000✔
3383
                       static_cast<const GByte *>(pSrcData) +
3384
                           nSrcPixelStride * i,
34,000✔
3385
                       nDstDataTypeSize);
3386
            }
3387
        }
3388
        else
3389
        {
3390
            const auto getAlignedPtr = [](GByte *ptr, int align)
210✔
3391
            {
3392
                return ptr +
3393
                       ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
210✔
3394
                        align);
210✔
3395
            };
3396

3397
            // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3398
            // be sure to get correctly aligned pointer.
3399
            constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
105✔
3400
            GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3401
            GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3402
            GByte *pabySrcBuffer =
3403
                getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
105✔
3404
            GByte *pabyDstBuffer =
3405
                getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
105✔
3406
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3,360✔
3407
            {
3408
                memcpy(pabySrcBuffer,
3,255✔
3409
                       static_cast<const GByte *>(pSrcData) +
3410
                           nSrcPixelStride * i,
3,255✔
3411
                       nSrcDataTypeSize);
3412
                GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3,255✔
3413
                                eDstType, 0, 1);
3414
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3,255✔
3415
                       pabyDstBuffer, nDstDataTypeSize);
3416
            }
3417
        }
3418
        return;
905✔
3419
    }
3420

3421
    // Deal with the case where we're replicating a single word into the
3422
    // provided buffer
3423
    if (nSrcPixelStride == 0 && nWordCount > 1)
108,270,000✔
3424
    {
3425
        GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
901,102✔
3426
                          nDstPixelStride, nWordCount);
3427
        return;
902,902✔
3428
    }
3429

3430
    if (eSrcType == eDstType)
107,368,000✔
3431
    {
3432
        if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
53,998,300✔
3433
        {
3434
            GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
18,562,600✔
3435
                         static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3436
                         nWordCount);
3437
            return;
18,561,200✔
3438
        }
3439

3440
        if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
35,435,700✔
3441
            (nDstPixelStride % 2) == 0)
20,966,300✔
3442
        {
3443
            GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
20,966,300✔
3444
                         static_cast<const short *>(pSrcData), nSrcPixelStride,
3445
                         nWordCount);
3446
            return;
20,966,000✔
3447
        }
3448

3449
        if (nWordCount == 1)
14,469,400✔
3450
        {
3451
#if defined(CSA_BUILD) || defined(__COVERITY__)
3452
            // Avoid false positives...
3453
            memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3454
#else
3455
            if (nSrcDataTypeSize == 2)
14,056,500✔
3456
                memcpy(pDstData, pSrcData, 2);
×
3457
            else if (nSrcDataTypeSize == 4)
14,056,500✔
3458
                memcpy(pDstData, pSrcData, 4);
14,014,000✔
3459
            else if (nSrcDataTypeSize == 8)
42,569✔
3460
                memcpy(pDstData, pSrcData, 8);
26,052✔
3461
            else /* if( eSrcType == GDT_CFloat64 ) */
3462
                memcpy(pDstData, pSrcData, 16);
16,517✔
3463
#endif
3464
            return;
14,056,500✔
3465
        }
3466

3467
        // Let memcpy() handle the case where we're copying a packed buffer
3468
        // of pixels.
3469
        if (nSrcPixelStride == nDstPixelStride)
412,885✔
3470
        {
3471
            if (nSrcPixelStride == nSrcDataTypeSize)
259,243✔
3472
            {
3473
                memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
257,073✔
3474
                return;
257,073✔
3475
            }
3476
        }
3477
    }
3478

3479
    // Handle the more general case -- deals with conversion of data types
3480
    // directly.
3481
    switch (eSrcType)
53,525,900✔
3482
    {
3483
        case GDT_Byte:
14,934,900✔
3484
            GDALCopyWordsFromT<unsigned char>(
14,934,900✔
3485
                static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3486
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3487
            break;
14,936,900✔
3488
        case GDT_Int8:
976✔
3489
            GDALCopyWordsFromT<signed char>(
976✔
3490
                static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3491
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3492
            break;
976✔
3493
        case GDT_UInt16:
53,125✔
3494
            GDALCopyWordsFromT<unsigned short>(
53,125✔
3495
                static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3496
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3497
            break;
53,125✔
3498
        case GDT_Int16:
4,542,320✔
3499
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
4,542,320✔
3500
                                      nSrcPixelStride, false, pDstData,
3501
                                      eDstType, nDstPixelStride, nWordCount);
3502
            break;
4,542,310✔
3503
        case GDT_UInt32:
6,747✔
3504
            GDALCopyWordsFromT<unsigned int>(
6,747✔
3505
                static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3506
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3507
            break;
6,747✔
3508
        case GDT_Int32:
12,254,600✔
3509
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
12,254,600✔
3510
                                    nSrcPixelStride, false, pDstData, eDstType,
3511
                                    nDstPixelStride, nWordCount);
3512
            break;
12,254,600✔
3513
        case GDT_UInt64:
1,430✔
3514
            GDALCopyWordsFromT<std::uint64_t>(
1,430✔
3515
                static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3516
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3517
            break;
1,430✔
3518
        case GDT_Int64:
7,175✔
3519
            GDALCopyWordsFromT<std::int64_t>(
7,175✔
3520
                static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3521
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3522
            break;
7,175✔
3523
        case GDT_Float32:
318,702✔
3524
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
318,702✔
3525
                                      nSrcPixelStride, false, pDstData,
3526
                                      eDstType, nDstPixelStride, nWordCount);
3527
            break;
318,698✔
3528
        case GDT_Float64:
20,677,900✔
3529
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
20,677,900✔
3530
                                       nSrcPixelStride, false, pDstData,
3531
                                       eDstType, nDstPixelStride, nWordCount);
3532
            break;
20,678,000✔
3533
        case GDT_CInt16:
566,961✔
3534
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
566,961✔
3535
                                      nSrcPixelStride, true, pDstData, eDstType,
3536
                                      nDstPixelStride, nWordCount);
3537
            break;
566,961✔
3538
        case GDT_CInt32:
397✔
3539
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
397✔
3540
                                    nSrcPixelStride, true, pDstData, eDstType,
3541
                                    nDstPixelStride, nWordCount);
3542
            break;
397✔
3543
        case GDT_CFloat32:
1,357✔
3544
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
1,357✔
3545
                                      nSrcPixelStride, true, pDstData, eDstType,
3546
                                      nDstPixelStride, nWordCount);
3547
            break;
1,357✔
3548
        case GDT_CFloat64:
172,487✔
3549
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
172,487✔
3550
                                       nSrcPixelStride, true, pDstData,
3551
                                       eDstType, nDstPixelStride, nWordCount);
3552
            break;
172,487✔
3553
        case GDT_Unknown:
×
3554
        case GDT_TypeCount:
3555
            CPLAssert(false);
×
3556
    }
3557
}
3558

3559
/************************************************************************/
3560
/*                            GDALCopyBits()                            */
3561
/************************************************************************/
3562

3563
/**
3564
 * Bitwise word copying.
3565
 *
3566
 * A function for moving sets of partial bytes around.  Loosely
3567
 * speaking this is a bitwise analog to GDALCopyWords().
3568
 *
3569
 * It copies nStepCount "words" where each word is nBitCount bits long.
3570
 * The nSrcStep and nDstStep are the number of bits from the start of one
3571
 * word to the next (same as nBitCount if they are packed).  The nSrcOffset
3572
 * and nDstOffset are the offset into the source and destination buffers
3573
 * to start at, also measured in bits.
3574
 *
3575
 * All bit offsets are assumed to start from the high order bit in a byte
3576
 * (i.e. most significant bit first).  Currently this function is not very
3577
 * optimized, but it may be improved for some common cases in the future
3578
 * as needed.
3579
 *
3580
 * @param pabySrcData the source data buffer.
3581
 * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3582
 * first word to copy.
3583
 * @param nSrcStep the offset in bits from the start one source word to the
3584
 * start of the next.
3585
 * @param pabyDstData the destination data buffer.
3586
 * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3587
 * first word to copy over.
3588
 * @param nDstStep the offset in bits from the start one word to the
3589
 * start of the next.
3590
 * @param nBitCount the number of bits in a word to be copied.
3591
 * @param nStepCount the number of words to copy.
3592
 */
3593

3594
void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
×
3595
                  GByte *pabyDstData, int nDstOffset, int nDstStep,
3596
                  int nBitCount, int nStepCount)
3597

3598
{
3599
    VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
×
3600

3601
    for (int iStep = 0; iStep < nStepCount; iStep++)
×
3602
    {
3603
        for (int iBit = 0; iBit < nBitCount; iBit++)
×
3604
        {
3605
            if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
×
3606
                pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
×
3607
            else
3608
                pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
×
3609

3610
            nSrcOffset++;
×
3611
            nDstOffset++;
×
3612
        }
3613

3614
        nSrcOffset += (nSrcStep - nBitCount);
×
3615
        nDstOffset += (nDstStep - nBitCount);
×
3616
    }
3617
}
3618

3619
/************************************************************************/
3620
/*                    GDALGetBestOverviewLevel()                        */
3621
/*                                                                      */
3622
/* Returns the best overview level to satisfy the query or -1 if none   */
3623
/* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when        */
3624
/* returning a valid overview level                                     */
3625
/************************************************************************/
3626

3627
int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
×
3628
                                 int &nXSize, int &nYSize, int nBufXSize,
3629
                                 int nBufYSize)
3630
{
3631
    return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
×
3632
                                         nBufXSize, nBufYSize, nullptr);
×
3633
}
3634

3635
int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
322,828✔
3636
                                  int &nYOff, int &nXSize, int &nYSize,
3637
                                  int nBufXSize, int nBufYSize,
3638
                                  GDALRasterIOExtraArg *psExtraArg)
3639
{
3640
    /* -------------------------------------------------------------------- */
3641
    /*      Compute the desired downsampling factor.  It is                 */
3642
    /*      based on the least reduced axis, and represents the number      */
3643
    /*      of source pixels to one destination pixel.                      */
3644
    /* -------------------------------------------------------------------- */
3645
    const double dfDesiredDownsamplingFactor =
322,828✔
3646
        ((nXSize / static_cast<double>(nBufXSize)) <
322,828✔
3647
             (nYSize / static_cast<double>(nBufYSize)) ||
160,491✔
3648
         nBufYSize == 1)
3649
            ? nXSize / static_cast<double>(nBufXSize)
354,204✔
3650
            : nYSize / static_cast<double>(nBufYSize);
129,115✔
3651

3652
    /* -------------------------------------------------------------------- */
3653
    /*      Find the overview level that largest downsampling factor (most  */
3654
    /*      downsampled) that is still less than (or only a little more)    */
3655
    /*      downsampled than the request.                                   */
3656
    /* -------------------------------------------------------------------- */
3657
    const int nOverviewCount = poBand->GetOverviewCount();
322,828✔
3658
    GDALRasterBand *poBestOverview = nullptr;
322,828✔
3659
    double dfBestDownsamplingFactor = 0;
322,828✔
3660
    int nBestOverviewLevel = -1;
322,828✔
3661

3662
    const char *pszOversampligThreshold =
3663
        CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
322,828✔
3664

3665
    // Note: keep this logic for overview selection in sync between
3666
    // gdalwarp_lib.cpp and rasterio.cpp
3667
    // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3668
    const double dfOversamplingThreshold =
3669
        pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
645,647✔
3670
        : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
322,819✔
3671
            ? 1.0
645,638✔
3672
            : 1.2;
322,828✔
3673
    for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
325,519✔
3674
    {
3675
        GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
5,529✔
3676
        if (poOverview == nullptr ||
11,058✔
3677
            poOverview->GetXSize() > poBand->GetXSize() ||
11,057✔
3678
            poOverview->GetYSize() > poBand->GetYSize())
5,528✔
3679
        {
3680
            continue;
1✔
3681
        }
3682

3683
        // Compute downsampling factor of this overview
3684
        const double dfDownsamplingFactor = std::min(
3685
            poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
5,528✔
3686
            poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
11,056✔
3687

3688
        // Is it nearly the requested factor and better (lower) than
3689
        // the current best factor?
3690
        // Use an epsilon because of numerical instability.
3691
        constexpr double EPSILON = 1e-1;
5,528✔
3692
        if (dfDownsamplingFactor >=
5,636✔
3693
                dfDesiredDownsamplingFactor * dfOversamplingThreshold +
5,528✔
3694
                    EPSILON ||
5,420✔
3695
            dfDownsamplingFactor <= dfBestDownsamplingFactor)
3696
        {
3697
            continue;
108✔
3698
        }
3699

3700
        // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3701
        const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
5,420✔
3702

3703
        if (pszResampling != nullptr &&
5,420✔
3704
            STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
71✔
3705
            continue;
16✔
3706

3707
        // OK, this is our new best overview.
3708
        poBestOverview = poOverview;
5,404✔
3709
        nBestOverviewLevel = iOverview;
5,404✔
3710
        dfBestDownsamplingFactor = dfDownsamplingFactor;
5,404✔
3711

3712
        if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
5,404✔
3713
            EPSILON)
3714
        {
3715
            break;
2,838✔
3716
        }
3717
    }
3718

3719
    /* -------------------------------------------------------------------- */
3720
    /*      If we didn't find an overview that helps us, just return        */
3721
    /*      indicating failure and the full resolution image will be used.  */
3722
    /* -------------------------------------------------------------------- */
3723
    if (nBestOverviewLevel < 0)
322,828✔
3724
        return -1;
319,922✔
3725

3726
    /* -------------------------------------------------------------------- */
3727
    /*      Recompute the source window in terms of the selected            */
3728
    /*      overview.                                                       */
3729
    /* -------------------------------------------------------------------- */
3730
    const double dfXFactor =
3731
        poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
2,906✔
3732
    const double dfYFactor =
3733
        poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
2,906✔
3734
    CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
2,906✔
3735
             poBestOverview->GetYSize());
3736

3737
    const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
8,718✔
3738
                                static_cast<int>(nXOff / dfXFactor + 0.5));
2,906✔
3739
    const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
8,718✔
3740
                                static_cast<int>(nYOff / dfYFactor + 0.5));
2,906✔
3741
    int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
2,906✔
3742
    int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
2,906✔
3743
    if (nOXOff + nOXSize > poBestOverview->GetXSize())
2,906✔
3744
        nOXSize = poBestOverview->GetXSize() - nOXOff;
×
3745
    if (nOYOff + nOYSize > poBestOverview->GetYSize())
2,906✔
3746
        nOYSize = poBestOverview->GetYSize() - nOYOff;
2✔
3747

3748
    if (psExtraArg)
2,906✔
3749
    {
3750
        if (psExtraArg->bFloatingPointWindowValidity)
2,906✔
3751
        {
3752
            psExtraArg->dfXOff /= dfXFactor;
45✔
3753
            psExtraArg->dfXSize /= dfXFactor;
45✔
3754
            psExtraArg->dfYOff /= dfYFactor;
45✔
3755
            psExtraArg->dfYSize /= dfYFactor;
45✔
3756
        }
3757
        else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
2,861✔
3758
        {
3759
            psExtraArg->bFloatingPointWindowValidity = true;
16✔
3760
            psExtraArg->dfXOff = nXOff / dfXFactor;
16✔
3761
            psExtraArg->dfXSize = nXSize / dfXFactor;
16✔
3762
            psExtraArg->dfYOff = nYOff / dfYFactor;
16✔
3763
            psExtraArg->dfYSize = nYSize / dfYFactor;
16✔
3764
        }
3765
    }
3766

3767
    nXOff = nOXOff;
2,906✔
3768
    nYOff = nOYOff;
2,906✔
3769
    nXSize = nOXSize;
2,906✔
3770
    nYSize = nOYSize;
2,906✔
3771

3772
    return nBestOverviewLevel;
2,906✔
3773
}
3774

3775
/************************************************************************/
3776
/*                          OverviewRasterIO()                          */
3777
/*                                                                      */
3778
/*      Special work function to utilize available overviews to         */
3779
/*      more efficiently satisfy downsampled requests.  It will         */
3780
/*      return CE_Failure if there are no appropriate overviews         */
3781
/*      available but it doesn't emit any error messages.               */
3782
/************************************************************************/
3783

3784
//! @cond Doxygen_Suppress
3785
CPLErr GDALRasterBand::OverviewRasterIO(
2✔
3786
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3787
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3788
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3789

3790
{
3791
    GDALRasterIOExtraArg sExtraArg;
3792
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
2✔
3793

3794
    const int nOverview = GDALBandGetBestOverviewLevel2(
2✔
3795
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3796
    if (nOverview < 0)
2✔
3797
        return CE_Failure;
1✔
3798

3799
    /* -------------------------------------------------------------------- */
3800
    /*      Recast the call in terms of the new raster layer.               */
3801
    /* -------------------------------------------------------------------- */
3802
    GDALRasterBand *poOverviewBand = GetOverview(nOverview);
1✔
3803
    if (poOverviewBand == nullptr)
1✔
3804
        return CE_Failure;
×
3805

3806
    return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
1✔
3807
                                    pData, nBufXSize, nBufYSize, eBufType,
3808
                                    nPixelSpace, nLineSpace, &sExtraArg);
1✔
3809
}
3810

3811
/************************************************************************/
3812
/*                      TryOverviewRasterIO()                           */
3813
/************************************************************************/
3814

3815
CPLErr GDALRasterBand::TryOverviewRasterIO(
161,946✔
3816
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3817
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3818
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3819
    int *pbTried)
3820
{
3821
    int nXOffMod = nXOff;
161,946✔
3822
    int nYOffMod = nYOff;
161,946✔
3823
    int nXSizeMod = nXSize;
161,946✔
3824
    int nYSizeMod = nYSize;
161,946✔
3825
    GDALRasterIOExtraArg sExtraArg;
3826

3827
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
161,946✔
3828

3829
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
161,946✔
3830
        this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3831
        &sExtraArg);
3832

3833
    if (iOvrLevel >= 0)
161,946✔
3834
    {
3835
        GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
49✔
3836
        if (poOverviewBand)
49✔
3837
        {
3838
            *pbTried = TRUE;
49✔
3839
            return poOverviewBand->RasterIO(
49✔
3840
                eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3841
                nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3842
                &sExtraArg);
49✔
3843
        }
3844
    }
3845

3846
    *pbTried = FALSE;
161,897✔
3847
    return CE_None;
161,897✔
3848
}
3849

3850
/************************************************************************/
3851
/*                      TryOverviewRasterIO()                           */
3852
/************************************************************************/
3853

3854
CPLErr GDALDataset::TryOverviewRasterIO(
158,041✔
3855
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3856
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3857
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3858
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
3859
    int *pbTried)
3860
{
3861
    int nXOffMod = nXOff;
158,041✔
3862
    int nYOffMod = nYOff;
158,041✔
3863
    int nXSizeMod = nXSize;
158,041✔
3864
    int nYSizeMod = nYSize;
158,041✔
3865
    GDALRasterIOExtraArg sExtraArg;
3866
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
158,041✔
3867

3868
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
316,082✔
3869
        papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
158,041✔
3870
        nBufYSize, &sExtraArg);
3871

3872
    if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
158,080✔
3873
        papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
39✔
3874
    {
3875
        *pbTried = TRUE;
39✔
3876
        return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
39✔
3877
            eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3878
            nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3879
            nLineSpace, nBandSpace, &sExtraArg);
39✔
3880
    }
3881
    else
3882
    {
3883
        *pbTried = FALSE;
158,002✔
3884
        return CE_None;
158,002✔
3885
    }
3886
}
3887

3888
/************************************************************************/
3889
/*                        GetBestOverviewLevel()                        */
3890
/*                                                                      */
3891
/* Returns the best overview level to satisfy the query or -1 if none   */
3892
/* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid     */
3893
/* overview level                                                       */
3894
/************************************************************************/
3895

3896
static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
4✔
3897
                                           int &nYOff, int &nXSize, int &nYSize,
3898
                                           int nBufXSize, int nBufYSize,
3899
                                           int nBandCount,
3900
                                           const int *panBandMap,
3901
                                           GDALRasterIOExtraArg *psExtraArg)
3902
{
3903
    int nOverviewCount = 0;
4✔
3904
    GDALRasterBand *poFirstBand = nullptr;
4✔
3905

3906
    /* -------------------------------------------------------------------- */
3907
    /* Check that all bands have the same number of overviews and           */
3908
    /* that they have all the same size and block dimensions                */
3909
    /* -------------------------------------------------------------------- */
3910
    for (int iBand = 0; iBand < nBandCount; iBand++)
12✔
3911
    {
3912
        GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
8✔
3913
        if (poBand == nullptr)
8✔
3914
            return -1;
×
3915
        if (iBand == 0)
8✔
3916
        {
3917
            poFirstBand = poBand;
4✔
3918
            nOverviewCount = poBand->GetOverviewCount();
4✔
3919
        }
3920
        else if (nOverviewCount != poBand->GetOverviewCount())
4✔
3921
        {
3922
            CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
×
3923
                             "mismatched overview count, use std method.");
3924
            return -1;
×
3925
        }
3926
        else
3927
        {
3928
            for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
4✔
3929
            {
3930
                GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
×
3931
                GDALRasterBand *poOvrFirstBand =
3932
                    poFirstBand->GetOverview(iOverview);
×
3933
                if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
×
3934
                    continue;
×
3935

3936
                if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
×
3937
                    poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
×
3938
                {
3939
                    CPLDebug("GDAL",
×
3940
                             "GDALDataset::GetBestOverviewLevel() ... "
3941
                             "mismatched overview sizes, use std method.");
3942
                    return -1;
×
3943
                }
3944
                int nBlockXSizeFirst = 0;
×
3945
                int nBlockYSizeFirst = 0;
×
3946
                poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
×
3947
                                             &nBlockYSizeFirst);
3948

3949
                int nBlockXSizeCurrent = 0;
×
3950
                int nBlockYSizeCurrent = 0;
×
3951
                poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
×
3952
                                        &nBlockYSizeCurrent);
3953

3954
                if (nBlockXSizeFirst != nBlockXSizeCurrent ||
×
3955
                    nBlockYSizeFirst != nBlockYSizeCurrent)
×
3956
                {
3957
                    CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
×
3958
                                     "mismatched block sizes, use std method.");
3959
                    return -1;
×
3960
                }
3961
            }
3962
        }
3963
    }
3964
    if (poFirstBand == nullptr)
4✔
3965
        return -1;
×
3966

3967
    return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
4✔
3968
                                         nYSize, nBufXSize, nBufYSize,
3969
                                         psExtraArg);
4✔
3970
}
3971

3972
/************************************************************************/
3973
/*                         BlockBasedRasterIO()                         */
3974
/*                                                                      */
3975
/*      This convenience function implements a dataset level            */
3976
/*      RasterIO() interface based on calling down to fetch blocks,     */
3977
/*      much like the GDALRasterBand::IRasterIO(), but it handles       */
3978
/*      all bands at once, so that a format driver that handles a       */
3979
/*      request for different bands of the same block efficiently       */
3980
/*      (i.e. without re-reading interleaved data) will efficiently.    */
3981
/*                                                                      */
3982
/*      This method is intended to be called by an overridden           */
3983
/*      IRasterIO() method in the driver specific GDALDataset           */
3984
/*      derived class.                                                  */
3985
/*                                                                      */
3986
/*      Default internal implementation of RasterIO() ... utilizes      */
3987
/*      the Block access methods to satisfy the request.  This would    */
3988
/*      normally only be overridden by formats with overviews.          */
3989
/*                                                                      */
3990
/*      To keep things relatively simple, this method does not          */
3991
/*      currently take advantage of some special cases addressed in     */
3992
/*      GDALRasterBand::IRasterIO(), so it is likely best to only       */
3993
/*      call it when you know it will help.  That is in cases where     */
3994
/*      data is at 1:1 to the buffer, and you know the driver is        */
3995
/*      implementing interleaved IO efficiently on a block by block     */
3996
/*      basis. Overviews will be used when possible.                    */
3997
/************************************************************************/
3998

3999
CPLErr GDALDataset::BlockBasedRasterIO(
63,603✔
4000
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4001
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4002
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4003
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4004

4005
{
4006
    CPLAssert(nullptr != pData);
63,603✔
4007

4008
    GByte **papabySrcBlock = nullptr;
63,603✔
4009
    GDALRasterBlock *poBlock = nullptr;
63,603✔
4010
    GDALRasterBlock **papoBlocks = nullptr;
63,603✔
4011
    int nLBlockX = -1;
63,603✔
4012
    int nLBlockY = -1;
63,603✔
4013
    int iBufYOff;
4014
    int iBufXOff;
4015
    int nBlockXSize = 1;
63,603✔
4016
    int nBlockYSize = 1;
63,603✔
4017
    CPLErr eErr = CE_None;
63,603✔
4018
    GDALDataType eDataType = GDT_Byte;
63,603✔
4019

4020
    const bool bUseIntegerRequestCoords =
63,603✔
4021
        (!psExtraArg->bFloatingPointWindowValidity ||
64,035✔
4022
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
432✔
4023
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
427✔
4024

4025
    /* -------------------------------------------------------------------- */
4026
    /*      Ensure that all bands share a common block size and data type.  */
4027
    /* -------------------------------------------------------------------- */
4028
    for (int iBand = 0; iBand < nBandCount; iBand++)
301,086✔
4029
    {
4030
        GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
237,483✔
4031

4032
        if (iBand == 0)
237,485✔
4033
        {
4034
            poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
63,600✔
4035
            eDataType = poBand->GetRasterDataType();
63,599✔
4036
        }
4037
        else
4038
        {
4039
            int nThisBlockXSize = 0;
173,885✔
4040
            int nThisBlockYSize = 0;
173,885✔
4041
            poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
173,885✔
4042
            if (nThisBlockXSize != nBlockXSize ||
173,883✔
4043
                nThisBlockYSize != nBlockYSize)
173,883✔
4044
            {
4045
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
1✔
4046
                                 "mismatched block sizes, use std method.");
4047
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
×
4048
                                         pData, nBufXSize, nBufYSize, eBufType,
4049
                                         nBandCount, panBandMap, nPixelSpace,
4050
                                         nLineSpace, nBandSpace, psExtraArg);
×
4051
            }
4052

4053
            if (eDataType != poBand->GetRasterDataType() &&
173,882✔
4054
                (nXSize != nBufXSize || nYSize != nBufYSize))
×
4055
            {
4056
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
×
4057
                                 "mismatched band data types, use std method.");
4058
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
×
4059
                                         pData, nBufXSize, nBufYSize, eBufType,
4060
                                         nBandCount, panBandMap, nPixelSpace,
4061
                                         nLineSpace, nBandSpace, psExtraArg);
×
4062
            }
4063
        }
4064
    }
4065

4066
    /* ==================================================================== */
4067
    /*      In this special case at full resolution we step through in      */
4068
    /*      blocks, turning the request over to the per-band                */
4069
    /*      IRasterIO(), but ensuring that all bands of one block are       */
4070
    /*      called before proceeding to the next.                           */
4071
    /* ==================================================================== */
4072

4073
    if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
63,603✔
4074
    {
4075
        GDALRasterIOExtraArg sDummyExtraArg;
4076
        INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
63,597✔
4077

4078
        int nChunkYSize = 0;
63,597✔
4079
        int nChunkXSize = 0;
63,597✔
4080

4081
        for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
220,069✔
4082
        {
4083
            const int nChunkYOff = iBufYOff + nYOff;
157,500✔
4084
            nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
157,500✔
4085
            if (nChunkYOff + nChunkYSize > nYOff + nYSize)
157,500✔
4086
                nChunkYSize = (nYOff + nYSize) - nChunkYOff;
58,873✔
4087

4088
            for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
838,834✔
4089
            {
4090
                const int nChunkXOff = iBufXOff + nXOff;
682,355✔
4091
                nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
682,355✔
4092
                if (nChunkXOff + nChunkXSize > nXOff + nXSize)
682,355✔
4093
                    nChunkXSize = (nXOff + nXSize) - nChunkXOff;
74,878✔
4094

4095
                GByte *pabyChunkData =
682,355✔
4096
                    static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
682,355✔
4097
                    static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
682,355✔
4098

4099
                for (int iBand = 0; iBand < nBandCount; iBand++)
3,315,780✔
4100
                {
4101
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
2,634,440✔
4102

4103
                    eErr = poBand->IRasterIO(
5,268,860✔
4104
                        eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4105
                        nChunkYSize,
4106
                        pabyChunkData +
2,634,420✔
4107
                            static_cast<GPtrDiff_t>(iBand) * nBandSpace,
2,634,420✔
4108
                        nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4109
                        nLineSpace, &sDummyExtraArg);
2,634,420✔
4110
                    if (eErr != CE_None)
2,634,440✔
4111
                        return eErr;
1,024✔
4112
                }
4113
            }
4114

4115
            if (psExtraArg->pfnProgress != nullptr &&
176,724✔
4116
                !psExtraArg->pfnProgress(
20,245✔
4117
                    1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
176,724✔
4118
                        nBufYSize,
4119
                    "", psExtraArg->pProgressData))
4120
            {
4121
                return CE_Failure;
13✔
4122
            }
4123
        }
4124

4125
        return CE_None;
62,569✔
4126
    }
4127

4128
    /* Below code is not compatible with that case. It would need a complete */
4129
    /* separate code like done in GDALRasterBand::IRasterIO. */
4130
    if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
6✔
4131
    {
4132
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
×
4133
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4134
                                 panBandMap, nPixelSpace, nLineSpace,
4135
                                 nBandSpace, psExtraArg);
×
4136
    }
4137

4138
    /* We could have a smarter implementation, but that will do for now */
4139
    if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
6✔
4140
        (nBufXSize != nXSize || nBufYSize != nYSize))
×
4141
    {
4142
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
×
4143
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4144
                                 panBandMap, nPixelSpace, nLineSpace,
4145
                                 nBandSpace, psExtraArg);
×
4146
    }
4147

4148
    /* ==================================================================== */
4149
    /*      Loop reading required source blocks to satisfy output           */
4150
    /*      request.  This is the most general implementation.              */
4151
    /* ==================================================================== */
4152

4153
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
6✔
4154

4155
    papabySrcBlock =
4156
        static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4✔
4157
    papoBlocks =
4158
        static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4✔
4159

4160
    /* -------------------------------------------------------------------- */
4161
    /*      Select an overview level if appropriate.                        */
4162
    /* -------------------------------------------------------------------- */
4163

4164
    GDALRasterIOExtraArg sExtraArg;
4165
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4✔
4166
    const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4✔
4167
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4168
        panBandMap, &sExtraArg);
4169
    if (nOverviewLevel >= 0)
4✔
4170
    {
4171
        GetRasterBand(panBandMap[0])
2✔
4172
            ->GetOverview(nOverviewLevel)
2✔
4173
            ->GetBlockSize(&nBlockXSize, &nBlockYSize);
2✔
4174
    }
4175

4176
    double dfXOff = nXOff;
4✔
4177
    double dfYOff = nYOff;
4✔
4178
    double dfXSize = nXSize;
4✔
4179
    double dfYSize = nYSize;
4✔
4180
    if (sExtraArg.bFloatingPointWindowValidity)
4✔
4181
    {
4182
        dfXOff = sExtraArg.dfXOff;
2✔
4183
        dfYOff = sExtraArg.dfYOff;
2✔
4184
        dfXSize = sExtraArg.dfXSize;
2✔
4185
        dfYSize = sExtraArg.dfYSize;
2✔
4186
    }
4187

4188
    /* -------------------------------------------------------------------- */
4189
    /*      Compute stepping increment.                                     */
4190
    /* -------------------------------------------------------------------- */
4191
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4✔
4192
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4✔
4193

4194
    constexpr double EPS = 1e-10;
4✔
4195
    /* -------------------------------------------------------------------- */
4196
    /*      Loop over buffer computing source locations.                    */
4197
    /* -------------------------------------------------------------------- */
4198
    for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
36✔
4199
    {
4200
        GPtrDiff_t iSrcOffset;
4201

4202
        // Add small epsilon to avoid some numeric precision issues.
4203
        const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
32✔
4204
        const int iSrcY = static_cast<int>(std::min(
32✔
4205
            std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
32✔
4206

4207
        GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
32✔
4208
                                static_cast<GPtrDiff_t>(nLineSpace);
4209

4210
        for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
302✔
4211
        {
4212
            const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
270✔
4213
            const int iSrcX = static_cast<int>(std::min(
270✔
4214
                std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
270✔
4215

4216
            // FIXME: this code likely doesn't work if the dirty block gets
4217
            // flushed to disk before being completely written. In the meantime,
4218
            // bJustInitialize should probably be set to FALSE even if it is not
4219
            // ideal performance wise, and for lossy compression
4220

4221
            /* --------------------------------------------------------------------
4222
             */
4223
            /*      Ensure we have the appropriate block loaded. */
4224
            /* --------------------------------------------------------------------
4225
             */
4226
            if (iSrcX < nLBlockX * nBlockXSize ||
270✔
4227
                iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
270✔
4228
                iSrcY < nLBlockY * nBlockYSize ||
266✔
4229
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
266✔
4230
            {
4231
                nLBlockX = iSrcX / nBlockXSize;
4✔
4232
                nLBlockY = iSrcY / nBlockYSize;
4✔
4233

4234
                const bool bJustInitialize =
4✔
4235
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
×
4236
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
×
4237
                    nXOff <= nLBlockX * nBlockXSize &&
4✔
4238
                    nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
×
4239
                /*bool bMemZeroBuffer = FALSE;
4240
                if( eRWFlag == GF_Write && !bJustInitialize &&
4241
                    nXOff <= nLBlockX * nBlockXSize &&
4242
                    nYOff <= nLBlockY * nBlockYSize &&
4243
                    (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4244
                     (nXOff + nXSize == GetRasterXSize() &&
4245
                     (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4246
                    (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4247
                     (nYOff + nYSize == GetRasterYSize() &&
4248
                     (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4249
                {
4250
                    bJustInitialize = TRUE;
4251
                    bMemZeroBuffer = TRUE;
4252
                }*/
4253
                for (int iBand = 0; iBand < nBandCount; iBand++)
12✔
4254
                {
4255
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
8✔
4256
                    if (nOverviewLevel >= 0)
8✔
4257
                        poBand = poBand->GetOverview(nOverviewLevel);
2✔
4258
                    poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
16✔
4259
                                                        bJustInitialize);
8✔
4260
                    if (poBlock == nullptr)
8✔
4261
                    {
4262
                        eErr = CE_Failure;
×
4263
                        goto CleanupAndReturn;
×
4264
                    }
4265

4266
                    if (eRWFlag == GF_Write)
8✔
4267
                        poBlock->MarkDirty();
×
4268

4269
                    if (papoBlocks[iBand] != nullptr)
8✔
4270
                        papoBlocks[iBand]->DropLock();
×
4271

4272
                    papoBlocks[iBand] = poBlock;
8✔
4273

4274
                    papabySrcBlock[iBand] =
8✔
4275
                        static_cast<GByte *>(poBlock->GetDataRef());
8✔
4276
                    /*if( bMemZeroBuffer )
4277
                    {
4278
                        memset(papabySrcBlock[iBand], 0,
4279
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4280
                    * nBlockYSize);
4281
                    }*/
4282
                }
4283
            }
4284

4285
            /* --------------------------------------------------------------------
4286
             */
4287
            /*      Copy over this pixel of data. */
4288
            /* --------------------------------------------------------------------
4289
             */
4290
            iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
270✔
4291
                          static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
270✔
4292
                          (static_cast<GPtrDiff_t>(iSrcY) -
270✔
4293
                           static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
270✔
4294
                              nBlockXSize) *
270✔
4295
                         nBandDataSize;
270✔
4296

4297
            for (int iBand = 0; iBand < nBandCount; iBand++)
980✔
4298
            {
4299
                GByte *pabySrcBlock = papabySrcBlock[iBand];
710✔
4300
                GPtrDiff_t iBandBufOffset =
710✔
4301
                    iBufOffset + static_cast<GPtrDiff_t>(iBand) *
710✔
4302
                                     static_cast<GPtrDiff_t>(nBandSpace);
4303

4304
                if (eDataType == eBufType)
710✔
4305
                {
4306
                    if (eRWFlag == GF_Read)
710✔
4307
                        memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
710✔
4308
                               pabySrcBlock + iSrcOffset, nBandDataSize);
710✔
4309
                    else
4310
                        memcpy(pabySrcBlock + iSrcOffset,
×
4311
                               static_cast<const GByte *>(pData) +
4312
                                   iBandBufOffset,
×
4313
                               nBandDataSize);
4314
                }
4315
                else
4316
                {
4317
                    /* type to type conversion ... ouch, this is expensive way
4318
                       of handling single words */
4319

4320
                    if (eRWFlag == GF_Read)
×
4321
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
×
4322
                                        static_cast<GByte *>(pData) +
4323
                                            iBandBufOffset,
×
4324
                                        eBufType, 0, 1);
4325
                    else
4326
                        GDALCopyWords64(static_cast<const GByte *>(pData) +
×
4327
                                            iBandBufOffset,
×
4328
                                        eBufType, 0, pabySrcBlock + iSrcOffset,
×
4329
                                        eDataType, 0, 1);
4330
                }
4331
            }
4332

4333
            iBufOffset += static_cast<int>(nPixelSpace);
270✔
4334
        }
4335
    }
4336

4337
    /* -------------------------------------------------------------------- */
4338
    /*      CleanupAndReturn.                                               */
4339
    /* -------------------------------------------------------------------- */
4340
CleanupAndReturn:
4✔
4341
    CPLFree(papabySrcBlock);
4✔
4342
    if (papoBlocks != nullptr)
4✔
4343
    {
4344
        for (int iBand = 0; iBand < nBandCount; iBand++)
12✔
4345
        {
4346
            if (papoBlocks[iBand] != nullptr)
8✔
4347
                papoBlocks[iBand]->DropLock();
8✔
4348
        }
4349
        CPLFree(papoBlocks);
4✔
4350
    }
4351

4352
    return eErr;
4✔
4353
}
4354

4355
//! @endcond
4356

4357
/************************************************************************/
4358
/*                  GDALCopyWholeRasterGetSwathSize()                   */
4359
/************************************************************************/
4360

4361
static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
2,868✔
4362
                                            GDALRasterBand *poDstPrototypeBand,
4363
                                            int nBandCount,
4364
                                            int bDstIsCompressed,
4365
                                            int bInterleave, int *pnSwathCols,
4366
                                            int *pnSwathLines)
4367
{
4368
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
2,868✔
4369
    int nSrcBlockXSize = 0;
2,868✔
4370
    int nSrcBlockYSize = 0;
2,868✔
4371
    int nBlockXSize = 0;
2,868✔
4372
    int nBlockYSize = 0;
2,868✔
4373

4374
    int nXSize = poSrcPrototypeBand->GetXSize();
2,868✔
4375
    int nYSize = poSrcPrototypeBand->GetYSize();
2,868✔
4376

4377
    poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
2,868✔
4378
    poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
2,868✔
4379

4380
    const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
2,868✔
4381
    const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
2,868✔
4382

4383
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
2,868✔
4384
    if (bInterleave)
2,868✔
4385
        nPixelSize *= nBandCount;
1,322✔
4386

4387
    // aim for one row of blocks.  Do not settle for less.
4388
    int nSwathCols = nXSize;
2,868✔
4389
    int nSwathLines = nMaxBlockYSize;
2,868✔
4390

4391
    const char *pszSrcCompression =
4392
        poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
2,868✔
4393
    if (pszSrcCompression == nullptr)
2,868✔
4394
    {
4395
        auto poSrcDS = poSrcPrototypeBand->GetDataset();
2,842✔
4396
        if (poSrcDS)
2,842✔
4397
            pszSrcCompression =
4398
                poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
2,836✔
4399
    }
4400

4401
    /* -------------------------------------------------------------------- */
4402
    /*      What will our swath size be?                                    */
4403
    /* -------------------------------------------------------------------- */
4404
    // When writing interleaved data in a compressed format, we want to be sure
4405
    // that each block will only be written once, so the swath size must not be
4406
    // greater than the block cache.
4407
    const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
2,868✔
4408
    int nTargetSwathSize;
4409
    if (pszSwathSize != nullptr)
2,868✔
4410
        nTargetSwathSize = static_cast<int>(
×
4411
            std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
×
4412
    else
4413
    {
4414
        // As a default, take one 1/4 of the cache size.
4415
        nTargetSwathSize = static_cast<int>(
2,868✔
4416
            std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
2,868✔
4417

4418
        // but if the minimum idal swath buf size is less, then go for it to
4419
        // avoid unnecessarily abusing RAM usage.
4420
        // but try to use 10 MB at least.
4421
        GIntBig nIdealSwathBufSize =
2,868✔
4422
            static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
2,868✔
4423
        int nMinTargetSwathSize = 10 * 1000 * 1000;
2,868✔
4424

4425
        if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
2,868✔
4426
             GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
2,868✔
4427
        {
4428
            nMinTargetSwathSize = nTargetSwathSize;
2✔
4429
        }
4430

4431
        if (nIdealSwathBufSize < nTargetSwathSize &&
2,868✔
4432
            nIdealSwathBufSize < nMinTargetSwathSize)
2,858✔
4433
        {
4434
            nIdealSwathBufSize = nMinTargetSwathSize;
2,855✔
4435
        }
4436

4437
        if (pszSrcCompression != nullptr &&
2,868✔
4438
            EQUAL(pszSrcCompression, "JPEG2000") &&
156✔
4439
            (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
×
4440
                                   (nSrcBlockYSize % nBlockYSize) == 0)))
×
4441
        {
4442
            nIdealSwathBufSize =
2✔
4443
                std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4✔
4444
                                                 nSrcBlockYSize * nPixelSize);
2✔
4445
        }
4446
        if (nTargetSwathSize > nIdealSwathBufSize)
2,868✔
4447
            nTargetSwathSize = static_cast<int>(
2,854✔
4448
                std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
2,854✔
4449
    }
4450

4451
    if (nTargetSwathSize < 1000000)
2,868✔
4452
        nTargetSwathSize = 1000000;
8✔
4453

4454
    /* But let's check that  */
4455
    if (bDstIsCompressed && bInterleave &&
3,076✔
4456
        nTargetSwathSize > GDALGetCacheMax64())
208✔
4457
    {
4458
        CPLError(CE_Warning, CPLE_AppDefined,
×
4459
                 "When translating into a compressed interleave format, "
4460
                 "the block cache size (" CPL_FRMT_GIB ") "
4461
                 "should be at least the size of the swath (%d) "
4462
                 "(GDAL_SWATH_SIZE config. option)",
4463
                 GDALGetCacheMax64(), nTargetSwathSize);
4464
    }
4465

4466
#define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4467
#define ROUND_TO(x, y) (((x) / (y)) * (y))
4468

4469
    // if both input and output datasets are tiled, that the tile dimensions
4470
    // are "compatible", try to stick  to a swath dimension that is a multiple
4471
    // of input and output block dimensions.
4472
    if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
2,868✔
4473
        IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
34✔
4474
        IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
34✔
4475
        IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
34✔
4476
        IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
34✔
4477
    {
4478
        if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
34✔
4479
                nPixelSize <=
34✔
4480
            static_cast<GIntBig>(nTargetSwathSize))
34✔
4481
        {
4482
            nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
34✔
4483
            nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
34✔
4484
            if (nSwathCols == 0)
34✔
4485
                nSwathCols = nMaxBlockXSize;
×
4486
            if (nSwathCols > nXSize)
34✔
4487
                nSwathCols = nXSize;
32✔
4488
            nSwathLines = nMaxBlockYSize;
34✔
4489

4490
            if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
34✔
4491
                static_cast<GIntBig>(nTargetSwathSize))
34✔
4492
            {
4493
                nSwathCols = nXSize;
×
4494
                nSwathLines = nBlockYSize;
×
4495
            }
4496
        }
4497
    }
4498

4499
    const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
2,868✔
4500
    const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
2,868✔
4501
    if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
2,868✔
4502
    {
4503
        nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
1✔
4504
        if (nSwathLines == 0)
1✔
4505
            nSwathLines = 1;
1✔
4506

4507
        CPLDebug(
1✔
4508
            "GDAL",
4509
            "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4510
            "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4511
            "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4512
            nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
1✔
4513
    }
4514
    // If we are processing single scans, try to handle several at once.
4515
    // If we are handling swaths already, only grow the swath if a row
4516
    // of blocks is substantially less than our target buffer size.
4517
    else if (nSwathLines == 1 ||
2,867✔
4518
             nMemoryPerCol * nSwathLines <
2,366✔
4519
                 static_cast<GIntBig>(nTargetSwathSize) / 10)
2,366✔
4520
    {
4521
        nSwathLines = std::min(
2,840✔
4522
            nYSize,
4523
            std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
2,840✔
4524

4525
        /* If possible try to align to source and target block height */
4526
        if ((nSwathLines % nMaxBlockYSize) != 0 &&
2,840✔
4527
            nSwathLines > nMaxBlockYSize &&
961✔
4528
            IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
961✔
4529
            IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
933✔
4530
            nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
151✔
4531
    }
4532

4533
    if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
2,868✔
4534
        (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
×
4535
                               IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
×
4536
    {
4537
        // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4538
        if (nSwathLines < nSrcBlockYSize)
2✔
4539
        {
4540
            nSwathLines = nSrcBlockYSize;
×
4541

4542
            // Number of pixels that can be read/write simultaneously.
4543
            nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
×
4544
            nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
×
4545
            if (nSwathCols == 0)
×
4546
                nSwathCols = nSrcBlockXSize;
×
4547
            if (nSwathCols > nXSize)
×
4548
                nSwathCols = nXSize;
×
4549

4550
            CPLDebug(
×
4551
                "GDAL",
4552
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4553
                "too high block, "
4554
                "use partial width at one time");
4555
        }
4556
        else if ((nSwathLines % nSrcBlockYSize) != 0)
2✔
4557
        {
4558
            /* Round on a multiple of nSrcBlockYSize */
4559
            nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
×
4560
            CPLDebug(
×
4561
                "GDAL",
4562
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4563
                "round nSwathLines to block height : %d",
4564
                nSwathLines);
4565
        }
4566
    }
4567
    else if (bDstIsCompressed)
2,866✔
4568
    {
4569
        if (nSwathLines < nBlockYSize)
366✔
4570
        {
4571
            nSwathLines = nBlockYSize;
142✔
4572

4573
            // Number of pixels that can be read/write simultaneously.
4574
            nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
142✔
4575
            nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
142✔
4576
            if (nSwathCols == 0)
142✔
4577
                nSwathCols = nBlockXSize;
×
4578
            if (nSwathCols > nXSize)
142✔
4579
                nSwathCols = nXSize;
142✔
4580

4581
            CPLDebug(
142✔
4582
                "GDAL",
4583
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4584
                "too high block, "
4585
                "use partial width at one time");
4586
        }
4587
        else if ((nSwathLines % nBlockYSize) != 0)
224✔
4588
        {
4589
            // Round on a multiple of nBlockYSize.
4590
            nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
9✔
4591
            CPLDebug(
9✔
4592
                "GDAL",
4593
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4594
                "round nSwathLines to block height : %d",
4595
                nSwathLines);
4596
        }
4597
    }
4598

4599
    *pnSwathCols = nSwathCols;
2,868✔
4600
    *pnSwathLines = nSwathLines;
2,868✔
4601
}
2,868✔
4602

4603
/************************************************************************/
4604
/*                     GDALDatasetCopyWholeRaster()                     */
4605
/************************************************************************/
4606

4607
/**
4608
 * \brief Copy all dataset raster data.
4609
 *
4610
 * This function copies the complete raster contents of one dataset to
4611
 * another similarly configured dataset.  The source and destination
4612
 * dataset must have the same number of bands, and the same width
4613
 * and height.  The bands do not have to have the same data type.
4614
 *
4615
 * This function is primarily intended to support implementation of
4616
 * driver specific CreateCopy() functions.  It implements efficient copying,
4617
 * in particular "chunking" the copy in substantial blocks and, if appropriate,
4618
 * performing the transfer in a pixel interleaved fashion.
4619
 *
4620
 * Currently the only papszOptions value supported are :
4621
 * <ul>
4622
 * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4623
 * write access pattern (this does not modify the layout of the destination
4624
 * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4625
 * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4626
 * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4627
 * (GDAL &gt;= 2.2)</li>
4628
 * </ul>
4629
 * More options may be supported in the future.
4630
 *
4631
 * @param hSrcDS the source dataset
4632
 * @param hDstDS the destination dataset
4633
 * @param papszOptions transfer hints in "StringList" Name=Value format.
4634
 * @param pfnProgress progress reporting function.
4635
 * @param pProgressData callback data for progress function.
4636
 *
4637
 * @return CE_None on success, or CE_Failure on failure.
4638
 */
4639

4640
CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
2,840✔
4641
                                              GDALDatasetH hDstDS,
4642
                                              CSLConstList papszOptions,
4643
                                              GDALProgressFunc pfnProgress,
4644
                                              void *pProgressData)
4645

4646
{
4647
    VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
2,840✔
4648
    VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
2,840✔
4649

4650
    GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
2,840✔
4651
    GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
2,840✔
4652

4653
    if (pfnProgress == nullptr)
2,840✔
4654
        pfnProgress = GDALDummyProgress;
3✔
4655

4656
    /* -------------------------------------------------------------------- */
4657
    /*      Confirm the datasets match in size and band counts.             */
4658
    /* -------------------------------------------------------------------- */
4659
    const int nXSize = poDstDS->GetRasterXSize();
2,840✔
4660
    const int nYSize = poDstDS->GetRasterYSize();
2,840✔
4661
    const int nBandCount = poDstDS->GetRasterCount();
2,840✔
4662

4663
    if (poSrcDS->GetRasterXSize() != nXSize ||
2,840✔
4664
        poSrcDS->GetRasterYSize() != nYSize ||
5,680✔
4665
        poSrcDS->GetRasterCount() != nBandCount)
2,840✔
4666
    {
4667
        CPLError(CE_Failure, CPLE_AppDefined,
×
4668
                 "Input and output dataset sizes or band counts do not\n"
4669
                 "match in GDALDatasetCopyWholeRaster()");
4670
        return CE_Failure;
×
4671
    }
4672

4673
    /* -------------------------------------------------------------------- */
4674
    /*      Report preliminary (0) progress.                                */
4675
    /* -------------------------------------------------------------------- */
4676
    if (!pfnProgress(0.0, nullptr, pProgressData))
2,840✔
4677
    {
4678
        CPLError(CE_Failure, CPLE_UserInterrupt,
1✔
4679
                 "User terminated CreateCopy()");
4680
        return CE_Failure;
1✔
4681
    }
4682

4683
    /* -------------------------------------------------------------------- */
4684
    /*      Get our prototype band, and assume the others are similarly     */
4685
    /*      configured.                                                     */
4686
    /* -------------------------------------------------------------------- */
4687
    if (nBandCount == 0)
2,839✔
4688
        return CE_None;
×
4689

4690
    GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
2,839✔
4691
    GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
2,839✔
4692
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
2,839✔
4693

4694
    /* -------------------------------------------------------------------- */
4695
    /*      Do we want to try and do the operation in a pixel               */
4696
    /*      interleaved fashion?                                            */
4697
    /* -------------------------------------------------------------------- */
4698
    bool bInterleave = false;
2,839✔
4699
    const char *pszInterleave =
4700
        poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
2,839✔
4701
    if (pszInterleave != nullptr &&
2,839✔
4702
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
1,102✔
4703
        bInterleave = true;
152✔
4704

4705
    pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
2,839✔
4706
    if (pszInterleave != nullptr &&
2,839✔
4707
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
2,056✔
4708
        bInterleave = true;
1,293✔
4709

4710
    pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
2,839✔
4711
    if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
2,839✔
4712
        bInterleave = true;
5✔
4713
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
2,834✔
4714
        bInterleave = false;
7✔
4715
    // attributes is specific to the TileDB driver
4716
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
2,827✔
4717
        bInterleave = true;
4✔
4718
    else if (pszInterleave != nullptr)
2,823✔
4719
    {
4720
        CPLError(CE_Warning, CPLE_NotSupported,
×
4721
                 "Unsupported value for option INTERLEAVE");
4722
    }
4723

4724
    // If the destination is compressed, we must try to write blocks just once,
4725
    // to save disk space (GTiff case for example), and to avoid data loss
4726
    // (JPEG compression for example).
4727
    bool bDstIsCompressed = false;
2,839✔
4728
    const char *pszDstCompressed =
4729
        CSLFetchNameValue(papszOptions, "COMPRESSED");
2,839✔
4730
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
2,839✔
4731
        bDstIsCompressed = true;
349✔
4732

4733
    /* -------------------------------------------------------------------- */
4734
    /*      What will our swath size be?                                    */
4735
    /* -------------------------------------------------------------------- */
4736

4737
    int nSwathCols = 0;
2,839✔
4738
    int nSwathLines = 0;
2,839✔
4739
    GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
2,839✔
4740
                                    nBandCount, bDstIsCompressed, bInterleave,
4741
                                    &nSwathCols, &nSwathLines);
4742

4743
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
2,839✔
4744
    if (bInterleave)
2,839✔
4745
        nPixelSize *= nBandCount;
1,322✔
4746

4747
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
2,839✔
4748
    if (pSwathBuf == nullptr)
2,839✔
4749
    {
4750
        return CE_Failure;
×
4751
    }
4752

4753
    CPLDebug("GDAL",
2,839✔
4754
             "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4755
             nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4756

4757
    // Advise the source raster that we are going to read it completely
4758
    // Note: this might already have been done by GDALCreateCopy() in the
4759
    // likely case this function is indirectly called by it
4760
    poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
2,839✔
4761
                        nullptr, nullptr);
2,839✔
4762

4763
    /* ==================================================================== */
4764
    /*      Band oriented (uninterleaved) case.                             */
4765
    /* ==================================================================== */
4766
    CPLErr eErr = CE_None;
2,839✔
4767
    const bool bCheckHoles =
4768
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
2,839✔
4769

4770
    if (!bInterleave)
2,839✔
4771
    {
4772
        GDALRasterIOExtraArg sExtraArg;
4773
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
1,517✔
4774
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
1,517✔
4775

4776
        const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4,551✔
4777
                                     DIV_ROUND_UP(nYSize, nSwathLines) *
1,517✔
4778
                                     DIV_ROUND_UP(nXSize, nSwathCols);
1,517✔
4779
        GIntBig nBlocksDone = 0;
1,517✔
4780

4781
        for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
3,738✔
4782
        {
4783
            int nBand = iBand + 1;
2,221✔
4784

4785
            for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4,600✔
4786
            {
4787
                int nThisLines = nSwathLines;
2,379✔
4788

4789
                if (iY + nThisLines > nYSize)
2,379✔
4790
                    nThisLines = nYSize - iY;
274✔
4791

4792
                for (int iX = 0; iX < nXSize && eErr == CE_None;
4,758✔
4793
                     iX += nSwathCols)
2,379✔
4794
                {
4795
                    int nThisCols = nSwathCols;
2,379✔
4796

4797
                    if (iX + nThisCols > nXSize)
2,379✔
4798
                        nThisCols = nXSize - iX;
×
4799

4800
                    int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
2,379✔
4801
                    if (bCheckHoles)
2,379✔
4802
                    {
4803
                        nStatus = poSrcDS->GetRasterBand(nBand)
4804
                                      ->GetDataCoverageStatus(
954✔
4805
                                          iX, iY, nThisCols, nThisLines,
4806
                                          GDAL_DATA_COVERAGE_STATUS_DATA);
4807
                    }
4808
                    if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
2,379✔
4809
                    {
4810
                        sExtraArg.pfnProgress = GDALScaledProgress;
2,375✔
4811
                        sExtraArg.pProgressData = GDALCreateScaledProgress(
4,750✔
4812
                            nBlocksDone / static_cast<double>(nTotalBlocks),
2,375✔
4813
                            (nBlocksDone + 0.5) /
2,375✔
4814
                                static_cast<double>(nTotalBlocks),
2,375✔
4815
                            pfnProgress, pProgressData);
4816
                        if (sExtraArg.pProgressData == nullptr)
2,375✔
4817
                            sExtraArg.pfnProgress = nullptr;
1,410✔
4818

4819
                        eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
2,375✔
4820
                                                 nThisLines, pSwathBuf,
4821
                                                 nThisCols, nThisLines, eDT, 1,
4822
                                                 &nBand, 0, 0, 0, &sExtraArg);
4823

4824
                        GDALDestroyScaledProgress(sExtraArg.pProgressData);
2,375✔
4825

4826
                        if (eErr == CE_None)
2,375✔
4827
                            eErr = poDstDS->RasterIO(
2,371✔
4828
                                GF_Write, iX, iY, nThisCols, nThisLines,
4829
                                pSwathBuf, nThisCols, nThisLines, eDT, 1,
4830
                                &nBand, 0, 0, 0, nullptr);
4831
                    }
4832

4833
                    nBlocksDone++;
2,379✔
4834
                    if (eErr == CE_None &&
4,719✔
4835
                        !pfnProgress(nBlocksDone /
2,340✔
4836
                                         static_cast<double>(nTotalBlocks),
2,340✔
4837
                                     nullptr, pProgressData))
4838
                    {
4839
                        eErr = CE_Failure;
3✔
4840
                        CPLError(CE_Failure, CPLE_UserInterrupt,
3✔
4841
                                 "User terminated CreateCopy()");
4842
                    }
4843
                }
4844
            }
4845
        }
4846
    }
4847

4848
    /* ==================================================================== */
4849
    /*      Pixel interleaved case.                                         */
4850
    /* ==================================================================== */
4851
    else /* if( bInterleave ) */
4852
    {
4853
        GDALRasterIOExtraArg sExtraArg;
4854
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
1,322✔
4855
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
1,322✔
4856

4857
        const GIntBig nTotalBlocks =
1,322✔
4858
            static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
1,322✔
4859
            DIV_ROUND_UP(nXSize, nSwathCols);
1,322✔
4860
        GIntBig nBlocksDone = 0;
1,322✔
4861

4862
        for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
2,852✔
4863
        {
4864
            int nThisLines = nSwathLines;
1,530✔
4865

4866
            if (iY + nThisLines > nYSize)
1,530✔
4867
                nThisLines = nYSize - iY;
186✔
4868

4869
            for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
3,065✔
4870
            {
4871
                int nThisCols = nSwathCols;
1,535✔
4872

4873
                if (iX + nThisCols > nXSize)
1,535✔
4874
                    nThisCols = nXSize - iX;
3✔
4875

4876
                int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
1,535✔
4877
                if (bCheckHoles)
1,535✔
4878
                {
4879
                    nStatus = 0;
1,344✔
4880
                    for (int iBand = 0; iBand < nBandCount; iBand++)
1,397✔
4881
                    {
4882
                        nStatus |= poSrcDS->GetRasterBand(iBand + 1)
1,378✔
4883
                                       ->GetDataCoverageStatus(
1,378✔
4884
                                           iX, iY, nThisCols, nThisLines,
4885
                                           GDAL_DATA_COVERAGE_STATUS_DATA);
4886
                        if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
1,378✔
4887
                            break;
1,325✔
4888
                    }
4889
                }
4890
                if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
1,535✔
4891
                {
4892
                    sExtraArg.pfnProgress = GDALScaledProgress;
1,516✔
4893
                    sExtraArg.pProgressData = GDALCreateScaledProgress(
3,032✔
4894
                        nBlocksDone / static_cast<double>(nTotalBlocks),
1,516✔
4895
                        (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
1,516✔
4896
                        pfnProgress, pProgressData);
4897
                    if (sExtraArg.pProgressData == nullptr)
1,516✔
4898
                        sExtraArg.pfnProgress = nullptr;
295✔
4899

4900
                    eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
1,516✔
4901
                                             nThisLines, pSwathBuf, nThisCols,
4902
                                             nThisLines, eDT, nBandCount,
4903
                                             nullptr, 0, 0, 0, &sExtraArg);
4904

4905
                    GDALDestroyScaledProgress(sExtraArg.pProgressData);
1,516✔
4906

4907
                    if (eErr == CE_None)
1,516✔
4908
                        eErr = poDstDS->RasterIO(
1,515✔
4909
                            GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4910
                            nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4911
                            0, 0, nullptr);
4912
                }
4913

4914
                nBlocksDone++;
1,535✔
4915
                if (eErr == CE_None &&
3,066✔
4916
                    !pfnProgress(nBlocksDone /
1,531✔
4917
                                     static_cast<double>(nTotalBlocks),
1,531✔
4918
                                 nullptr, pProgressData))
4919
                {
4920
                    eErr = CE_Failure;
1✔
4921
                    CPLError(CE_Failure, CPLE_UserInterrupt,
1✔
4922
                             "User terminated CreateCopy()");
4923
                }
4924
            }
4925
        }
4926
    }
4927

4928
    /* -------------------------------------------------------------------- */
4929
    /*      Cleanup                                                         */
4930
    /* -------------------------------------------------------------------- */
4931
    CPLFree(pSwathBuf);
2,839✔
4932

4933
    return eErr;
2,839✔
4934
}
4935

4936
/************************************************************************/
4937
/*                     GDALRasterBandCopyWholeRaster()                  */
4938
/************************************************************************/
4939

4940
/**
4941
 * \brief Copy a whole raster band
4942
 *
4943
 * This function copies the complete raster contents of one band to
4944
 * another similarly configured band.  The source and destination
4945
 * bands must have the same width and height.  The bands do not have
4946
 * to have the same data type.
4947
 *
4948
 * It implements efficient copying, in particular "chunking" the copy in
4949
 * substantial blocks.
4950
 *
4951
 * Currently the only papszOptions value supported are :
4952
 * <ul>
4953
 * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4954
 * achieve best compression.</li>
4955
 * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4956
 * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL &gt;= 2.2)</li>
4957
 * </ul>
4958
 *
4959
 * @param hSrcBand the source band
4960
 * @param hDstBand the destination band
4961
 * @param papszOptions transfer hints in "StringList" Name=Value format.
4962
 * @param pfnProgress progress reporting function.
4963
 * @param pProgressData callback data for progress function.
4964
 *
4965
 * @return CE_None on success, or CE_Failure on failure.
4966
 */
4967

4968
CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
29✔
4969
    GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4970
    const char *const *const papszOptions, GDALProgressFunc pfnProgress,
4971
    void *pProgressData)
4972

4973
{
4974
    VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
29✔
4975
    VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
29✔
4976

4977
    GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
29✔
4978
    GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
29✔
4979
    CPLErr eErr = CE_None;
29✔
4980

4981
    if (pfnProgress == nullptr)
29✔
4982
        pfnProgress = GDALDummyProgress;
11✔
4983

4984
    /* -------------------------------------------------------------------- */
4985
    /*      Confirm the datasets match in size and band counts.             */
4986
    /* -------------------------------------------------------------------- */
4987
    int nXSize = poSrcBand->GetXSize();
29✔
4988
    int nYSize = poSrcBand->GetYSize();
29✔
4989

4990
    if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
29✔
4991
    {
4992
        CPLError(CE_Failure, CPLE_AppDefined,
×
4993
                 "Input and output band sizes do not\n"
4994
                 "match in GDALRasterBandCopyWholeRaster()");
4995
        return CE_Failure;
×
4996
    }
4997

4998
    /* -------------------------------------------------------------------- */
4999
    /*      Report preliminary (0) progress.                                */
5000
    /* -------------------------------------------------------------------- */
5001
    if (!pfnProgress(0.0, nullptr, pProgressData))
29✔
5002
    {
5003
        CPLError(CE_Failure, CPLE_UserInterrupt,
×
5004
                 "User terminated CreateCopy()");
5005
        return CE_Failure;
×
5006
    }
5007

5008
    GDALDataType eDT = poDstBand->GetRasterDataType();
29✔
5009

5010
    // If the destination is compressed, we must try to write blocks just once,
5011
    // to save disk space (GTiff case for example), and to avoid data loss
5012
    // (JPEG compression for example).
5013
    bool bDstIsCompressed = false;
29✔
5014
    const char *pszDstCompressed =
5015
        CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
29✔
5016
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
29✔
5017
        bDstIsCompressed = true;
17✔
5018

5019
    /* -------------------------------------------------------------------- */
5020
    /*      What will our swath size be?                                    */
5021
    /* -------------------------------------------------------------------- */
5022

5023
    int nSwathCols = 0;
29✔
5024
    int nSwathLines = 0;
29✔
5025
    GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
29✔
5026
                                    FALSE, &nSwathCols, &nSwathLines);
5027

5028
    const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
29✔
5029

5030
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
29✔
5031
    if (pSwathBuf == nullptr)
29✔
5032
    {
5033
        return CE_Failure;
×
5034
    }
5035

5036
    CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
29✔
5037
             nSwathCols, nSwathLines);
5038

5039
    const bool bCheckHoles =
5040
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
29✔
5041

5042
    // Advise the source raster that we are going to read it completely
5043
    poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
29✔
5044

5045
    /* ==================================================================== */
5046
    /*      Band oriented (uninterleaved) case.                             */
5047
    /* ==================================================================== */
5048

5049
    for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
67✔
5050
    {
5051
        int nThisLines = nSwathLines;
38✔
5052

5053
        if (iY + nThisLines > nYSize)
38✔
5054
            nThisLines = nYSize - iY;
3✔
5055

5056
        for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
76✔
5057
        {
5058
            int nThisCols = nSwathCols;
38✔
5059

5060
            if (iX + nThisCols > nXSize)
38✔
5061
                nThisCols = nXSize - iX;
×
5062

5063
            int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
38✔
5064
            if (bCheckHoles)
38✔
5065
            {
5066
                nStatus = poSrcBand->GetDataCoverageStatus(
×
5067
                    iX, iY, nThisCols, nThisLines,
5068
                    GDAL_DATA_COVERAGE_STATUS_DATA);
5069
            }
5070
            if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
38✔
5071
            {
5072
                eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
38✔
5073
                                           nThisLines, pSwathBuf, nThisCols,
5074
                                           nThisLines, eDT, 0, 0, nullptr);
5075

5076
                if (eErr == CE_None)
38✔
5077
                    eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
38✔
5078
                                               nThisLines, pSwathBuf, nThisCols,
5079
                                               nThisLines, eDT, 0, 0, nullptr);
5080
            }
5081

5082
            if (eErr == CE_None &&
76✔
5083
                !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
38✔
5084
                             nullptr, pProgressData))
5085
            {
5086
                eErr = CE_Failure;
×
5087
                CPLError(CE_Failure, CPLE_UserInterrupt,
×
5088
                         "User terminated CreateCopy()");
5089
            }
5090
        }
5091
    }
5092

5093
    /* -------------------------------------------------------------------- */
5094
    /*      Cleanup                                                         */
5095
    /* -------------------------------------------------------------------- */
5096
    CPLFree(pSwathBuf);
29✔
5097

5098
    return eErr;
29✔
5099
}
5100

5101
/************************************************************************/
5102
/*                      GDALCopyRasterIOExtraArg ()                     */
5103
/************************************************************************/
5104

5105
void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
323,319✔
5106
                              GDALRasterIOExtraArg *psSrcArg)
5107
{
5108
    INIT_RASTERIO_EXTRA_ARG(*psDestArg);
323,319✔
5109
    if (psSrcArg)
323,319✔
5110
    {
5111
        psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
323,319✔
5112
        psDestArg->pfnProgress = psSrcArg->pfnProgress;
323,319✔
5113
        psDestArg->pProgressData = psSrcArg->pProgressData;
323,319✔
5114
        psDestArg->bFloatingPointWindowValidity =
323,319✔
5115
            psSrcArg->bFloatingPointWindowValidity;
323,319✔
5116
        if (psSrcArg->bFloatingPointWindowValidity)
323,319✔
5117
        {
5118
            psDestArg->dfXOff = psSrcArg->dfXOff;
3,116✔
5119
            psDestArg->dfYOff = psSrcArg->dfYOff;
3,116✔
5120
            psDestArg->dfXSize = psSrcArg->dfXSize;
3,116✔
5121
            psDestArg->dfYSize = psSrcArg->dfYSize;
3,116✔
5122
        }
5123
    }
5124
}
323,319✔
5125

5126
/************************************************************************/
5127
/*                         HasOnlyNoData()                              */
5128
/************************************************************************/
5129

5130
template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
24,858,042✔
5131
{
5132
    return value == noDataValue;
24,858,042✔
5133
}
5134

5135
template <> bool IsEqualToNoData<float>(float value, float noDataValue)
560,291✔
5136
{
5137
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
560,291✔
5138
}
5139

5140
template <> bool IsEqualToNoData<double>(double value, double noDataValue)
501,120✔
5141
{
5142
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
501,120✔
5143
}
5144

5145
template <class T>
5146
static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
12,084✔
5147
                           size_t nHeight, size_t nLineStride,
5148
                           size_t nComponents)
5149
{
5150
    // Fast test: check the 4 corners and the middle pixel.
5151
    for (size_t iBand = 0; iBand < nComponents; iBand++)
23,424✔
5152
    {
5153
        if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
24,905✔
5154
              IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
12,367✔
5155
                              noDataValue) &&
12,284✔
5156
              IsEqualToNoData(
12,284✔
5157
                  pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
12,284✔
5158
                              nComponents +
12,284✔
5159
                          iBand],
5160
                  noDataValue) &&
11,351✔
5161
              IsEqualToNoData(
11,351✔
5162
                  pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
11,351✔
5163
                  noDataValue) &&
5164
              IsEqualToNoData(
11,343✔
5165
                  pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
11,343✔
5166
                              nComponents +
11,343✔
5167
                          iBand],
5168
                  noDataValue)))
5169
        {
5170
            return false;
1,198✔
5171
        }
5172
    }
5173

5174
    // Test all pixels.
5175
    for (size_t iY = 0; iY < nHeight; iY++)
37,776✔
5176
    {
5177
        const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
26,914✔
5178
        for (size_t iX = 0; iX < nWidth * nComponents; iX++)
25,886,447✔
5179
        {
5180
            if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
25,859,623✔
5181
            {
5182
                return false;
24✔
5183
            }
5184
        }
5185
    }
5186
    return true;
10,862✔
5187
}
5188

5189
/************************************************************************/
5190
/*                    GDALBufferHasOnlyNoData()                         */
5191
/************************************************************************/
5192

5193
bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
35,370✔
5194
                             size_t nWidth, size_t nHeight, size_t nLineStride,
5195
                             size_t nComponents, int nBitsPerSample,
5196
                             GDALBufferSampleFormat nSampleFormat)
5197
{
5198
    // In the case where the nodata is 0, we can compare several bytes at
5199
    // once. Select the largest natural integer type for the architecture.
5200
#if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5201
    // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5202
    typedef std::uint64_t WordType;
5203
#else
5204
    typedef std::uint32_t WordType;
5205
#endif
5206
    if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
35,370✔
5207
        // Do not use this optimized code path for floating point numbers,
5208
        // as it can't detect negative zero.
5209
        nSampleFormat != GSF_FLOATING_POINT)
5210
    {
5211
        const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
23,280✔
5212
        const size_t nSize =
23,280✔
5213
            (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
23,280✔
5214
        size_t i = 0;
23,280✔
5215
        const size_t nInitialIters =
5216
            std::min(sizeof(WordType) -
46,560✔
5217
                         static_cast<size_t>(
23,280✔
5218
                             reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5219
                             sizeof(WordType)),
5220
                     nSize);
23,280✔
5221
        for (; i < nInitialIters; i++)
194,957✔
5222
        {
5223
            if (pabyBuffer[i])
174,973✔
5224
                return false;
3,296✔
5225
        }
5226
        for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
16,097,000✔
5227
        {
5228
            if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
16,082,400✔
5229
                return false;
5,449✔
5230
        }
5231
        for (; i < nSize; i++)
51,884✔
5232
        {
5233
            if (pabyBuffer[i])
37,354✔
5234
                return false;
5✔
5235
        }
5236
        return true;
14,530✔
5237
    }
5238

5239
    if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
12,090✔
5240
    {
5241
        return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
22,234✔
5242
               HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
11,117✔
5243
                              static_cast<uint8_t>(dfNoDataValue), nWidth,
11,117✔
5244
                              nHeight, nLineStride, nComponents);
11,117✔
5245
    }
5246
    if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
973✔
5247
    {
5248
        // Use unsigned implementation by converting the nodatavalue to
5249
        // unsigned
5250
        return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
63✔
5251
               HasOnlyNoDataT(
31✔
5252
                   static_cast<const uint8_t *>(pBuffer),
5253
                   static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
31✔
5254
                   nWidth, nHeight, nLineStride, nComponents);
32✔
5255
    }
5256
    if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
941✔
5257
    {
5258
        return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
21✔
5259
               HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
10✔
5260
                              static_cast<uint16_t>(dfNoDataValue), nWidth,
10✔
5261
                              nHeight, nLineStride, nComponents);
11✔
5262
    }
5263
    if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
930✔
5264
    {
5265
        // Use unsigned implementation by converting the nodatavalue to
5266
        // unsigned
5267
        return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
109✔
5268
               HasOnlyNoDataT(
54✔
5269
                   static_cast<const uint16_t *>(pBuffer),
5270
                   static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
54✔
5271
                   nWidth, nHeight, nLineStride, nComponents);
55✔
5272
    }
5273
    if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
875✔
5274
    {
5275
        return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
73✔
5276
               HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
36✔
5277
                              static_cast<uint32_t>(dfNoDataValue), nWidth,
5278
                              nHeight, nLineStride, nComponents);
37✔
5279
    }
5280
    if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
838✔
5281
    {
5282
        // Use unsigned implementation by converting the nodatavalue to
5283
        // unsigned
5284
        return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
19✔
5285
               HasOnlyNoDataT(
9✔
5286
                   static_cast<const uint32_t *>(pBuffer),
5287
                   static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
9✔
5288
                   nWidth, nHeight, nLineStride, nComponents);
10✔
5289
    }
5290
    if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
828✔
5291
    {
5292
        return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
56✔
5293
               HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
28✔
5294
                              static_cast<uint64_t>(dfNoDataValue), nWidth,
5295
                              nHeight, nLineStride, nComponents);
28✔
5296
    }
5297
    if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
800✔
5298
    {
5299
        // Use unsigned implementation by converting the nodatavalue to
5300
        // unsigned
5301
        return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
×
5302
               HasOnlyNoDataT(
×
5303
                   static_cast<const uint64_t *>(pBuffer),
5304
                   static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
×
5305
                   nWidth, nHeight, nLineStride, nComponents);
×
5306
    }
5307
    if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
800✔
5308
    {
5309
        return (std::isnan(dfNoDataValue) ||
1,329✔
5310
                GDALIsValueInRange<float>(dfNoDataValue)) &&
1,358✔
5311
               HasOnlyNoDataT(static_cast<const float *>(pBuffer),
678✔
5312
                              static_cast<float>(dfNoDataValue), nWidth,
5313
                              nHeight, nLineStride, nComponents);
679✔
5314
    }
5315
    if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
121✔
5316
    {
5317
        return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
121✔
5318
                              dfNoDataValue, nWidth, nHeight, nLineStride,
5319
                              nComponents);
121✔
5320
    }
5321
    return false;
×
5322
}
5323

5324
#ifdef HAVE_SSE2
5325

5326
/************************************************************************/
5327
/*                    GDALDeinterleave3Byte()                           */
5328
/************************************************************************/
5329

5330
#if defined(__GNUC__) && !defined(__clang__)
5331
__attribute__((optimize("no-tree-vectorize")))
5332
#endif
5333
static void
5334
GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
69,797✔
5335
                      GByte *CPL_RESTRICT pabyDest0,
5336
                      GByte *CPL_RESTRICT pabyDest1,
5337
                      GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5338
#ifdef USE_NEON_OPTIMIZATIONS
5339
{
5340
    return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5341
                                       nIters);
5342
}
5343
#else
5344
{
5345
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
5346
    if (CPLHaveRuntimeSSSE3())
69,797✔
5347
    {
5348
        return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
69,801✔
5349
                                           pabyDest2, nIters);
69,798✔
5350
    }
5351
#endif
5352

5353
    size_t i = 0;
2✔
5354
    if (((reinterpret_cast<uintptr_t>(pabySrc) |
2✔
5355
          reinterpret_cast<uintptr_t>(pabyDest0) |
2✔
5356
          reinterpret_cast<uintptr_t>(pabyDest1) |
2✔
5357
          reinterpret_cast<uintptr_t>(pabyDest2)) %
2✔
5358
         sizeof(unsigned int)) == 0)
5359
    {
5360
        // Slightly better than GCC autovectorizer
5361
        for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
17✔
5362
        {
5363
            unsigned int word0 =
15✔
5364
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
15✔
5365
            unsigned int word1 =
15✔
5366
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
15✔
5367
            unsigned int word2 =
15✔
5368
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
15✔
5369
            reinterpret_cast<unsigned int *>(pabyDest0)[j] =
15✔
5370
                (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
15✔
5371
                ((word2 >> 8) << 24);
15✔
5372
            reinterpret_cast<unsigned int *>(pabyDest1)[j] =
15✔
5373
                ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
15✔
5374
                (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
15✔
5375
            pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
15✔
5376
            pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
15✔
5377
            pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
15✔
5378
            pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
15✔
5379
        }
5380
    }
5381
#if defined(__clang__)
5382
#pragma clang loop vectorize(disable)
5383
#endif
5384
    for (; i < nIters; ++i)
3✔
5385
    {
5386
        pabyDest0[i] = pabySrc[3 * i + 0];
1✔
5387
        pabyDest1[i] = pabySrc[3 * i + 1];
1✔
5388
        pabyDest2[i] = pabySrc[3 * i + 2];
1✔
5389
    }
5390
}
5391
#endif
5392

5393
/************************************************************************/
5394
/*                    GDALDeinterleave4Byte()                           */
5395
/************************************************************************/
5396

5397
#if !defined(__GNUC__) || defined(__clang__)
5398

5399
/************************************************************************/
5400
/*                         deinterleave()                               */
5401
/************************************************************************/
5402

5403
template <bool SHIFT, bool MASK>
5404
inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5405
                            __m128i &xmm2_ori, __m128i &xmm3_ori)
5406
{
5407
    // Set higher 24bit of each int32 packed word to 0
5408
    if (SHIFT)
5409
    {
5410
        xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5411
        xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5412
        xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5413
        xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5414
    }
5415
    __m128i xmm0;
5416
    __m128i xmm1;
5417
    __m128i xmm2;
5418
    __m128i xmm3;
5419
    if (MASK)
5420
    {
5421
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
5422
        xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5423
        xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5424
        xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5425
        xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5426
    }
5427
    else
5428
    {
5429
        xmm0 = xmm0_ori;
5430
        xmm1 = xmm1_ori;
5431
        xmm2 = xmm2_ori;
5432
        xmm3 = xmm3_ori;
5433
    }
5434
    // Pack int32 to int16
5435
    xmm0 = _mm_packs_epi32(xmm0, xmm1);
5436
    xmm2 = _mm_packs_epi32(xmm2, xmm3);
5437
    // Pack int16 to uint8
5438
    xmm0 = _mm_packus_epi16(xmm0, xmm2);
5439
    return xmm0;
5440
}
5441

5442
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5443
                                  GByte *CPL_RESTRICT pabyDest0,
5444
                                  GByte *CPL_RESTRICT pabyDest1,
5445
                                  GByte *CPL_RESTRICT pabyDest2,
5446
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5447
#ifdef USE_NEON_OPTIMIZATIONS
5448
{
5449
    return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5450
                                       pabyDest3, nIters);
5451
}
5452
#else
5453
{
5454
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
5455
    if (CPLHaveRuntimeSSSE3())
5456
    {
5457
        return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5458
                                           pabyDest2, pabyDest3, nIters);
5459
    }
5460
#endif
5461

5462
    // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5463
    // do something slightly better.
5464
    size_t i = 0;
5465
    for (; i + 15 < nIters; i += 16)
5466
    {
5467
        __m128i xmm0_ori = _mm_loadu_si128(
5468
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5469
        __m128i xmm1_ori = _mm_loadu_si128(
5470
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5471
        __m128i xmm2_ori = _mm_loadu_si128(
5472
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5473
        __m128i xmm3_ori = _mm_loadu_si128(
5474
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5475

5476
        _mm_storeu_si128(
5477
            reinterpret_cast<__m128i *>(pabyDest0 + i),
5478
            deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5479
        _mm_storeu_si128(
5480
            reinterpret_cast<__m128i *>(pabyDest1 + i),
5481
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5482
        _mm_storeu_si128(
5483
            reinterpret_cast<__m128i *>(pabyDest2 + i),
5484
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5485
        _mm_storeu_si128(
5486
            reinterpret_cast<__m128i *>(pabyDest3 + i),
5487
            deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5488
    }
5489

5490
#if defined(__clang__)
5491
#pragma clang loop vectorize(disable)
5492
#endif
5493
    for (; i < nIters; ++i)
5494
    {
5495
        pabyDest0[i] = pabySrc[4 * i + 0];
5496
        pabyDest1[i] = pabySrc[4 * i + 1];
5497
        pabyDest2[i] = pabySrc[4 * i + 2];
5498
        pabyDest3[i] = pabySrc[4 * i + 3];
5499
    }
5500
}
5501
#endif
5502
#else
5503
// GCC autovectorizer does an excellent job
5504
__attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
52,997✔
5505
    const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5506
    GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5507
    GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5508
{
5509
    for (size_t i = 0; i < nIters; ++i)
525,000,000✔
5510
    {
5511
        pabyDest0[i] = pabySrc[4 * i + 0];
524,947,000✔
5512
        pabyDest1[i] = pabySrc[4 * i + 1];
524,947,000✔
5513
        pabyDest2[i] = pabySrc[4 * i + 2];
524,947,000✔
5514
        pabyDest3[i] = pabySrc[4 * i + 3];
524,947,000✔
5515
    }
5516
}
52,997✔
5517
#endif
5518

5519
#else
5520

5521
/************************************************************************/
5522
/*                    GDALDeinterleave3Byte()                           */
5523
/************************************************************************/
5524

5525
// TODO: Enabling below could help on non-Intel architectures where GCC knows
5526
// how to auto-vectorize
5527
// #if defined(__GNUC__)
5528
//__attribute__((optimize("tree-vectorize")))
5529
// #endif
5530
static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5531
                                  GByte *CPL_RESTRICT pabyDest0,
5532
                                  GByte *CPL_RESTRICT pabyDest1,
5533
                                  GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5534
{
5535
    for (size_t i = 0; i < nIters; ++i)
5536
    {
5537
        pabyDest0[i] = pabySrc[3 * i + 0];
5538
        pabyDest1[i] = pabySrc[3 * i + 1];
5539
        pabyDest2[i] = pabySrc[3 * i + 2];
5540
    }
5541
}
5542

5543
/************************************************************************/
5544
/*                    GDALDeinterleave4Byte()                           */
5545
/************************************************************************/
5546

5547
// TODO: Enabling below could help on non-Intel architectures where gcc knows
5548
// how to auto-vectorize
5549
// #if defined(__GNUC__)
5550
//__attribute__((optimize("tree-vectorize")))
5551
// #endif
5552
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5553
                                  GByte *CPL_RESTRICT pabyDest0,
5554
                                  GByte *CPL_RESTRICT pabyDest1,
5555
                                  GByte *CPL_RESTRICT pabyDest2,
5556
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5557
{
5558
    for (size_t i = 0; i < nIters; ++i)
5559
    {
5560
        pabyDest0[i] = pabySrc[4 * i + 0];
5561
        pabyDest1[i] = pabySrc[4 * i + 1];
5562
        pabyDest2[i] = pabySrc[4 * i + 2];
5563
        pabyDest3[i] = pabySrc[4 * i + 3];
5564
    }
5565
}
5566

5567
#endif
5568

5569
/************************************************************************/
5570
/*                      GDALDeinterleave()                              */
5571
/************************************************************************/
5572

5573
/*! Copy values from a pixel-interleave buffer to multiple per-component
5574
    buffers.
5575

5576
    In pseudo-code
5577
    \verbatim
5578
    for(size_t i = 0; i < nIters; ++i)
5579
        for(int iComp = 0; iComp < nComponents; iComp++ )
5580
            ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5581
    \endverbatim
5582

5583
    The implementation is optimized for a few cases, like de-interleaving
5584
    of 3 or 4-components Byte buffers.
5585

5586
    \since GDAL 3.6
5587
 */
5588
void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
123,546✔
5589
                      int nComponents, void **ppDestBuffer,
5590
                      GDALDataType eDestDT, size_t nIters)
5591
{
5592
    if (eSourceDT == eDestDT)
123,546✔
5593
    {
5594
        if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8)
123,526✔
5595
        {
5596
            if (nComponents == 3)
122,793✔
5597
            {
5598
                const GByte *CPL_RESTRICT pabySrc =
69,796✔
5599
                    static_cast<const GByte *>(pSourceBuffer);
5600
                GByte *CPL_RESTRICT pabyDest0 =
69,796✔
5601
                    static_cast<GByte *>(ppDestBuffer[0]);
5602
                GByte *CPL_RESTRICT pabyDest1 =
69,796✔
5603
                    static_cast<GByte *>(ppDestBuffer[1]);
5604
                GByte *CPL_RESTRICT pabyDest2 =
69,796✔
5605
                    static_cast<GByte *>(ppDestBuffer[2]);
5606
                GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
69,796✔
5607
                                      nIters);
5608
                return;
69,799✔
5609
            }
5610
            else if (nComponents == 4)
52,997✔
5611
            {
5612
                const GByte *CPL_RESTRICT pabySrc =
52,997✔
5613
                    static_cast<const GByte *>(pSourceBuffer);
5614
                GByte *CPL_RESTRICT pabyDest0 =
52,997✔
5615
                    static_cast<GByte *>(ppDestBuffer[0]);
5616
                GByte *CPL_RESTRICT pabyDest1 =
52,997✔
5617
                    static_cast<GByte *>(ppDestBuffer[1]);
5618
                GByte *CPL_RESTRICT pabyDest2 =
52,997✔
5619
                    static_cast<GByte *>(ppDestBuffer[2]);
5620
                GByte *CPL_RESTRICT pabyDest3 =
52,997✔
5621
                    static_cast<GByte *>(ppDestBuffer[3]);
5622
                GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
52,997✔
5623
                                      pabyDest3, nIters);
5624
                return;
52,997✔
5625
            }
×
5626
        }
5627
#if ((defined(__GNUC__) && !defined(__clang__)) ||                             \
5628
     defined(__INTEL_CLANG_COMPILER)) &&                                       \
5629
    defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
5630
        else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
1,466✔
5631
                 CPLHaveRuntimeSSSE3())
733✔
5632
        {
5633
            if (nComponents == 3)
733✔
5634
            {
5635
                const GUInt16 *CPL_RESTRICT panSrc =
239✔
5636
                    static_cast<const GUInt16 *>(pSourceBuffer);
5637
                GUInt16 *CPL_RESTRICT panDest0 =
239✔
5638
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
5639
                GUInt16 *CPL_RESTRICT panDest1 =
239✔
5640
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
5641
                GUInt16 *CPL_RESTRICT panDest2 =
239✔
5642
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
5643
                GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
239✔
5644
                                              panDest2, nIters);
5645
                return;
239✔
5646
            }
5647
#if !defined(__INTEL_CLANG_COMPILER)
5648
            // ICC autovectorizer doesn't do a good job, at least with icx
5649
            // 2022.1.0.20220316
5650
            else if (nComponents == 4)
494✔
5651
            {
5652
                const GUInt16 *CPL_RESTRICT panSrc =
494✔
5653
                    static_cast<const GUInt16 *>(pSourceBuffer);
5654
                GUInt16 *CPL_RESTRICT panDest0 =
494✔
5655
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
5656
                GUInt16 *CPL_RESTRICT panDest1 =
494✔
5657
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
5658
                GUInt16 *CPL_RESTRICT panDest2 =
494✔
5659
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
5660
                GUInt16 *CPL_RESTRICT panDest3 =
494✔
5661
                    static_cast<GUInt16 *>(ppDestBuffer[3]);
5662
                GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
494✔
5663
                                              panDest2, panDest3, nIters);
5664
                return;
494✔
5665
            }
5666
#endif
5667
        }
5668
#endif
5669
    }
5670

5671
    const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
20✔
5672
    const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
22✔
5673
    for (int iComp = 0; iComp < nComponents; iComp++)
87✔
5674
    {
5675
        GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
65✔
5676
                            iComp * nSourceDTSize,
65✔
5677
                        eSourceDT, nComponents * nSourceDTSize,
5678
                        ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
65✔
5679
    }
5680
}
5681

5682
/************************************************************************/
5683
/*                    GDALTranspose2DSingleToSingle()                   */
5684
/************************************************************************/
5685
/**
5686
 * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
5687
 *
5688
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5689
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5690
 * @param nSrcWidth Width of pSrc array.
5691
 * @param nSrcHeight Height of pSrc array.
5692
 */
5693

5694
template <class DST, class SRC>
5695
void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
124✔
5696
                                   DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5697
                                   size_t nSrcHeight)
5698
{
5699
    constexpr size_t blocksize = 32;
124✔
5700
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
273✔
5701
    {
5702
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
149✔
5703
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
348✔
5704
        {
5705
            // transpose the block beginning at [i,j]
5706
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
199✔
5707
            for (size_t k = i; k < max_k; ++k)
2,446✔
5708
            {
5709
                for (size_t l = j; l < max_l; ++l)
40,849✔
5710
                {
5711
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
38,602✔
5712
                                 pDst[k + l * nSrcHeight]);
38,602✔
5713
                }
5714
            }
5715
        }
5716
    }
5717
}
124✔
5718

5719
/************************************************************************/
5720
/*                   GDALTranspose2DComplexToComplex()                  */
5721
/************************************************************************/
5722
/**
5723
 * Transpose a 2D array of complex values into an array of complex values,
5724
 * in a efficient (cache-oblivious) way.
5725
 *
5726
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5727
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5728
 * @param nSrcWidth Width of pSrc array.
5729
 * @param nSrcHeight Height of pSrc array.
5730
 */
5731
template <class DST, class SRC>
5732
void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
16✔
5733
                                     DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5734
                                     size_t nSrcHeight)
5735
{
5736
    constexpr size_t blocksize = 32;
16✔
5737
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
32✔
5738
    {
5739
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
16✔
5740
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
32✔
5741
        {
5742
            // transpose the block beginning at [i,j]
5743
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
16✔
5744
            for (size_t k = i; k < max_k; ++k)
48✔
5745
            {
5746
                for (size_t l = j; l < max_l; ++l)
128✔
5747
                {
5748
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
96✔
5749
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
96✔
5750
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
96✔
5751
                                 pDst[2 * (k + l * nSrcHeight) + 1]);
96✔
5752
                }
5753
            }
5754
        }
5755
    }
5756
}
16✔
5757

5758
/************************************************************************/
5759
/*                   GDALTranspose2DComplexToSingle()                  */
5760
/************************************************************************/
5761
/**
5762
 * Transpose a 2D array of complex values into an array of non-complex values,
5763
 * in a efficient (cache-oblivious) way.
5764
 *
5765
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5766
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5767
 * @param nSrcWidth Width of pSrc array.
5768
 * @param nSrcHeight Height of pSrc array.
5769
 */
5770
template <class DST, class SRC>
5771
void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
40✔
5772
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5773
                                    size_t nSrcHeight)
5774
{
5775
    constexpr size_t blocksize = 32;
40✔
5776
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
80✔
5777
    {
5778
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
40✔
5779
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
80✔
5780
        {
5781
            // transpose the block beginning at [i,j]
5782
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
40✔
5783
            for (size_t k = i; k < max_k; ++k)
120✔
5784
            {
5785
                for (size_t l = j; l < max_l; ++l)
320✔
5786
                {
5787
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
240✔
5788
                                 pDst[k + l * nSrcHeight]);
240✔
5789
                }
5790
            }
5791
        }
5792
    }
5793
}
40✔
5794

5795
/************************************************************************/
5796
/*                   GDALTranspose2DSingleToComplex()                  */
5797
/************************************************************************/
5798
/**
5799
 * Transpose a 2D array of non-complex values into an array of complex values,
5800
 * in a efficient (cache-oblivious) way.
5801
 *
5802
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5803
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5804
 * @param nSrcWidth Width of pSrc array.
5805
 * @param nSrcHeight Height of pSrc array.
5806
 */
5807
template <class DST, class SRC>
5808
void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
40✔
5809
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5810
                                    size_t nSrcHeight)
5811
{
5812
    constexpr size_t blocksize = 32;
40✔
5813
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
80✔
5814
    {
5815
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
40✔
5816
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
80✔
5817
        {
5818
            // transpose the block beginning at [i,j]
5819
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
40✔
5820
            for (size_t k = i; k < max_k; ++k)
120✔
5821
            {
5822
                for (size_t l = j; l < max_l; ++l)
320✔
5823
                {
5824
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
240✔
5825
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
240✔
5826
                    pDst[2 * (k + l * nSrcHeight) + 1] = 0;
240✔
5827
                }
5828
            }
5829
        }
5830
    }
5831
}
40✔
5832

5833
/************************************************************************/
5834
/*                        GDALTranspose2D()                             */
5835
/************************************************************************/
5836

5837
template <class DST, bool DST_IS_COMPLEX>
5838
static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
220✔
5839
                            size_t nSrcWidth, size_t nSrcHeight)
5840
{
5841
#define CALL_GDALTranspose2D_internal(SRC_TYPE)                                \
5842
    do                                                                         \
5843
    {                                                                          \
5844
        if constexpr (DST_IS_COMPLEX)                                          \
5845
        {                                                                      \
5846
            GDALTranspose2DSingleToComplex(                                    \
5847
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
5848
                nSrcHeight);                                                   \
5849
        }                                                                      \
5850
        else                                                                   \
5851
        {                                                                      \
5852
            GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
5853
                                          pDst, nSrcWidth, nSrcHeight);        \
5854
        }                                                                      \
5855
    } while (0)
5856

5857
#define CALL_GDALTranspose2DComplex_internal(SRC_TYPE)                         \
5858
    do                                                                         \
5859
    {                                                                          \
5860
        if constexpr (DST_IS_COMPLEX)                                          \
5861
        {                                                                      \
5862
            GDALTranspose2DComplexToComplex(                                   \
5863
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
5864
                nSrcHeight);                                                   \
5865
        }                                                                      \
5866
        else                                                                   \
5867
        {                                                                      \
5868
            GDALTranspose2DComplexToSingle(                                    \
5869
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
5870
                nSrcHeight);                                                   \
5871
        }                                                                      \
5872
    } while (0)
5873

5874
    // clang-format off
5875
    switch (eSrcType)
220✔
5876
    {
5877
        case GDT_Byte:     CALL_GDALTranspose2D_internal(uint8_t); break;
14✔
5878
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t); break;
13✔
5879
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t); break;
22✔
5880
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t); break;
14✔
5881
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t); break;
22✔
5882
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t); break;
14✔
5883
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t); break;
14✔
5884
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t); break;
14✔
5885
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float); break;
15✔
5886
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double); break;
22✔
5887
        case GDT_CInt16:   CALL_GDALTranspose2DComplex_internal(int16_t); break;
14✔
5888
        case GDT_CInt32:   CALL_GDALTranspose2DComplex_internal(int32_t); break;
14✔
5889
        case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
14✔
5890
        case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
14✔
5891
        case GDT_Unknown:
×
5892
        case GDT_TypeCount:
5893
            break;
×
5894
    }
5895
        // clang-format on
5896

5897
#undef CALL_GDALTranspose2D_internal
5898
#undef CALL_GDALTranspose2DComplex_internal
5899
}
220✔
5900

5901
/************************************************************************/
5902
/*                      GDALInterleave2Byte()                           */
5903
/************************************************************************/
5904

5905
#if defined(HAVE_SSE2) &&                                                      \
5906
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5907

5908
// ICC autovectorizer doesn't do a good job at generating good SSE code,
5909
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5910
#if defined(__GNUC__)
5911
__attribute__((noinline))
5912
#endif
5913
static void
5914
GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5915
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
5916
{
5917
    size_t i = 0;
5918
    constexpr size_t VALS_PER_ITER = 16;
5919
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5920
    {
5921
        __m128i xmm0 =
5922
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
5923
        __m128i xmm1 = _mm_loadu_si128(
5924
            reinterpret_cast<__m128i const *>(pSrc + i + nIters));
5925
        _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
5926
                         _mm_unpacklo_epi8(xmm0, xmm1));
5927
        _mm_storeu_si128(
5928
            reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
5929
            _mm_unpackhi_epi8(xmm0, xmm1));
5930
    }
5931
#if defined(__clang__)
5932
#pragma clang loop vectorize(disable)
5933
#endif
5934
    for (; i < nIters; ++i)
5935
    {
5936
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5937
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5938
    }
5939
}
5940

5941
#else
5942

5943
#if defined(__GNUC__) && !defined(__clang__)
5944
__attribute__((optimize("tree-vectorize")))
5945
#endif
5946
#if defined(__GNUC__)
5947
__attribute__((noinline))
5948
#endif
5949
static void
5950
GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
4✔
5951
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
5952
{
5953
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5954
#pragma clang loop vectorize(enable)
5955
#endif
5956
    for (size_t i = 0; i < nIters; ++i)
44✔
5957
    {
5958
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
40✔
5959
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
40✔
5960
    }
5961
}
4✔
5962

5963
#endif
5964

5965
/************************************************************************/
5966
/*                      GDALInterleave4Byte()                           */
5967
/************************************************************************/
5968

5969
#if defined(HAVE_SSE2) &&                                                      \
5970
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5971

5972
// ICC autovectorizer doesn't do a good job at generating good SSE code,
5973
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5974
#if defined(__GNUC__)
5975
__attribute__((noinline))
5976
#endif
5977
static void
5978
GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
5979
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
5980
{
5981
    size_t i = 0;
5982
    constexpr size_t VALS_PER_ITER = 16;
5983
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5984
    {
5985
        __m128i xmm0 = _mm_loadu_si128(
5986
            reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
5987
        __m128i xmm1 = _mm_loadu_si128(
5988
            reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
5989
        __m128i xmm2 = _mm_loadu_si128(
5990
            reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
5991
        __m128i xmm3 = _mm_loadu_si128(
5992
            reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
5993
        auto tmp0 = _mm_unpacklo_epi8(
5994
            xmm0,
5995
            xmm1);  // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
5996
        auto tmp1 = _mm_unpackhi_epi8(
5997
            xmm0,
5998
            xmm1);  // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
5999
        auto tmp2 = _mm_unpacklo_epi8(
6000
            xmm2,
6001
            xmm3);  // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
6002
        auto tmp3 = _mm_unpackhi_epi8(
6003
            xmm2,
6004
            xmm3);  // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
6005
        auto tmp2_0 = _mm_unpacklo_epi16(
6006
            tmp0,
6007
            tmp2);  // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
6008
        auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
6009
        auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
6010
        auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
6011
        _mm_storeu_si128(
6012
            reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
6013
            tmp2_0);
6014
        _mm_storeu_si128(
6015
            reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
6016
            tmp2_1);
6017
        _mm_storeu_si128(
6018
            reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
6019
            tmp2_2);
6020
        _mm_storeu_si128(
6021
            reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
6022
            tmp2_3);
6023
    }
6024
#if defined(__clang__)
6025
#pragma clang loop vectorize(disable)
6026
#endif
6027
    for (; i < nIters; ++i)
6028
    {
6029
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6030
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6031
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6032
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6033
    }
6034
}
6035

6036
#else
6037

6038
#if defined(__GNUC__) && !defined(__clang__)
6039
__attribute__((optimize("tree-vectorize")))
6040
#endif
6041
#if defined(__GNUC__)
6042
__attribute__((noinline))
6043
#endif
6044
static void
6045
GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
2✔
6046
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6047
{
6048
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6049
#pragma clang loop vectorize(enable)
6050
#endif
6051
    for (size_t i = 0; i < nIters; ++i)
36✔
6052
    {
6053
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
34✔
6054
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
34✔
6055
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
34✔
6056
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
34✔
6057
    }
6058
}
2✔
6059

6060
#endif
6061

6062
/************************************************************************/
6063
/*                        GDALTranspose2D()                             */
6064
/************************************************************************/
6065

6066
/**
6067
 * Transpose a 2D array in a efficient (cache-oblivious) way.
6068
 *
6069
 * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
6070
 * @param eSrcType Data type of pSrc.
6071
 * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
6072
 * @param eDstType Data type of pDst.
6073
 * @param nSrcWidth Width of pSrc array.
6074
 * @param nSrcHeight Height of pSrc array.
6075
 * @since GDAL 3.11
6076
 */
6077

6078
void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
245✔
6079
                     GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
6080
{
6081
    if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8))
245✔
6082
    {
6083
        if (nSrcHeight == 2)
25✔
6084
        {
6085
            GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
4✔
6086
                                static_cast<uint8_t *>(pDst), nSrcWidth);
6087
            return;
4✔
6088
        }
6089
        if (nSrcHeight == 4)
21✔
6090
        {
6091
            GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
2✔
6092
                                static_cast<uint8_t *>(pDst), nSrcWidth);
6093
            return;
2✔
6094
        }
6095
#if (defined(HAVE_SSSE3_AT_COMPILE_TIME) &&                                    \
6096
     (defined(__x86_64) || defined(_M_X64)))
6097
        if (CPLHaveRuntimeSSSE3())
19✔
6098
        {
6099
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
19✔
6100
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
6101
                                       nSrcHeight);
6102
            return;
19✔
6103
        }
6104
#elif defined(USE_NEON_OPTIMIZATIONS)
6105
        {
6106
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6107
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
6108
                                       nSrcHeight);
6109
            return;
6110
        }
6111
#endif
6112
    }
6113

6114
#define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX)                \
6115
    GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>(                                 \
6116
        pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
6117

6118
    // clang-format off
6119
    switch (eDstType)
220✔
6120
    {
6121
        case GDT_Byte:     CALL_GDALTranspose2D_internal(uint8_t, false); break;
13✔
6122
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t, false); break;
13✔
6123
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t, false); break;
22✔
6124
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t, false); break;
14✔
6125
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t, false); break;
22✔
6126
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t, false); break;
14✔
6127
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t, false); break;
14✔
6128
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t, false); break;
14✔
6129
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float, false); break;
15✔
6130
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double, false); break;
23✔
6131
        case GDT_CInt16:   CALL_GDALTranspose2D_internal(int16_t, true); break;
14✔
6132
        case GDT_CInt32:   CALL_GDALTranspose2D_internal(int32_t, true); break;
14✔
6133
        case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
14✔
6134
        case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
14✔
6135
        case GDT_Unknown:
×
6136
        case GDT_TypeCount:
6137
            break;
×
6138
    }
6139
        // clang-format on
6140

6141
#undef CALL_GDALTranspose2D_internal
6142
}
6143

6144
/************************************************************************/
6145
/*                     ExtractBitAndConvertTo255()                      */
6146
/************************************************************************/
6147

6148
#if defined(__GNUC__) || defined(_MSC_VER)
6149
// Signedness of char implementation dependent, so be explicit.
6150
// Assumes 2-complement integer types and sign extension of right shifting
6151
// GCC guarantees such:
6152
// https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
6153
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
95,050✔
6154
{
6155
    return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
95,050✔
6156
                              7);
95,050✔
6157
}
6158
#else
6159
// Portable way
6160
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6161
{
6162
    return (byVal & (1 << nBit)) ? 255 : 0;
6163
}
6164
#endif
6165

6166
/************************************************************************/
6167
/*                   ExpandEightPackedBitsToByteAt255()                 */
6168
/************************************************************************/
6169

6170
static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
11,697✔
6171
                                                    GByte abyOutput[8])
6172
{
6173
    abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
11,697✔
6174
    abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
11,697✔
6175
    abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
11,697✔
6176
    abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
11,697✔
6177
    abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
11,697✔
6178
    abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
11,697✔
6179
    abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
11,697✔
6180
    abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
11,697✔
6181
}
11,697✔
6182

6183
/************************************************************************/
6184
/*                GDALExpandPackedBitsToByteAt0Or255()                  */
6185
/************************************************************************/
6186

6187
/** Expand packed-bits (ordered from most-significant bit to least one)
6188
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6189
  at 1 to a byte at 255.
6190

6191
 The function does (in a possibly more optimized way) the following:
6192
 \code{.cpp}
6193
 for (size_t i = 0; i < nInputBits; ++i )
6194
 {
6195
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
6196
 }
6197
 \endcode
6198

6199
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6200
 @param pabyOutput Output array of nInputBits bytes.
6201
 @param nInputBits Number of valid bits in pabyInput.
6202

6203
 @since 3.11
6204
*/
6205

6206
void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
30,937✔
6207
                                        GByte *CPL_RESTRICT pabyOutput,
6208
                                        size_t nInputBits)
6209
{
6210
    const size_t nInputWholeBytes = nInputBits / 8;
30,937✔
6211
    size_t iByte = 0;
30,937✔
6212

6213
#ifdef HAVE_SSE2
6214
    // Mask to isolate each bit
6215
    const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
30,937✔
6216
                                          8, 16, 32, 64, -128);
6217
    const __m128i zero = _mm_setzero_si128();
30,937✔
6218
    const __m128i all_ones = _mm_set1_epi8(-1);
30,937✔
6219
#ifdef __SSSE3__
6220
    const __m128i dispatch_two_bytes =
6221
        _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
6222
#endif
6223
    constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
30,937✔
6224
    for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
79,750✔
6225
    {
6226
        __m128i reg_ori = _mm_loadu_si128(
48,813✔
6227
            reinterpret_cast<const __m128i *>(pabyInput + iByte));
48,813✔
6228

6229
        constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
48,813✔
6230
        for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
439,317✔
6231
        {
6232
            // Given reg_ori = (A, B, ... 14 other bytes ...),
6233
            // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
6234
#ifdef __SSSE3__
6235
            __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
6236
#else
6237
            __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
390,504✔
6238
            reg = _mm_unpacklo_epi16(reg, reg);
390,504✔
6239
            reg = _mm_unpacklo_epi32(reg, reg);
390,504✔
6240
#endif
6241

6242
            // Test if bits of interest are set
6243
            reg = _mm_and_si128(reg, bit_mask);
390,504✔
6244

6245
            // Now test if those bits are set, by comparing to zero. So the
6246
            // result will be that bytes where bits are set will be at 0, and
6247
            // ones where they are cleared will be at 0xFF. So the inverse of
6248
            // the end result we want!
6249
            reg = _mm_cmpeq_epi8(reg, zero);
390,504✔
6250

6251
            // Invert the result
6252
            reg = _mm_andnot_si128(reg, all_ones);
390,504✔
6253

6254
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
6255

6256
            pabyOutput += SSE_REG_SIZE;
390,504✔
6257

6258
            // Right-shift of 2 bytes
6259
            reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
390,504✔
6260
        }
6261
    }
6262

6263
#endif  // HAVE_SSE2
6264

6265
    for (; iByte < nInputWholeBytes; ++iByte)
42,634✔
6266
    {
6267
        ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
11,697✔
6268
        pabyOutput += 8;
11,697✔
6269
    }
6270
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
32,411✔
6271
    {
6272
        *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
1,474✔
6273
        ++pabyOutput;
1,474✔
6274
    }
6275
}
30,937✔
6276

6277
/************************************************************************/
6278
/*                   ExpandEightPackedBitsToByteAt1()                   */
6279
/************************************************************************/
6280

6281
static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
136,113✔
6282
                                                  GByte abyOutput[8])
6283
{
6284
    abyOutput[0] = (byVal >> 7) & 0x1;
136,113✔
6285
    abyOutput[1] = (byVal >> 6) & 0x1;
136,113✔
6286
    abyOutput[2] = (byVal >> 5) & 0x1;
136,113✔
6287
    abyOutput[3] = (byVal >> 4) & 0x1;
136,113✔
6288
    abyOutput[4] = (byVal >> 3) & 0x1;
136,113✔
6289
    abyOutput[5] = (byVal >> 2) & 0x1;
136,113✔
6290
    abyOutput[6] = (byVal >> 1) & 0x1;
136,113✔
6291
    abyOutput[7] = (byVal >> 0) & 0x1;
136,113✔
6292
}
136,113✔
6293

6294
/************************************************************************/
6295
/*                GDALExpandPackedBitsToByteAt0Or1()                    */
6296
/************************************************************************/
6297

6298
/** Expand packed-bits (ordered from most-significant bit to least one)
6299
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6300
  at 1 to a byte at 1.
6301

6302
 The function does (in a possibly more optimized way) the following:
6303
 \code{.cpp}
6304
 for (size_t i = 0; i < nInputBits; ++i )
6305
 {
6306
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
6307
 }
6308
 \endcode
6309

6310
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6311
 @param pabyOutput Output array of nInputBits bytes.
6312
 @param nInputBits Number of valid bits in pabyInput.
6313

6314
 @since 3.11
6315
*/
6316

6317
void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
7,041✔
6318
                                      GByte *CPL_RESTRICT pabyOutput,
6319
                                      size_t nInputBits)
6320
{
6321
    const size_t nInputWholeBytes = nInputBits / 8;
7,041✔
6322
    size_t iByte = 0;
7,041✔
6323
    for (; iByte < nInputWholeBytes; ++iByte)
143,154✔
6324
    {
6325
        ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
136,113✔
6326
        pabyOutput += 8;
136,113✔
6327
    }
6328
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
18,902✔
6329
    {
6330
        *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
11,861✔
6331
        ++pabyOutput;
11,861✔
6332
    }
6333
}
7,041✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc