• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OSGeo / gdal / 15985453442

30 Jun 2025 11:00PM UTC coverage: 71.093% (+0.004%) from 71.089%
15985453442

Pull #12680

github

web-flow
Merge 346c552a0 into 3996c9be8
Pull Request #12680: Arrow/Parquet: handle list of binary and binary as JSON content (struct/list_of_list/array_of binary)

68 of 89 new or added lines in 1 file covered. (76.4%)

37 existing lines in 20 files now uncovered.

574098 of 807529 relevant lines covered (71.09%)

250439.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.73
/ogr/ogrsf_frmts/generic/ograrrowarrayhelper.cpp
1
/******************************************************************************
2
 *
3
 * Project:  OpenGIS Simple Features Reference Implementation
4
 * Purpose:  Helper to fill ArrowArray
5
 * Author:   Even Rouault <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12

13
#include "ograrrowarrayhelper.h"
14
#include "ogrlayerarrow.h"
15
#include "ogr_p.h"
16

17
#include <limits>
18

19
//! @cond Doxygen_Suppress
20

21
/************************************************************************/
22
/*                           GetMemLimit()                              */
23
/************************************************************************/
24

25
/*static*/ uint32_t OGRArrowArrayHelper::GetMemLimit()
1,706✔
26
{
27
    uint32_t nMemLimit =
1,706✔
28
        static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
29
    // Just for tests
30
    const char *pszOGR_ARROW_MEM_LIMIT =
31
        CPLGetConfigOption("OGR_ARROW_MEM_LIMIT", nullptr);
1,706✔
32
    if (pszOGR_ARROW_MEM_LIMIT)
1,706✔
33
        nMemLimit = atoi(pszOGR_ARROW_MEM_LIMIT);
131✔
34
    else
35
    {
36
        const auto nUsableRAM = CPLGetUsablePhysicalRAM();
1,575✔
37
        if (nUsableRAM > 0 && static_cast<uint64_t>(nUsableRAM / 4) < nMemLimit)
1,575✔
38
            nMemLimit = static_cast<uint32_t>(nUsableRAM / 4);
×
39
    }
40
    return nMemLimit;
1,706✔
41
}
42

43
/************************************************************************/
44
/*                       GetMaxFeaturesInBatch()                        */
45
/************************************************************************/
46

47
/* static */
48
int OGRArrowArrayHelper::GetMaxFeaturesInBatch(
600✔
49
    const CPLStringList &aosArrowArrayStreamOptions)
50
{
51
    int l_nMaxBatchSize = atoi(aosArrowArrayStreamOptions.FetchNameValueDef(
600✔
52
        "MAX_FEATURES_IN_BATCH", "65536"));
53
    if (l_nMaxBatchSize <= 0)
600✔
54
        l_nMaxBatchSize = 1;
×
55
    if (l_nMaxBatchSize > INT_MAX - 1)
600✔
56
        l_nMaxBatchSize = INT_MAX - 1;
×
57

58
    return l_nMaxBatchSize;
600✔
59
}
60

61
/************************************************************************/
62
/*                       OGRArrowArrayHelper()                          */
63
/************************************************************************/
64

65
OGRArrowArrayHelper::OGRArrowArrayHelper(
414✔
66
    GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
67
    const CPLStringList &aosArrowArrayStreamOptions,
68
    struct ArrowArray *out_array)
414✔
69
    : m_bIncludeFID(CPLTestBool(
414✔
70
          aosArrowArrayStreamOptions.FetchNameValueDef("INCLUDE_FID", "YES"))),
71
      m_nMaxBatchSize(GetMaxFeaturesInBatch(aosArrowArrayStreamOptions)),
828✔
72
      m_nFieldCount(poFeatureDefn->GetFieldCount()),
828✔
73
      m_nGeomFieldCount(poFeatureDefn->GetGeomFieldCount()),
828✔
74
      m_out_array(out_array)
414✔
75
{
76
    memset(out_array, 0, sizeof(*out_array));
414✔
77

78
    m_mapOGRFieldToArrowField.resize(m_nFieldCount, -1);
414✔
79
    m_mapOGRGeomFieldToArrowField.resize(m_nGeomFieldCount, -1);
414✔
80
    m_abNullableFields.resize(m_nFieldCount);
414✔
81
    m_anTZFlags.resize(m_nFieldCount);
414✔
82
    int nTZFlagOverride = -1;
413✔
83
    const char *pszTZOverride =
84
        aosArrowArrayStreamOptions.FetchNameValue("TIMEZONE");
413✔
85
    if (pszTZOverride)
414✔
86
    {
87
        if (EQUAL(pszTZOverride, "unknown") || EQUAL(pszTZOverride, ""))
363✔
88
        {
UNCOV
89
            nTZFlagOverride = OGR_TZFLAG_UNKNOWN;
×
90
        }
91
        else
92
        {
93
            // we don't really care about the actual timezone, since we
94
            // will convert OGRField::Date to UTC in all cases
95
            nTZFlagOverride = OGR_TZFLAG_UTC;
363✔
96
        }
97
    }
98
    const bool bDateTimeAsString =
99
        aosArrowArrayStreamOptions.FetchBool(GAS_OPT_DATETIME_AS_STRING, false);
414✔
100

101
    if (m_bIncludeFID)
413✔
102
    {
103
        m_nChildren++;
401✔
104
    }
105
    // cppcheck-suppress knownConditionTrueFalse
106
    for (int i = 0; i < m_nFieldCount; i++)
3,119✔
107
    {
108
        const auto poFieldDefn = poFeatureDefn->GetFieldDefn(i);
2,706✔
109
        m_abNullableFields[i] = CPL_TO_BOOL(poFieldDefn->IsNullable());
2,706✔
110
        m_anTZFlags[i] =
2,706✔
111
            nTZFlagOverride >= 0 ? nTZFlagOverride : poFieldDefn->GetTZFlag();
2,706✔
112
        if (!poFieldDefn->IsIgnored())
2,706✔
113
        {
114
            m_mapOGRFieldToArrowField[i] = m_nChildren;
2,671✔
115
            m_nChildren++;
2,671✔
116
        }
117
    }
118
    // cppcheck-suppress knownConditionTrueFalse
119
    for (int i = 0; i < m_nGeomFieldCount; i++)
822✔
120
    {
121
        if (!poFeatureDefn->GetGeomFieldDefn(i)->IsIgnored())
409✔
122
        {
123
            m_mapOGRGeomFieldToArrowField[i] = m_nChildren;
396✔
124
            m_nChildren++;
396✔
125
        }
126
    }
127

128
    m_anArrowFieldMaxAlloc.resize(m_nChildren);
413✔
129

130
    out_array->release = OGRLayer::ReleaseArray;
412✔
131

132
    out_array->length = m_nMaxBatchSize;
412✔
133
    out_array->null_count = 0;
412✔
134

135
    out_array->n_children = m_nChildren;
412✔
136
    out_array->children = static_cast<struct ArrowArray **>(
414✔
137
        CPLCalloc(m_nChildren, sizeof(struct ArrowArray *)));
412✔
138
    out_array->release = OGRLayer::ReleaseArray;
414✔
139
    out_array->n_buffers = 1;
414✔
140
    out_array->buffers =
413✔
141
        static_cast<const void **>(CPLCalloc(1, sizeof(void *)));
414✔
142

143
    // Allocate buffers
144

145
    if (m_bIncludeFID)
413✔
146
    {
147
        out_array->children[0] = static_cast<struct ArrowArray *>(
803✔
148
            CPLCalloc(1, sizeof(struct ArrowArray)));
401✔
149
        auto psChild = out_array->children[0];
402✔
150
        psChild->release = OGRLayer::ReleaseArray;
402✔
151
        psChild->length = m_nMaxBatchSize;
402✔
152
        psChild->n_buffers = 2;
402✔
153
        psChild->buffers =
401✔
154
            static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
402✔
155
        m_panFIDValues = static_cast<int64_t *>(
402✔
156
            VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(int64_t) * m_nMaxBatchSize));
401✔
157
        if (m_panFIDValues == nullptr)
402✔
158
            goto error;
×
159
        psChild->buffers[1] = m_panFIDValues;
402✔
160
    }
161

162
    // cppcheck-suppress knownConditionTrueFalse
163
    for (int i = 0; i < m_nFieldCount; i++)
3,120✔
164
    {
165
        const int iArrowField = m_mapOGRFieldToArrowField[i];
2,706✔
166
        if (iArrowField >= 0)
2,706✔
167
        {
168
            const auto poFieldDefn = poFeatureDefn->GetFieldDefn(i);
2,671✔
169
            out_array->children[iArrowField] = static_cast<struct ArrowArray *>(
5,342✔
170
                CPLCalloc(1, sizeof(struct ArrowArray)));
2,671✔
171
            auto psChild = out_array->children[iArrowField];
2,671✔
172

173
            psChild->release = OGRLayer::ReleaseArray;
2,671✔
174
            psChild->length = m_nMaxBatchSize;
2,671✔
175
            const auto eSubType = poFieldDefn->GetSubType();
2,671✔
176
            size_t nEltSize = 0;
2,671✔
177
            switch (poFieldDefn->GetType())
2,671✔
178
            {
179
                case OFTInteger:
1,713✔
180
                {
181
                    if (eSubType == OFSTBoolean)
1,713✔
182
                    {
183
                        nEltSize = sizeof(uint8_t);
65✔
184
                    }
185
                    else if (eSubType == OFSTInt16)
1,648✔
186
                    {
187
                        nEltSize = sizeof(int16_t);
61✔
188
                    }
189
                    else
190
                    {
191
                        nEltSize = sizeof(int32_t);
1,587✔
192
                    }
193

194
                    const auto &osDomainName = poFieldDefn->GetDomainName();
1,713✔
195
                    if (!osDomainName.empty() && poDS != nullptr)
1,713✔
196
                    {
197
                        const auto poFieldDomain =
198
                            poDS->GetFieldDomain(osDomainName);
24✔
199
                        if (poFieldDomain &&
48✔
200
                            poFieldDomain->GetDomainType() == OFDT_CODED)
24✔
201
                        {
202
                            const OGRCodedFieldDomain *poCodedDomain =
24✔
203
                                static_cast<const OGRCodedFieldDomain *>(
204
                                    poFieldDomain);
205
                            FillDict(psChild, poCodedDomain);
24✔
206
                        }
207
                    }
208

209
                    break;
1,713✔
210
                }
211
                case OFTInteger64:
78✔
212
                {
213
                    nEltSize = sizeof(int64_t);
78✔
214
                    break;
78✔
215
                }
216
                case OFTReal:
142✔
217
                {
218
                    if (eSubType == OFSTFloat32)
142✔
219
                    {
220
                        nEltSize = sizeof(float);
64✔
221
                    }
222
                    else
223
                    {
224
                        nEltSize = sizeof(double);
78✔
225
                    }
226
                    break;
142✔
227
                }
228

229
                case OFTDateTime:
72✔
230
                {
231
                    if (!bDateTimeAsString)
72✔
232
                    {
233
                        nEltSize = sizeof(int64_t);
66✔
234
                        break;
66✔
235
                    }
236
                    else
237
                    {
238
                        [[fallthrough]];
239
                    }
240
                }
241

242
                case OFTString:
243
                case OFTBinary:
244
                {
245
                    psChild->n_buffers = 3;
636✔
246
                    psChild->buffers = static_cast<const void **>(
636✔
247
                        CPLCalloc(3, sizeof(void *)));
636✔
248
                    psChild->buffers[1] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
636✔
249
                        sizeof(uint32_t) * (1 + m_nMaxBatchSize));
250
                    if (psChild->buffers[1] == nullptr)
636✔
251
                        goto error;
×
252
                    memset(const_cast<void *>(psChild->buffers[1]), 0,
636✔
253
                           sizeof(uint32_t) * (1 + m_nMaxBatchSize));
636✔
254
                    constexpr size_t DEFAULT_STRING_SIZE = 10;
636✔
255
                    m_anArrowFieldMaxAlloc[iArrowField] =
1,272✔
256
                        DEFAULT_STRING_SIZE * m_nMaxBatchSize;
636✔
257
                    psChild->buffers[2] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
636✔
258
                        m_anArrowFieldMaxAlloc[iArrowField]);
259
                    if (psChild->buffers[2] == nullptr)
636✔
260
                        goto error;
×
261
                    break;
636✔
262
                }
263

264
                case OFTDate:
36✔
265
                {
266
                    nEltSize = sizeof(int32_t);
36✔
267
                    break;
36✔
268
                }
269

270
                case OFTTime:
×
271
                {
272
                    nEltSize = sizeof(int32_t);
×
273
                    break;
×
274
                }
275

276
                default:
×
277
                    break;
×
278
            }
279

280
            if (nEltSize != 0)
2,671✔
281
            {
282
                psChild->n_buffers = 2;
2,035✔
283
                psChild->buffers =
2,035✔
284
                    static_cast<const void **>(CPLCalloc(2, sizeof(void *)));
2,035✔
285
                psChild->buffers[1] =
4,070✔
286
                    VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nEltSize * m_nMaxBatchSize);
2,035✔
287
                if (psChild->buffers[1] == nullptr)
2,035✔
288
                    goto error;
×
289
                memset(const_cast<void *>(psChild->buffers[1]), 0,
2,035✔
290
                       nEltSize * m_nMaxBatchSize);
2,035✔
291
            }
292
        }
293
    }
294

295
    // cppcheck-suppress knownConditionTrueFalse
296
    for (int i = 0; i < m_nGeomFieldCount; i++)
823✔
297
    {
298
        const int iArrowField = m_mapOGRGeomFieldToArrowField[i];
409✔
299
        if (iArrowField >= 0)
409✔
300
        {
301
            out_array->children[iArrowField] = static_cast<struct ArrowArray *>(
792✔
302
                CPLCalloc(1, sizeof(struct ArrowArray)));
396✔
303
            auto psChild = out_array->children[iArrowField];
396✔
304

305
            psChild->release = OGRLayer::ReleaseArray;
396✔
306
            psChild->length = m_nMaxBatchSize;
396✔
307

308
            psChild->n_buffers = 3;
396✔
309
            psChild->buffers =
396✔
310
                static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
396✔
311
            psChild->buffers[1] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
396✔
312
                sizeof(uint32_t) * (1 + m_nMaxBatchSize));
313
            if (psChild->buffers[1] == nullptr)
396✔
314
                goto error;
×
315
            memset(const_cast<void *>(psChild->buffers[1]), 0,
396✔
316
                   sizeof(uint32_t) * (1 + m_nMaxBatchSize));
396✔
317
            constexpr size_t DEFAULT_WKB_SIZE = 100;
396✔
318
            m_anArrowFieldMaxAlloc[iArrowField] =
792✔
319
                DEFAULT_WKB_SIZE * m_nMaxBatchSize;
396✔
320
            psChild->buffers[2] = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(
396✔
321
                m_anArrowFieldMaxAlloc[iArrowField]);
322
            if (psChild->buffers[2] == nullptr)
396✔
323
                goto error;
×
324
        }
325
    }
326

327
    return;
414✔
328

329
error:
×
330
    out_array->release(out_array);
×
331
    memset(out_array, 0, sizeof(*out_array));
×
332
}
333

334
/************************************************************************/
335
/*                             FillDict()                               */
336
/************************************************************************/
337

338
/* static */
339
bool OGRArrowArrayHelper::FillDict(struct ArrowArray *psChild,
37✔
340
                                   const OGRCodedFieldDomain *poCodedDomain)
341
{
342
    int nLastCode = -1;
37✔
343
    uint32_t nCountChars = 0;
37✔
344
    int nCountNull = 0;
37✔
345
    for (const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
37✔
346
         psIter->pszCode; ++psIter)
121✔
347
    {
348
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
84✔
349
        {
350
            return false;
×
351
        }
352
        int nCode = atoi(psIter->pszCode);
84✔
353
        if (nCode <= nLastCode || nCode - nLastCode > 100)
84✔
354
        {
355
            return false;
×
356
        }
357
        for (int i = nLastCode + 1; i < nCode; ++i)
115✔
358
        {
359
            nCountNull++;
31✔
360
        }
361
        if (psIter->pszValue)
84✔
362
        {
363
            const size_t nLen = strlen(psIter->pszValue);
53✔
364
            if (nLen > std::numeric_limits<uint32_t>::max() - nCountChars)
53✔
365
                return false;
×
366
            nCountChars += static_cast<uint32_t>(nLen);
53✔
367
        }
368
        else
369
        {
370
            nCountNull++;
31✔
371
        }
372
        nLastCode = nCode;
84✔
373
    }
374
    const int nLength = 1 + nLastCode;
37✔
375

376
    auto psDict = static_cast<struct ArrowArray *>(
377
        CPLCalloc(1, sizeof(struct ArrowArray)));
37✔
378
    psChild->dictionary = psDict;
37✔
379

380
    psDict->release = OGRLayer::ReleaseArray;
37✔
381
    psDict->length = nLength;
37✔
382
    psDict->n_buffers = 3;
37✔
383
    psDict->buffers = static_cast<const void **>(CPLCalloc(3, sizeof(void *)));
37✔
384
    psDict->null_count = nCountNull;
37✔
385
    uint8_t *pabyNull = nullptr;
37✔
386
    if (nCountNull)
37✔
387
    {
388
        pabyNull = static_cast<uint8_t *>(
389
            VSI_MALLOC_ALIGNED_AUTO_VERBOSE((nLength + 7) / 8));
31✔
390
        if (pabyNull == nullptr)
31✔
391
        {
392
            psDict->release(psDict);
×
393
            CPLFree(psDict);
×
394
            psChild->dictionary = nullptr;
×
395
            return false;
×
396
        }
397
        memset(pabyNull, 0xFF, (nLength + 7) / 8);
31✔
398
        psDict->buffers[0] = pabyNull;
31✔
399
    }
400

401
    uint32_t *panOffsets = static_cast<uint32_t *>(
402
        VSI_MALLOC_ALIGNED_AUTO_VERBOSE(sizeof(uint32_t) * (1 + nLength)));
37✔
403
    if (panOffsets == nullptr)
37✔
404
    {
405
        psDict->release(psDict);
×
406
        CPLFree(psDict);
×
407
        psChild->dictionary = nullptr;
×
408
        return false;
×
409
    }
410
    psDict->buffers[1] = panOffsets;
37✔
411

412
    char *pachValues =
413
        static_cast<char *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nCountChars));
37✔
414
    if (pachValues == nullptr)
37✔
415
    {
416
        psDict->release(psDict);
×
417
        CPLFree(psDict);
×
418
        psChild->dictionary = nullptr;
×
419
        return false;
×
420
    }
421
    psDict->buffers[2] = pachValues;
37✔
422

423
    nLastCode = -1;
37✔
424
    uint32_t nOffset = 0;
37✔
425
    for (const OGRCodedValue *psIter = poCodedDomain->GetEnumeration();
37✔
426
         psIter->pszCode; ++psIter)
121✔
427
    {
428
        if (CPLGetValueType(psIter->pszCode) != CPL_VALUE_INTEGER)
84✔
429
        {
430
            psDict->release(psDict);
×
431
            CPLFree(psDict);
×
432
            psChild->dictionary = nullptr;
×
433
            return false;
×
434
        }
435
        int nCode = atoi(psIter->pszCode);
84✔
436
        if (nCode <= nLastCode || nCode - nLastCode > 100)
84✔
437
        {
438
            psDict->release(psDict);
×
439
            CPLFree(psDict);
×
440
            psChild->dictionary = nullptr;
×
441
            return false;
×
442
        }
443
        for (int i = nLastCode + 1; i < nCode; ++i)
115✔
444
        {
445
            panOffsets[i] = nOffset;
31✔
446
            if (pabyNull)
31✔
447
                pabyNull[i / 8] &= static_cast<uint8_t>(~(1 << (i % 8)));
31✔
448
        }
449
        panOffsets[nCode] = nOffset;
84✔
450
        if (psIter->pszValue)
84✔
451
        {
452
            const size_t nLen = strlen(psIter->pszValue);
53✔
453
            memcpy(pachValues + nOffset, psIter->pszValue, nLen);
53✔
454
            nOffset += static_cast<uint32_t>(nLen);
53✔
455
        }
456
        else if (pabyNull)
31✔
457
        {
458
            pabyNull[nCode / 8] &= static_cast<uint8_t>(~(1 << (nCode % 8)));
31✔
459
        }
460
        nLastCode = nCode;
84✔
461
    }
462
    panOffsets[nLength] = nOffset;
37✔
463

464
    return true;
37✔
465
}
466

467
//! @endcond
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc