• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 25131031480

29 Apr 2026 08:04PM UTC coverage: 59.401% (+4.5%) from 54.936%
25131031480

push

github

web-flow
feat(shapefile) Fast SHP GeoArrow output (#3409)

11620 of 21432 branches covered (54.22%)

Branch coverage included in aggregate %.

681 of 1180 new or added lines in 19 files covered. (57.71%)

6 existing lines in 3 files now uncovered.

24065 of 38643 relevant lines covered (62.28%)

15918.03 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.72
/modules/shapefile/src/shapefile-arrow-loader-with-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderContext} from '@loaders.gl/loader-utils';
6
import {
7
  parseFromContext,
8
  parseInBatchesFromContext,
9
  toArrayBufferIterator
10
} from '@loaders.gl/loader-utils';
11
import * as arrow from 'apache-arrow';
12
import type {ArrowTable, ArrowTableBatch, Field, Schema as TableSchema} from '@loaders.gl/schema';
13
import {ArrowTableBuilder, convertSchemaToArrow} from '@loaders.gl/schema-utils';
14
import {
15
  type GeoParquetGeometryType,
16
  makeWKBGeometryField,
17
  setWKBGeometryColumnMetadata
18
} from '@loaders.gl/gis';
19
import {Proj4Projection} from '@math.gl/proj4';
20
import {SHPLoaderWithParser} from './shp-loader-with-parser';
21
import {DBFLoaderWithParser} from './dbf-loader-with-parser';
22
import type {ShapefileLoaderOptions} from './shapefile-loader';
23
import type {SHPResult} from './lib/parsers/parse-shp';
24
import type {SHPHeader} from './lib/parsers/parse-shp-header';
25
import {loadShapefileSidecarFiles, replaceExtension} from './lib/parsers/parse-shapefile';
26
import {
27
  type SHPWKBGeometry,
28
  makeWKBGeometryArrowTable
29
} from './lib/parsers/build-wkb-geometry-arrow';
30
import {makeSHPGeoArrowGeometryTable} from './lib/parsers/build-geoarrow-geometry-arrow';
31
const GEOMETRY_COLUMN_NAME = 'geometry';
4✔
32

33
/** Parses a shapefile and returns an Arrow table with a WKB geometry column. */
34
export async function parseShapefileToArrow(
35
  arrayBuffer: ArrayBuffer,
36
  options?: ShapefileLoaderOptions,
37
  context?: LoaderContext
38
): Promise<ArrowTable> {
39
  const {cpg, prj} = await loadShapefileSidecarFiles(options, context);
30✔
40
  const transform = getReprojectionTransform(prj, options);
30✔
41
  const geoArrowEncoding = getTypedGeoArrowEncoding(options);
30✔
42
  let header: SHPHeader | undefined;
43
  let geometryTable: ArrowTable;
44

45
  if (geoArrowEncoding) {
30✔
46
    geometryTable = makeSHPGeoArrowGeometryTable(arrayBuffer, options, {transform});
2✔
47
  } else {
48
    const shpResult = (await parseFromContext(
28✔
49
      arrayBuffer,
50
      SHPLoaderWithParser,
51
      {
52
        ...options,
53
        shp: {
54
          ...options?.shp,
55
          shape: 'wkb'
56
        }
57
      },
58
      context!
59
    )) as SHPResult;
60
    header = shpResult.header;
28✔
61
    geometryTable = makeGeometryArrowTable(
28✔
62
      shpResult.geometries as (SHPWKBGeometry | null)[],
63
      header,
64
      transform
65
    );
66
  }
67

68
  let propertySchema: TableSchema | null = null;
30✔
69
  let propertyTable: ArrowTable | null = null;
30✔
70

71
  const dbfResponse = context?.url
30!
NEW
72
    ? await context.fetch(replaceExtension(context.url, 'dbf')).catch(() => null)
×
73
    : null;
74
  if (dbfResponse?.ok) {
30!
75
    propertyTable = (await parseFromContext(
30✔
76
      dbfResponse as any,
77
      DBFLoaderWithParser,
78
      {
79
        ...options,
80
        dbf: {
81
          ...options?.dbf,
82
          shape: 'arrow-table' as const,
83
          encoding: cpg || 'latin1'
56✔
84
        }
85
      },
86
      context!
87
    )) as ArrowTable;
88
    propertySchema = propertyTable.schema || null;
30!
89
  }
90

91
  const schema = buildOutputSchema(propertySchema, header, geometryTable.schema);
30✔
92
  return propertyTable
30!
93
    ? appendGeometryColumnToArrowTable(propertyTable, geometryTable, schema)
94
    : geometryTable;
95
}
96

97
/** Parses a shapefile into Arrow batches while keeping DBF-derived schema stable. */
98
export async function* parseShapefileToArrowInBatches(
99
  asyncIterator:
100
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
101
    | Iterable<ArrayBufferLike | ArrayBufferView>,
102
  options?: ShapefileLoaderOptions,
103
  context?: LoaderContext
104
): AsyncIterable<ArrowTableBatch> {
105
  if (getTypedGeoArrowEncoding(options)) {
16!
NEW
106
    throw new Error('Typed GeoArrow shapefile output is only supported for non-streaming parse.');
×
107
  }
108

109
  const {cpg, prj} = await loadShapefileSidecarFiles(options, context);
16✔
110
  const batchSize =
111
    options?.shapefile?.batchSize || options?.shp?.batchSize || options?.dbf?.batchSize || 10000;
16✔
112

113
  const shapeIterable = await parseInBatchesFromContext(
16✔
114
    toArrayBufferIterator(asyncIterator),
115
    SHPLoaderWithParser,
116
    {
117
      ...options,
118
      shp: {
119
        ...options?.shp,
120
        shape: 'wkb',
121
        batchSize
122
      }
123
    },
124
    context!
125
  );
126
  const shapeIterator = getAsyncIterator(shapeIterable);
16✔
127

128
  const shapeHeader = await getNextNonMetadataValue(shapeIterator);
16✔
129
  const header = shapeHeader as SHPHeader;
16✔
130

131
  let propertyIterator: AsyncIterator<any> | null = null;
16✔
132
  let propertySchema: TableSchema | null = null;
16✔
133

134
  const dbfResponse = context?.url
16!
NEW
135
    ? await context.fetch(replaceExtension(context.url, 'dbf')).catch(() => null)
×
136
    : null;
137
  if (dbfResponse?.ok) {
16!
138
    const dbfOptions = {
16✔
139
      ...options,
140
      dbf: {
141
        ...options?.dbf,
142
        shape: 'arrow-table' as const,
143
        batchSize,
144
        encoding: cpg || 'latin1'
30✔
145
      }
146
    };
147
    const propertyIterable = await parseInBatchesFromContext(
16✔
148
      dbfResponse,
149
      DBFLoaderWithParser,
150
      dbfOptions,
151
      context!
152
    );
153
    propertyIterator = getAsyncIterator(propertyIterable);
16✔
154

155
    const firstPropertyBatch = await getNextArrowBatch(propertyIterator);
16✔
156
    propertySchema = firstPropertyBatch?.schema || null;
16!
157
    const outputSchema = buildOutputSchema(propertySchema, header);
16✔
158
    const propertyQueue: arrow.Table[] = [];
16✔
159
    const geometryQueue: arrow.Table[] = [];
16✔
160
    let yieldedDataBatch = false;
16✔
161

162
    if (firstPropertyBatch && firstPropertyBatch.length > 0) {
16✔
163
      propertyQueue.push(firstPropertyBatch.data);
14✔
164
    }
165

166
    let shapeDone = false;
16✔
167
    let propertyDone = false;
16✔
168
    while (
16✔
169
      !shapeDone ||
16!
170
      !propertyDone ||
171
      getQueuedRowCount(geometryQueue) > 0 ||
172
      getQueuedRowCount(propertyQueue) > 0
173
    ) {
174
      if (!shapeDone && getQueuedRowCount(geometryQueue) === 0) {
44!
175
        const shapeBatch = await shapeIterator.next();
44✔
176
        if (shapeBatch.done) {
44✔
177
          shapeDone = true;
14✔
178
        } else if (shapeBatch.value?.batchType !== 'metadata') {
30!
179
          const transform = getReprojectionTransform(prj, options);
30✔
180
          geometryQueue.push(
30✔
181
            makeGeometryArrowTable(shapeBatch.value as (SHPWKBGeometry | null)[], header, transform)
182
              .data
183
          );
184
        }
185
      }
186

187
      if (!propertyDone && getQueuedRowCount(propertyQueue) < getQueuedRowCount(geometryQueue)) {
44✔
188
        const propertyBatch = await getNextArrowBatch(propertyIterator);
16✔
189
        if (!propertyBatch) {
16!
190
          propertyDone = true;
×
191
        } else if (propertyBatch.length > 0) {
16!
192
          propertyQueue.push(propertyBatch.data);
16✔
193
        }
194
      }
195

196
      const rowCount = Math.min(getQueuedRowCount(geometryQueue), getQueuedRowCount(propertyQueue));
44✔
197
      if (rowCount === 0) {
44✔
198
        if (
14!
199
          (shapeDone && getQueuedRowCount(geometryQueue) === 0) ||
28!
200
          (propertyDone && getQueuedRowCount(propertyQueue) === 0)
201
        ) {
202
          break;
14✔
203
        }
204
        continue;
×
205
      }
206

207
      const propertyTable = takeRowsFromQueue(propertyQueue, rowCount);
30✔
208
      const geometryTable = takeRowsFromQueue(geometryQueue, rowCount);
30✔
209
      const batch = appendGeometryColumnToArrowTable(
30✔
210
        {shape: 'arrow-table', schema: propertySchema || undefined, data: propertyTable},
30!
211
        {shape: 'arrow-table', data: geometryTable},
212
        outputSchema
213
      );
214
      yieldedDataBatch = true;
44✔
215
      yield {
44✔
216
        shape: 'arrow-table',
217
        batchType: 'data',
218
        length: batch.data.numRows,
219
        schema: batch.schema,
220
        data: batch.data
221
      };
222
    }
223
    if (!yieldedDataBatch) {
14✔
224
      yield makeEmptyArrowBatch(outputSchema);
2✔
225
    }
226
    return;
14✔
227
  }
228

NEW
229
  const outputSchema = buildOutputSchema(null, header);
×
230
  let yieldedDataBatch = false;
×
231

232
  while (true) {
×
233
    const shapeBatch = await shapeIterator.next();
×
234
    if (shapeBatch.done) {
×
235
      break;
×
236
    }
237
    if (shapeBatch.value?.batchType === 'metadata') {
×
238
      continue;
×
239
    }
NEW
240
    const transform = getReprojectionTransform(prj, options);
×
NEW
241
    const arrowTable = makeGeometryArrowTable(
×
242
      shapeBatch.value as (SHPWKBGeometry | null)[],
243
      header,
244
      transform
245
    );
NEW
246
    yieldedDataBatch = true;
×
NEW
247
    yield {
×
248
      shape: 'arrow-table',
249
      batchType: 'data',
250
      length: arrowTable.data.numRows,
251
      schema: outputSchema,
252
      data: arrowTable.data
253
    };
254
  }
255
  if (!yieldedDataBatch) {
×
256
    yield makeEmptyArrowBatch(outputSchema);
×
257
  }
258
}
259

260
/** Creates the output Arrow schema by appending the WKB geometry column to DBF fields. */
261
function buildOutputSchema(
262
  propertySchema: TableSchema | null,
263
  header?: SHPHeader,
264
  geometrySchema?: TableSchema
265
): TableSchema {
266
  const geometryField: Field =
267
    geometrySchema?.fields[0] || makeWKBGeometryField(GEOMETRY_COLUMN_NAME);
104✔
268
  const schema: TableSchema = {
104✔
269
    fields: [...(propertySchema?.fields || []), geometryField],
162✔
270
    metadata: {
271
      ...(propertySchema?.metadata || {}),
162✔
272
      ...(geometrySchema?.metadata || {})
178✔
273
    }
274
  };
275

276
  if (!geometrySchema) {
104✔
277
    setWKBGeometryColumnMetadata(schema.metadata!, {
74✔
278
      geometryColumnName: GEOMETRY_COLUMN_NAME,
279
      geometryTypes: inferGeometryTypes(header)
280
    });
281
  }
282

283
  return schema;
104✔
284
}
285

286
function makeGeometryArrowTable(
287
  geometries: (SHPWKBGeometry | null | undefined)[],
288
  header?: SHPHeader,
289
  transform?: (coordinate: number[]) => number[]
290
) {
291
  const geometrySchema = buildOutputSchema(null, header);
58✔
292
  return makeWKBGeometryArrowTable(geometries, geometrySchema, transform);
58✔
293
}
294

295
function appendGeometryColumnToArrowTable(
296
  propertyTable: ArrowTable,
297
  geometryTable: ArrowTable,
298
  schema: TableSchema
299
): ArrowTable {
300
  const propertyBatch = propertyTable.data.batches[0];
60✔
301
  const geometryBatch = geometryTable.data.batches[0];
60✔
302
  const arrowSchema = convertSchemaToArrow(schema);
60✔
303
  const structField = new arrow.Struct(arrowSchema.fields);
60✔
304
  const children = [...propertyBatch.data.children, geometryBatch.data.children[0]];
60✔
305
  const rowCount = Math.max(propertyTable.data.numRows, geometryTable.data.numRows);
60✔
306
  const structData = new arrow.Data(structField, 0, rowCount, 0, undefined, children);
60✔
307
  const recordBatch = new arrow.RecordBatch(arrowSchema, structData);
60✔
308

309
  return {
60✔
310
    shape: 'arrow-table',
311
    schema,
312
    data: new arrow.Table(arrowSchema, [recordBatch])
313
  };
314
}
315

316
function getReprojectionTransform(
317
  sourceCrs: string | undefined,
318
  options?: ShapefileLoaderOptions
319
): ((coordinate: number[]) => number[]) | undefined {
320
  const {reproject = false, _targetCrs = 'WGS84'} = options?.gis || {};
60✔
321
  if (!reproject) {
60✔
322
    return undefined;
58✔
323
  }
324
  const projection = new Proj4Projection({from: sourceCrs || 'WGS84', to: _targetCrs || 'WGS84'});
2!
325
  return coordinate => projection.project(coordinate);
60✔
326
}
327

328
function getTypedGeoArrowEncoding(options?: ShapefileLoaderOptions): boolean {
329
  const encoding = options?.shapefile?.geoarrowEncoding || options?.shp?.geoarrowEncoding;
46✔
330
  return encoding === 'geoarrow';
46✔
331
}
332

333
/** Infers GeoParquet geometry type metadata from parsed geometries or the SHP header. */
334
function inferGeometryTypes(header?: SHPHeader): GeoParquetGeometryType[] {
335
  const fallbackType = getGeometryTypeFromHeader(header?.type);
74✔
336
  return fallbackType ? [fallbackType] : [];
74!
337
}
338

339
/** Maps SHP header geometry type codes to GeoParquet geometry type strings. */
340
function getGeometryTypeFromHeader(type?: number): GeoParquetGeometryType | null {
341
  switch (type) {
74!
342
    case 1:
343
    case 11:
344
    case 21:
345
      return type === 11 ? 'Point Z' : 'Point';
52!
346
    case 3:
347
    case 13:
348
    case 23:
349
      return type === 13 ? 'LineString Z' : 'LineString';
14!
350
    case 5:
351
    case 15:
352
    case 25:
353
      return type === 15 ? 'Polygon Z' : 'Polygon';
8!
354
    case 8:
355
    case 18:
356
    case 28:
357
      return type === 18 ? 'MultiPoint Z' : 'MultiPoint';
×
358
    default:
359
      return null;
×
360
  }
361
}
362

363
/** Normalizes sync or async iterables to a single async iterator interface. */
364
function getAsyncIterator(iterable: AsyncIterable<any> | Iterable<any>): AsyncIterator<any> {
365
  const iterator = iterable[Symbol.asyncIterator]?.() || iterable[Symbol.iterator]?.();
32!
366
  return iterator as AsyncIterator<any>;
32✔
367
}
368

369
/** Reads the next non-metadata value from a parser iterator. */
370
async function getNextNonMetadataValue(iterator: AsyncIterator<any>): Promise<any> {
371
  while (true) {
16✔
372
    const result = await iterator.next();
30✔
373
    if (result.done) {
30!
374
      return null;
×
375
    }
376
    if (result.value?.batchType !== 'metadata') {
30✔
377
      return result.value;
16✔
378
    }
379
  }
380
}
381

382
async function getNextArrowBatch(iterator: AsyncIterator<any>): Promise<ArrowTableBatch | null> {
383
  while (true) {
32✔
384
    const result = await iterator.next();
46✔
385
    if (result.done) {
46!
UNCOV
386
      return null;
×
387
    }
388
    if (result.value?.shape === 'arrow-table') {
46✔
389
      return result.value;
32✔
390
    }
391
  }
392
}
393

394
function getQueuedRowCount(queue: arrow.Table[]): number {
395
  return queue.reduce((rowCount, table) => rowCount + table.numRows, 0);
234✔
396
}
397

398
function takeRowsFromQueue(queue: arrow.Table[], rowCount: number): arrow.Table {
399
  const table = queue[0];
60✔
400
  if (rowCount === table.numRows) {
60!
401
    queue.shift();
60✔
402
    return table;
60✔
403
  }
NEW
404
  const result = table.slice(0, rowCount);
×
NEW
405
  queue[0] = table.slice(rowCount, table.numRows - rowCount);
×
NEW
406
  return result;
×
407
}
408

409
/** Creates an explicit empty Arrow batch so zero-row shapefiles still expose schema in batch mode. */
410
function makeEmptyArrowBatch(schema: TableSchema): ArrowTableBatch {
411
  const table = new ArrowTableBuilder(schema).finishTable();
2✔
412
  return {
2✔
413
    shape: 'arrow-table',
414
    batchType: 'data',
415
    length: 0,
416
    schema,
417
    data: table.data
418
  };
419
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc