• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 24907303489

24 Apr 2026 07:12PM UTC coverage: 59.423% (+0.09%) from 59.334%
24907303489

push

github

web-flow
feat: Dynamic import loaders (#3405)

11252 of 20783 branches covered (54.14%)

Branch coverage included in aggregate %.

1164 of 1518 new or added lines in 244 files covered. (76.68%)

41 existing lines in 18 files now uncovered.

23432 of 37585 relevant lines covered (62.34%)

16317.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.77
/modules/shapefile/src/shapefile-arrow-loader-with-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderContext, LoaderWithParser} from '@loaders.gl/loader-utils';
6
import {
7
  parseFromContext,
8
  parseInBatchesFromContext,
9
  toArrayBufferIterator
10
} from '@loaders.gl/loader-utils';
11
import type {
12
  ArrowTable,
13
  ArrowTableBatch,
14
  BinaryGeometry,
15
  Field,
16
  Geometry,
17
  Schema as TableSchema,
18
  Feature
19
} from '@loaders.gl/schema';
20
import {ArrowTableBuilder} from '@loaders.gl/schema-utils';
21
import {
22
  convertBinaryGeometryToGeometry,
23
  convertGeometryToWKB,
24
  type GeoParquetGeometryType,
25
  makeWKBGeometryField,
26
  setWKBGeometryColumnMetadata,
27
  transformGeoJsonCoords
28
} from '@loaders.gl/gis';
29
import {Proj4Projection} from '@math.gl/proj4';
30
import {SHPLoaderWithParser} from './shp-loader-with-parser';
31
import {DBFArrowLoaderWithParser} from './dbf-arrow-loader-with-parser';
32
import {DBFLoaderWithParser} from './dbf-loader-with-parser';
33
import type {ShapefileLoaderOptions} from './shapefile-loader';
34
import type {SHPHeader} from './lib/parsers/parse-shp-header';
35
import {loadShapefileSidecarFiles, replaceExtension} from './lib/parsers/parse-shapefile';
36
import {ShapefileArrowLoader as ShapefileArrowLoaderMetadata} from './shapefile-arrow-loader';
37

38
const {preload: _ShapefileArrowLoaderPreload, ...ShapefileArrowLoaderMetadataWithoutPreload} =
39
  ShapefileArrowLoaderMetadata;
4✔
40

41
const GEOMETRY_COLUMN_NAME = 'geometry';
4✔
42

43
/** Options for `ShapefileArrowLoaderWithParser`. */
44
export type ShapefileArrowLoaderOptions = ShapefileLoaderOptions;
45

46
/**
47
 * Shapefile loader that returns properties and geometry as an Arrow table.
48
 *
49
 * The loader preserves DBF attributes as Arrow columns and appends a WKB
50
 * `geometry` column annotated with geospatial schema metadata.
51
 */
52
export const ShapefileArrowLoaderWithParser = {
4✔
53
  ...ShapefileArrowLoaderMetadataWithoutPreload,
54
  parse: parseShapefileToArrow,
55
  parseInBatches: parseShapefileToArrowInBatches
56
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, ShapefileArrowLoaderOptions>;
57

58
/** Parses a shapefile and returns an Arrow table with a WKB geometry column. */
59
export async function parseShapefileToArrow(
60
  arrayBuffer: ArrayBuffer,
61
  options?: ShapefileArrowLoaderOptions,
62
  context?: LoaderContext
63
): Promise<ArrowTable> {
64
  const {header, geometries} = await parseFromContext(
26✔
65
    arrayBuffer,
66
    SHPLoaderWithParser,
67
    options,
68
    context!
69
  );
70
  const {cpg, prj} = await loadShapefileSidecarFiles(options, context);
26✔
71

72
  const geometryObjects = parseGeometries(geometries);
26✔
73
  const features = maybeReprojectFeatures(
26✔
74
    geometryObjects.map(geometry => ({type: 'Feature', geometry, properties: {}})),
90✔
75
    prj,
76
    options
77
  );
78

79
  let propertySchema: TableSchema | null = null;
26✔
80
  let propertyRows: Record<string, unknown>[] = [];
26✔
81

82
  const dbfResponse = await context?.fetch(replaceExtension(context?.url || '', 'dbf'));
26!
83
  if (dbfResponse?.ok) {
26!
84
    const table = await parseFromContext(
26✔
85
      dbfResponse as any,
86
      DBFArrowLoaderWithParser,
87
      {
88
        ...options,
89
        dbf: {
90
          ...options?.dbf,
91
          encoding: cpg || 'latin1'
48✔
92
        }
93
      },
94
      context!
95
    );
96
    propertySchema = table.schema || null;
26!
97
    propertyRows = getRowsFromArrowTable(table);
26✔
98
  }
99

100
  const schema = buildOutputSchema(
26✔
101
    propertySchema,
102
    features.map(feature => feature.geometry),
90✔
103
    header
104
  );
105
  const tableBuilder = new ArrowTableBuilder(schema);
26✔
106

107
  const rowCount = Math.max(features.length, propertyRows.length);
26✔
108
  for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
26✔
109
    tableBuilder.addObjectRow(
90✔
110
      makeArrowRow(propertyRows[rowIndex], features[rowIndex]?.geometry, header)
111
    );
112
  }
113

114
  return tableBuilder.finishTable();
26✔
115
}
116

117
/** Parses a shapefile into Arrow batches while keeping DBF-derived schema stable. */
118
export async function* parseShapefileToArrowInBatches(
119
  asyncIterator:
120
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
121
    | Iterable<ArrayBufferLike | ArrayBufferView>,
122
  options?: ShapefileArrowLoaderOptions,
123
  context?: LoaderContext
124
): AsyncIterable<ArrowTableBatch> {
125
  const {cpg, prj} = await loadShapefileSidecarFiles(options, context);
14✔
126

127
  const shapeIterable = await parseInBatchesFromContext(
14✔
128
    toArrayBufferIterator(asyncIterator),
129
    SHPLoaderWithParser,
130
    options,
131
    context!
132
  );
133
  const shapeIterator = getAsyncIterator(shapeIterable);
14✔
134

135
  const shapeHeader = await getNextNonMetadataValue(shapeIterator);
14✔
136
  const header = shapeHeader as SHPHeader;
14✔
137

138
  let propertyIterator: AsyncIterator<any> | null = null;
14✔
139
  let propertySchema: TableSchema | null = null;
14✔
140

141
  const dbfResponse = await context?.fetch(replaceExtension(context?.url || '', 'dbf'));
14!
142
  if (dbfResponse?.ok) {
14!
143
    const dbfOptions = {
14✔
144
      ...options,
145
      dbf: {
146
        ...options?.dbf,
147
        shape: 'object-row-table' as const,
148
        encoding: cpg || 'latin1'
26✔
149
      }
150
    };
151
    const schemaResponse =
152
      'clone' in dbfResponse
14!
153
        ? dbfResponse.clone()
154
        : await context?.fetch(replaceExtension(context?.url || '', 'dbf'));
×
NEW
155
    const propertyTable = await parseFromContext(
×
156
      schemaResponse as any,
157
      DBFLoaderWithParser,
158
      dbfOptions,
159
      context!
160
    );
161
    propertySchema = propertyTable?.schema || null;
14!
162

163
    const propertyIterable = await parseInBatchesFromContext(
14✔
164
      dbfResponse,
165
      DBFLoaderWithParser,
166
      dbfOptions,
167
      context!
168
    );
169
    propertyIterator = getAsyncIterator(propertyIterable);
14✔
170

171
    const outputSchema = buildOutputSchema(propertySchema, [], header);
14✔
172
    const propertyQueue: Record<string, unknown>[] = [];
14✔
173
    const geometryQueue: Geometry[] = [];
14✔
174
    let yieldedDataBatch = false;
14✔
175

176
    const firstPropertyBatch = await getNextPropertyBatch(propertyIterator);
14✔
177
    if (firstPropertyBatch) {
14✔
178
      propertyQueue.push(...firstPropertyBatch);
12✔
179
    }
180

181
    let shapeDone = false;
14✔
182
    let propertyDone = false;
14✔
183
    while (!shapeDone || !propertyDone || geometryQueue.length > 0 || propertyQueue.length > 0) {
14!
184
      if (!shapeDone && geometryQueue.length === 0) {
24!
185
        const shapeBatch = await shapeIterator.next();
24✔
186
        if (shapeBatch.done) {
24✔
187
          shapeDone = true;
12✔
188
        } else if (shapeBatch.value?.batchType !== 'metadata') {
12!
189
          geometryQueue.push(...parseGeometries(shapeBatch.value as BinaryGeometry[]));
12✔
190
        }
191
      }
192

193
      if (!propertyDone && propertyQueue.length < geometryQueue.length) {
24!
NEW
194
        const propertyBatch = await propertyIterator.next();
×
NEW
195
        if (propertyBatch.done) {
×
NEW
196
          propertyDone = true;
×
NEW
197
        } else if (Array.isArray(propertyBatch.value)) {
×
NEW
198
          propertyQueue.push(...propertyBatch.value);
×
199
        }
200
      }
201

202
      const rowCount = Math.min(geometryQueue.length, propertyQueue.length);
24✔
203
      if (rowCount === 0) {
24✔
204
        if (
12!
205
          (shapeDone && geometryQueue.length === 0) ||
24!
206
          (propertyDone && propertyQueue.length === 0)
207
        ) {
208
          break;
12✔
209
        }
NEW
210
        continue;
×
211
      }
212

213
      const features = maybeReprojectFeatures(
12✔
214
        geometryQueue
215
          .splice(0, rowCount)
216
          .map(geometry => ({type: 'Feature', geometry, properties: {}})),
54✔
217
        prj,
218
        options
219
      );
220
      const propertyRows = propertyQueue.splice(0, rowCount);
12✔
221
      const batchBuilder = new ArrowTableBuilder(outputSchema);
12✔
222
      for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
12✔
223
        batchBuilder.addObjectRow(
54✔
224
          makeArrowRow(propertyRows[rowIndex], features[rowIndex]?.geometry, header)
225
        );
226
      }
227
      const batch = batchBuilder.finishBatch();
12✔
228
      if (batch) {
12!
229
        yieldedDataBatch = true;
12✔
230
        yield batch;
12✔
231
      }
232
    }
233
    if (!yieldedDataBatch) {
12✔
234
      yield makeEmptyArrowBatch(outputSchema);
2✔
235
    }
236
    return;
12✔
237
  }
238

NEW
239
  const outputSchema = buildOutputSchema(null, [], header);
×
NEW
240
  let yieldedDataBatch = false;
×
241

NEW
242
  while (true) {
×
NEW
243
    const shapeBatch = await shapeIterator.next();
×
NEW
244
    if (shapeBatch.done) {
×
NEW
245
      break;
×
246
    }
NEW
247
    if (shapeBatch.value?.batchType === 'metadata') {
×
NEW
248
      continue;
×
249
    }
NEW
250
    const features = maybeReprojectFeatures(
×
NEW
251
      parseGeometries(shapeBatch.value as BinaryGeometry[]).map(geometry => ({
×
252
        type: 'Feature',
253
        geometry,
254
        properties: {}
255
      })),
256
      prj,
257
      options
258
    );
NEW
259
    const batchBuilder = new ArrowTableBuilder(outputSchema);
×
NEW
260
    for (const feature of features) {
×
NEW
261
      batchBuilder.addObjectRow(makeArrowRow(undefined, feature.geometry, header));
×
262
    }
NEW
263
    const batch = batchBuilder.finishBatch();
×
NEW
264
    if (batch) {
×
NEW
265
      yieldedDataBatch = true;
×
NEW
266
      yield batch;
×
267
    }
268
  }
NEW
269
  if (!yieldedDataBatch) {
×
NEW
270
    yield makeEmptyArrowBatch(outputSchema);
×
271
  }
272
}
273

274
/** Creates the output Arrow schema by appending the WKB geometry column to DBF fields. */
275
function buildOutputSchema(
276
  propertySchema: TableSchema | null,
277
  geometries: Geometry[],
278
  header?: SHPHeader
279
): TableSchema {
280
  const geometryField: Field = makeWKBGeometryField(GEOMETRY_COLUMN_NAME);
40✔
281
  const schema: TableSchema = {
40✔
282
    fields: [...(propertySchema?.fields || []), geometryField],
40!
283
    metadata: {
284
      ...(propertySchema?.metadata || {})
40!
285
    }
286
  };
287

288
  setWKBGeometryColumnMetadata(schema.metadata!, {
40✔
289
    geometryColumnName: GEOMETRY_COLUMN_NAME,
290
    geometryTypes: inferGeometryTypes(geometries, header)
291
  });
292

293
  return schema;
40✔
294
}
295

296
/** Combines one property row and one geometry into an Arrow-builder friendly object row. */
297
function makeArrowRow(
298
  propertyRow: Record<string, unknown> | undefined,
299
  geometry: Geometry | undefined,
300
  header?: SHPHeader
301
): Record<string, unknown> {
302
  return {
144✔
303
    ...(propertyRow || {}),
144!
304
    [GEOMETRY_COLUMN_NAME]: geometry
144!
305
      ? new Uint8Array(convertGeometryToWKB(geometry, getWKBOptions(geometry, header)))
306
      : null
307
  };
308
}
309

310
/** Materializes Arrow rows as plain objects for row-wise joining with SHP geometry output. */
311
function getRowsFromArrowTable(table: ArrowTable | ArrowTableBatch): Record<string, unknown>[] {
312
  const rows: Record<string, unknown>[] = [];
26✔
313
  for (let rowIndex = 0; rowIndex < table.data.numRows; rowIndex++) {
26✔
314
    rows.push(table.data.get(rowIndex)?.toJSON() || {});
90!
315
  }
316
  return rows;
26✔
317
}
318

319
/** Converts binary SHP geometries to GeoJSON geometries. */
320
function parseGeometries(geometries: BinaryGeometry[]): Geometry[] {
321
  return geometries.map(geometry => convertBinaryGeometryToGeometry(geometry));
144✔
322
}
323

324
/** Reprojects features when requested through standard shapefile GIS options. */
325
function maybeReprojectFeatures(
326
  features: Feature[],
327
  sourceCrs: string | undefined,
328
  options?: ShapefileArrowLoaderOptions
329
): Feature[] {
330
  const {reproject = false, _targetCrs = 'WGS84'} = options?.gis || {};
38✔
331
  if (!reproject) {
38✔
332
    return features;
36✔
333
  }
334
  const projection = new Proj4Projection({from: sourceCrs || 'WGS84', to: _targetCrs || 'WGS84'});
2!
335
  return transformGeoJsonCoords(features, coord => projection.project(coord));
38✔
336
}
337

338
/** Selects WKB dimensional flags from the shapefile header and parsed coordinate dimensionality. */
339
function getWKBOptions(geometry: Geometry, header?: SHPHeader): {hasZ?: boolean; hasM?: boolean} {
340
  const dimensions = getCoordinateDimensions(getGeometrySampleCoordinates(geometry));
144✔
341
  switch (header?.type) {
144!
342
    case 11:
343
    case 13:
344
    case 15:
345
    case 18:
NEW
346
      return {hasZ: dimensions > 2, hasM: dimensions > 3};
×
347
    case 21:
348
    case 23:
349
    case 25:
350
    case 28:
NEW
351
      return {hasM: dimensions > 2};
×
352
    default:
353
      return {hasZ: dimensions > 2, hasM: dimensions > 3};
144✔
354
  }
355
}
356

357
/** Returns the coordinate dimensionality of the first coordinate tuple in a geometry. */
358
function getCoordinateDimensions(coordinates: unknown): number {
359
  if (!Array.isArray(coordinates)) {
334!
NEW
360
    return 2;
×
361
  }
362
  if (typeof coordinates[0] === 'number') {
334✔
363
    return coordinates.length;
234✔
364
  }
365
  if (coordinates.length === 0) {
100!
NEW
366
    return 2;
×
367
  }
368
  return getCoordinateDimensions(coordinates[0]);
100✔
369
}
370

371
/** Infers GeoParquet geometry type metadata from parsed geometries or the SHP header. */
372
function inferGeometryTypes(geometries: Geometry[], header?: SHPHeader): GeoParquetGeometryType[] {
373
  const geometryTypes = new Set<GeoParquetGeometryType>();
40✔
374
  for (const geometry of geometries) {
40✔
375
    const dimensions = getCoordinateDimensions(getGeometrySampleCoordinates(geometry));
90✔
376
    geometryTypes.add(
90✔
377
      (dimensions > 2 ? `${geometry.type} Z` : geometry.type) as GeoParquetGeometryType
90!
378
    );
379
  }
380
  if (geometryTypes.size > 0) {
40✔
381
    return [...geometryTypes];
22✔
382
  }
383

384
  const fallbackType = getGeometryTypeFromHeader(header?.type);
18✔
385
  return fallbackType ? [fallbackType] : [];
40!
386
}
387

388
/** Maps SHP header geometry type codes to GeoParquet geometry type strings. */
389
function getGeometryTypeFromHeader(type?: number): GeoParquetGeometryType | null {
390
  switch (type) {
18!
391
    case 1:
392
    case 11:
393
    case 21:
394
      return type === 11 ? 'Point Z' : 'Point';
8!
395
    case 3:
396
    case 13:
397
    case 23:
398
      return type === 13 ? 'LineString Z' : 'LineString';
8!
399
    case 5:
400
    case 15:
401
    case 25:
402
      return type === 15 ? 'Polygon Z' : 'Polygon';
2!
403
    case 8:
404
    case 18:
405
    case 28:
NEW
406
      return type === 18 ? 'MultiPoint Z' : 'MultiPoint';
×
407
    default:
NEW
408
      return null;
×
409
  }
410
}
411

412
/** Extracts a representative coordinate array from any GeoJSON geometry. */
413
function getGeometrySampleCoordinates(geometry: Geometry): unknown {
414
  if ('coordinates' in geometry) {
234!
415
    return geometry.coordinates;
234✔
416
  }
NEW
417
  if ('geometries' in geometry && geometry.geometries.length > 0) {
×
NEW
418
    return getGeometrySampleCoordinates(geometry.geometries[0]);
×
419
  }
NEW
420
  return undefined;
×
421
}
422

423
/** Normalizes sync or async iterables to a single async iterator interface. */
424
function getAsyncIterator(iterable: AsyncIterable<any> | Iterable<any>): AsyncIterator<any> {
425
  const iterator = iterable[Symbol.asyncIterator]?.() || iterable[Symbol.iterator]?.();
28!
426
  return iterator as AsyncIterator<any>;
28✔
427
}
428

429
/** Reads the next non-metadata value from a parser iterator. */
430
async function getNextNonMetadataValue(iterator: AsyncIterator<any>): Promise<any> {
431
  while (true) {
14✔
432
    const result = await iterator.next();
28✔
433
    if (result.done) {
28!
NEW
434
      return null;
×
435
    }
436
    if (result.value?.batchType !== 'metadata') {
28✔
437
      return result.value;
14✔
438
    }
439
  }
440
}
441

442
/** Reads the next DBF row batch, skipping header objects. */
443
async function getNextPropertyBatch(
444
  iterator: AsyncIterator<any>
445
): Promise<Record<string, unknown>[] | null> {
446
  while (true) {
14✔
447
    const result = await iterator.next();
42✔
448
    if (result.done) {
42✔
449
      return null;
2✔
450
    }
451
    if (Array.isArray(result.value)) {
40✔
452
      return result.value;
12✔
453
    }
454
  }
455
}
456

457
/** Creates an explicit empty Arrow batch so zero-row shapefiles still expose schema in batch mode. */
458
function makeEmptyArrowBatch(schema: TableSchema): ArrowTableBatch {
459
  const table = new ArrowTableBuilder(schema).finishTable();
2✔
460
  return {
2✔
461
    shape: 'arrow-table',
462
    batchType: 'data',
463
    length: 0,
464
    schema,
465
    data: table.data
466
  };
467
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc