• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 24907303489

24 Apr 2026 07:12PM UTC coverage: 59.423% (+0.09%) from 59.334%
24907303489

push

github

web-flow
feat: Dynamic import loaders (#3405)

11252 of 20783 branches covered (54.14%)

Branch coverage included in aggregate %.

1164 of 1518 new or added lines in 244 files covered. (76.68%)

41 existing lines in 18 files now uncovered.

23432 of 37585 relevant lines covered (62.34%)

16317.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.9
/modules/csv/src/csv-arrow-loader-with-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
6
import type {
7
  ArrayRowTable,
8
  ArrowTable,
9
  ArrowTableBatch,
10
  ObjectRowTable,
11
  Schema,
12
  TableBatch
13
} from '@loaders.gl/schema';
14
import {ArrowTableBuilder} from '@loaders.gl/schema-utils';
15
import * as arrow from 'apache-arrow';
16

17
import {
18
  CSVArrowLoader as CSVArrowLoaderMetadata,
19
  type CSVArrowLoaderOptions
20
} from './csv-arrow-loader';
21
import type {CSVArrowOptions} from './csv-arrow-loader-options';
22
import {CSV_ARROW_DEFAULT_OPTIONS} from './csv-arrow-loader-options';
23
import {CSVLoaderWithParser} from './csv-loader-with-parser';
24
import {
25
  parseRawArrowCSVInBatches,
26
  parseRawArrowCSVTable,
27
  parseRawArrowCSVText
28
} from './lib/parsers/parse-csv-to-arrow';
29
import type {CSVRawArrowParseOptions} from './lib/parsers/parse-csv-to-arrow';
30

31
const {preload: _CSVArrowLoaderPreload, ...CSVArrowLoaderMetadataWithoutPreload} =
32
  CSVArrowLoaderMetadata;
14✔
33

34
export type {CSVArrowLoaderOptions} from './csv-arrow-loader';
35

36
export type CSVArrowParseOptions = CSVArrowLoaderOptions;
37

38
/** Cell value after Papa-style dynamic typing has been applied. */
39
type DynamicColumnValue = string | number | boolean | Date | null;
40

41
/** Arrow data types inferred by the typed Arrow conversion pass. */
42
type TypedColumnDataType = 'utf8' | 'float64' | 'bool' | 'date-millisecond';
43

44
/** Result of converting a raw Utf8 Arrow table to typed Arrow columns. */
45
type TypedArrowConversionResult = {
46
  typedArrowTable: ArrowTable;
47
  typedColumnDataTypes: TypedColumnDataType[];
48
};
49

50
const FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
14✔
51
const ISO_DATE =
52
  /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
14✔
53

54
/**
55
 * CSV loader that returns Apache Arrow tables.
56
 *
57
 * The default `csv.dynamicTyping: false` path emits Arrow Utf8 columns and uses
58
 * the byte-oriented parser when the supplied options are supported. Set
59
 * `csv.dynamicTyping: true` to opt into typed Arrow columns.
60
 */
61
export const CSVArrowLoaderWithParser = {
14✔
62
  ...CSVArrowLoaderMetadataWithoutPreload,
63
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVArrowLoaderOptions) =>
NEW
64
    parseCSVArrayBufferAsArrow(arrayBuffer, normalizeCSVArrowOptions(options)),
×
65
  parseText: (text: string, options?: CSVArrowLoaderOptions) =>
66
    parseCSVTextAsArrow(text, normalizeCSVArrowOptions(options)),
58✔
67
  parseInBatches: (
68
    asyncIterator:
69
      | AsyncIterable<ArrayBufferLike | ArrayBufferView>
70
      | Iterable<ArrayBufferLike | ArrayBufferView>,
71
    options?: CSVArrowLoaderOptions
72
  ) => parseCSVInArrowBatches(asyncIterator, normalizeCSVArrowOptions(options))
24✔
73
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, CSVArrowLoaderOptions>;
74

75
/** Applies Arrow-shaped CSV defaults before delegating to Arrow CSV parsing helpers. */
76
function normalizeCSVArrowOptions(options?: CSVArrowParseOptions): CSVArrowParseOptions {
77
  const skipEmptyLinesIsExplicit =
168✔
78
    (options?.csv && Object.prototype.hasOwnProperty.call(options.csv, 'skipEmptyLinesIsExplicit')
504✔
79
      ? Boolean(options.csv.skipEmptyLinesIsExplicit)
80
      : undefined) ?? Boolean(options?.csv && options.csv.skipEmptyLines === true);
172✔
81

82
  return {
168✔
83
    ...options,
84
    csv: {
85
      ...CSV_ARROW_DEFAULT_OPTIONS,
86
      ...options?.csv,
87
      skipEmptyLinesIsExplicit
88
    }
89
  };
90
}
91

92
/** Parses ArrayBuffer CSV input into an Arrow table. */
93
export async function parseCSVArrayBufferAsArrow(
94
  arrayBuffer: ArrayBuffer,
95
  options?: CSVArrowParseOptions
96
): Promise<ArrowTable> {
NEW
97
  const normalizedOptions = normalizeCSVArrowOptions(options);
×
NEW
98
  const csvOptions = createCSVArrowOptions(normalizedOptions);
×
NEW
99
  if (csvOptions.detectGeometryColumns) {
×
NEW
100
    const rowTable = await CSVLoaderWithParser.parse(arrayBuffer, {
×
101
      ...normalizedOptions,
102
      csv: {
103
        ...normalizedOptions.csv,
104
        shape: 'object-row-table',
105
        dynamicTyping: csvOptions.dynamicTyping
106
      }
107
    });
NEW
108
    return convertCSVRowTableToArrowTable(rowTable as ObjectRowTable);
×
109
  }
NEW
110
  const rawArrowCSVOptions = createRawArrowCSVOptions(normalizedOptions);
×
111

NEW
112
  const rawArrowTable = await parseRawArrowCSVTable(arrayBuffer, rawArrowCSVOptions);
×
113

NEW
114
  if (!shouldApplyDynamicTyping(csvOptions)) {
×
NEW
115
    return rawArrowTable;
×
116
  }
117

NEW
118
  return convertRawArrowTableToTypedArrowTable(rawArrowTable).typedArrowTable;
×
119
}
120

121
/** Parses string CSV input into an Arrow table. */
122
export async function parseCSVTextAsArrow(
123
  csvText: string,
124
  options?: CSVArrowParseOptions
125
): Promise<ArrowTable> {
126
  const normalizedOptions = normalizeCSVArrowOptions(options);
60✔
127
  const csvOptions = createCSVArrowOptions(normalizedOptions);
60✔
128
  if (csvOptions.detectGeometryColumns) {
60!
NEW
129
    const rowTable = await CSVLoaderWithParser.parseText(csvText, {
×
130
      ...normalizedOptions,
131
      csv: {
132
        ...normalizedOptions.csv,
133
        shape: 'object-row-table',
134
        dynamicTyping: csvOptions.dynamicTyping
135
      }
136
    });
NEW
137
    return convertCSVRowTableToArrowTable(rowTable as ObjectRowTable);
×
138
  }
139
  const rawArrowCSVOptions = createRawArrowCSVOptions(normalizedOptions);
60✔
140

141
  const rawArrowTable = await parseRawArrowCSVText(csvText, rawArrowCSVOptions);
60✔
142

143
  if (!shouldApplyDynamicTyping(csvOptions)) {
60✔
144
    return rawArrowTable;
40✔
145
  }
146

147
  return convertRawArrowTableToTypedArrowTable(rawArrowTable).typedArrowTable;
20✔
148
}
149

150
/** Parses batch CSV input into Arrow table batches. */
151
export function parseCSVInArrowBatches(
152
  asyncIterator:
153
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
154
    | Iterable<ArrayBufferLike | ArrayBufferView>,
155
  options?: CSVArrowParseOptions
156
): AsyncIterable<ArrowTableBatch> {
157
  const normalizedOptions = normalizeCSVArrowOptions(options);
26✔
158
  const csvOptions = createCSVArrowOptions(normalizedOptions);
26✔
159
  if (csvOptions.detectGeometryColumns) {
26!
NEW
160
    return convertCSVRowBatchesToArrowBatches(
×
161
      CSVLoaderWithParser.parseInBatches(asyncIterator, {
162
        ...normalizedOptions,
163
        csv: {
164
          ...normalizedOptions.csv,
165
          shape: 'object-row-table',
166
          dynamicTyping: csvOptions.dynamicTyping
167
        }
168
      })
169
    );
170
  }
171
  const rawArrowCSVOptions = createRawArrowCSVOptions(normalizedOptions);
26✔
172

173
  const rawArrowBatchIterator = parseRawArrowCSVInBatches(asyncIterator, rawArrowCSVOptions);
26✔
174

175
  return makeTypedArrowBatchIterator(rawArrowBatchIterator, csvOptions);
26✔
176
}
177

178
/** Converts CSV row-table output to an Arrow table using the supplied CSV schema. */
179
function convertCSVRowTableToArrowTable(table: ObjectRowTable | ArrayRowTable): ArrowTable {
NEW
180
  const arrowTableBuilder = new ArrowTableBuilder(table.schema!);
×
NEW
181
  for (const row of table.data) {
×
NEW
182
    if (table.shape === 'object-row-table') {
×
NEW
183
      arrowTableBuilder.addObjectRow(row as {[columnName: string]: unknown});
×
184
    } else {
NEW
185
      arrowTableBuilder.addArrayRow(row as unknown[]);
×
186
    }
187
  }
NEW
188
  return arrowTableBuilder.finishTable();
×
189
}
190

191
/** Converts CSV row batches to Arrow batches while preserving the CSV-derived schema. */
192
async function* convertCSVRowBatchesToArrowBatches(
193
  rowBatchIterator: AsyncIterable<TableBatch>
194
): AsyncIterable<ArrowTableBatch> {
NEW
195
  for await (const rowBatch of rowBatchIterator) {
×
NEW
196
    if (
×
197
      (rowBatch.shape !== 'array-row-table' && rowBatch.shape !== 'object-row-table') ||
×
198
      !rowBatch.schema
199
    ) {
NEW
200
      continue;
×
201
    }
202

NEW
203
    const arrowTableBuilder = new ArrowTableBuilder(rowBatch.schema);
×
NEW
204
    for (const row of rowBatch.data) {
×
NEW
205
      if (rowBatch.shape === 'object-row-table') {
×
NEW
206
        arrowTableBuilder.addObjectRow(row as {[columnName: string]: unknown});
×
207
      } else {
NEW
208
        arrowTableBuilder.addArrayRow(row as unknown[]);
×
209
      }
210
    }
NEW
211
    const arrowTable = arrowTableBuilder.finishTable();
×
NEW
212
    yield {
×
213
      ...rowBatch,
214
      shape: 'arrow-table',
215
      schema: rowBatch.schema,
216
      data: arrowTable.data,
217
      length: arrowTable.data.numRows
218
    };
219
  }
220
}
221

222
/** Converts an async iterator of raw Utf8 Arrow batches to typed Arrow batches. */
223
async function* makeTypedArrowBatchIterator(
224
  rawArrowBatchIterator: AsyncIterable<ArrowTableBatch>,
225
  csvOptions: CSVArrowOptions
226
): AsyncIterable<ArrowTableBatch> {
227
  let frozenColumnDataTypes: TypedColumnDataType[] | null = null;
26✔
228

229
  for await (const rawArrowBatch of rawArrowBatchIterator) {
26✔
230
    if (!shouldApplyDynamicTyping(csvOptions)) {
48✔
231
      yield rawArrowBatch;
26✔
232
      continue;
26✔
233
    }
234

235
    const rawArrowTable: ArrowTable = {
22✔
236
      shape: 'arrow-table',
237
      schema: rawArrowBatch.schema,
238
      data: rawArrowBatch.data
239
    };
240

241
    const conversionResult = convertRawArrowTableToTypedArrowTable(rawArrowTable, {
22✔
242
      frozenColumnDataTypes
243
    });
244

245
    if (!frozenColumnDataTypes && conversionResult.typedColumnDataTypes.length > 0) {
22✔
246
      frozenColumnDataTypes = conversionResult.typedColumnDataTypes;
16✔
247
    }
248

249
    yield {
22✔
250
      ...rawArrowBatch,
251
      schema: conversionResult.typedArrowTable.schema,
252
      data: conversionResult.typedArrowTable.data,
253
      length: conversionResult.typedArrowTable.data.numRows
254
    };
255
  }
256
}
257

258
/** Merges caller options with Arrow CSV defaults. */
259
function createCSVArrowOptions(options?: CSVArrowParseOptions): CSVArrowOptions {
260
  return {
172✔
261
    ...CSV_ARROW_DEFAULT_OPTIONS,
262
    ...options?.csv
263
  };
264
}
265

266
/** Creates raw Arrow options by stripping the typed conversion flag. */
267
function createRawArrowCSVOptions(options?: CSVArrowParseOptions): CSVRawArrowParseOptions {
268
  const csvOptions = createCSVArrowOptions(options);
86✔
269
  const {dynamicTyping, ...rawArrowCSVOptions} = csvOptions;
86✔
270

271
  return {
86✔
272
    ...options,
273
    csv: {
274
      ...rawArrowCSVOptions,
275
      dynamicTyping
276
    }
277
  };
278
}
279

280
/** Returns whether typed Arrow conversion should be applied. */
281
function shouldApplyDynamicTyping(csvOptions: CSVArrowOptions): boolean {
282
  return csvOptions.dynamicTyping !== false;
108✔
283
}
284

285
/** Converts an Arrow table of Utf8 columns to inferred typed Arrow columns. */
286
function convertRawArrowTableToTypedArrowTable(
287
  rawArrowTable: ArrowTable,
288
  options?: {frozenColumnDataTypes?: TypedColumnDataType[] | null}
289
): TypedArrowConversionResult {
290
  const rawArrowSchemaFields = rawArrowTable.data.schema.fields;
42✔
291
  const rowCount = rawArrowTable.data.numRows;
42✔
292

293
  if (rawArrowSchemaFields.length === 0) {
42!
NEW
294
    return {
×
295
      typedArrowTable: {
296
        shape: 'arrow-table',
297
        schema: {
298
          fields: [],
299
          metadata: {
300
            ...rawArrowTable.schema?.metadata,
301
            'loaders.gl#format': 'csv',
302
            'loaders.gl#loader': 'CSVLoader'
303
          }
304
        },
305
        data: rawArrowTable.data
306
      },
307
      typedColumnDataTypes: []
308
    };
309
  }
310

311
  const typedSchemaFields: Schema['fields'] = [];
42✔
312
  const typedColumnValues: unknown[][] = [];
42✔
313
  const typedColumnDataTypes: TypedColumnDataType[] = [];
42✔
314

315
  for (let columnIndex = 0; columnIndex < rawArrowSchemaFields.length; columnIndex++) {
42✔
316
    const rawArrowSchemaField = rawArrowSchemaFields[columnIndex];
188✔
317
    const rawArrowColumn = rawArrowTable.data.getChildAt(columnIndex);
188✔
318

319
    if (rawArrowSchemaField.type instanceof arrow.List) {
188✔
320
      typedSchemaFields.push(
4✔
321
        rawArrowTable.schema?.fields[columnIndex] || {
4!
322
          name: rawArrowSchemaField.name,
323
          type: 'utf8',
324
          nullable: true
325
        }
326
      );
327
      typedColumnDataTypes.push('utf8');
4✔
328
      typedColumnValues.push(
4✔
329
        rawArrowColumn
4!
330
          ? readRawArrowListValues(rawArrowColumn, rowCount)
331
          : new Array(rowCount).fill(null)
332
      );
333
      continue;
4✔
334
    }
335

336
    const rawStringValues: (string | null)[] = [];
184✔
337
    for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
184✔
338
      const rawArrowValue = rawArrowColumn?.get(rowIndex);
23,518✔
339
      rawStringValues.push(readRawArrowStringValue(rawArrowValue));
23,518✔
340
    }
341

342
    const dynamicValues = rawStringValues.map(rawStringValue =>
184✔
343
      parseValueWithDynamicTyping(rawStringValue)
23,518✔
344
    );
345

346
    const typedColumnDataType =
347
      options?.frozenColumnDataTypes?.[columnIndex] ?? deduceTypedColumnDataType(dynamicValues);
184✔
348

349
    typedSchemaFields.push({
188✔
350
      name: rawArrowSchemaField.name,
351
      type: typedColumnDataType,
352
      nullable: true
353
    });
354

355
    typedColumnDataTypes.push(typedColumnDataType);
188✔
356
    typedColumnValues.push(
188✔
357
      convertDynamicValuesToTypedColumnValues(dynamicValues, typedColumnDataType)
358
    );
359
  }
360

361
  const typedSchema: Schema = {
188✔
362
    fields: typedSchemaFields,
363
    metadata: {
364
      ...rawArrowTable.schema?.metadata,
365
      'loaders.gl#format': 'csv',
366
      'loaders.gl#loader': 'CSVLoader'
367
    }
368
  };
369

370
  const typedArrowTableBuilder = new ArrowTableBuilder(typedSchema);
42✔
371
  for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
42✔
372
    const rowValues = typedColumnValues.map(typedColumnValue => typedColumnValue[rowIndex]);
23,534✔
373
    typedArrowTableBuilder.addArrayRow(rowValues);
3,086✔
374
  }
375

376
  return {
3,086✔
377
    typedArrowTable: typedArrowTableBuilder.finishTable(),
378
    typedColumnDataTypes
379
  };
380
}
381

382
/** Reads an Arrow list column back to nullable JS arrays for table rebuilding. */
383
function readRawArrowListValues(rawArrowColumn: arrow.Vector, rowCount: number): unknown[] {
384
  const values: unknown[] = [];
4✔
385
  for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
4✔
386
    const rawArrowValue = rawArrowColumn.get(rowIndex);
16✔
387
    values.push(
16✔
388
      rawArrowValue === null || rawArrowValue === undefined ? null : Array.from(rawArrowValue)
40✔
389
    );
390
  }
391
  return values;
4✔
392
}
393

394
/** Converts an Arrow cell value to a nullable string value. */
395
function readRawArrowStringValue(rawArrowValue: unknown): string | null {
396
  if (rawArrowValue === null || rawArrowValue === undefined) {
23,518✔
397
    return null;
24✔
398
  }
399

400
  return String(rawArrowValue);
23,494✔
401
}
402

403
/** Applies Papa-compatible dynamic typing to one nullable CSV string value. */
404
function parseValueWithDynamicTyping(rawStringValue: string | null): DynamicColumnValue {
405
  if (rawStringValue === null) {
23,518✔
406
    return null;
24✔
407
  }
408

409
  if (rawStringValue === 'true' || rawStringValue === 'TRUE') {
23,494!
NEW
410
    return true;
×
411
  }
412

413
  if (rawStringValue === 'false' || rawStringValue === 'FALSE') {
23,494!
NEW
414
    return false;
×
415
  }
416

417
  if (FLOAT.test(rawStringValue)) {
23,494✔
418
    return Number.parseFloat(rawStringValue);
8,480✔
419
  }
420

421
  if (ISO_DATE.test(rawStringValue)) {
15,014!
NEW
422
    return new Date(rawStringValue);
×
423
  }
424

425
  if (rawStringValue === '') {
15,014✔
426
    return null;
130✔
427
  }
428

429
  return rawStringValue;
14,884✔
430
}
431

432
/** Deduces the narrowest supported Arrow type for one column. */
433
function deduceTypedColumnDataType(dynamicValues: DynamicColumnValue[]): TypedColumnDataType {
434
  let inferredColumnDataType: TypedColumnDataType | null = null;
170✔
435

436
  for (const dynamicValue of dynamicValues) {
170✔
437
    if (dynamicValue === null) {
8,324✔
438
      continue;
108✔
439
    }
440

441
    const currentValueDataType = getTypedColumnDataType(dynamicValue);
8,216✔
442

443
    if (currentValueDataType === 'utf8') {
8,216✔
444
      return 'utf8';
96✔
445
    }
446

447
    if (inferredColumnDataType === null) {
8,120✔
448
      inferredColumnDataType = currentValueDataType;
64✔
449
      continue;
64✔
450
    }
451

452
    if (inferredColumnDataType !== currentValueDataType) {
8,056!
NEW
453
      return 'utf8';
×
454
    }
455
  }
456

457
  return inferredColumnDataType ?? 'utf8';
74✔
458
}
459

460
/** Returns the typed Arrow column type for a non-null dynamically typed value. */
461
function getTypedColumnDataType(
462
  dynamicValue: Exclude<DynamicColumnValue, null>
463
): TypedColumnDataType {
464
  if (typeof dynamicValue === 'boolean') {
8,216!
NEW
465
    return 'bool';
×
466
  }
467

468
  if (typeof dynamicValue === 'number') {
8,216✔
469
    return 'float64';
8,120✔
470
  }
471

472
  if (dynamicValue instanceof Date) {
96!
NEW
473
    return 'date-millisecond';
×
474
  }
475

476
  return 'utf8';
96✔
477
}
478

479
/** Coerces dynamically typed values to values compatible with the selected Arrow type. */
480
function convertDynamicValuesToTypedColumnValues(
481
  dynamicValues: DynamicColumnValue[],
482
  typedColumnDataType: TypedColumnDataType
483
): DynamicColumnValue[] {
484
  switch (typedColumnDataType) {
184!
485
    case 'bool':
NEW
486
      return dynamicValues.map(dynamicValue =>
×
NEW
487
        typeof dynamicValue === 'boolean' ? dynamicValue : null
×
488
      );
489
    case 'float64':
490
      return dynamicValues.map(dynamicValue =>
78✔
491
        typeof dynamicValue === 'number' ? dynamicValue : null
8,490✔
492
      );
493
    case 'date-millisecond':
NEW
494
      return dynamicValues.map(dynamicValue =>
×
NEW
495
        dynamicValue instanceof Date ? dynamicValue : null
×
496
      );
497
    case 'utf8':
498
    default:
499
      return dynamicValues.map(dynamicValue =>
106✔
500
        dynamicValue === null ? null : String(dynamicValue)
15,028✔
501
      );
502
  }
503
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc