• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 25256585712

02 May 2026 04:35PM UTC coverage: 59.717% (-0.06%) from 59.776%
25256585712

push

github

web-flow
chore(loader-utils): Consolidate `parseWithWorker` with `processOnWorker` (#1564)

12514 of 23182 branches covered (53.98%)

Branch coverage included in aggregate %.

497 of 804 new or added lines in 22 files covered. (61.82%)

25 existing lines in 4 files now uncovered.

25948 of 41225 relevant lines covered (62.94%)

14803.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.33
/modules/csv/src/csv-arrow-table-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {
6
  ArrayRowTable,
7
  ArrowTable,
8
  ArrowTableBatch,
9
  ObjectRowTable,
10
  Schema,
11
  TableBatch
12
} from '@loaders.gl/schema';
13
import {ArrowTableBuilder} from '@loaders.gl/schema-utils';
14
import * as arrow from 'apache-arrow';
15

16
import type {CSVLoaderOptions} from './csv-loader-options';
17
import {CSV_LOADER_OPTIONS} from './csv-loader-options';
18
import {CSVLoaderWithParser} from './csv-loader-with-parser';
19
import {
20
  parseRawArrowCSVInBatches,
21
  parseRawArrowCSVTable,
22
  parseRawArrowCSVText
23
} from './lib/parsers/parse-csv-to-arrow';
24
import type {CSVRawArrowParseOptions} from './lib/parsers/parse-csv-to-arrow';
25

26
export type ArrowTableCSVOptions = Omit<NonNullable<CSVLoaderOptions['csv']>, 'shape'> & {
27
  /** @internal Whether the caller explicitly supplied `skipEmptyLines`. */
28
  skipEmptyLinesIsExplicit?: boolean;
29
};
30

31
export type ArrowTableCSVParseOptions = CSVLoaderOptions;
32

33
const ARROW_TABLE_CSV_DEFAULT_OPTIONS: ArrowTableCSVOptions = {
14✔
34
  optimizeMemoryUsage: CSV_LOADER_OPTIONS.csv.optimizeMemoryUsage,
35
  header: CSV_LOADER_OPTIONS.csv.header,
36
  columnPrefix: CSV_LOADER_OPTIONS.csv.columnPrefix,
37
  quoteChar: CSV_LOADER_OPTIONS.csv.quoteChar,
38
  escapeChar: CSV_LOADER_OPTIONS.csv.escapeChar,
39
  dynamicTyping: false,
40
  comments: CSV_LOADER_OPTIONS.csv.comments,
41
  skipEmptyLines: false,
42
  detectGeometryColumns: CSV_LOADER_OPTIONS.csv.detectGeometryColumns,
43
  delimitersToGuess: CSV_LOADER_OPTIONS.csv.delimitersToGuess
44
};
45

46
/** Cell value after Papa-style dynamic typing has been applied. */
47
type DynamicColumnValue = string | number | boolean | Date | null;
48

49
/** Arrow data types inferred by the typed Arrow conversion pass. */
50
type TypedColumnDataType = 'utf8' | 'float64' | 'bool' | 'date-millisecond';
51

52
/** Result of converting a raw Utf8 Arrow table to typed Arrow columns. */
53
type TypedArrowConversionResult = {
54
  typedArrowTable: ArrowTable;
55
  typedColumnDataTypes: TypedColumnDataType[];
56
};
57

58
const FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
14✔
59
const ISO_DATE =
60
  /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
14✔
61

62
/** Applies Arrow-shaped CSV defaults before delegating to Arrow CSV parsing helpers. */
63
function normalizeArrowTableCSVOptions(
64
  options?: ArrowTableCSVParseOptions
65
): ArrowTableCSVParseOptions {
66
  const skipEmptyLinesIsExplicit =
90✔
67
    (options?.csv && Object.prototype.hasOwnProperty.call(options.csv, 'skipEmptyLinesIsExplicit')
270!
68
      ? Boolean(options.csv.skipEmptyLinesIsExplicit)
69
      : undefined) ?? Boolean(options?.csv && options.csv.skipEmptyLines === true);
180✔
70

71
  return {
90✔
72
    ...options,
73
    csv: {
74
      ...ARROW_TABLE_CSV_DEFAULT_OPTIONS,
75
      ...options?.csv,
76
      skipEmptyLinesIsExplicit
77
    }
78
  };
79
}
80

81
/** Parses ArrayBuffer CSV input into an Arrow table. */
82
export async function parseCSVArrayBufferAsArrow(
83
  arrayBuffer: ArrayBuffer,
84
  options?: ArrowTableCSVParseOptions
85
): Promise<ArrowTable> {
NEW
86
  const normalizedOptions = normalizeArrowTableCSVOptions(options);
×
NEW
87
  const csvOptions = createArrowTableCSVOptions(normalizedOptions);
×
88
  if (csvOptions.detectGeometryColumns) {
×
89
    const rowTable = await CSVLoaderWithParser.parse(arrayBuffer, {
×
90
      ...normalizedOptions,
91
      csv: {
92
        ...normalizedOptions.csv,
93
        shape: 'object-row-table',
94
        dynamicTyping: csvOptions.dynamicTyping
95
      }
96
    });
97
    return convertCSVRowTableToArrowTable(rowTable as ObjectRowTable);
×
98
  }
NEW
99
  const rawArrowCSVOptions = createRawArrowTableCSVOptions(normalizedOptions);
×
100

101
  const rawArrowTable = await parseRawArrowCSVTable(arrayBuffer, rawArrowCSVOptions);
×
102

103
  if (!shouldApplyDynamicTyping(csvOptions)) {
×
104
    return rawArrowTable;
×
105
  }
106

107
  return convertRawArrowTableToTypedArrowTable(rawArrowTable).typedArrowTable;
×
108
}
109

110
/** Parses string CSV input into an Arrow table. */
111
export async function parseCSVTextAsArrow(
112
  csvText: string,
113
  options?: ArrowTableCSVParseOptions
114
): Promise<ArrowTable> {
115
  const normalizedOptions = normalizeArrowTableCSVOptions(options);
64✔
116
  const csvOptions = createArrowTableCSVOptions(normalizedOptions);
64✔
117
  if (csvOptions.detectGeometryColumns) {
64✔
118
    const rowTable = await CSVLoaderWithParser.parseText(csvText, {
2✔
119
      ...normalizedOptions,
120
      csv: {
121
        ...normalizedOptions.csv,
122
        shape: 'object-row-table',
123
        dynamicTyping: csvOptions.dynamicTyping
124
      }
125
    });
126
    return convertCSVRowTableToArrowTable(rowTable as ObjectRowTable);
2✔
127
  }
128
  const rawArrowCSVOptions = createRawArrowTableCSVOptions(normalizedOptions);
62✔
129

130
  const rawArrowTable = await parseRawArrowCSVText(csvText, rawArrowCSVOptions);
62✔
131

132
  if (!shouldApplyDynamicTyping(csvOptions)) {
62✔
133
    return rawArrowTable;
34✔
134
  }
135

136
  return convertRawArrowTableToTypedArrowTable(rawArrowTable).typedArrowTable;
28✔
137
}
138

139
/** Parses batch CSV input into Arrow table batches. */
140
export function parseCSVInArrowBatches(
141
  asyncIterator:
142
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
143
    | Iterable<ArrayBufferLike | ArrayBufferView>,
144
  options?: ArrowTableCSVParseOptions
145
): AsyncIterable<ArrowTableBatch> {
146
  const normalizedOptions = normalizeArrowTableCSVOptions(options);
26✔
147
  const csvOptions = createArrowTableCSVOptions(normalizedOptions);
26✔
148
  if (csvOptions.detectGeometryColumns) {
26!
149
    return convertCSVRowBatchesToArrowBatches(
×
150
      CSVLoaderWithParser.parseInBatches(asyncIterator, {
151
        ...normalizedOptions,
152
        csv: {
153
          ...normalizedOptions.csv,
154
          shape: 'object-row-table',
155
          dynamicTyping: csvOptions.dynamicTyping
156
        }
157
      })
158
    );
159
  }
160
  const rawArrowCSVOptions = createRawArrowTableCSVOptions(normalizedOptions);
26✔
161

162
  const rawArrowBatchIterator = parseRawArrowCSVInBatches(asyncIterator, rawArrowCSVOptions);
26✔
163

164
  return makeTypedArrowBatchIterator(rawArrowBatchIterator, csvOptions);
26✔
165
}
166

167
/** Converts CSV row-table output to an Arrow table using the supplied CSV schema. */
168
function convertCSVRowTableToArrowTable(table: ObjectRowTable | ArrayRowTable): ArrowTable {
169
  const arrowTableBuilder = new ArrowTableBuilder(table.schema!);
2✔
170
  for (const row of table.data) {
2✔
171
    if (table.shape === 'object-row-table') {
6!
172
      arrowTableBuilder.addObjectRow(row as {[columnName: string]: unknown});
6✔
173
    } else {
174
      arrowTableBuilder.addArrayRow(row as unknown[]);
×
175
    }
176
  }
177
  return arrowTableBuilder.finishTable();
2✔
178
}
179

180
/** Converts CSV row batches to Arrow batches while preserving the CSV-derived schema. */
181
async function* convertCSVRowBatchesToArrowBatches(
182
  rowBatchIterator: AsyncIterable<TableBatch>
183
): AsyncIterable<ArrowTableBatch> {
184
  for await (const rowBatch of rowBatchIterator) {
×
185
    if (
×
186
      (rowBatch.shape !== 'array-row-table' && rowBatch.shape !== 'object-row-table') ||
×
187
      !rowBatch.schema
188
    ) {
189
      continue;
×
190
    }
191

192
    const arrowTableBuilder = new ArrowTableBuilder(rowBatch.schema);
×
193
    for (const row of rowBatch.data) {
×
194
      if (rowBatch.shape === 'object-row-table') {
×
195
        arrowTableBuilder.addObjectRow(row as {[columnName: string]: unknown});
×
196
      } else {
197
        arrowTableBuilder.addArrayRow(row as unknown[]);
×
198
      }
199
    }
200
    const arrowTable = arrowTableBuilder.finishTable();
×
201
    yield {
×
202
      ...rowBatch,
203
      shape: 'arrow-table',
204
      schema: rowBatch.schema,
205
      data: arrowTable.data,
206
      length: arrowTable.data.numRows
207
    };
208
  }
209
}
210

211
/** Converts an async iterator of raw Utf8 Arrow batches to typed Arrow batches. */
212
async function* makeTypedArrowBatchIterator(
213
  rawArrowBatchIterator: AsyncIterable<ArrowTableBatch>,
214
  csvOptions: ArrowTableCSVOptions
215
): AsyncIterable<ArrowTableBatch> {
216
  let frozenColumnDataTypes: TypedColumnDataType[] | null = null;
26✔
217

218
  for await (const rawArrowBatch of rawArrowBatchIterator) {
26✔
219
    if (!shouldApplyDynamicTyping(csvOptions)) {
44✔
220
      yield rawArrowBatch;
26✔
221
      continue;
26✔
222
    }
223

224
    const rawArrowTable: ArrowTable = {
18✔
225
      shape: 'arrow-table',
226
      schema: rawArrowBatch.schema,
227
      data: rawArrowBatch.data
228
    };
229

230
    const conversionResult = convertRawArrowTableToTypedArrowTable(rawArrowTable, {
18✔
231
      frozenColumnDataTypes
232
    });
233

234
    if (!frozenColumnDataTypes && conversionResult.typedColumnDataTypes.length > 0) {
18✔
235
      frozenColumnDataTypes = conversionResult.typedColumnDataTypes;
16✔
236
    }
237

238
    yield {
18✔
239
      ...rawArrowBatch,
240
      schema: conversionResult.typedArrowTable.schema,
241
      data: conversionResult.typedArrowTable.data,
242
      length: conversionResult.typedArrowTable.data.numRows
243
    };
244
  }
245
}
246

247
/** Merges caller options with Arrow CSV defaults. */
248
function createArrowTableCSVOptions(options?: ArrowTableCSVParseOptions): ArrowTableCSVOptions {
249
  return {
178✔
250
    ...ARROW_TABLE_CSV_DEFAULT_OPTIONS,
251
    ...options?.csv
252
  };
253
}
254

255
/** Creates raw Arrow options by stripping the typed conversion flag. */
256
function createRawArrowTableCSVOptions(
257
  options?: ArrowTableCSVParseOptions
258
): CSVRawArrowParseOptions {
259
  const csvOptions = createArrowTableCSVOptions(options);
88✔
260
  const {dynamicTyping, ...rawArrowCSVOptions} = csvOptions;
88✔
261

262
  return {
88✔
263
    ...options,
264
    csv: {
265
      ...rawArrowCSVOptions,
266
      dynamicTyping
267
    }
268
  };
269
}
270

271
/** Returns whether typed Arrow conversion should be applied. */
272
function shouldApplyDynamicTyping(csvOptions: ArrowTableCSVOptions): boolean {
273
  return csvOptions.dynamicTyping !== false;
106✔
274
}
275

276
/** Converts an Arrow table of Utf8 columns to inferred typed Arrow columns. */
277
function convertRawArrowTableToTypedArrowTable(
278
  rawArrowTable: ArrowTable,
279
  options?: {frozenColumnDataTypes?: TypedColumnDataType[] | null}
280
): TypedArrowConversionResult {
281
  const rawArrowSchemaFields = rawArrowTable.data.schema.fields;
46✔
282
  const rowCount = rawArrowTable.data.numRows;
46✔
283

284
  if (rawArrowSchemaFields.length === 0) {
46!
285
    return {
×
286
      typedArrowTable: {
287
        shape: 'arrow-table',
288
        schema: {
289
          fields: [],
290
          metadata: {
291
            ...rawArrowTable.schema?.metadata,
292
            'loaders.gl#format': 'csv',
293
            'loaders.gl#loader': 'CSVLoader'
294
          }
295
        },
296
        data: rawArrowTable.data
297
      },
298
      typedColumnDataTypes: []
299
    };
300
  }
301

302
  const typedSchemaFields: Schema['fields'] = [];
46✔
303
  const typedColumnValues: unknown[][] = [];
46✔
304
  const typedColumnDataTypes: TypedColumnDataType[] = [];
46✔
305

306
  for (let columnIndex = 0; columnIndex < rawArrowSchemaFields.length; columnIndex++) {
46✔
307
    const rawArrowSchemaField = rawArrowSchemaFields[columnIndex];
200✔
308
    const rawArrowColumn = rawArrowTable.data.getChildAt(columnIndex);
200✔
309

310
    if (rawArrowSchemaField.type instanceof arrow.List) {
200✔
311
      typedSchemaFields.push(
6✔
312
        rawArrowTable.schema?.fields[columnIndex] || {
6!
313
          name: rawArrowSchemaField.name,
314
          type: 'utf8',
315
          nullable: true
316
        }
317
      );
318
      typedColumnDataTypes.push('utf8');
6✔
319
      typedColumnValues.push(
6✔
320
        rawArrowColumn
6!
321
          ? readRawArrowListValues(rawArrowColumn, rowCount)
322
          : new Array(rowCount).fill(null)
323
      );
324
      continue;
6✔
325
    }
326

327
    const rawStringValues: (string | null)[] = [];
194✔
328
    for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
194✔
329
      const rawArrowValue = rawArrowColumn?.get(rowIndex);
23,576✔
330
      rawStringValues.push(readRawArrowStringValue(rawArrowValue));
23,576✔
331
    }
332

333
    const dynamicValues = rawStringValues.map(rawStringValue =>
194✔
334
      parseValueWithDynamicTyping(rawStringValue)
23,576✔
335
    );
336

337
    const typedColumnDataType =
338
      options?.frozenColumnDataTypes?.[columnIndex] ?? deduceTypedColumnDataType(dynamicValues);
194✔
339

340
    typedSchemaFields.push({
200✔
341
      name: rawArrowSchemaField.name,
342
      type: typedColumnDataType,
343
      nullable: true
344
    });
345

346
    typedColumnDataTypes.push(typedColumnDataType);
200✔
347
    typedColumnValues.push(
200✔
348
      convertDynamicValuesToTypedColumnValues(dynamicValues, typedColumnDataType)
349
    );
350
  }
351

352
  const typedSchema: Schema = {
200✔
353
    fields: typedSchemaFields,
354
    metadata: {
355
      ...rawArrowTable.schema?.metadata,
356
      'loaders.gl#format': 'csv',
357
      'loaders.gl#loader': 'CSVLoader'
358
    }
359
  };
360

361
  const typedArrowTableBuilder = new ArrowTableBuilder(typedSchema);
46✔
362
  for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
46✔
363
    const rowValues = typedColumnValues.map(typedColumnValue => typedColumnValue[rowIndex]);
23,598✔
364
    typedArrowTableBuilder.addArrayRow(rowValues);
3,106✔
365
  }
366

367
  return {
3,106✔
368
    typedArrowTable: typedArrowTableBuilder.finishTable(),
369
    typedColumnDataTypes
370
  };
371
}
372

373
/** Reads an Arrow list column back to nullable JS arrays for table rebuilding. */
374
function readRawArrowListValues(rawArrowColumn: arrow.Vector, rowCount: number): unknown[] {
375
  const values: unknown[] = [];
6✔
376
  for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
6✔
377
    const rawArrowValue = rawArrowColumn.get(rowIndex);
22✔
378
    values.push(
22✔
379
      rawArrowValue === null || rawArrowValue === undefined ? null : Array.from(rawArrowValue)
54✔
380
    );
381
  }
382
  return values;
6✔
383
}
384

385
/** Converts an Arrow cell value to a nullable string value. */
386
function readRawArrowStringValue(rawArrowValue: unknown): string | null {
387
  if (rawArrowValue === null || rawArrowValue === undefined) {
23,576✔
388
    return null;
166✔
389
  }
390

391
  return String(rawArrowValue);
23,410✔
392
}
393

394
/** Applies Papa-compatible dynamic typing to one nullable CSV string value. */
395
function parseValueWithDynamicTyping(rawStringValue: string | null): DynamicColumnValue {
396
  if (rawStringValue === null) {
23,576✔
397
    return null;
166✔
398
  }
399

400
  if (rawStringValue === 'true' || rawStringValue === 'TRUE') {
23,410!
401
    return true;
×
402
  }
403

404
  if (rawStringValue === 'false' || rawStringValue === 'FALSE') {
23,410!
405
    return false;
×
406
  }
407

408
  if (FLOAT.test(rawStringValue)) {
23,410✔
409
    return Number.parseFloat(rawStringValue);
8,494✔
410
  }
411

412
  if (ISO_DATE.test(rawStringValue)) {
14,916!
413
    return new Date(rawStringValue);
×
414
  }
415

416
  if (rawStringValue === '') {
14,916!
417
    return null;
×
418
  }
419

420
  return rawStringValue;
14,916✔
421
}
422

423
/** Deduces the narrowest supported Arrow type for one column. */
424
function deduceTypedColumnDataType(dynamicValues: DynamicColumnValue[]): TypedColumnDataType {
425
  let inferredColumnDataType: TypedColumnDataType | null = null;
192✔
426

427
  for (const dynamicValue of dynamicValues) {
192✔
428
    if (dynamicValue === null) {
8,712✔
429
      continue;
110✔
430
    }
431

432
    const currentValueDataType = getTypedColumnDataType(dynamicValue);
8,602✔
433

434
    if (currentValueDataType === 'utf8') {
8,602✔
435
      return 'utf8';
112✔
436
    }
437

438
    if (inferredColumnDataType === null) {
8,490✔
439
      inferredColumnDataType = currentValueDataType;
70✔
440
      continue;
70✔
441
    }
442

443
    if (inferredColumnDataType !== currentValueDataType) {
8,420!
444
      return 'utf8';
×
445
    }
446
  }
447

448
  return inferredColumnDataType ?? 'utf8';
80✔
449
}
450

451
/** Returns the typed Arrow column type for a non-null dynamically typed value. */
452
function getTypedColumnDataType(
453
  dynamicValue: Exclude<DynamicColumnValue, null>
454
): TypedColumnDataType {
455
  if (typeof dynamicValue === 'boolean') {
8,602!
456
    return 'bool';
×
457
  }
458

459
  if (typeof dynamicValue === 'number') {
8,602✔
460
    return 'float64';
8,490✔
461
  }
462

463
  if (dynamicValue instanceof Date) {
112!
464
    return 'date-millisecond';
×
465
  }
466

467
  return 'utf8';
112✔
468
}
469

470
/** Coerces dynamically typed values to values compatible with the selected Arrow type. */
471
function convertDynamicValuesToTypedColumnValues(
472
  dynamicValues: DynamicColumnValue[],
473
  typedColumnDataType: TypedColumnDataType
474
): DynamicColumnValue[] {
475
  switch (typedColumnDataType) {
194!
476
    case 'bool':
477
      return dynamicValues.map(dynamicValue =>
×
478
        typeof dynamicValue === 'boolean' ? dynamicValue : null
×
479
      );
480
    case 'float64':
481
      return dynamicValues.map(dynamicValue =>
72✔
482
        typeof dynamicValue === 'number' ? dynamicValue : null
8,502✔
483
      );
484
    case 'date-millisecond':
485
      return dynamicValues.map(dynamicValue =>
×
486
        dynamicValue instanceof Date ? dynamicValue : null
×
487
      );
488
    case 'utf8':
489
    default:
490
      return dynamicValues.map(dynamicValue =>
122✔
491
        dynamicValue === null ? null : String(dynamicValue)
15,074✔
492
      );
493
  }
494
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc