• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 25798238260

13 May 2026 12:10PM UTC coverage: 60.607% (+0.3%) from 60.27%
25798238260

push

github

web-flow
feat(json) GeoJSON -> geoarrow, schema, logging  (#3399)

13466 of 24516 branches covered (54.93%)

Branch coverage included in aggregate %.

448 of 541 new or added lines in 12 files covered. (82.81%)

1264 existing lines in 117 files now uncovered.

27516 of 43103 relevant lines covered (63.84%)

15056.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.1
/modules/csv/src/csv-loader-with-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
6
import type {
7
  Schema,
8
  ArrayRowTable,
9
  ArrowTable,
10
  ArrowTableBatch,
11
  ColumnarTable,
12
  ColumnarTableBatch,
13
  ObjectRowTable,
14
  TableBatch
15
} from '@loaders.gl/schema';
16

17
import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
18
import {
19
  AsyncQueue,
20
  TableBatchBuilder,
21
  convertToArrayRow,
22
  convertToObjectRow
23
} from '@loaders.gl/schema-utils';
24
import Papa from './papaparse/papaparse';
25
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
26
import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
27
import {
28
  parseCSVArrayBufferAsArrow,
29
  parseCSVInArrowBatches,
30
  parseCSVTextAsArrow
31
} from './csv-arrow-table-parser';
32
import {
33
  deduceCSVSchemaFromRows,
34
  detectGeometryColumns,
35
  MAX_GEOMETRY_SNIFF_ROWS,
36
  normalizeGeometryArrayRow,
37
  normalizeGeometryObjectRow,
38
  shouldFinalizeGeometryDetection
39
} from './lib/csv-geometry';
40
import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';
41
import {deserializeCSVWorkerResult, serializeCSVWorkerResult} from './lib/csv-worker-transport';
42

43
const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;
14✔
44

45
export type {CSVLoaderOptions} from './csv-loader';
46

47
/** Loader for CSV and other delimiter-separated tabular text formats. */
48
export const CSVLoaderWithParser = {
14✔
49
  ...CSVLoaderMetadataWithoutPreload,
50
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
51
    options?.csv?.shape === 'arrow-table'
×
52
      ? parseCSVArrayBufferAsArrow(arrayBuffer, options)
53
      : parseCSVText(new TextDecoder().decode(arrayBuffer), options),
54
  parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
55
    parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),
×
56
  parseText: (text: string, options?: CSVLoaderOptions) =>
57
    options?.csv?.shape === 'arrow-table'
120✔
58
      ? parseCSVTextAsArrow(text, options)
59
      : parseCSVText(text, options),
60
  parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),
4✔
61
  parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
62
    options?.csv?.shape === 'arrow-table'
72✔
63
      ? parseCSVInArrowBatches(asyncIterator, options)
64
      : parseCSVInBatches(asyncIterator, options),
65
  serializeWorkerResult: serializeCSVWorkerResult,
66
  deserializeWorkerResult: deserializeCSVWorkerResult
67
} as const satisfies LoaderWithParser<
68
  ObjectRowTable | ArrayRowTable | ColumnarTable | ArrowTable,
69
  TableBatch | ColumnarTableBatch | ArrowTableBatch,
70
  CSVLoaderOptions
71
>;
72

73
async function parseCSVText(
74
  csvText: string,
75
  options?: CSVLoaderOptions
76
): Promise<ObjectRowTable | ArrayRowTable> {
77
  return parseCSVTextSync(csvText, options);
56✔
78
}
79

80
function parseCSVTextSync(
81
  csvText: string,
82
  options?: CSVLoaderOptions
83
): ObjectRowTable | ArrayRowTable {
84
  // Apps can call the parse method directly, so we apply default options here
85
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};
60✔
86

87
  const firstRow = readFirstRow(csvText);
60✔
88
  const header: boolean =
89
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
60✔
90

91
  const parseWithHeader = header;
60✔
92

93
  const papaparseConfig = {
60✔
94
    // dynamicTyping: true,
95
    ...csvOptions,
96
    header: parseWithHeader,
97
    download: false, // We handle loading, no need for papaparse to do it for us
98
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
60✔
99
    error: e => {
100
      throw new Error(e);
×
101
    }
102
  };
103

104
  const result = Papa.parse(csvText, papaparseConfig);
60✔
105
  const rows = result.data as any[];
60✔
106

107
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
60✔
108

109
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
60!
110
  let table: ArrayRowTable | ObjectRowTable;
111
  switch (shape) {
60!
112
    case 'object-row-table':
113
      table = {
52✔
114
        shape: 'object-row-table',
115
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
174,922✔
116
      };
117
      break;
52✔
118
    case 'array-row-table':
119
      table = {
8✔
120
        shape: 'array-row-table',
121
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
22✔
122
      };
123
      break;
8✔
124
    default:
125
      throw new Error(shape);
×
126
  }
127
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
60✔
128
    ? detectGeometryColumns(
129
        headerRow,
130
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow))),
24!
131
        csvOptions.geometryEncoding
132
      )
133
    : [];
134

135
  if (detectedGeometryColumns.length > 0) {
60✔
136
    table =
8✔
137
      table.shape === 'array-row-table'
8✔
138
        ? {
139
            ...table,
140
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
6✔
141
          }
142
        : {
143
            ...table,
144
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
18✔
145
          };
146
  }
147

148
  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
60✔
149
  return table;
60✔
150
}
151

152
// TODO - support batch size 0 = no batching/single batch?
153
function parseCSVInBatches(
154
  asyncIterator:
155
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
156
    | Iterable<ArrayBufferLike | ArrayBufferView>,
157
  options?: CSVLoaderOptions
158
): AsyncIterable<TableBatch> {
159
  // Papaparse does not support standard batch size handling
160
  // TODO - investigate papaparse chunks mode
161
  options = {...options};
46✔
162
  if (options?.core?.batchSize === 'auto') {
46✔
163
    options.core.batchSize = 4000;
40✔
164
  }
165

166
  // Apps can call the parse method directly, we so apply default options here
167
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};
46✔
168

169
  const asyncQueue = new AsyncQueue<TableBatch>();
46✔
170

171
  let isFirstRow: boolean = true;
46✔
172
  let headerRow: string[] | null = null;
46✔
173
  let tableBatchBuilder: TableBatchBuilder | null = null;
46✔
174
  let schema: Schema | null = null;
46✔
175
  let sniffedRows: unknown[][] = [];
46✔
176
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
46✔
177
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;
46✔
178

179
  const config = {
46✔
180
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
181
    ...csvOptions,
182
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
183
    download: false, // We handle loading, no need for papaparse to do it for us
184
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
185
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
186
    // See https://github.com/mholt/PapaParse/issues/465
187
    chunkSize: 1024 * 1024 * 5,
188
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
189
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
190
    // both of the skipEmptyLines and step callback options are provided:
191
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
192
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
193
    skipEmptyLines: false,
194

195
    // step is called on every row
196
    // eslint-disable-next-line complexity, max-statements
197
    step(results) {
198
      let row = results.data;
14,516✔
199

200
      if (csvOptions.skipEmptyLines === 'greedy') {
14,516✔
201
        // Manually reject lines that are empty
202
        const collapsedRow = row.flat().join('').trim();
16✔
203
        if (collapsedRow === '') {
16✔
204
          return;
10✔
205
        }
206
      } else if (csvOptions.skipEmptyLines === true) {
14,500!
207
        row = normalizePapaStreamingRow(row);
14,500✔
208
        if (row.length === 1 && row[0] === null) {
14,500✔
209
          return;
6✔
210
        }
211
      }
212
      const bytesUsed = results.meta.cursor;
14,500✔
213

214
      // Check if we need to save a header row
215
      if (isFirstRow && !headerRow) {
14,500✔
216
        // Auto detects or can be forced with csvOptions.header
217
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
46✔
218
        if (header) {
46✔
219
          headerRow = row.map(duplicateColumnTransformer());
28✔
220
          return;
28✔
221
        }
222
      }
223

224
      // If first data row, we can deduce the schema
225
      if (isFirstRow) {
14,472✔
226
        if (!headerRow) {
50✔
227
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
18✔
228
        }
229
      }
230

231
      if (csvOptions.optimizeMemoryUsage) {
14,472!
232
        // A workaround to allocate new strings and don't retain pointers to original strings.
233
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
234
        row = JSON.parse(JSON.stringify(row));
×
235
      }
236

237
      const shape = getBatchShape();
14,472✔
238

239
      if (!geometryDetectionFinalized && headerRow) {
14,472✔
240
        sniffedRows.push(row);
6✔
241
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
6✔
242
          headerRow,
243
          sniffedRows,
244
          MAX_GEOMETRY_SNIFF_ROWS
245
        );
246
        if (geometryDetectionFinalized) {
6!
247
          detectedGeometryColumns = detectGeometryColumns(
×
248
            headerRow,
249
            sniffedRows,
250
            csvOptions.geometryEncoding
251
          );
252
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
×
UNCOV
253
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
254
          );
255
          schema = deduceCSVSchemaFromRows(
×
256
            normalizedSniffedRows,
257
            headerRow,
258
            detectedGeometryColumns
259
          );
260
          isFirstRow = false;
×
261
          for (const normalizedSniffedRow of normalizedSniffedRows) {
×
262
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
×
263
          }
264
          sniffedRows = [];
×
265
        }
266
        return;
6✔
267
      }
268

269
      if (isFirstRow) {
14,466✔
270
        if (!headerRow) {
44!
271
          return;
×
272
        }
273
        schema = deduceCSVSchemaFromRows(
44✔
274
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
275
          headerRow,
276
          detectedGeometryColumns
277
        );
278
        isFirstRow = false;
44✔
279
      }
280

281
      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
14,466✔
282
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
14,466✔
283
    },
284

285
    // complete is called when all rows have been read
286
    complete(results) {
287
      try {
46✔
288
        if (!geometryDetectionFinalized && headerRow) {
46✔
289
          detectedGeometryColumns = detectGeometryColumns(
2✔
290
            headerRow,
291
            sniffedRows,
292
            csvOptions.geometryEncoding
293
          );
294
          const normalizedSniffedRows = sniffedRows.map(row =>
4✔
295
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
3✔
296
          );
297
          schema = deduceCSVSchemaFromRows(
2✔
298
            normalizedSniffedRows,
299
            headerRow,
300
            detectedGeometryColumns
301
          );
302
          const shape = getBatchShape();
2✔
303
          tableBatchBuilder =
2✔
304
            tableBatchBuilder ||
4✔
305
            new TableBatchBuilder(schema, {
306
              ...(options?.core || {}),
2!
307
              shape
308
            });
309
          for (const normalizedSniffedRow of normalizedSniffedRows) {
2✔
310
            const batchRow =
311
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
6!
312
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
313
                : normalizedSniffedRow;
314
            tableBatchBuilder.addRow(batchRow);
6✔
315
          }
316
        }
317
        const bytesUsed = results.meta.cursor;
46✔
318
        // Ensure any final (partial) batch gets emitted
319
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
46✔
320
        if (batch) {
46✔
321
          asyncQueue.enqueue(batch);
42✔
322
        }
323
      } catch (error) {
324
        asyncQueue.enqueue(error as Error);
×
325
      }
326

327
      asyncQueue.close();
46✔
328
    }
329
  };
330

331
  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);
46✔
332

333
  // TODO - Does it matter if we return asyncIterable or asyncIterator
334
  // return asyncQueue[Symbol.asyncIterator]();
335
  return asyncQueue;
46✔
336

337
  function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
338
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
14,466✔
339
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
14,466✔
340
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
4✔
341
    }
342

343
    tableBatchBuilder =
14,466✔
344
      tableBatchBuilder ||
14,510✔
345
      new TableBatchBuilder(schema!, {
346
        ...(options?.core || {}),
44!
347
        shape
348
      });
349

350
    try {
14,466✔
351
      tableBatchBuilder.addRow(batchRow);
14,466✔
352
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
14,466✔
353
      if (batch) {
14,466✔
354
        asyncQueue.enqueue(batch);
168✔
355
      }
356
    } catch (error) {
357
      asyncQueue.enqueue(error as Error);
×
358
    }
359
  }
360

361
  function getBatchShape(): CSVBatchShape {
362
    const deprecatedShape = (options as {shape?: CSVBatchShape} | undefined)?.shape;
14,474✔
363
    const shape = deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
14,474!
364
    switch (shape) {
14,474✔
365
      case 'array-row-table':
366
      case 'columnar-table':
367
        return shape;
4,034✔
368
      default:
369
        return DEFAULT_CSV_SHAPE;
10,440✔
370
    }
371
  }
372
}
373

374
type CSVBatchShape = 'array-row-table' | 'object-row-table' | 'columnar-table';
375

376
/**
377
 * Checks if a certain row is a header row
378
 * @param row the row to check
379
 * @returns true if the row looks like a header
380
 */
381
function isHeaderRow(row: string[]): boolean {
382
  return row && row.every(value => typeof value === 'string');
302✔
383
}
384

385
/**
386
 * Reads, parses, and returns the first row of a CSV text
387
 * @param csvText the csv text to parse
388
 * @returns the first row
389
 */
390
function readFirstRow(csvText: string): any[] {
391
  const result = Papa.parse(csvText, {
60✔
392
    dynamicTyping: true,
393
    preview: 1
394
  });
395
  return result.data[0];
60✔
396
}
397

398
/**
399
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
400
 * duplicate header columns and would use the latest occurrence by default.
401
 * See the header option in https://www.papaparse.com/docs#config
402
 * @returns a transform function that returns sanitized names for duplicate fields
403
 */
404
function duplicateColumnTransformer(): (column: string) => string {
405
  const observedColumns = new Set<string>();
74✔
406
  return col => {
74✔
407
    let colName = col;
356✔
408
    let counter = 1;
356✔
409
    while (observedColumns.has(colName)) {
356✔
410
      colName = `${col}.${counter}`;
50✔
411
      counter++;
50✔
412
    }
413
    observedColumns.add(colName);
356✔
414
    return colName;
356✔
415
  };
416
}
417

418
/**
419
 * Generates the header of a CSV given a prefix and a column count
420
 * @param columnPrefix the columnPrefix to use
421
 * @param count the count of column names to generate
422
 * @returns an array of column names
423
 */
424
function generateHeader(columnPrefix: string, count: number = 0): string[] {
32✔
425
  const headers: string[] = [];
32✔
426
  for (let i = 0; i < count; i++) {
32✔
427
    headers.push(`${columnPrefix}${i + 1}`);
102✔
428
  }
429
  return headers;
32✔
430
}
431

432
function normalizePapaStreamingRow(row: unknown[]): unknown[] {
433
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
57,522✔
434
}
435

436
function convertToPapaObjectRow(
437
  row: unknown[],
438
  headerRow: string[]
439
): {[columnName: string]: unknown} {
440
  const objectRow = convertToObjectRow(row, headerRow);
4✔
441
  const parsedExtra = row.slice(headerRow.length);
4✔
442
  if (parsedExtra.length > 0) {
4!
443
    objectRow.__parsed_extra = parsedExtra;
4✔
444
  }
445
  return objectRow;
4✔
446
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc