• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 25138678579

29 Apr 2026 11:12PM UTC coverage: 59.466% (+0.07%) from 59.401%
25138678579

push

github

web-flow
chore: Standardize and optimize geospatial loaders (#3410)

11651 of 21470 branches covered (54.27%)

Branch coverage included in aggregate %.

73 of 86 new or added lines in 9 files covered. (84.88%)

2 existing lines in 2 files now uncovered.

24115 of 38675 relevant lines covered (62.35%)

15903.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.1
/modules/csv/src/csv-loader-with-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
6
import type {
7
  Schema,
8
  ArrayRowTable,
9
  ArrowTable,
10
  ArrowTableBatch,
11
  ColumnarTable,
12
  ColumnarTableBatch,
13
  ObjectRowTable,
14
  TableBatch
15
} from '@loaders.gl/schema';
16

17
import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
18
import {
19
  AsyncQueue,
20
  TableBatchBuilder,
21
  convertToArrayRow,
22
  convertToObjectRow
23
} from '@loaders.gl/schema-utils';
24
import Papa from './papaparse/papaparse';
25
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
26
import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
27
import {
28
  parseCSVArrayBufferAsArrow,
29
  parseCSVInArrowBatches,
30
  parseCSVTextAsArrow
31
} from './csv-arrow-loader-with-parser';
32
import {
33
  deduceCSVSchemaFromRows,
34
  detectGeometryColumns,
35
  MAX_GEOMETRY_SNIFF_ROWS,
36
  normalizeGeometryArrayRow,
37
  normalizeGeometryObjectRow,
38
  shouldFinalizeGeometryDetection
39
} from './lib/csv-geometry';
40
import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';
41

42
const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;
14✔
43

44
export type {CSVLoaderOptions} from './csv-loader';
45

46
/** Loader for CSV and other delimiter-separated tabular text formats. */
47
export const CSVLoaderWithParser = {
14✔
48
  ...CSVLoaderMetadataWithoutPreload,
49
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
50
    options?.csv?.shape === 'arrow-table'
×
51
      ? parseCSVArrayBufferAsArrow(arrayBuffer, options)
52
      : parseCSVText(new TextDecoder().decode(arrayBuffer), options),
53
  parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
54
    parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),
×
55
  parseText: (text: string, options?: CSVLoaderOptions) =>
56
    options?.csv?.shape === 'arrow-table'
60✔
57
      ? parseCSVTextAsArrow(text, options)
58
      : parseCSVText(text, options),
59
  parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),
4✔
60
  parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
61
    options?.csv?.shape === 'arrow-table'
48✔
62
      ? parseCSVInArrowBatches(asyncIterator, options)
63
      : parseCSVInBatches(asyncIterator, options)
64
} as const satisfies LoaderWithParser<
65
  ObjectRowTable | ArrayRowTable | ColumnarTable | ArrowTable,
66
  TableBatch | ColumnarTableBatch | ArrowTableBatch,
67
  CSVLoaderOptions
68
>;
69

70
async function parseCSVText(
71
  csvText: string,
72
  options?: CSVLoaderOptions
73
): Promise<ObjectRowTable | ArrayRowTable> {
74
  return parseCSVTextSync(csvText, options);
56✔
75
}
76

77
function parseCSVTextSync(
78
  csvText: string,
79
  options?: CSVLoaderOptions
80
): ObjectRowTable | ArrayRowTable {
81
  // Apps can call the parse method directly, so we apply default options here
82
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};
60✔
83

84
  const firstRow = readFirstRow(csvText);
60✔
85
  const header: boolean =
86
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
60✔
87

88
  const parseWithHeader = header;
60✔
89

90
  const papaparseConfig = {
60✔
91
    // dynamicTyping: true,
92
    ...csvOptions,
93
    header: parseWithHeader,
94
    download: false, // We handle loading, no need for papaparse to do it for us
95
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
60✔
96
    error: e => {
97
      throw new Error(e);
×
98
    }
99
  };
100

101
  const result = Papa.parse(csvText, papaparseConfig);
60✔
102
  const rows = result.data as any[];
60✔
103

104
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
60✔
105

106
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
60!
107
  let table: ArrayRowTable | ObjectRowTable;
108
  switch (shape) {
60!
109
    case 'object-row-table':
110
      table = {
52✔
111
        shape: 'object-row-table',
112
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
174,922✔
113
      };
114
      break;
52✔
115
    case 'array-row-table':
116
      table = {
8✔
117
        shape: 'array-row-table',
118
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
22✔
119
      };
120
      break;
8✔
121
    default:
122
      throw new Error(shape);
×
123
  }
124
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
60✔
125
    ? detectGeometryColumns(
126
        headerRow,
127
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow))),
24!
128
        csvOptions.geometryEncoding
129
      )
130
    : [];
131

132
  if (detectedGeometryColumns.length > 0) {
60✔
133
    table =
8✔
134
      table.shape === 'array-row-table'
8✔
135
        ? {
136
            ...table,
137
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
6✔
138
          }
139
        : {
140
            ...table,
141
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
18✔
142
          };
143
  }
144

145
  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
60✔
146
  return table;
60✔
147
}
148

149
// TODO - support batch size 0 = no batching/single batch?
150
function parseCSVInBatches(
151
  asyncIterator:
152
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
153
    | Iterable<ArrayBufferLike | ArrayBufferView>,
154
  options?: CSVLoaderOptions
155
): AsyncIterable<TableBatch> {
156
  // Papaparse does not support standard batch size handling
157
  // TODO - investigate papaparse chunks mode
158
  options = {...options};
46✔
159
  if (options?.core?.batchSize === 'auto') {
46✔
160
    options.core.batchSize = 4000;
40✔
161
  }
162

163
  // Apps can call the parse method directly, we so apply default options here
164
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};
46✔
165

166
  const asyncQueue = new AsyncQueue<TableBatch>();
46✔
167

168
  let isFirstRow: boolean = true;
46✔
169
  let headerRow: string[] | null = null;
46✔
170
  let tableBatchBuilder: TableBatchBuilder | null = null;
46✔
171
  let schema: Schema | null = null;
46✔
172
  let sniffedRows: unknown[][] = [];
46✔
173
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
46✔
174
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;
46✔
175

176
  const config = {
46✔
177
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
178
    ...csvOptions,
179
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
180
    download: false, // We handle loading, no need for papaparse to do it for us
181
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
182
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
183
    // See https://github.com/mholt/PapaParse/issues/465
184
    chunkSize: 1024 * 1024 * 5,
185
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
186
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
187
    // both of the skipEmptyLines and step callback options are provided:
188
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
189
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
190
    skipEmptyLines: false,
191

192
    // step is called on every row
193
    // eslint-disable-next-line complexity, max-statements
194
    step(results) {
195
      let row = results.data;
14,516✔
196

197
      if (csvOptions.skipEmptyLines === 'greedy') {
14,516✔
198
        // Manually reject lines that are empty
199
        const collapsedRow = row.flat().join('').trim();
16✔
200
        if (collapsedRow === '') {
16✔
201
          return;
10✔
202
        }
203
      } else if (csvOptions.skipEmptyLines === true) {
14,500!
204
        row = normalizePapaStreamingRow(row);
14,500✔
205
        if (row.length === 1 && row[0] === null) {
14,500✔
206
          return;
6✔
207
        }
208
      }
209
      const bytesUsed = results.meta.cursor;
14,500✔
210

211
      // Check if we need to save a header row
212
      if (isFirstRow && !headerRow) {
14,500✔
213
        // Auto detects or can be forced with csvOptions.header
214
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
46✔
215
        if (header) {
46✔
216
          headerRow = row.map(duplicateColumnTransformer());
28✔
217
          return;
28✔
218
        }
219
      }
220

221
      // If first data row, we can deduce the schema
222
      if (isFirstRow) {
14,472✔
223
        if (!headerRow) {
50✔
224
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
18✔
225
        }
226
      }
227

228
      if (csvOptions.optimizeMemoryUsage) {
14,472!
229
        // A workaround to allocate new strings and don't retain pointers to original strings.
230
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
231
        row = JSON.parse(JSON.stringify(row));
×
232
      }
233

234
      const shape = getBatchShape();
14,472✔
235

236
      if (!geometryDetectionFinalized && headerRow) {
14,472✔
237
        sniffedRows.push(row);
6✔
238
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
6✔
239
          headerRow,
240
          sniffedRows,
241
          MAX_GEOMETRY_SNIFF_ROWS
242
        );
243
        if (geometryDetectionFinalized) {
6!
NEW
244
          detectedGeometryColumns = detectGeometryColumns(
×
245
            headerRow,
246
            sniffedRows,
247
            csvOptions.geometryEncoding
248
          );
UNCOV
249
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
×
250
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
251
          );
252
          schema = deduceCSVSchemaFromRows(
×
253
            normalizedSniffedRows,
254
            headerRow,
255
            detectedGeometryColumns
256
          );
257
          isFirstRow = false;
×
258
          for (const normalizedSniffedRow of normalizedSniffedRows) {
×
259
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
×
260
          }
261
          sniffedRows = [];
×
262
        }
263
        return;
6✔
264
      }
265

266
      if (isFirstRow) {
14,466✔
267
        if (!headerRow) {
44!
268
          return;
×
269
        }
270
        schema = deduceCSVSchemaFromRows(
44✔
271
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
272
          headerRow,
273
          detectedGeometryColumns
274
        );
275
        isFirstRow = false;
44✔
276
      }
277

278
      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
14,466✔
279
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
14,466✔
280
    },
281

282
    // complete is called when all rows have been read
283
    complete(results) {
284
      try {
46✔
285
        if (!geometryDetectionFinalized && headerRow) {
46✔
286
          detectedGeometryColumns = detectGeometryColumns(
2✔
287
            headerRow,
288
            sniffedRows,
289
            csvOptions.geometryEncoding
290
          );
291
          const normalizedSniffedRows = sniffedRows.map(row =>
4✔
292
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
3✔
293
          );
294
          schema = deduceCSVSchemaFromRows(
2✔
295
            normalizedSniffedRows,
296
            headerRow,
297
            detectedGeometryColumns
298
          );
299
          const shape = getBatchShape();
2✔
300
          tableBatchBuilder =
2✔
301
            tableBatchBuilder ||
4✔
302
            new TableBatchBuilder(schema, {
303
              ...(options?.core || {}),
2!
304
              shape
305
            });
306
          for (const normalizedSniffedRow of normalizedSniffedRows) {
2✔
307
            const batchRow =
308
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
6!
309
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
310
                : normalizedSniffedRow;
311
            tableBatchBuilder.addRow(batchRow);
6✔
312
          }
313
        }
314
        const bytesUsed = results.meta.cursor;
46✔
315
        // Ensure any final (partial) batch gets emitted
316
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
46✔
317
        if (batch) {
46✔
318
          asyncQueue.enqueue(batch);
42✔
319
        }
320
      } catch (error) {
321
        asyncQueue.enqueue(error as Error);
×
322
      }
323

324
      asyncQueue.close();
46✔
325
    }
326
  };
327

328
  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);
46✔
329

330
  // TODO - Does it matter if we return asyncIterable or asyncIterator
331
  // return asyncQueue[Symbol.asyncIterator]();
332
  return asyncQueue;
46✔
333

334
  function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
335
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
14,466✔
336
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
14,466✔
337
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
4✔
338
    }
339

340
    tableBatchBuilder =
14,466✔
341
      tableBatchBuilder ||
14,510✔
342
      new TableBatchBuilder(schema!, {
343
        ...(options?.core || {}),
44!
344
        shape
345
      });
346

347
    try {
14,466✔
348
      tableBatchBuilder.addRow(batchRow);
14,466✔
349
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
14,466✔
350
      if (batch) {
14,466✔
351
        asyncQueue.enqueue(batch);
168✔
352
      }
353
    } catch (error) {
354
      asyncQueue.enqueue(error as Error);
×
355
    }
356
  }
357

358
  function getBatchShape(): CSVBatchShape {
359
    const deprecatedShape = (options as {shape?: CSVBatchShape} | undefined)?.shape;
14,474✔
360
    const shape = deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
14,474!
361
    switch (shape) {
14,474✔
362
      case 'array-row-table':
363
      case 'columnar-table':
364
        return shape;
4,034✔
365
      default:
366
        return DEFAULT_CSV_SHAPE;
10,440✔
367
    }
368
  }
369
}
370

371
type CSVBatchShape = 'array-row-table' | 'object-row-table' | 'columnar-table';
372

373
/**
374
 * Checks if a certain row is a header row
375
 * @param row the row to check
376
 * @returns true if the row looks like a header
377
 */
378
function isHeaderRow(row: string[]): boolean {
379
  return row && row.every(value => typeof value === 'string');
302✔
380
}
381

382
/**
383
 * Reads, parses, and returns the first row of a CSV text
384
 * @param csvText the csv text to parse
385
 * @returns the first row
386
 */
387
function readFirstRow(csvText: string): any[] {
388
  const result = Papa.parse(csvText, {
60✔
389
    dynamicTyping: true,
390
    preview: 1
391
  });
392
  return result.data[0];
60✔
393
}
394

395
/**
396
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
397
 * duplicate header columns and would use the latest occurrence by default.
398
 * See the header option in https://www.papaparse.com/docs#config
399
 * @returns a transform function that returns sanitized names for duplicate fields
400
 */
401
function duplicateColumnTransformer(): (column: string) => string {
402
  const observedColumns = new Set<string>();
74✔
403
  return col => {
74✔
404
    let colName = col;
356✔
405
    let counter = 1;
356✔
406
    while (observedColumns.has(colName)) {
356✔
407
      colName = `${col}.${counter}`;
50✔
408
      counter++;
50✔
409
    }
410
    observedColumns.add(colName);
356✔
411
    return colName;
356✔
412
  };
413
}
414

415
/**
416
 * Generates the header of a CSV given a prefix and a column count
417
 * @param columnPrefix the columnPrefix to use
418
 * @param count the count of column names to generate
419
 * @returns an array of column names
420
 */
421
function generateHeader(columnPrefix: string, count: number = 0): string[] {
32✔
422
  const headers: string[] = [];
32✔
423
  for (let i = 0; i < count; i++) {
32✔
424
    headers.push(`${columnPrefix}${i + 1}`);
102✔
425
  }
426
  return headers;
32✔
427
}
428

429
function normalizePapaStreamingRow(row: unknown[]): unknown[] {
430
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
57,522✔
431
}
432

433
function convertToPapaObjectRow(
434
  row: unknown[],
435
  headerRow: string[]
436
): {[columnName: string]: unknown} {
437
  const objectRow = convertToObjectRow(row, headerRow);
4✔
438
  const parsedExtra = row.slice(headerRow.length);
4✔
439
  if (parsedExtra.length > 0) {
4!
440
    objectRow.__parsed_extra = parsedExtra;
4✔
441
  }
442
  return objectRow;
4✔
443
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc