• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 24907303489

24 Apr 2026 07:12PM UTC coverage: 59.423% (+0.09%) from 59.334%
24907303489

push

github

web-flow
feat: Dynamic import loaders (#3405)

11252 of 20783 branches covered (54.14%)

Branch coverage included in aggregate %.

1164 of 1518 new or added lines in 244 files covered. (76.68%)

41 existing lines in 18 files now uncovered.

23432 of 37585 relevant lines covered (62.34%)

16317.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.1
/modules/csv/src/csv-loader-with-parser.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
6
import type {
7
  Schema,
8
  ArrayRowTable,
9
  ArrowTable,
10
  ArrowTableBatch,
11
  ColumnarTable,
12
  ColumnarTableBatch,
13
  ObjectRowTable,
14
  TableBatch
15
} from '@loaders.gl/schema';
16

17
import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
18
import {
19
  AsyncQueue,
20
  TableBatchBuilder,
21
  convertToArrayRow,
22
  convertToObjectRow
23
} from '@loaders.gl/schema-utils';
24
import Papa from './papaparse/papaparse';
25
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
26
import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
27
import {
28
  parseCSVArrayBufferAsArrow,
29
  parseCSVInArrowBatches,
30
  parseCSVTextAsArrow
31
} from './csv-arrow-loader-with-parser';
32
import {
33
  deduceCSVSchemaFromRows,
34
  detectGeometryColumns,
35
  MAX_GEOMETRY_SNIFF_ROWS,
36
  normalizeGeometryArrayRow,
37
  normalizeGeometryObjectRow,
38
  shouldFinalizeGeometryDetection
39
} from './lib/csv-geometry';
40
import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';
41

42
const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;
14✔
43

44
export type {CSVLoaderOptions} from './csv-loader';
45

46
/** Loader for CSV and other delimiter-separated tabular text formats. */
47
export const CSVLoaderWithParser = {
14✔
48
  ...CSVLoaderMetadataWithoutPreload,
49
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
NEW
50
    options?.csv?.shape === 'arrow-table'
×
51
      ? parseCSVArrayBufferAsArrow(arrayBuffer, options)
52
      : parseCSVText(new TextDecoder().decode(arrayBuffer), options),
53
  parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
NEW
54
    parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),
×
55
  parseText: (text: string, options?: CSVLoaderOptions) =>
56
    options?.csv?.shape === 'arrow-table'
54✔
57
      ? parseCSVTextAsArrow(text, options)
58
      : parseCSVText(text, options),
59
  parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),
4✔
60
  parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
61
    options?.csv?.shape === 'arrow-table'
48✔
62
      ? parseCSVInArrowBatches(asyncIterator, options)
63
      : parseCSVInBatches(asyncIterator, options)
64
} as const satisfies LoaderWithParser<
65
  ObjectRowTable | ArrayRowTable | ColumnarTable | ArrowTable,
66
  TableBatch | ColumnarTableBatch | ArrowTableBatch,
67
  CSVLoaderOptions
68
>;
69

70
async function parseCSVText(
71
  csvText: string,
72
  options?: CSVLoaderOptions
73
): Promise<ObjectRowTable | ArrayRowTable> {
74
  return parseCSVTextSync(csvText, options);
52✔
75
}
76

77
function parseCSVTextSync(
78
  csvText: string,
79
  options?: CSVLoaderOptions
80
): ObjectRowTable | ArrayRowTable {
81
  // Apps can call the parse method directly, so we apply default options here
82
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};
56✔
83

84
  const firstRow = readFirstRow(csvText);
56✔
85
  const header: boolean =
86
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
56✔
87

88
  const parseWithHeader = header;
56✔
89

90
  const papaparseConfig = {
56✔
91
    // dynamicTyping: true,
92
    ...csvOptions,
93
    header: parseWithHeader,
94
    download: false, // We handle loading, no need for papaparse to do it for us
95
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
56✔
96
    error: e => {
NEW
97
      throw new Error(e);
×
98
    }
99
  };
100

101
  const result = Papa.parse(csvText, papaparseConfig);
56✔
102
  const rows = result.data as any[];
56✔
103

104
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
56✔
105

106
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
56!
107
  let table: ArrayRowTable | ObjectRowTable;
108
  switch (shape) {
56!
109
    case 'object-row-table':
110
      table = {
48✔
111
        shape: 'object-row-table',
112
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
174,910✔
113
      };
114
      break;
48✔
115
    case 'array-row-table':
116
      table = {
8✔
117
        shape: 'array-row-table',
118
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
22✔
119
      };
120
      break;
8✔
121
    default:
NEW
122
      throw new Error(shape);
×
123
  }
124
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
56✔
125
    ? detectGeometryColumns(
126
        headerRow,
127
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
12!
128
      )
129
    : [];
130

131
  if (detectedGeometryColumns.length > 0) {
56✔
132
    table =
4✔
133
      table.shape === 'array-row-table'
4✔
134
        ? {
135
            ...table,
136
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
6✔
137
          }
138
        : {
139
            ...table,
140
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
6✔
141
          };
142
  }
143

144
  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
56✔
145
  return table;
56✔
146
}
147

148
// TODO - support batch size 0 = no batching/single batch?
149
function parseCSVInBatches(
150
  asyncIterator:
151
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
152
    | Iterable<ArrayBufferLike | ArrayBufferView>,
153
  options?: CSVLoaderOptions
154
): AsyncIterable<TableBatch> {
155
  // Papaparse does not support standard batch size handling
156
  // TODO - investigate papaparse chunks mode
157
  options = {...options};
46✔
158
  if (options?.core?.batchSize === 'auto') {
46✔
159
    options.core.batchSize = 4000;
40✔
160
  }
161

162
  // Apps can call the parse method directly, we so apply default options here
163
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};
46✔
164

165
  const asyncQueue = new AsyncQueue<TableBatch>();
46✔
166

167
  let isFirstRow: boolean = true;
46✔
168
  let headerRow: string[] | null = null;
46✔
169
  let tableBatchBuilder: TableBatchBuilder | null = null;
46✔
170
  let schema: Schema | null = null;
46✔
171
  let sniffedRows: unknown[][] = [];
46✔
172
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
46✔
173
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;
46✔
174

175
  const config = {
46✔
176
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
177
    ...csvOptions,
178
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
179
    download: false, // We handle loading, no need for papaparse to do it for us
180
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
181
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
182
    // See https://github.com/mholt/PapaParse/issues/465
183
    chunkSize: 1024 * 1024 * 5,
184
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
185
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
186
    // both of the skipEmptyLines and step callback options are provided:
187
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
188
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
189
    skipEmptyLines: false,
190

191
    // step is called on every row
192
    // eslint-disable-next-line complexity, max-statements
193
    step(results) {
194
      let row = results.data;
14,516✔
195

196
      if (csvOptions.skipEmptyLines === 'greedy') {
14,516✔
197
        // Manually reject lines that are empty
198
        const collapsedRow = row.flat().join('').trim();
16✔
199
        if (collapsedRow === '') {
16✔
200
          return;
10✔
201
        }
202
      } else if (csvOptions.skipEmptyLines === true) {
14,500!
203
        row = normalizePapaStreamingRow(row);
14,500✔
204
        if (row.length === 1 && row[0] === null) {
14,500✔
205
          return;
6✔
206
        }
207
      }
208
      const bytesUsed = results.meta.cursor;
14,500✔
209

210
      // Check if we need to save a header row
211
      if (isFirstRow && !headerRow) {
14,500✔
212
        // Auto detects or can be forced with csvOptions.header
213
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
46✔
214
        if (header) {
46✔
215
          headerRow = row.map(duplicateColumnTransformer());
28✔
216
          return;
28✔
217
        }
218
      }
219

220
      // If first data row, we can deduce the schema
221
      if (isFirstRow) {
14,472✔
222
        if (!headerRow) {
50✔
223
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
18✔
224
        }
225
      }
226

227
      if (csvOptions.optimizeMemoryUsage) {
14,472!
228
        // A workaround to allocate new strings and don't retain pointers to original strings.
229
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
NEW
230
        row = JSON.parse(JSON.stringify(row));
×
231
      }
232

233
      const shape = getBatchShape();
14,472✔
234

235
      if (!geometryDetectionFinalized && headerRow) {
14,472✔
236
        sniffedRows.push(row);
6✔
237
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
6✔
238
          headerRow,
239
          sniffedRows,
240
          MAX_GEOMETRY_SNIFF_ROWS
241
        );
242
        if (geometryDetectionFinalized) {
6!
NEW
243
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
×
NEW
244
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
×
245
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
246
          );
NEW
247
          schema = deduceCSVSchemaFromRows(
×
248
            normalizedSniffedRows,
249
            headerRow,
250
            detectedGeometryColumns
251
          );
NEW
252
          isFirstRow = false;
×
NEW
253
          for (const normalizedSniffedRow of normalizedSniffedRows) {
×
NEW
254
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
×
255
          }
NEW
256
          sniffedRows = [];
×
257
        }
258
        return;
6✔
259
      }
260

261
      if (isFirstRow) {
14,466✔
262
        if (!headerRow) {
44!
NEW
263
          return;
×
264
        }
265
        schema = deduceCSVSchemaFromRows(
44✔
266
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
267
          headerRow,
268
          detectedGeometryColumns
269
        );
270
        isFirstRow = false;
44✔
271
      }
272

273
      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
14,466✔
274
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
14,466✔
275
    },
276

277
    // complete is called when all rows have been read
278
    complete(results) {
279
      try {
46✔
280
        if (!geometryDetectionFinalized && headerRow) {
46✔
281
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
2✔
282
          const normalizedSniffedRows = sniffedRows.map(row =>
4✔
283
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
3✔
284
          );
285
          schema = deduceCSVSchemaFromRows(
2✔
286
            normalizedSniffedRows,
287
            headerRow,
288
            detectedGeometryColumns
289
          );
290
          const shape = getBatchShape();
2✔
291
          tableBatchBuilder =
2✔
292
            tableBatchBuilder ||
4✔
293
            new TableBatchBuilder(schema, {
294
              ...(options?.core || {}),
2!
295
              shape
296
            });
297
          for (const normalizedSniffedRow of normalizedSniffedRows) {
2✔
298
            const batchRow =
299
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
6!
300
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
301
                : normalizedSniffedRow;
302
            tableBatchBuilder.addRow(batchRow);
6✔
303
          }
304
        }
305
        const bytesUsed = results.meta.cursor;
46✔
306
        // Ensure any final (partial) batch gets emitted
307
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
46✔
308
        if (batch) {
46✔
309
          asyncQueue.enqueue(batch);
42✔
310
        }
311
      } catch (error) {
NEW
312
        asyncQueue.enqueue(error as Error);
×
313
      }
314

315
      asyncQueue.close();
46✔
316
    }
317
  };
318

319
  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);
46✔
320

321
  // TODO - Does it matter if we return asyncIterable or asyncIterator
322
  // return asyncQueue[Symbol.asyncIterator]();
323
  return asyncQueue;
46✔
324

325
  function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
326
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
14,466✔
327
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
14,466✔
328
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
4✔
329
    }
330

331
    tableBatchBuilder =
14,466✔
332
      tableBatchBuilder ||
14,510✔
333
      new TableBatchBuilder(schema!, {
334
        ...(options?.core || {}),
44!
335
        shape
336
      });
337

338
    try {
14,466✔
339
      tableBatchBuilder.addRow(batchRow);
14,466✔
340
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
14,466✔
341
      if (batch) {
14,466✔
342
        asyncQueue.enqueue(batch);
168✔
343
      }
344
    } catch (error) {
NEW
345
      asyncQueue.enqueue(error as Error);
×
346
    }
347
  }
348

349
  function getBatchShape(): CSVBatchShape {
350
    const deprecatedShape = (options as {shape?: CSVBatchShape} | undefined)?.shape;
14,474✔
351
    const shape = deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
14,474!
352
    switch (shape) {
14,474✔
353
      case 'array-row-table':
354
      case 'columnar-table':
355
        return shape;
4,034✔
356
      default:
357
        return DEFAULT_CSV_SHAPE;
10,440✔
358
    }
359
  }
360
}
361

362
type CSVBatchShape = 'array-row-table' | 'object-row-table' | 'columnar-table';
363

364
/**
365
 * Checks if a certain row is a header row
366
 * @param row the row to check
367
 * @returns true if the row looks like a header
368
 */
369
function isHeaderRow(row: string[]): boolean {
370
  return row && row.every(value => typeof value === 'string');
290✔
371
}
372

373
/**
374
 * Reads, parses, and returns the first row of a CSV text
375
 * @param csvText the csv text to parse
376
 * @returns the first row
377
 */
378
function readFirstRow(csvText: string): any[] {
379
  const result = Papa.parse(csvText, {
56✔
380
    dynamicTyping: true,
381
    preview: 1
382
  });
383
  return result.data[0];
56✔
384
}
385

386
/**
387
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
388
 * duplicate header columns and would use the latest occurrence by default.
389
 * See the header option in https://www.papaparse.com/docs#config
390
 * @returns a transform function that returns sanitized names for duplicate fields
391
 */
392
function duplicateColumnTransformer(): (column: string) => string {
393
  const observedColumns = new Set<string>();
70✔
394
  return col => {
70✔
395
    let colName = col;
344✔
396
    let counter = 1;
344✔
397
    while (observedColumns.has(colName)) {
344✔
398
      colName = `${col}.${counter}`;
50✔
399
      counter++;
50✔
400
    }
401
    observedColumns.add(colName);
344✔
402
    return colName;
344✔
403
  };
404
}
405

406
/**
407
 * Generates the header of a CSV given a prefix and a column count
408
 * @param columnPrefix the columnPrefix to use
409
 * @param count the count of column names to generate
410
 * @returns an array of column names
411
 */
412
function generateHeader(columnPrefix: string, count: number = 0): string[] {
32✔
413
  const headers: string[] = [];
32✔
414
  for (let i = 0; i < count; i++) {
32✔
415
    headers.push(`${columnPrefix}${i + 1}`);
102✔
416
  }
417
  return headers;
32✔
418
}
419

420
function normalizePapaStreamingRow(row: unknown[]): unknown[] {
421
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
57,522✔
422
}
423

424
function convertToPapaObjectRow(
425
  row: unknown[],
426
  headerRow: string[]
427
): {[columnName: string]: unknown} {
428
  const objectRow = convertToObjectRow(row, headerRow);
4✔
429
  const parsedExtra = row.slice(headerRow.length);
4✔
430
  if (parsedExtra.length > 0) {
4!
431
    objectRow.__parsed_extra = parsedExtra;
4✔
432
  }
433
  return objectRow;
4✔
434
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc