• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 24607714991

18 Apr 2026 03:21PM UTC coverage: 57.099% (+0.3%) from 56.834%
24607714991

push

github

web-flow
feat: Organize sprawling converter function fleet into well defined exported Converter objects (#3396)

10822 of 20660 branches covered (52.38%)

Branch coverage included in aggregate %.

1386 of 1996 new or added lines in 59 files covered. (69.44%)

2 existing lines in 2 files now uncovered.

22093 of 36985 relevant lines covered (59.74%)

4859.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.0
/modules/csv/src/csv-loader.ts
1
// loaders.gl
2
// SPDX-License-Identifier: MIT
3
// Copyright (c) vis.gl contributors
4

5
import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6
import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
7

8
import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
9
import {
10
  AsyncQueue,
11
  TableBatchBuilder,
12
  convertToArrayRow,
13
  convertToObjectRow
14
} from '@loaders.gl/schema-utils';
15
import Papa from './papaparse/papaparse';
16
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
17
import {CSVFormat} from './csv-format';
18
import {
19
  deduceCSVSchemaFromRows,
20
  detectGeometryColumns,
21
  MAX_GEOMETRY_SNIFF_ROWS,
22
  normalizeGeometryArrayRow,
23
  normalizeGeometryObjectRow,
24
  shouldFinalizeGeometryDetection
25
} from './lib/csv-geometry';
26

27
// __VERSION__ is injected by babel-plugin-version-inline
28
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
29
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
9!
30

31
const DEFAULT_CSV_SHAPE = 'object-row-table';
9✔
32

33
export type CSVLoaderOptions = LoaderOptions & {
34
  csv?: {
35
    // loaders.gl options
36
    shape?: 'array-row-table' | 'object-row-table';
37
    /** optimizes memory usage but increases parsing time. */
38
    optimizeMemoryUsage?: boolean;
39
    columnPrefix?: string;
40
    header?: 'auto';
41

42
    // CSV options (papaparse)
43
    // delimiter: auto
44
    // newline: auto
45
    quoteChar?: string;
46
    escapeChar?: string;
47
    // Convert numbers and boolean values in rows from strings
48
    dynamicTyping?: boolean;
49
    comments?: boolean;
50
    skipEmptyLines?: boolean | 'greedy';
51
    // transform: null?
52
    delimitersToGuess?: string[];
53
    detectGeometryColumns?: boolean;
54
    // fastMode: auto
55
  };
56
};
57

58
export const CSVLoader = {
9✔
59
  ...CSVFormat,
60

61
  dataType: null as unknown as ObjectRowTable | ArrayRowTable,
62
  batchType: null as unknown as TableBatch,
63
  version: VERSION,
64
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
65
    parseCSV(new TextDecoder().decode(arrayBuffer), options),
1✔
66
  parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
26✔
67
  parseInBatches: parseCSVInBatches,
68
  // @ts-ignore
69
  // testText: null,
70
  options: {
71
    csv: {
72
      shape: DEFAULT_CSV_SHAPE, // 'object-row-table'
73
      optimizeMemoryUsage: false,
74
      // CSV options
75
      header: 'auto',
76
      columnPrefix: 'column',
77
      // delimiter: auto
78
      // newline: auto
79
      quoteChar: '"',
80
      escapeChar: '"',
81
      dynamicTyping: true,
82
      comments: false,
83
      skipEmptyLines: true,
84
      // transform: null?
85
      detectGeometryColumns: false,
86
      delimitersToGuess: [',', '\t', '|', ';']
87
      // fastMode: auto
88
    }
89
  }
90
} as const satisfies LoaderWithParser<ObjectRowTable | ArrayRowTable, TableBatch, CSVLoaderOptions>;
91

92
async function parseCSV(
93
  csvText: string,
94
  options?: CSVLoaderOptions
95
): Promise<ObjectRowTable | ArrayRowTable> {
96
  // Apps can call the parse method directly, so we apply default options here
97
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
27✔
98

99
  const firstRow = readFirstRow(csvText);
27✔
100
  const header: boolean =
101
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
27✔
102

103
  const parseWithHeader = header;
27✔
104

105
  const papaparseConfig = {
27✔
106
    // dynamicTyping: true,
107
    ...csvOptions,
108
    header: parseWithHeader,
109
    download: false, // We handle loading, no need for papaparse to do it for us
110
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
27✔
111
    error: e => {
112
      throw new Error(e);
×
113
    }
114
  };
115

116
  const result = Papa.parse(csvText, papaparseConfig);
27✔
117
  const rows = result.data as any[];
27✔
118

119
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
27✔
120

121
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
27!
122
  let table: ArrayRowTable | ObjectRowTable;
123
  switch (shape) {
27!
124
    case 'object-row-table':
125
      table = {
23✔
126
        shape: 'object-row-table',
127
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
87,461✔
128
      };
129
      break;
23✔
130
    case 'array-row-table':
131
      table = {
3✔
132
        shape: 'array-row-table',
133
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
8!
134
      };
135
      break;
3✔
136
    default:
137
      throw new Error(shape);
×
138
  }
139
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
26!
140
    ? detectGeometryColumns(
141
        headerRow,
NEW
142
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
×
143
      )
144
    : [];
145

146
  if (detectedGeometryColumns.length > 0) {
27!
NEW
147
    table =
×
148
      table.shape === 'array-row-table'
×
149
        ? {
150
            ...table,
NEW
151
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
×
152
          }
153
        : {
154
            ...table,
NEW
155
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
×
156
          };
157
  }
158

159
  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
26✔
160
  return table;
26✔
161
}
162

163
// TODO - support batch size 0 = no batching/single batch?
164
function parseCSVInBatches(
165
  asyncIterator:
166
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
167
    | Iterable<ArrayBufferLike | ArrayBufferView>,
168
  options?: CSVLoaderOptions
169
): AsyncIterable<TableBatch> {
170
  // Papaparse does not support standard batch size handling
171
  // TODO - investigate papaparse chunks mode
172
  options = {...options};
22✔
173
  if (options?.core?.batchSize === 'auto') {
22✔
174
    options.core.batchSize = 4000;
19✔
175
  }
176

177
  // Apps can call the parse method directly, we so apply default options here
178
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
22✔
179

180
  const asyncQueue = new AsyncQueue<TableBatch>();
22✔
181

182
  let isFirstRow: boolean = true;
22✔
183
  let headerRow: string[] | null = null;
22✔
184
  let tableBatchBuilder: TableBatchBuilder | null = null;
22✔
185
  let schema: Schema | null = null;
22✔
186
  let sniffedRows: unknown[][] = [];
22✔
187
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
22✔
188
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;
22✔
189

190
  const config = {
22✔
191
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
192
    ...csvOptions,
193
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
194
    download: false, // We handle loading, no need for papaparse to do it for us
195
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
196
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
197
    // See https://github.com/mholt/PapaParse/issues/465
198
    chunkSize: 1024 * 1024 * 5,
199
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
200
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
201
    // both of the skipEmptyLines and step callback options are provided:
202
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
203
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
204
    skipEmptyLines: false,
205

206
    // step is called on every row
207
    // eslint-disable-next-line complexity, max-statements
208
    step(results) {
209
      let row = results.data;
9,252✔
210

211
      if (csvOptions.skipEmptyLines === 'greedy') {
9,252✔
212
        // Manually reject lines that are empty
213
        const collapsedRow = row.flat().join('').trim();
8✔
214
        if (collapsedRow === '') {
8✔
215
          return;
5✔
216
        }
217
      } else if (csvOptions.skipEmptyLines === true) {
9,244!
218
        row = normalizePapaStreamingRow(row);
9,244✔
219
        if (row.length === 1 && row[0] === null) {
9,244✔
220
          return;
3✔
221
        }
222
      }
223
      const bytesUsed = results.meta.cursor;
9,244✔
224

225
      // Check if we need to save a header row
226
      if (isFirstRow && !headerRow) {
9,244✔
227
        // Auto detects or can be forced with csvOptions.header
228
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
22✔
229
        if (header) {
22✔
230
          headerRow = row.map(duplicateColumnTransformer());
13✔
231
          return;
13✔
232
        }
233
      }
234

235
      // If first data row, we can deduce the schema
236
      if (isFirstRow) {
9,231✔
237
        if (!headerRow) {
22✔
238
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
9✔
239
        }
240
      }
241

242
      if (csvOptions.optimizeMemoryUsage) {
9,231!
243
        // A workaround to allocate new strings and don't retain pointers to original strings.
244
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
245
        row = JSON.parse(JSON.stringify(row));
×
246
      }
247

248
      const shape = getBatchShape();
9,231✔
249

250
      if (!geometryDetectionFinalized && headerRow) {
9,231!
NEW
251
        sniffedRows.push(row);
×
NEW
252
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
×
253
          headerRow,
254
          sniffedRows,
255
          MAX_GEOMETRY_SNIFF_ROWS
256
        );
NEW
257
        if (geometryDetectionFinalized) {
×
NEW
258
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
×
NEW
259
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
×
NEW
260
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
×
261
          );
NEW
262
          schema = deduceCSVSchemaFromRows(
×
263
            normalizedSniffedRows,
264
            headerRow,
265
            detectedGeometryColumns
266
          );
NEW
267
          isFirstRow = false;
×
NEW
268
          for (const normalizedSniffedRow of normalizedSniffedRows) {
×
NEW
269
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
×
270
          }
NEW
271
          sniffedRows = [];
×
272
        }
NEW
273
        return;
×
274
      }
275

276
      if (isFirstRow) {
9,231✔
277
        if (!headerRow) {
22!
NEW
278
          return;
×
279
        }
280
        schema = deduceCSVSchemaFromRows(
22✔
281
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
282
          headerRow,
283
          detectedGeometryColumns
284
        );
285
        isFirstRow = false;
22✔
286
      }
287

288
      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
9,231✔
289
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
9,231✔
290
    },
291

292
    // complete is called when all rows have been read
293
    complete(results) {
294
      try {
22✔
295
        if (!geometryDetectionFinalized && headerRow) {
22!
NEW
296
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
×
NEW
297
          const normalizedSniffedRows = sniffedRows.map(row =>
×
NEW
298
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
×
299
          );
NEW
300
          schema = deduceCSVSchemaFromRows(
×
301
            normalizedSniffedRows,
302
            headerRow,
303
            detectedGeometryColumns
304
          );
NEW
305
          const shape = getBatchShape();
×
NEW
306
          tableBatchBuilder =
×
307
            tableBatchBuilder ||
×
308
            new TableBatchBuilder(schema, {
309
              ...(options?.core || {}),
×
310
              shape
311
            });
NEW
312
          for (const normalizedSniffedRow of normalizedSniffedRows) {
×
313
            const batchRow =
NEW
314
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
×
315
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
316
                : normalizedSniffedRow;
NEW
317
            tableBatchBuilder.addRow(batchRow);
×
318
          }
319
        }
320
        const bytesUsed = results.meta.cursor;
22✔
321
        // Ensure any final (partial) batch gets emitted
322
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
22✔
323
        if (batch) {
22✔
324
          asyncQueue.enqueue(batch);
20✔
325
        }
326
      } catch (error) {
327
        asyncQueue.enqueue(error as Error);
×
328
      }
329

330
      asyncQueue.close();
22✔
331
    }
332
  };
333

334
  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);
22✔
335

336
  // TODO - Does it matter if we return asyncIterable or asyncIterator
337
  // return asyncQueue[Symbol.asyncIterator]();
338
  return asyncQueue;
22✔
339

340
  function addCSVBatchRow(
341
    rowToAdd: unknown[],
342
    shape: 'array-row-table' | 'object-row-table',
343
    bytesUsed: number
344
  ): void {
345
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
9,231✔
346
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
9,231✔
347
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
2✔
348
    }
349

350
    tableBatchBuilder =
9,231✔
351
      tableBatchBuilder ||
9,253✔
352
      new TableBatchBuilder(schema!, {
353
        ...(options?.core || {}),
23✔
354
        shape
355
      });
356

357
    try {
9,231✔
358
      tableBatchBuilder.addRow(batchRow);
9,231✔
359
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
9,231✔
360
      if (batch) {
9,231✔
361
        asyncQueue.enqueue(batch);
84✔
362
      }
363
    } catch (error) {
NEW
364
      asyncQueue.enqueue(error as Error);
×
365
    }
366
  }
367

368
  function getBatchShape(): 'array-row-table' | 'object-row-table' {
369
    const deprecatedShape = (
370
      options as {shape?: 'array-row-table' | 'object-row-table'} | undefined
9,231✔
371
    )?.shape;
372
    return deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
9,231!
373
  }
374
}
375

376
/**
377
 * Checks if a certain row is a header row
378
 * @param row the row to check
379
 * @returns true if the row looks like a header
380
 */
381
function isHeaderRow(row: string[]): boolean {
382
  return row && row.every(value => typeof value === 'string');
143✔
383
}
384

385
/**
386
 * Reads, parses, and returns the first row of a CSV text
387
 * @param csvText the csv text to parse
388
 * @returns the first row
389
 */
390
function readFirstRow(csvText: string): any[] {
391
  const result = Papa.parse(csvText, {
27✔
392
    dynamicTyping: true,
393
    preview: 1
394
  });
395
  return result.data[0];
27✔
396
}
397

398
/**
399
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
400
 * duplicate header columns and would use the latest occurrence by default.
401
 * See the header option in https://www.papaparse.com/docs#config
402
 * @returns a transform function that returns sanitized names for duplicate fields
403
 */
404
function duplicateColumnTransformer(): (column: string) => string {
405
  const observedColumns = new Set<string>();
31✔
406
  return col => {
31✔
407
    let colName = col;
166✔
408
    let counter = 1;
166✔
409
    while (observedColumns.has(colName)) {
166✔
410
      colName = `${col}.${counter}`;
25✔
411
      counter++;
25✔
412
    }
413
    observedColumns.add(colName);
166✔
414
    return colName;
166✔
415
  };
416
}
417

418
/**
419
 * Generates the header of a CSV given a prefix and a column count
420
 * @param columnPrefix the columnPrefix to use
421
 * @param count the count of column names to generate
422
 * @returns an array of column names
423
 */
424
function generateHeader(columnPrefix: string, count: number = 0): string[] {
17✔
425
  const headers: string[] = [];
17✔
426
  for (let i = 0; i < count; i++) {
17✔
427
    headers.push(`${columnPrefix}${i + 1}`);
54✔
428
  }
429
  return headers;
17✔
430
}
431

432
function normalizePapaStreamingRow(row: unknown[]): unknown[] {
433
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
34,742✔
434
}
435

436
function convertToPapaObjectRow(
437
  row: unknown[],
438
  headerRow: string[]
439
): {[columnName: string]: unknown} {
440
  const objectRow = convertToObjectRow(row, headerRow);
2✔
441
  const parsedExtra = row.slice(headerRow.length);
2✔
442
  if (parsedExtra.length > 0) {
2!
443
    objectRow.__parsed_extra = parsedExtra;
2✔
444
  }
445
  return objectRow;
2✔
446
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc