• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

visgl / loaders.gl / 20352515932

18 Dec 2025 09:56PM UTC coverage: 35.115% (-28.4%) from 63.485%
20352515932

push

github

web-flow
feat(loader-utils): Export is-type helpers (#3258)

1188 of 1998 branches covered (59.46%)

Branch coverage included in aggregate %.

147 of 211 new or added lines in 13 files covered. (69.67%)

30011 existing lines in 424 files now uncovered.

37457 of 108056 relevant lines covered (34.66%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

36.36
/modules/csv/src/csv-loader.ts
1
// loaders.gl
1✔
2
// SPDX-License-Identifier: MIT
1✔
3
// Copyright (c) vis.gl contributors
1✔
4

1✔
5
import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
1✔
6
import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
1✔
7

1✔
8
import {log, toArrayBufferIterator} from '@loaders.gl/loader-utils';
1✔
9
import {
1✔
10
  AsyncQueue,
1✔
11
  deduceTableSchema,
1✔
12
  TableBatchBuilder,
1✔
13
  convertToArrayRow,
1✔
14
  convertToObjectRow
1✔
15
} from '@loaders.gl/schema-utils';
1✔
16
import Papa from './papaparse/papaparse';
1✔
17
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
1✔
18
import {CSVFormat} from './csv-format';
1✔
19

1✔
20
// __VERSION__ is injected by babel-plugin-version-inline
1✔
21
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
1✔
22
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
1!
23

1✔
24
const DEFAULT_CSV_SHAPE = 'object-row-table';
1✔
25

1✔
26
export type CSVLoaderOptions = LoaderOptions & {
1✔
27
  csv?: {
1✔
28
    // loaders.gl options
1✔
29
    shape?: 'array-row-table' | 'object-row-table';
1✔
30
    /** optimizes memory usage but increases parsing time. */
1✔
31
    optimizeMemoryUsage?: boolean;
1✔
32
    columnPrefix?: string;
1✔
33
    header?: 'auto';
1✔
34

1✔
35
    // CSV options (papaparse)
1✔
36
    // delimiter: auto
1✔
37
    // newline: auto
1✔
38
    quoteChar?: string;
1✔
39
    escapeChar?: string;
1✔
40
    // Convert numbers and boolean values in rows from strings
1✔
41
    dynamicTyping?: boolean;
1✔
42
    comments?: boolean;
1✔
43
    skipEmptyLines?: boolean | 'greedy';
1✔
44
    // transform: null?
1✔
45
    delimitersToGuess?: string[];
1✔
46
    // fastMode: auto
1✔
47
  };
1✔
48
};
1✔
49

1✔
50
export const CSVLoader = {
1✔
51
  ...CSVFormat,
1✔
52

1✔
53
  dataType: null as unknown as ObjectRowTable | ArrayRowTable,
1✔
54
  batchType: null as unknown as TableBatch,
1✔
55
  version: VERSION,
1✔
56
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
1✔
UNCOV
57
    parseCSV(new TextDecoder().decode(arrayBuffer), options),
×
58
  parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
1✔
59
  parseInBatches: parseCSVInBatches,
1✔
60
  // @ts-ignore
1✔
61
  // testText: null,
1✔
62
  options: {
1✔
63
    csv: {
1✔
64
      shape: DEFAULT_CSV_SHAPE, // 'object-row-table'
1✔
65
      optimizeMemoryUsage: false,
1✔
66
      // CSV options
1✔
67
      header: 'auto',
1✔
68
      columnPrefix: 'column',
1✔
69
      // delimiter: auto
1✔
70
      // newline: auto
1✔
71
      quoteChar: '"',
1✔
72
      escapeChar: '"',
1✔
73
      dynamicTyping: true,
1✔
74
      comments: false,
1✔
75
      skipEmptyLines: true,
1✔
76
      // transform: null?
1✔
77
      delimitersToGuess: [',', '\t', '|', ';']
1✔
78
      // fastMode: auto
1✔
79
    }
1✔
80
  }
1✔
81
} as const satisfies LoaderWithParser<ObjectRowTable | ArrayRowTable, TableBatch, CSVLoaderOptions>;
1✔
82

1✔
UNCOV
83
async function parseCSV(
×
UNCOV
84
  csvText: string,
×
UNCOV
85
  options?: CSVLoaderOptions
×
UNCOV
86
): Promise<ObjectRowTable | ArrayRowTable> {
×
UNCOV
87
  // Apps can call the parse method directly, so we apply default options here
×
UNCOV
88
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
×
UNCOV
89

×
UNCOV
90
  const firstRow = readFirstRow(csvText);
×
UNCOV
91
  const header: boolean =
×
UNCOV
92
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
×
UNCOV
93

×
UNCOV
94
  const parseWithHeader = header;
×
UNCOV
95

×
UNCOV
96
  const papaparseConfig = {
×
UNCOV
97
    // dynamicTyping: true,
×
UNCOV
98
    ...csvOptions,
×
UNCOV
99
    header: parseWithHeader,
×
UNCOV
100
    download: false, // We handle loading, no need for papaparse to do it for us
×
UNCOV
101
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
×
UNCOV
102
    error: (e) => {
×
103
      throw new Error(e);
×
104
    }
×
UNCOV
105
  };
×
UNCOV
106

×
UNCOV
107
  const result = Papa.parse(csvText, papaparseConfig);
×
UNCOV
108
  const rows = result.data as any[];
×
UNCOV
109

×
UNCOV
110
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
×
UNCOV
111

×
UNCOV
112
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
×
UNCOV
113
  let table: ArrayRowTable | ObjectRowTable;
×
UNCOV
114
  switch (shape) {
×
UNCOV
115
    case 'object-row-table':
×
UNCOV
116
      table = {
×
UNCOV
117
        shape: 'object-row-table',
×
UNCOV
118
        data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
×
UNCOV
119
      };
×
UNCOV
120
      break;
×
UNCOV
121
    case 'array-row-table':
×
UNCOV
122
      table = {
×
UNCOV
123
        shape: 'array-row-table',
×
UNCOV
124
        data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
×
UNCOV
125
      };
×
UNCOV
126
      break;
×
UNCOV
127
    default:
×
128
      throw new Error(shape);
×
UNCOV
129
  }
×
UNCOV
130
  table.schema = deduceTableSchema(table!);
×
UNCOV
131
  return table;
×
UNCOV
132
}
×
133

1✔
134
// TODO - support batch size 0 = no batching/single batch?
1✔
UNCOV
135
function parseCSVInBatches(
×
UNCOV
136
  asyncIterator:
×
UNCOV
137
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
×
UNCOV
138
    | Iterable<ArrayBufferLike | ArrayBufferView>,
×
UNCOV
139
  options?: CSVLoaderOptions
×
UNCOV
140
): AsyncIterable<TableBatch> {
×
UNCOV
141
  // Papaparse does not support standard batch size handling
×
UNCOV
142
  // TODO - investigate papaparse chunks mode
×
UNCOV
143
  options = {...options};
×
UNCOV
144
  if (options?.core?.batchSize === 'auto') {
×
UNCOV
145
    options.core.batchSize = 4000;
×
UNCOV
146
  }
×
UNCOV
147

×
UNCOV
148
  // Apps can call the parse method directly, we so apply default options here
×
UNCOV
149
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
×
UNCOV
150

×
UNCOV
151
  const asyncQueue = new AsyncQueue<TableBatch>();
×
UNCOV
152

×
UNCOV
153
  let isFirstRow: boolean = true;
×
UNCOV
154
  let headerRow: string[] | null = null;
×
UNCOV
155
  let tableBatchBuilder: TableBatchBuilder | null = null;
×
UNCOV
156
  let schema: Schema | null = null;
×
UNCOV
157

×
UNCOV
158
  const config = {
×
UNCOV
159
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
×
UNCOV
160
    ...csvOptions,
×
UNCOV
161
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
×
UNCOV
162
    download: false, // We handle loading, no need for papaparse to do it for us
×
UNCOV
163
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
×
UNCOV
164
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
×
UNCOV
165
    // See https://github.com/mholt/PapaParse/issues/465
×
UNCOV
166
    chunkSize: 1024 * 1024 * 5,
×
UNCOV
167
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
×
UNCOV
168
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
×
UNCOV
169
    // both of the skipEmptyLines and step callback options are provided:
×
UNCOV
170
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
×
UNCOV
171
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
×
UNCOV
172
    skipEmptyLines: false,
×
UNCOV
173

×
UNCOV
174
    // step is called on every row
×
UNCOV
175
    // eslint-disable-next-line complexity, max-statements
×
UNCOV
176
    step(results) {
×
UNCOV
177
      let row = results.data;
×
UNCOV
178

×
UNCOV
179
      if (csvOptions.skipEmptyLines) {
×
UNCOV
180
        // Manually reject lines that are empty
×
UNCOV
181
        const collapsedRow = row.flat().join('').trim();
×
UNCOV
182
        if (collapsedRow === '') {
×
UNCOV
183
          return;
×
UNCOV
184
        }
×
UNCOV
185
      }
×
UNCOV
186
      const bytesUsed = results.meta.cursor;
×
UNCOV
187

×
UNCOV
188
      // Check if we need to save a header row
×
UNCOV
189
      if (isFirstRow && !headerRow) {
×
UNCOV
190
        // Auto detects or can be forced with csvOptions.header
×
UNCOV
191
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
×
UNCOV
192
        if (header) {
×
UNCOV
193
          headerRow = row.map(duplicateColumnTransformer());
×
UNCOV
194
          return;
×
UNCOV
195
        }
×
UNCOV
196
      }
×
UNCOV
197

×
UNCOV
198
      // If first data row, we can deduce the schema
×
UNCOV
199
      if (isFirstRow) {
×
UNCOV
200
        isFirstRow = false;
×
UNCOV
201
        if (!headerRow) {
×
UNCOV
202
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
×
UNCOV
203
        }
×
UNCOV
204
        schema = deduceCSVSchema(row, headerRow);
×
UNCOV
205
      }
×
UNCOV
206

×
UNCOV
207
      if (csvOptions.optimizeMemoryUsage) {
×
208
        // A workaround to allocate new strings and don't retain pointers to original strings.
×
209
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
×
210
        row = JSON.parse(JSON.stringify(row));
×
211
      }
×
UNCOV
212

×
UNCOV
213
      const shape = (options as any)?.shape || csvOptions.shape || DEFAULT_CSV_SHAPE;
×
UNCOV
214

×
UNCOV
215
      // Add the row
×
UNCOV
216
      tableBatchBuilder =
×
UNCOV
217
        tableBatchBuilder ||
×
UNCOV
218
        new TableBatchBuilder(
×
UNCOV
219
          // @ts-expect-error TODO this is not a proper schema
×
UNCOV
220
          schema,
×
UNCOV
221
          {
×
UNCOV
222
            shape,
×
UNCOV
223
            ...(options?.core || {})
×
UNCOV
224
          }
×
UNCOV
225
        );
×
UNCOV
226

×
UNCOV
227
      try {
×
UNCOV
228
        tableBatchBuilder.addRow(row);
×
UNCOV
229
        // If a batch has been completed, emit it
×
UNCOV
230
        const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
×
UNCOV
231
        if (batch) {
×
UNCOV
232
          asyncQueue.enqueue(batch);
×
UNCOV
233
        }
×
UNCOV
234
      } catch (error) {
×
235
        asyncQueue.enqueue(error as Error);
×
236
      }
×
UNCOV
237
    },
×
UNCOV
238

×
UNCOV
239
    // complete is called when all rows have been read
×
UNCOV
240
    complete(results) {
×
UNCOV
241
      try {
×
UNCOV
242
        const bytesUsed = results.meta.cursor;
×
UNCOV
243
        // Ensure any final (partial) batch gets emitted
×
UNCOV
244
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
×
UNCOV
245
        if (batch) {
×
UNCOV
246
          asyncQueue.enqueue(batch);
×
UNCOV
247
        }
×
UNCOV
248
      } catch (error) {
×
249
        asyncQueue.enqueue(error as Error);
×
250
      }
×
UNCOV
251

×
UNCOV
252
      asyncQueue.close();
×
UNCOV
253
    }
×
UNCOV
254
  };
×
UNCOV
255

×
UNCOV
256
  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);
×
UNCOV
257

×
UNCOV
258
  // TODO - Does it matter if we return asyncIterable or asyncIterator
×
UNCOV
259
  // return asyncQueue[Symbol.asyncIterator]();
×
UNCOV
260
  return asyncQueue;
×
UNCOV
261
}
×
262

1✔
263
/**
1✔
264
 * Checks if a certain row is a header row
1✔
265
 * @param row the row to check
1✔
266
 * @returns true if the row looks like a header
1✔
267
 */
1✔
UNCOV
268
function isHeaderRow(row: string[]): boolean {
×
UNCOV
269
  return row && row.every((value) => typeof value === 'string');
×
UNCOV
270
}
×
271

1✔
272
/**
1✔
273
 * Reads, parses, and returns the first row of a CSV text
1✔
274
 * @param csvText the csv text to parse
1✔
275
 * @returns the first row
1✔
276
 */
1✔
UNCOV
277
function readFirstRow(csvText: string): any[] {
×
UNCOV
278
  const result = Papa.parse(csvText, {
×
UNCOV
279
    dynamicTyping: true,
×
UNCOV
280
    preview: 1
×
UNCOV
281
  });
×
UNCOV
282
  return result.data[0];
×
UNCOV
283
}
×
284

1✔
285
/**
1✔
286
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
1✔
287
 * duplicate header columns and would use the latest occurrence by default.
1✔
288
 * See the header option in https://www.papaparse.com/docs#config
1✔
289
 * @returns a transform function that returns sanitized names for duplicate fields
1✔
290
 */
1✔
UNCOV
291
function duplicateColumnTransformer(): (column: string) => string {
×
UNCOV
292
  const observedColumns = new Set<string>();
×
UNCOV
293
  return (col) => {
×
UNCOV
294
    let colName = col;
×
UNCOV
295
    let counter = 1;
×
UNCOV
296
    while (observedColumns.has(colName)) {
×
UNCOV
297
      colName = `${col}.${counter}`;
×
UNCOV
298
      counter++;
×
UNCOV
299
    }
×
UNCOV
300
    observedColumns.add(colName);
×
UNCOV
301
    return colName;
×
UNCOV
302
  };
×
UNCOV
303
}
×
304

1✔
305
/**
1✔
306
 * Generates the header of a CSV given a prefix and a column count
1✔
307
 * @param columnPrefix the columnPrefix to use
1✔
308
 * @param count the count of column names to generate
1✔
309
 * @returns an array of column names
1✔
310
 */
1✔
UNCOV
311
function generateHeader(columnPrefix: string, count: number = 0): string[] {
×
UNCOV
312
  const headers: string[] = [];
×
UNCOV
313
  for (let i = 0; i < count; i++) {
×
UNCOV
314
    headers.push(`${columnPrefix}${i + 1}`);
×
UNCOV
315
  }
×
UNCOV
316
  return headers;
×
UNCOV
317
}
×
318

1✔
UNCOV
319
function deduceCSVSchema(row, headerRow): Schema {
×
UNCOV
320
  const fields: Schema['fields'] = [];
×
UNCOV
321
  for (let i = 0; i < row.length; i++) {
×
UNCOV
322
    const columnName = (headerRow && headerRow[i]) || i;
×
UNCOV
323
    const value = row[i];
×
UNCOV
324
    switch (typeof value) {
×
UNCOV
325
      case 'number':
×
UNCOV
326
        fields.push({name: String(columnName), type: 'float64', nullable: true});
×
UNCOV
327
        break;
×
UNCOV
328
      case 'boolean':
×
329
        fields.push({name: String(columnName), type: 'bool', nullable: true});
×
330
        break;
×
UNCOV
331
      case 'string':
×
UNCOV
332
        fields.push({name: String(columnName), type: 'utf8', nullable: true});
×
UNCOV
333
        break;
×
UNCOV
334
      default:
×
335
        log.warn(`CSV: Unknown column type: ${typeof value}`)();
×
336
        fields.push({name: String(columnName), type: 'utf8', nullable: true});
×
UNCOV
337
    }
×
UNCOV
338
  }
×
UNCOV
339
  return {
×
UNCOV
340
    fields,
×
UNCOV
341
    metadata: {
×
UNCOV
342
      'loaders.gl#format': 'csv',
×
UNCOV
343
      'loaders.gl#loader': 'CSVLoader'
×
UNCOV
344
    }
×
UNCOV
345
  };
×
UNCOV
346
}
×
347

1✔
348
// TODO - remove
1✔
349
// type ObjectField = {name: string; index: number; type: any};
1✔
350
// type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
1✔
351

1✔
352
// function deduceObjectSchema(row, headerRow): ObjectSchema {
1✔
353
//   const schema: ObjectSchema = headerRow ? {} : [];
1✔
354
//   for (let i = 0; i < row.length; i++) {
1✔
355
//     const columnName = (headerRow && headerRow[i]) || i;
1✔
356
//     const value = row[i];
1✔
357
//     switch (typeof value) {
1✔
358
//       case 'number':
1✔
359
//       case 'boolean':
1✔
360
//         // TODO - booleans could be handled differently...
1✔
361
//         schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
1✔
362
//         break;
1✔
363
//       case 'string':
1✔
364
//       default:
1✔
365
//         schema[columnName] = {name: String(columnName), index: i, type: Array};
1✔
366
//       // We currently only handle numeric rows
1✔
367
//       // TODO we could offer a function to map strings to numbers?
1✔
368
//     }
1✔
369
//   }
1✔
370
//   return schema;
1✔
371
// }
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc