24108422669

Committed 07 Apr 2026 10:56PM UTC coverage: 35.134% (-0.3%) from 35.411%

Build # 24108422669

Build Type

push

github

Committed by

web-flow

Commit Message

feat(csv) CSVArrowLoader (#3345)

Coverage Stats

1225 of 2058 branches covered (59.52%)

Branch coverage included in aggregate %.

568 of 2529 new or added lines in 12 files covered. (22.46%)

2 existing lines in 2 files now uncovered.

39940 of 115107 relevant lines covered (34.7%)

0.77 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

34.67

/modules/csv/src/csv-loader.ts

// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';

import {log, toArrayBufferIterator} from '@loaders.gl/loader-utils';
import {
  AsyncQueue,
  deduceTableSchema,
  TableBatchBuilder,
  convertToArrayRow,
  convertToObjectRow
} from '@loaders.gl/schema-utils';
import Papa from './papaparse/papaparse';
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
import {CSVFormat} from './csv-format';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

const DEFAULT_CSV_SHAPE = 'object-row-table';

export type CSVLoaderOptions = LoaderOptions & {
  csv?: {
    // loaders.gl options
    shape?: 'array-row-table' | 'object-row-table';
    /** optimizes memory usage but increases parsing time. */
    optimizeMemoryUsage?: boolean;
    columnPrefix?: string;
    header?: 'auto';

    // CSV options (papaparse)
    // delimiter: auto
    // newline: auto
    quoteChar?: string;
    escapeChar?: string;
    // Convert numbers and boolean values in rows from strings
    dynamicTyping?: boolean;
    comments?: boolean;
    skipEmptyLines?: boolean | 'greedy';
    // transform: null?
    delimitersToGuess?: string[];
    // fastMode: auto
  };
};

export const CSVLoader = {
  ...CSVFormat,

  dataType: null as unknown as ObjectRowTable | ArrayRowTable,
  batchType: null as unknown as TableBatch,
  version: VERSION,
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
    parseCSV(new TextDecoder().decode(arrayBuffer), options),
  parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
  parseInBatches: parseCSVInBatches,
  // @ts-ignore
  // testText: null,
  options: {
    csv: {
      shape: DEFAULT_CSV_SHAPE, // 'object-row-table'
      optimizeMemoryUsage: false,
      // CSV options
      header: 'auto',
      columnPrefix: 'column',
      // delimiter: auto
      // newline: auto
      quoteChar: '"',
      escapeChar: '"',
      dynamicTyping: true,
      comments: false,
      skipEmptyLines: true,
      // transform: null?
      delimitersToGuess: [',', '\t', '|', ';']
      // fastMode: auto
    }
  }
} as const satisfies LoaderWithParser<ObjectRowTable | ArrayRowTable, TableBatch, CSVLoaderOptions>;

async function parseCSV(
  csvText: string,
  options?: CSVLoaderOptions
): Promise<ObjectRowTable | ArrayRowTable> {
  // Apps can call the parse method directly, so we apply default options here
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};

  const firstRow = readFirstRow(csvText);
  const header: boolean =
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);

  const parseWithHeader = header;

  const papaparseConfig = {
    // dynamicTyping: true,
    ...csvOptions,
    header: parseWithHeader,
    download: false, // We handle loading, no need for papaparse to do it for us
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
    error: (e) => {
      throw new Error(e);
    }
  };

  const result = Papa.parse(csvText, papaparseConfig);
  const rows = result.data as any[];

  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);

  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
  let table: ArrayRowTable | ObjectRowTable;
  switch (shape) {
    case 'object-row-table':
      table = {
        shape: 'object-row-table',
        data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
      };
      break;
    case 'array-row-table':
      table = {
        shape: 'array-row-table',
        data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
      };
      break;
    default:
      throw new Error(shape);
  }
  table.schema = deduceTableSchema(table!);
  return table;
}

// TODO - support batch size 0 = no batching/single batch?
function parseCSVInBatches(
  asyncIterator:
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
    | Iterable<ArrayBufferLike | ArrayBufferView>,
  options?: CSVLoaderOptions
): AsyncIterable<TableBatch> {
  // Papaparse does not support standard batch size handling
  // TODO - investigate papaparse chunks mode
  options = {...options};
  if (options?.core?.batchSize === 'auto') {
    options.core.batchSize = 4000;
  }

  // Apps can call the parse method directly, we so apply default options here
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};

  const asyncQueue = new AsyncQueue<TableBatch>();

  let isFirstRow: boolean = true;
  let headerRow: string[] | null = null;
  let tableBatchBuilder: TableBatchBuilder | null = null;
  let schema: Schema | null = null;

  const config = {
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
    ...csvOptions,
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
    download: false, // We handle loading, no need for papaparse to do it for us
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
    // See https://github.com/mholt/PapaParse/issues/465
    chunkSize: 1024 * 1024 * 5,
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
    // both of the skipEmptyLines and step callback options are provided:
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
    skipEmptyLines: false,

    // step is called on every row
    // eslint-disable-next-line complexity, max-statements
    step(results) {
      let row = results.data;

      if (csvOptions.skipEmptyLines === 'greedy') {
        // Manually reject lines that are empty
        const collapsedRow = row.flat().join('').trim();
        if (collapsedRow === '') {
          return;
        }
      } else if (csvOptions.skipEmptyLines === true) {
        row = normalizePapaStreamingRow(row);
        if (row.length === 1 && row[0] === null) {
          return;
        }
      }
      const bytesUsed = results.meta.cursor;

      // Check if we need to save a header row
      if (isFirstRow && !headerRow) {
        // Auto detects or can be forced with csvOptions.header
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
        if (header) {
          headerRow = row.map(duplicateColumnTransformer());
          return;
        }
      }

      // If first data row, we can deduce the schema
      if (isFirstRow) {
        isFirstRow = false;
        if (!headerRow) {
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
        }
        schema = deduceCSVSchema(row, headerRow);
      }

      if (csvOptions.optimizeMemoryUsage) {
        // A workaround to allocate new strings and don't retain pointers to original strings.
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
        row = JSON.parse(JSON.stringify(row));
      }

      const shape = (options as any)?.shape || csvOptions.shape || DEFAULT_CSV_SHAPE;
      if (shape === 'object-row-table' && headerRow && row.length > headerRow.length) {
        row = convertToPapaObjectRow(row, headerRow);
      }

      // Add the row
      tableBatchBuilder =
        tableBatchBuilder ||
        new TableBatchBuilder(
          // @ts-expect-error TODO this is not a proper schema
          schema,
          {
            shape,
            ...(options?.core || {})
          }
        );

      try {
        tableBatchBuilder.addRow(row);
        // If a batch has been completed, emit it
        const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
        if (batch) {
          asyncQueue.enqueue(batch);
        }
      } catch (error) {
        asyncQueue.enqueue(error as Error);
      }
    },

    // complete is called when all rows have been read
    complete(results) {
      try {
        const bytesUsed = results.meta.cursor;
        // Ensure any final (partial) batch gets emitted
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
        if (batch) {
          asyncQueue.enqueue(batch);
        }
      } catch (error) {
        asyncQueue.enqueue(error as Error);
      }

      asyncQueue.close();
    }
  };

  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);

  // TODO - Does it matter if we return asyncIterable or asyncIterator
  // return asyncQueue[Symbol.asyncIterator]();
  return asyncQueue;
}

/**
 * Checks if a certain row is a header row
 * @param row the row to check
 * @returns true if the row looks like a header
 */
function isHeaderRow(row: string[]): boolean {
  return row && row.every((value) => typeof value === 'string');
}

/**
 * Reads, parses, and returns the first row of a CSV text
 * @param csvText the csv text to parse
 * @returns the first row
 */
function readFirstRow(csvText: string): any[] {
  const result = Papa.parse(csvText, {
    dynamicTyping: true,
    preview: 1
  });
  return result.data[0];
}

/**
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
 * duplicate header columns and would use the latest occurrence by default.
 * See the header option in https://www.papaparse.com/docs#config
 * @returns a transform function that returns sanitized names for duplicate fields
 */
function duplicateColumnTransformer(): (column: string) => string {
  const observedColumns = new Set<string>();
  return (col) => {
    let colName = col;
    let counter = 1;
    while (observedColumns.has(colName)) {
      colName = `${col}.${counter}`;
      counter++;
    }
    observedColumns.add(colName);
    return colName;
  };
}

/**
 * Generates the header of a CSV given a prefix and a column count
 * @param columnPrefix the columnPrefix to use
 * @param count the count of column names to generate
 * @returns an array of column names
 */
function generateHeader(columnPrefix: string, count: number = 0): string[] {
  const headers: string[] = [];
  for (let i = 0; i < count; i++) {
    headers.push(`${columnPrefix}${i + 1}`);
  }
  return headers;
}

function normalizePapaStreamingRow(row: unknown[]): unknown[] {
  return row.map((value) => (Array.isArray(value) && value.length === 0 ? null : value));
}

function convertToPapaObjectRow(
  row: unknown[],
  headerRow: string[]
): {[columnName: string]: unknown} {
  const objectRow = convertToObjectRow(row, headerRow);
  const parsedExtra = row.slice(headerRow.length);
  if (parsedExtra.length > 0) {
    objectRow.__parsed_extra = parsedExtra;
  }
  return objectRow;
}

function deduceCSVSchema(row, headerRow): Schema {
  const fields: Schema['fields'] = [];
  for (let i = 0; i < row.length; i++) {
    const columnName = (headerRow && headerRow[i]) || i;
    const value = row[i];
    switch (typeof value) {
      case 'number':
        fields.push({name: String(columnName), type: 'float64', nullable: true});
        break;
      case 'boolean':
        fields.push({name: String(columnName), type: 'bool', nullable: true});
        break;
      case 'string':
        fields.push({name: String(columnName), type: 'utf8', nullable: true});
        break;
      default:
        log.warn(`CSV: Unknown column type: ${typeof value}`)();
        fields.push({name: String(columnName), type: 'utf8', nullable: true});
    }
  }
  return {
    fields,
    metadata: {
      'loaders.gl#format': 'csv',
      'loaders.gl#loader': 'CSVLoader'
    }
  };
}

// TODO - remove
// type ObjectField = {name: string; index: number; type: any};
// type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];

// function deduceObjectSchema(row, headerRow): ObjectSchema {
//   const schema: ObjectSchema = headerRow ? {} : [];
//   for (let i = 0; i < row.length; i++) {
//     const columnName = (headerRow && headerRow[i]) || i;
//     const value = row[i];
//     switch (typeof value) {
//       case 'number':
//       case 'boolean':
//         // TODO - booleans could be handled differently...
//         schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
//         break;
//       case 'string':
//       default:
//         schema[columnName] = {name: String(columnName), index: i, type: Array};
//       // We currently only handle numeric rows
//       // TODO we could offer a function to map strings to numbers?
//     }
//   }
//   return schema;
// }

1	// loaders.gl	1✔
2	// SPDX-License-Identifier: MIT	1✔
3	// Copyright (c) vis.gl contributors	1✔
4		1✔
5	import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';	1✔
6	import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';	1✔
7		1✔
8	import {log, toArrayBufferIterator} from '@loaders.gl/loader-utils';	1✔
9	import {	1✔
10	AsyncQueue,	1✔
11	deduceTableSchema,	1✔
12	TableBatchBuilder,	1✔
13	convertToArrayRow,	1✔
14	convertToObjectRow	1✔
15	} from '@loaders.gl/schema-utils';	1✔
16	import Papa from './papaparse/papaparse';	1✔
17	import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';	1✔
18	import {CSVFormat} from './csv-format';	1✔
19		1✔
20	// __VERSION__ is injected by babel-plugin-version-inline	1✔
21	// @ts-ignore TS2304: Cannot find name '__VERSION__'.	1✔
22	const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';	1!
23		1✔
24	const DEFAULT_CSV_SHAPE = 'object-row-table';	1✔
25		1✔
26	export type CSVLoaderOptions = LoaderOptions & {	1✔
27	csv?: {	1✔
28	// loaders.gl options	1✔
29	shape?: 'array-row-table' \| 'object-row-table';	1✔
30	/** optimizes memory usage but increases parsing time. */	1✔
31	optimizeMemoryUsage?: boolean;	1✔
32	columnPrefix?: string;	1✔
33	header?: 'auto';	1✔
34		1✔
35	// CSV options (papaparse)	1✔
36	// delimiter: auto	1✔
37	// newline: auto	1✔
38	quoteChar?: string;	1✔
39	escapeChar?: string;	1✔
40	// Convert numbers and boolean values in rows from strings	1✔
41	dynamicTyping?: boolean;	1✔
42	comments?: boolean;	1✔
43	skipEmptyLines?: boolean \| 'greedy';	1✔
44	// transform: null?	1✔
45	delimitersToGuess?: string[];	1✔
46	// fastMode: auto	1✔
47	};	1✔
48	};	1✔
49		1✔
50	export const CSVLoader = {	1✔
51	...CSVFormat,	1✔
52		1✔
53	dataType: null as unknown as ObjectRowTable \| ArrayRowTable,	1✔
54	batchType: null as unknown as TableBatch,	1✔
55	version: VERSION,	1✔
56	parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>	1✔
57	parseCSV(new TextDecoder().decode(arrayBuffer), options),	×
58	parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),	1✔
59	parseInBatches: parseCSVInBatches,	1✔
60	// @ts-ignore	1✔
61	// testText: null,	1✔
62	options: {	1✔
63	csv: {	1✔
64	shape: DEFAULT_CSV_SHAPE, // 'object-row-table'	1✔
65	optimizeMemoryUsage: false,	1✔
66	// CSV options	1✔
67	header: 'auto',	1✔
68	columnPrefix: 'column',	1✔
69	// delimiter: auto	1✔
70	// newline: auto	1✔
71	quoteChar: '"',	1✔
72	escapeChar: '"',	1✔
73	dynamicTyping: true,	1✔
74	comments: false,	1✔
75	skipEmptyLines: true,	1✔
76	// transform: null?	1✔
77	delimitersToGuess: [',', '\t', '\|', ';']	1✔
78	// fastMode: auto	1✔
79	}	1✔
80	}	1✔
81	} as const satisfies LoaderWithParser<ObjectRowTable \| ArrayRowTable, TableBatch, CSVLoaderOptions>;	1✔
82		1✔
83	async function parseCSV(	×
84	csvText: string,	×
85	options?: CSVLoaderOptions	×
86	): Promise<ObjectRowTable \| ArrayRowTable> {	×
87	// Apps can call the parse method directly, so we apply default options here	×
88	const csvOptions = {...CSVLoader.options.csv, ...options?.csv};	×
89		×
90	const firstRow = readFirstRow(csvText);	×
91	const header: boolean =	×
92	csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);	×
93		×
94	const parseWithHeader = header;	×
95		×
96	const papaparseConfig = {	×
97	// dynamicTyping: true,	×
98	...csvOptions,	×
99	header: parseWithHeader,	×
100	download: false, // We handle loading, no need for papaparse to do it for us	×
101	transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,	×
102	error: (e) => {	×
103	throw new Error(e);	×
104	}	×
105	};	×
106		×
107	const result = Papa.parse(csvText, papaparseConfig);	×
108	const rows = result.data as any[];	×
109		×
110	const headerRow = result.meta.fields \|\| generateHeader(csvOptions.columnPrefix, firstRow.length);	×
111		×
112	const shape = csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	×
113	let table: ArrayRowTable \| ObjectRowTable;	×
114	switch (shape) {	×
115	case 'object-row-table':	×
116	table = {	×
117	shape: 'object-row-table',	×
118	data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))	×
119	};	×
120	break;	×
121	case 'array-row-table':	×
122	table = {	×
123	shape: 'array-row-table',	×
124	data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))	×
125	};	×
126	break;	×
127	default:	×
128	throw new Error(shape);	×
129	}	×
130	table.schema = deduceTableSchema(table!);	×
131	return table;	×
132	}	×
133		1✔
134	// TODO - support batch size 0 = no batching/single batch?	1✔
135	function parseCSVInBatches(	×
136	asyncIterator:	×
137	\| AsyncIterable<ArrayBufferLike \| ArrayBufferView>	×
138	\| Iterable<ArrayBufferLike \| ArrayBufferView>,	×
139	options?: CSVLoaderOptions	×
140	): AsyncIterable<TableBatch> {	×
141	// Papaparse does not support standard batch size handling	×
142	// TODO - investigate papaparse chunks mode	×
143	options = {...options};	×
144	if (options?.core?.batchSize === 'auto') {	×
145	options.core.batchSize = 4000;	×
146	}	×
147		×
148	// Apps can call the parse method directly, we so apply default options here	×
149	const csvOptions = {...CSVLoader.options.csv, ...options?.csv};	×
150		×
151	const asyncQueue = new AsyncQueue<TableBatch>();	×
152		×
153	let isFirstRow: boolean = true;	×
154	let headerRow: string[] \| null = null;	×
155	let tableBatchBuilder: TableBatchBuilder \| null = null;	×
156	let schema: Schema \| null = null;	×
157		×
158	const config = {	×
159	// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,	×
160	...csvOptions,	×
161	header: false, // Unfortunately, header detection is not automatic and does not infer shapes	×
162	download: false, // We handle loading, no need for papaparse to do it for us	×
163	// chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the	×
164	// streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.	×
165	// See https://github.com/mholt/PapaParse/issues/465	×
166	chunkSize: 1024 * 1024 * 5,	×
167	// skipEmptyLines is set to a boolean value if supplied. Greedy is set to true	×
168	// skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if	×
169	// both of the skipEmptyLines and step callback options are provided:	×
170	// - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465	×
171	// - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825	×
172	skipEmptyLines: false,	×
173		×
174	// step is called on every row	×
175	// eslint-disable-next-line complexity, max-statements	×
176	step(results) {	×
177	let row = results.data;	×
178		×
NEW 179	if (csvOptions.skipEmptyLines === 'greedy') {	×
180	// Manually reject lines that are empty	×
181	const collapsedRow = row.flat().join('').trim();	×
182	if (collapsedRow === '') {	×
183	return;	×
184	}	×
NEW 185	} else if (csvOptions.skipEmptyLines === true) {	×
NEW 186	row = normalizePapaStreamingRow(row);	×
NEW 187	if (row.length === 1 && row[0] === null) {	×
NEW 188	return;	×
NEW 189	}	×
190	}	×
191	const bytesUsed = results.meta.cursor;	×
192		×
193	// Check if we need to save a header row	×
194	if (isFirstRow && !headerRow) {	×
195	// Auto detects or can be forced with csvOptions.header	×
196	const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);	×
197	if (header) {	×
198	headerRow = row.map(duplicateColumnTransformer());	×
199	return;	×
200	}	×
201	}	×
202		×
203	// If first data row, we can deduce the schema	×
204	if (isFirstRow) {	×
205	isFirstRow = false;	×
206	if (!headerRow) {	×
207	headerRow = generateHeader(csvOptions.columnPrefix, row.length);	×
208	}	×
209	schema = deduceCSVSchema(row, headerRow);	×
210	}	×
211		×
212	if (csvOptions.optimizeMemoryUsage) {	×
213	// A workaround to allocate new strings and don't retain pointers to original strings.	×
214	// https://bugs.chromium.org/p/v8/issues/detail?id=2869	×
215	row = JSON.parse(JSON.stringify(row));	×
216	}	×
217		×
218	const shape = (options as any)?.shape \|\| csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	×
NEW 219	if (shape === 'object-row-table' && headerRow && row.length > headerRow.length) {	×
NEW 220	row = convertToPapaObjectRow(row, headerRow);	×
NEW 221	}	×
222		×
223	// Add the row	×
224	tableBatchBuilder =	×
225	tableBatchBuilder \|\|	×
226	new TableBatchBuilder(	×
227	// @ts-expect-error TODO this is not a proper schema	×
228	schema,	×
229	{	×
230	shape,	×
231	...(options?.core \|\| {})	×
232	}	×
233	);	×
234		×
235	try {	×
236	tableBatchBuilder.addRow(row);	×
237	// If a batch has been completed, emit it	×
238	const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});	×
239	if (batch) {	×
240	asyncQueue.enqueue(batch);	×
241	}	×
242	} catch (error) {	×
243	asyncQueue.enqueue(error as Error);	×
244	}	×
245	},	×
246		×
247	// complete is called when all rows have been read	×
248	complete(results) {	×
249	try {	×
250	const bytesUsed = results.meta.cursor;	×
251	// Ensure any final (partial) batch gets emitted	×
252	const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});	×
253	if (batch) {	×
254	asyncQueue.enqueue(batch);	×
255	}	×
256	} catch (error) {	×
257	asyncQueue.enqueue(error as Error);	×
258	}	×
259		×
260	asyncQueue.close();	×
261	}	×
262	};	×
263		×
264	Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);	×
265		×
266	// TODO - Does it matter if we return asyncIterable or asyncIterator	×
267	// return asyncQueue[Symbol.asyncIterator]();	×
268	return asyncQueue;	×
269	}	×
270		1✔
271	/**	1✔
272	* Checks if a certain row is a header row	1✔
273	* @param row the row to check	1✔
274	* @returns true if the row looks like a header	1✔
275	*/	1✔
276	function isHeaderRow(row: string[]): boolean {	×
277	return row && row.every((value) => typeof value === 'string');	×
278	}	×
279		1✔
280	/**	1✔
281	* Reads, parses, and returns the first row of a CSV text	1✔
282	* @param csvText the csv text to parse	1✔
283	* @returns the first row	1✔
284	*/	1✔
285	function readFirstRow(csvText: string): any[] {	×
286	const result = Papa.parse(csvText, {	×
287	dynamicTyping: true,	×
288	preview: 1	×
289	});	×
290	return result.data[0];	×
291	}	×
292		1✔
293	/**	1✔
294	* Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle	1✔
295	* duplicate header columns and would use the latest occurrence by default.	1✔
296	* See the header option in https://www.papaparse.com/docs#config	1✔
297	* @returns a transform function that returns sanitized names for duplicate fields	1✔
298	*/	1✔
299	function duplicateColumnTransformer(): (column: string) => string {	×
300	const observedColumns = new Set<string>();	×
301	return (col) => {	×
302	let colName = col;	×
303	let counter = 1;	×
304	while (observedColumns.has(colName)) {	×
305	colName = `${col}.${counter}`;	×
306	counter++;	×
307	}	×
308	observedColumns.add(colName);	×
309	return colName;	×
310	};	×
311	}	×
312		1✔
313	/**	1✔
314	* Generates the header of a CSV given a prefix and a column count	1✔
315	* @param columnPrefix the columnPrefix to use	1✔
316	* @param count the count of column names to generate	1✔
317	* @returns an array of column names	1✔
318	*/	1✔
319	function generateHeader(columnPrefix: string, count: number = 0): string[] {	×
320	const headers: string[] = [];	×
321	for (let i = 0; i < count; i++) {	×
322	headers.push(`${columnPrefix}${i + 1}`);	×
323	}	×
324	return headers;	×
325	}	×
326		1✔
NEW 327	function normalizePapaStreamingRow(row: unknown[]): unknown[] {	×
NEW 328	return row.map((value) => (Array.isArray(value) && value.length === 0 ? null : value));	×
NEW 329	}	×
330		1✔
NEW 331	function convertToPapaObjectRow(	×
NEW 332	row: unknown[],	×
NEW 333	headerRow: string[]	×
NEW 334	): {[columnName: string]: unknown} {	×
NEW 335	const objectRow = convertToObjectRow(row, headerRow);	×
NEW 336	const parsedExtra = row.slice(headerRow.length);	×
NEW 337	if (parsedExtra.length > 0) {	×
NEW 338	objectRow.__parsed_extra = parsedExtra;	×
NEW 339	}	×
NEW 340	return objectRow;	×
NEW 341	}	×
342		1✔
343	function deduceCSVSchema(row, headerRow): Schema {	×
344	const fields: Schema['fields'] = [];	×
345	for (let i = 0; i < row.length; i++) {	×
346	const columnName = (headerRow && headerRow[i]) \|\| i;	×
347	const value = row[i];	×
348	switch (typeof value) {	×
349	case 'number':	×
350	fields.push({name: String(columnName), type: 'float64', nullable: true});	×
351	break;	×
352	case 'boolean':	×
353	fields.push({name: String(columnName), type: 'bool', nullable: true});	×
354	break;	×
355	case 'string':	×
356	fields.push({name: String(columnName), type: 'utf8', nullable: true});	×
357	break;	×
358	default:	×
359	log.warn(`CSV: Unknown column type: ${typeof value}`)();	×
360	fields.push({name: String(columnName), type: 'utf8', nullable: true});	×
361	}	×
362	}	×
363	return {	×
364	fields,	×
365	metadata: {	×
366	'loaders.gl#format': 'csv',	×
367	'loaders.gl#loader': 'CSVLoader'	×
368	}	×
369	};	×
370	}	×
371		1✔
372	// TODO - remove	1✔
373	// type ObjectField = {name: string; index: number; type: any};	1✔
374	// type ObjectSchema = {[key: string]: ObjectField} \| ObjectField[];	1✔
375		1✔
376	// function deduceObjectSchema(row, headerRow): ObjectSchema {	1✔
377	// const schema: ObjectSchema = headerRow ? {} : [];	1✔
378	// for (let i = 0; i < row.length; i++) {	1✔
379	// const columnName = (headerRow && headerRow[i]) \|\| i;	1✔
380	// const value = row[i];	1✔
381	// switch (typeof value) {	1✔
382	// case 'number':	1✔
383	// case 'boolean':	1✔
384	// // TODO - booleans could be handled differently...	1✔
385	// schema[columnName] = {name: String(columnName), index: i, type: Float32Array};	1✔
386	// break;	1✔
387	// case 'string':	1✔
388	// default:	1✔
389	// schema[columnName] = {name: String(columnName), index: i, type: Array};	1✔
390	// // We currently only handle numeric rows	1✔
391	// // TODO we could offer a function to map strings to numbers?	1✔
392	// }	1✔
393	// }	1✔
394	// return schema;	1✔
395	// }	1✔

visgl / loaders.gl / 24108422669

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous