24907303489

Committed 24 Apr 2026 07:12PM UTC coverage: 59.423% (+0.09%) from 59.334%

Build # 24907303489

Build Type

push

github

Committed by

web-flow

Commit Message

feat: Dynamic import loaders (#3405)

Coverage Stats

11252 of 20783 branches covered (54.14%)

Branch coverage included in aggregate %.

1164 of 1518 new or added lines in 244 files covered. (76.68%)

41 existing lines in 18 files now uncovered.

23432 of 37585 relevant lines covered (62.34%)

16317.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.1

/modules/csv/src/csv-loader-with-parser.ts

// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {LoaderWithParser} from '@loaders.gl/loader-utils';
import type {
  Schema,
  ArrayRowTable,
  ArrowTable,
  ArrowTableBatch,
  ColumnarTable,
  ColumnarTableBatch,
  ObjectRowTable,
  TableBatch
} from '@loaders.gl/schema';

import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
import {
  AsyncQueue,
  TableBatchBuilder,
  convertToArrayRow,
  convertToObjectRow
} from '@loaders.gl/schema-utils';
import Papa from './papaparse/papaparse';
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
import {
  parseCSVArrayBufferAsArrow,
  parseCSVInArrowBatches,
  parseCSVTextAsArrow
} from './csv-arrow-loader-with-parser';
import {
  deduceCSVSchemaFromRows,
  detectGeometryColumns,
  MAX_GEOMETRY_SNIFF_ROWS,
  normalizeGeometryArrayRow,
  normalizeGeometryObjectRow,
  shouldFinalizeGeometryDetection
} from './lib/csv-geometry';
import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';

const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;

export type {CSVLoaderOptions} from './csv-loader';

/** Loader for CSV and other delimiter-separated tabular text formats. */
export const CSVLoaderWithParser = {
  ...CSVLoaderMetadataWithoutPreload,
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
    options?.csv?.shape === 'arrow-table'
      ? parseCSVArrayBufferAsArrow(arrayBuffer, options)
      : parseCSVText(new TextDecoder().decode(arrayBuffer), options),
  parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
    parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),
  parseText: (text: string, options?: CSVLoaderOptions) =>
    options?.csv?.shape === 'arrow-table'
      ? parseCSVTextAsArrow(text, options)
      : parseCSVText(text, options),
  parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),
  parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
    options?.csv?.shape === 'arrow-table'
      ? parseCSVInArrowBatches(asyncIterator, options)
      : parseCSVInBatches(asyncIterator, options)
} as const satisfies LoaderWithParser<
  ObjectRowTable | ArrayRowTable | ColumnarTable | ArrowTable,
  TableBatch | ColumnarTableBatch | ArrowTableBatch,
  CSVLoaderOptions
>;

async function parseCSVText(
  csvText: string,
  options?: CSVLoaderOptions
): Promise<ObjectRowTable | ArrayRowTable> {
  return parseCSVTextSync(csvText, options);
}

function parseCSVTextSync(
  csvText: string,
  options?: CSVLoaderOptions
): ObjectRowTable | ArrayRowTable {
  // Apps can call the parse method directly, so we apply default options here
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};

  const firstRow = readFirstRow(csvText);
  const header: boolean =
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);

  const parseWithHeader = header;

  const papaparseConfig = {
    // dynamicTyping: true,
    ...csvOptions,
    header: parseWithHeader,
    download: false, // We handle loading, no need for papaparse to do it for us
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
    error: e => {
      throw new Error(e);
    }
  };

  const result = Papa.parse(csvText, papaparseConfig);
  const rows = result.data as any[];

  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);

  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
  let table: ArrayRowTable | ObjectRowTable;
  switch (shape) {
    case 'object-row-table':
      table = {
        shape: 'object-row-table',
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
      };
      break;
    case 'array-row-table':
      table = {
        shape: 'array-row-table',
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
      };
      break;
    default:
      throw new Error(shape);
  }
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
    ? detectGeometryColumns(
        headerRow,
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
      )
    : [];

  if (detectedGeometryColumns.length > 0) {
    table =
      table.shape === 'array-row-table'
        ? {
            ...table,
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
          }
        : {
            ...table,
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
          };
  }

  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
  return table;
}

// TODO - support batch size 0 = no batching/single batch?
function parseCSVInBatches(
  asyncIterator:
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
    | Iterable<ArrayBufferLike | ArrayBufferView>,
  options?: CSVLoaderOptions
): AsyncIterable<TableBatch> {
  // Papaparse does not support standard batch size handling
  // TODO - investigate papaparse chunks mode
  options = {...options};
  if (options?.core?.batchSize === 'auto') {
    options.core.batchSize = 4000;
  }

  // Apps can call the parse method directly, we so apply default options here
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};

  const asyncQueue = new AsyncQueue<TableBatch>();

  let isFirstRow: boolean = true;
  let headerRow: string[] | null = null;
  let tableBatchBuilder: TableBatchBuilder | null = null;
  let schema: Schema | null = null;
  let sniffedRows: unknown[][] = [];
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;

  const config = {
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
    ...csvOptions,
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
    download: false, // We handle loading, no need for papaparse to do it for us
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
    // See https://github.com/mholt/PapaParse/issues/465
    chunkSize: 1024 * 1024 * 5,
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
    // both of the skipEmptyLines and step callback options are provided:
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
    skipEmptyLines: false,

    // step is called on every row
    // eslint-disable-next-line complexity, max-statements
    step(results) {
      let row = results.data;

      if (csvOptions.skipEmptyLines === 'greedy') {
        // Manually reject lines that are empty
        const collapsedRow = row.flat().join('').trim();
        if (collapsedRow === '') {
          return;
        }
      } else if (csvOptions.skipEmptyLines === true) {
        row = normalizePapaStreamingRow(row);
        if (row.length === 1 && row[0] === null) {
          return;
        }
      }
      const bytesUsed = results.meta.cursor;

      // Check if we need to save a header row
      if (isFirstRow && !headerRow) {
        // Auto detects or can be forced with csvOptions.header
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
        if (header) {
          headerRow = row.map(duplicateColumnTransformer());
          return;
        }
      }

      // If first data row, we can deduce the schema
      if (isFirstRow) {
        if (!headerRow) {
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
        }
      }

      if (csvOptions.optimizeMemoryUsage) {
        // A workaround to allocate new strings and don't retain pointers to original strings.
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
        row = JSON.parse(JSON.stringify(row));
      }

      const shape = getBatchShape();

      if (!geometryDetectionFinalized && headerRow) {
        sniffedRows.push(row);
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
          headerRow,
          sniffedRows,
          MAX_GEOMETRY_SNIFF_ROWS
        );
        if (geometryDetectionFinalized) {
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
          );
          schema = deduceCSVSchemaFromRows(
            normalizedSniffedRows,
            headerRow,
            detectedGeometryColumns
          );
          isFirstRow = false;
          for (const normalizedSniffedRow of normalizedSniffedRows) {
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
          }
          sniffedRows = [];
        }
        return;
      }

      if (isFirstRow) {
        if (!headerRow) {
          return;
        }
        schema = deduceCSVSchemaFromRows(
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
          headerRow,
          detectedGeometryColumns
        );
        isFirstRow = false;
      }

      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
    },

    // complete is called when all rows have been read
    complete(results) {
      try {
        if (!geometryDetectionFinalized && headerRow) {
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
          const normalizedSniffedRows = sniffedRows.map(row =>
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
          );
          schema = deduceCSVSchemaFromRows(
            normalizedSniffedRows,
            headerRow,
            detectedGeometryColumns
          );
          const shape = getBatchShape();
          tableBatchBuilder =
            tableBatchBuilder ||
            new TableBatchBuilder(schema, {
              ...(options?.core || {}),
              shape
            });
          for (const normalizedSniffedRow of normalizedSniffedRows) {
            const batchRow =
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
                : normalizedSniffedRow;
            tableBatchBuilder.addRow(batchRow);
          }
        }
        const bytesUsed = results.meta.cursor;
        // Ensure any final (partial) batch gets emitted
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
        if (batch) {
          asyncQueue.enqueue(batch);
        }
      } catch (error) {
        asyncQueue.enqueue(error as Error);
      }

      asyncQueue.close();
    }
  };

  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);

  // TODO - Does it matter if we return asyncIterable or asyncIterator
  // return asyncQueue[Symbol.asyncIterator]();
  return asyncQueue;

  function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
    }

    tableBatchBuilder =
      tableBatchBuilder ||
      new TableBatchBuilder(schema!, {
        ...(options?.core || {}),
        shape
      });

    try {
      tableBatchBuilder.addRow(batchRow);
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
      if (batch) {
        asyncQueue.enqueue(batch);
      }
    } catch (error) {
      asyncQueue.enqueue(error as Error);
    }
  }

  function getBatchShape(): CSVBatchShape {
    const deprecatedShape = (options as {shape?: CSVBatchShape} | undefined)?.shape;
    const shape = deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
    switch (shape) {
      case 'array-row-table':
      case 'columnar-table':
        return shape;
      default:
        return DEFAULT_CSV_SHAPE;
    }
  }
}

type CSVBatchShape = 'array-row-table' | 'object-row-table' | 'columnar-table';

/**
 * Checks if a certain row is a header row
 * @param row the row to check
 * @returns true if the row looks like a header
 */
function isHeaderRow(row: string[]): boolean {
  return row && row.every(value => typeof value === 'string');
}

/**
 * Reads, parses, and returns the first row of a CSV text
 * @param csvText the csv text to parse
 * @returns the first row
 */
function readFirstRow(csvText: string): any[] {
  const result = Papa.parse(csvText, {
    dynamicTyping: true,
    preview: 1
  });
  return result.data[0];
}

/**
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
 * duplicate header columns and would use the latest occurrence by default.
 * See the header option in https://www.papaparse.com/docs#config
 * @returns a transform function that returns sanitized names for duplicate fields
 */
function duplicateColumnTransformer(): (column: string) => string {
  const observedColumns = new Set<string>();
  return col => {
    let colName = col;
    let counter = 1;
    while (observedColumns.has(colName)) {
      colName = `${col}.${counter}`;
      counter++;
    }
    observedColumns.add(colName);
    return colName;
  };
}

/**
 * Generates the header of a CSV given a prefix and a column count
 * @param columnPrefix the columnPrefix to use
 * @param count the count of column names to generate
 * @returns an array of column names
 */
function generateHeader(columnPrefix: string, count: number = 0): string[] {
  const headers: string[] = [];
  for (let i = 0; i < count; i++) {
    headers.push(`${columnPrefix}${i + 1}`);
  }
  return headers;
}

function normalizePapaStreamingRow(row: unknown[]): unknown[] {
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
}

function convertToPapaObjectRow(
  row: unknown[],
  headerRow: string[]
): {[columnName: string]: unknown} {
  const objectRow = convertToObjectRow(row, headerRow);
  const parsedExtra = row.slice(headerRow.length);
  if (parsedExtra.length > 0) {
    objectRow.__parsed_extra = parsedExtra;
  }
  return objectRow;
}

1	// loaders.gl
2	// SPDX-License-Identifier: MIT
3	// Copyright (c) vis.gl contributors
4
5	import type {LoaderWithParser} from '@loaders.gl/loader-utils';
6	import type {
7	Schema,
8	ArrayRowTable,
9	ArrowTable,
10	ArrowTableBatch,
11	ColumnarTable,
12	ColumnarTableBatch,
13	ObjectRowTable,
14	TableBatch
15	} from '@loaders.gl/schema';
16
17	import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
18	import {
19	AsyncQueue,
20	TableBatchBuilder,
21	convertToArrayRow,
22	convertToObjectRow
23	} from '@loaders.gl/schema-utils';
24	import Papa from './papaparse/papaparse';
25	import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
26	import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
27	import {
28	parseCSVArrayBufferAsArrow,
29	parseCSVInArrowBatches,
30	parseCSVTextAsArrow
31	} from './csv-arrow-loader-with-parser';
32	import {
33	deduceCSVSchemaFromRows,
34	detectGeometryColumns,
35	MAX_GEOMETRY_SNIFF_ROWS,
36	normalizeGeometryArrayRow,
37	normalizeGeometryObjectRow,
38	shouldFinalizeGeometryDetection
39	} from './lib/csv-geometry';
40	import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';
41
42	const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;	14✔
43
44	export type {CSVLoaderOptions} from './csv-loader';
45
46	/** Loader for CSV and other delimiter-separated tabular text formats. */
47	export const CSVLoaderWithParser = {	14✔
48	...CSVLoaderMetadataWithoutPreload,
49	parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
NEW 50	options?.csv?.shape === 'arrow-table'	×
51	? parseCSVArrayBufferAsArrow(arrayBuffer, options)
52	: parseCSVText(new TextDecoder().decode(arrayBuffer), options),
53	parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
NEW 54	parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),	×
55	parseText: (text: string, options?: CSVLoaderOptions) =>
56	options?.csv?.shape === 'arrow-table'	54✔
57	? parseCSVTextAsArrow(text, options)
58	: parseCSVText(text, options),
59	parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),	4✔
60	parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
61	options?.csv?.shape === 'arrow-table'	48✔
62	? parseCSVInArrowBatches(asyncIterator, options)
63	: parseCSVInBatches(asyncIterator, options)
64	} as const satisfies LoaderWithParser<
65	ObjectRowTable \| ArrayRowTable \| ColumnarTable \| ArrowTable,
66	TableBatch \| ColumnarTableBatch \| ArrowTableBatch,
67	CSVLoaderOptions
68	>;
69
70	async function parseCSVText(
71	csvText: string,
72	options?: CSVLoaderOptions
73	): Promise<ObjectRowTable \| ArrayRowTable> {
74	return parseCSVTextSync(csvText, options);	52✔
75	}
76
77	function parseCSVTextSync(
78	csvText: string,
79	options?: CSVLoaderOptions
80	): ObjectRowTable \| ArrayRowTable {
81	// Apps can call the parse method directly, so we apply default options here
82	const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};	56✔
83
84	const firstRow = readFirstRow(csvText);	56✔
85	const header: boolean =
86	csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);	56✔
87
88	const parseWithHeader = header;	56✔
89
90	const papaparseConfig = {	56✔
91	// dynamicTyping: true,
92	...csvOptions,
93	header: parseWithHeader,
94	download: false, // We handle loading, no need for papaparse to do it for us
95	transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,	56✔
96	error: e => {
NEW 97	throw new Error(e);	×
98	}
99	};
100
101	const result = Papa.parse(csvText, papaparseConfig);	56✔
102	const rows = result.data as any[];	56✔
103
104	const headerRow = result.meta.fields \|\| generateHeader(csvOptions.columnPrefix, firstRow.length);	56✔
105
106	const shape = csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	56!
107	let table: ArrayRowTable \| ObjectRowTable;
108	switch (shape) {	56!
109	case 'object-row-table':
110	table = {	48✔
111	shape: 'object-row-table',
112	data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))	174,910✔
113	};
114	break;	48✔
115	case 'array-row-table':
116	table = {	8✔
117	shape: 'array-row-table',
118	data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))	22✔
119	};
120	break;	8✔
121	default:
NEW 122	throw new Error(shape);	×
123	}
124	const detectedGeometryColumns = csvOptions.detectGeometryColumns	56✔
125	? detectGeometryColumns(
126	headerRow,
127	rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))	12!
128	)
129	: [];
130
131	if (detectedGeometryColumns.length > 0) {	56✔
132	table =	4✔
133	table.shape === 'array-row-table'	4✔
134	? {
135	...table,
136	data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))	6✔
137	}
138	: {
139	...table,
140	data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))	6✔
141	};
142	}
143
144	table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);	56✔
145	return table;	56✔
146	}
147
148	// TODO - support batch size 0 = no batching/single batch?
149	function parseCSVInBatches(
150	asyncIterator:
151	\| AsyncIterable<ArrayBufferLike \| ArrayBufferView>
152	\| Iterable<ArrayBufferLike \| ArrayBufferView>,
153	options?: CSVLoaderOptions
154	): AsyncIterable<TableBatch> {
155	// Papaparse does not support standard batch size handling
156	// TODO - investigate papaparse chunks mode
157	options = {...options};	46✔
158	if (options?.core?.batchSize === 'auto') {	46✔
159	options.core.batchSize = 4000;	40✔
160	}
161
162	// Apps can call the parse method directly, we so apply default options here
163	const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};	46✔
164
165	const asyncQueue = new AsyncQueue<TableBatch>();	46✔
166
167	let isFirstRow: boolean = true;	46✔
168	let headerRow: string[] \| null = null;	46✔
169	let tableBatchBuilder: TableBatchBuilder \| null = null;	46✔
170	let schema: Schema \| null = null;	46✔
171	let sniffedRows: unknown[][] = [];	46✔
172	let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;	46✔
173	let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;	46✔
174
175	const config = {	46✔
176	// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
177	...csvOptions,
178	header: false, // Unfortunately, header detection is not automatic and does not infer shapes
179	download: false, // We handle loading, no need for papaparse to do it for us
180	// chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
181	// streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
182	// See https://github.com/mholt/PapaParse/issues/465
183	chunkSize: 1024 * 1024 * 5,
184	// skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
185	// skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
186	// both of the skipEmptyLines and step callback options are provided:
187	// - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
188	// - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
189	skipEmptyLines: false,
190
191	// step is called on every row
192	// eslint-disable-next-line complexity, max-statements
193	step(results) {
194	let row = results.data;	14,516✔
195
196	if (csvOptions.skipEmptyLines === 'greedy') {	14,516✔
197	// Manually reject lines that are empty
198	const collapsedRow = row.flat().join('').trim();	16✔
199	if (collapsedRow === '') {	16✔
200	return;	10✔
201	}
202	} else if (csvOptions.skipEmptyLines === true) {	14,500!
203	row = normalizePapaStreamingRow(row);	14,500✔
204	if (row.length === 1 && row[0] === null) {	14,500✔
205	return;	6✔
206	}
207	}
208	const bytesUsed = results.meta.cursor;	14,500✔
209
210	// Check if we need to save a header row
211	if (isFirstRow && !headerRow) {	14,500✔
212	// Auto detects or can be forced with csvOptions.header
213	const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);	46✔
214	if (header) {	46✔
215	headerRow = row.map(duplicateColumnTransformer());	28✔
216	return;	28✔
217	}
218	}
219
220	// If first data row, we can deduce the schema
221	if (isFirstRow) {	14,472✔
222	if (!headerRow) {	50✔
223	headerRow = generateHeader(csvOptions.columnPrefix, row.length);	18✔
224	}
225	}
226
227	if (csvOptions.optimizeMemoryUsage) {	14,472!
228	// A workaround to allocate new strings and don't retain pointers to original strings.
229	// https://bugs.chromium.org/p/v8/issues/detail?id=2869
NEW 230	row = JSON.parse(JSON.stringify(row));	×
231	}
232
233	const shape = getBatchShape();	14,472✔
234
235	if (!geometryDetectionFinalized && headerRow) {	14,472✔
236	sniffedRows.push(row);	6✔
237	geometryDetectionFinalized = shouldFinalizeGeometryDetection(	6✔
238	headerRow,
239	sniffedRows,
240	MAX_GEOMETRY_SNIFF_ROWS
241	);
242	if (geometryDetectionFinalized) {	6!
NEW 243	detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);	×
NEW 244	const normalizedSniffedRows = sniffedRows.map(sniffedRow =>	×
245	normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
246	);
NEW 247	schema = deduceCSVSchemaFromRows(	×
248	normalizedSniffedRows,
249	headerRow,
250	detectedGeometryColumns
251	);
NEW 252	isFirstRow = false;	×
NEW 253	for (const normalizedSniffedRow of normalizedSniffedRows) {	×
NEW 254	addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);	×
255	}
NEW 256	sniffedRows = [];	×
257	}
258	return;	6✔
259	}
260
261	if (isFirstRow) {	14,466✔
262	if (!headerRow) {	44!
NEW 263	return;	×
264	}
265	schema = deduceCSVSchemaFromRows(	44✔
266	[normalizeGeometryArrayRow(row, detectedGeometryColumns)],
267	headerRow,
268	detectedGeometryColumns
269	);
270	isFirstRow = false;	44✔
271	}
272
273	const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);	14,466✔
274	addCSVBatchRow(normalizedRow, shape, bytesUsed);	14,466✔
275	},
276
277	// complete is called when all rows have been read
278	complete(results) {
279	try {	46✔
280	if (!geometryDetectionFinalized && headerRow) {	46✔
281	detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);	2✔
282	const normalizedSniffedRows = sniffedRows.map(row =>	4✔
283	normalizeGeometryArrayRow(row, detectedGeometryColumns)	3✔
284	);
285	schema = deduceCSVSchemaFromRows(	2✔
286	normalizedSniffedRows,
287	headerRow,
288	detectedGeometryColumns
289	);
290	const shape = getBatchShape();	2✔
291	tableBatchBuilder =	2✔
292	tableBatchBuilder \|\|	4✔
293	new TableBatchBuilder(schema, {
294	...(options?.core \|\| {}),	2!
295	shape
296	});
297	for (const normalizedSniffedRow of normalizedSniffedRows) {	2✔
298	const batchRow =
299	shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length	6!
300	? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
301	: normalizedSniffedRow;
302	tableBatchBuilder.addRow(batchRow);	6✔
303	}
304	}
305	const bytesUsed = results.meta.cursor;	46✔
306	// Ensure any final (partial) batch gets emitted
307	const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});	46✔
308	if (batch) {	46✔
309	asyncQueue.enqueue(batch);	42✔
310	}
311	} catch (error) {
NEW 312	asyncQueue.enqueue(error as Error);	×
313	}
314
315	asyncQueue.close();	46✔
316	}
317	};
318
319	Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);	46✔
320
321	// TODO - Does it matter if we return asyncIterable or asyncIterator
322	// return asyncQueue[Symbol.asyncIterator]();
323	return asyncQueue;	46✔
324
325	function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
326	let batchRow: unknown[] \| {[columnName: string]: unknown} = rowToAdd;	14,466✔
327	if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {	14,466✔
328	batchRow = convertToPapaObjectRow(rowToAdd, headerRow);	4✔
329	}
330
331	tableBatchBuilder =	14,466✔
332	tableBatchBuilder \|\|	14,510✔
333	new TableBatchBuilder(schema!, {
334	...(options?.core \|\| {}),	44!
335	shape
336	});
337
338	try {	14,466✔
339	tableBatchBuilder.addRow(batchRow);	14,466✔
340	const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});	14,466✔
341	if (batch) {	14,466✔
342	asyncQueue.enqueue(batch);	168✔
343	}
344	} catch (error) {
NEW 345	asyncQueue.enqueue(error as Error);	×
346	}
347	}
348
349	function getBatchShape(): CSVBatchShape {
350	const deprecatedShape = (options as {shape?: CSVBatchShape} \| undefined)?.shape;	14,474✔
351	const shape = deprecatedShape \|\| csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	14,474!
352	switch (shape) {	14,474✔
353	case 'array-row-table':
354	case 'columnar-table':
355	return shape;	4,034✔
356	default:
357	return DEFAULT_CSV_SHAPE;	10,440✔
358	}
359	}
360	}
361
362	type CSVBatchShape = 'array-row-table' \| 'object-row-table' \| 'columnar-table';
363
364	/**
365	* Checks if a certain row is a header row
366	* @param row the row to check
367	* @returns true if the row looks like a header
368	*/
369	function isHeaderRow(row: string[]): boolean {
370	return row && row.every(value => typeof value === 'string');	290✔
371	}
372
373	/**
374	* Reads, parses, and returns the first row of a CSV text
375	* @param csvText the csv text to parse
376	* @returns the first row
377	*/
378	function readFirstRow(csvText: string): any[] {
379	const result = Papa.parse(csvText, {	56✔
380	dynamicTyping: true,
381	preview: 1
382	});
383	return result.data[0];	56✔
384	}
385
386	/**
387	* Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
388	* duplicate header columns and would use the latest occurrence by default.
389	* See the header option in https://www.papaparse.com/docs#config
390	* @returns a transform function that returns sanitized names for duplicate fields
391	*/
392	function duplicateColumnTransformer(): (column: string) => string {
393	const observedColumns = new Set<string>();	70✔
394	return col => {	70✔
395	let colName = col;	344✔
396	let counter = 1;	344✔
397	while (observedColumns.has(colName)) {	344✔
398	colName = `${col}.${counter}`;	50✔
399	counter++;	50✔
400	}
401	observedColumns.add(colName);	344✔
402	return colName;	344✔
403	};
404	}
405
406	/**
407	* Generates the header of a CSV given a prefix and a column count
408	* @param columnPrefix the columnPrefix to use
409	* @param count the count of column names to generate
410	* @returns an array of column names
411	*/
412	function generateHeader(columnPrefix: string, count: number = 0): string[] {	32✔
413	const headers: string[] = [];	32✔
414	for (let i = 0; i < count; i++) {	32✔
415	headers.push(`${columnPrefix}${i + 1}`);	102✔
416	}
417	return headers;	32✔
418	}
419
420	function normalizePapaStreamingRow(row: unknown[]): unknown[] {
421	return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));	57,522✔
422	}
423
424	function convertToPapaObjectRow(
425	row: unknown[],
426	headerRow: string[]
427	): {[columnName: string]: unknown} {
428	const objectRow = convertToObjectRow(row, headerRow);	4✔
429	const parsedExtra = row.slice(headerRow.length);	4✔
430	if (parsedExtra.length > 0) {	4!
431	objectRow.__parsed_extra = parsedExtra;	4✔
432	}
433	return objectRow;	4✔
434	}

visgl / loaders.gl / 24907303489

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous