25138678579

Committed 29 Apr 2026 11:12PM UTC coverage: 59.466% (+0.07%) from 59.401%

Build # 25138678579

Build Type

push

github

Committed by

web-flow

Commit Message

chore: Standardize and optimize geospatial loaders (#3410)

Coverage Stats

11651 of 21470 branches covered (54.27%)

Branch coverage included in aggregate %.

73 of 86 new or added lines in 9 files covered. (84.88%)

2 existing lines in 2 files now uncovered.

24115 of 38675 relevant lines covered (62.35%)

15903.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.1

/modules/csv/src/csv-loader-with-parser.ts

// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {LoaderWithParser} from '@loaders.gl/loader-utils';
import type {
  Schema,
  ArrayRowTable,
  ArrowTable,
  ArrowTableBatch,
  ColumnarTable,
  ColumnarTableBatch,
  ObjectRowTable,
  TableBatch
} from '@loaders.gl/schema';

import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
import {
  AsyncQueue,
  TableBatchBuilder,
  convertToArrayRow,
  convertToObjectRow
} from '@loaders.gl/schema-utils';
import Papa from './papaparse/papaparse';
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
import {
  parseCSVArrayBufferAsArrow,
  parseCSVInArrowBatches,
  parseCSVTextAsArrow
} from './csv-arrow-loader-with-parser';
import {
  deduceCSVSchemaFromRows,
  detectGeometryColumns,
  MAX_GEOMETRY_SNIFF_ROWS,
  normalizeGeometryArrayRow,
  normalizeGeometryObjectRow,
  shouldFinalizeGeometryDetection
} from './lib/csv-geometry';
import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';

const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;

export type {CSVLoaderOptions} from './csv-loader';

/** Loader for CSV and other delimiter-separated tabular text formats. */
export const CSVLoaderWithParser = {
  ...CSVLoaderMetadataWithoutPreload,
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
    options?.csv?.shape === 'arrow-table'
      ? parseCSVArrayBufferAsArrow(arrayBuffer, options)
      : parseCSVText(new TextDecoder().decode(arrayBuffer), options),
  parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
    parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),
  parseText: (text: string, options?: CSVLoaderOptions) =>
    options?.csv?.shape === 'arrow-table'
      ? parseCSVTextAsArrow(text, options)
      : parseCSVText(text, options),
  parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),
  parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
    options?.csv?.shape === 'arrow-table'
      ? parseCSVInArrowBatches(asyncIterator, options)
      : parseCSVInBatches(asyncIterator, options)
} as const satisfies LoaderWithParser<
  ObjectRowTable | ArrayRowTable | ColumnarTable | ArrowTable,
  TableBatch | ColumnarTableBatch | ArrowTableBatch,
  CSVLoaderOptions
>;

async function parseCSVText(
  csvText: string,
  options?: CSVLoaderOptions
): Promise<ObjectRowTable | ArrayRowTable> {
  return parseCSVTextSync(csvText, options);
}

function parseCSVTextSync(
  csvText: string,
  options?: CSVLoaderOptions
): ObjectRowTable | ArrayRowTable {
  // Apps can call the parse method directly, so we apply default options here
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};

  const firstRow = readFirstRow(csvText);
  const header: boolean =
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);

  const parseWithHeader = header;

  const papaparseConfig = {
    // dynamicTyping: true,
    ...csvOptions,
    header: parseWithHeader,
    download: false, // We handle loading, no need for papaparse to do it for us
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
    error: e => {
      throw new Error(e);
    }
  };

  const result = Papa.parse(csvText, papaparseConfig);
  const rows = result.data as any[];

  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);

  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
  let table: ArrayRowTable | ObjectRowTable;
  switch (shape) {
    case 'object-row-table':
      table = {
        shape: 'object-row-table',
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
      };
      break;
    case 'array-row-table':
      table = {
        shape: 'array-row-table',
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
      };
      break;
    default:
      throw new Error(shape);
  }
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
    ? detectGeometryColumns(
        headerRow,
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow))),
        csvOptions.geometryEncoding
      )
    : [];

  if (detectedGeometryColumns.length > 0) {
    table =
      table.shape === 'array-row-table'
        ? {
            ...table,
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
          }
        : {
            ...table,
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
          };
  }

  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
  return table;
}

// TODO - support batch size 0 = no batching/single batch?
function parseCSVInBatches(
  asyncIterator:
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
    | Iterable<ArrayBufferLike | ArrayBufferView>,
  options?: CSVLoaderOptions
): AsyncIterable<TableBatch> {
  // Papaparse does not support standard batch size handling
  // TODO - investigate papaparse chunks mode
  options = {...options};
  if (options?.core?.batchSize === 'auto') {
    options.core.batchSize = 4000;
  }

  // Apps can call the parse method directly, we so apply default options here
  const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};

  const asyncQueue = new AsyncQueue<TableBatch>();

  let isFirstRow: boolean = true;
  let headerRow: string[] | null = null;
  let tableBatchBuilder: TableBatchBuilder | null = null;
  let schema: Schema | null = null;
  let sniffedRows: unknown[][] = [];
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;

  const config = {
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
    ...csvOptions,
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
    download: false, // We handle loading, no need for papaparse to do it for us
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
    // See https://github.com/mholt/PapaParse/issues/465
    chunkSize: 1024 * 1024 * 5,
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
    // both of the skipEmptyLines and step callback options are provided:
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
    skipEmptyLines: false,

    // step is called on every row
    // eslint-disable-next-line complexity, max-statements
    step(results) {
      let row = results.data;

      if (csvOptions.skipEmptyLines === 'greedy') {
        // Manually reject lines that are empty
        const collapsedRow = row.flat().join('').trim();
        if (collapsedRow === '') {
          return;
        }
      } else if (csvOptions.skipEmptyLines === true) {
        row = normalizePapaStreamingRow(row);
        if (row.length === 1 && row[0] === null) {
          return;
        }
      }
      const bytesUsed = results.meta.cursor;

      // Check if we need to save a header row
      if (isFirstRow && !headerRow) {
        // Auto detects or can be forced with csvOptions.header
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
        if (header) {
          headerRow = row.map(duplicateColumnTransformer());
          return;
        }
      }

      // If first data row, we can deduce the schema
      if (isFirstRow) {
        if (!headerRow) {
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
        }
      }

      if (csvOptions.optimizeMemoryUsage) {
        // A workaround to allocate new strings and don't retain pointers to original strings.
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
        row = JSON.parse(JSON.stringify(row));
      }

      const shape = getBatchShape();

      if (!geometryDetectionFinalized && headerRow) {
        sniffedRows.push(row);
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
          headerRow,
          sniffedRows,
          MAX_GEOMETRY_SNIFF_ROWS
        );
        if (geometryDetectionFinalized) {
          detectedGeometryColumns = detectGeometryColumns(
            headerRow,
            sniffedRows,
            csvOptions.geometryEncoding
          );
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
          );
          schema = deduceCSVSchemaFromRows(
            normalizedSniffedRows,
            headerRow,
            detectedGeometryColumns
          );
          isFirstRow = false;
          for (const normalizedSniffedRow of normalizedSniffedRows) {
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
          }
          sniffedRows = [];
        }
        return;
      }

      if (isFirstRow) {
        if (!headerRow) {
          return;
        }
        schema = deduceCSVSchemaFromRows(
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
          headerRow,
          detectedGeometryColumns
        );
        isFirstRow = false;
      }

      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
    },

    // complete is called when all rows have been read
    complete(results) {
      try {
        if (!geometryDetectionFinalized && headerRow) {
          detectedGeometryColumns = detectGeometryColumns(
            headerRow,
            sniffedRows,
            csvOptions.geometryEncoding
          );
          const normalizedSniffedRows = sniffedRows.map(row =>
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
          );
          schema = deduceCSVSchemaFromRows(
            normalizedSniffedRows,
            headerRow,
            detectedGeometryColumns
          );
          const shape = getBatchShape();
          tableBatchBuilder =
            tableBatchBuilder ||
            new TableBatchBuilder(schema, {
              ...(options?.core || {}),
              shape
            });
          for (const normalizedSniffedRow of normalizedSniffedRows) {
            const batchRow =
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
                : normalizedSniffedRow;
            tableBatchBuilder.addRow(batchRow);
          }
        }
        const bytesUsed = results.meta.cursor;
        // Ensure any final (partial) batch gets emitted
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
        if (batch) {
          asyncQueue.enqueue(batch);
        }
      } catch (error) {
        asyncQueue.enqueue(error as Error);
      }

      asyncQueue.close();
    }
  };

  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);

  // TODO - Does it matter if we return asyncIterable or asyncIterator
  // return asyncQueue[Symbol.asyncIterator]();
  return asyncQueue;

  function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
    }

    tableBatchBuilder =
      tableBatchBuilder ||
      new TableBatchBuilder(schema!, {
        ...(options?.core || {}),
        shape
      });

    try {
      tableBatchBuilder.addRow(batchRow);
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
      if (batch) {
        asyncQueue.enqueue(batch);
      }
    } catch (error) {
      asyncQueue.enqueue(error as Error);
    }
  }

  function getBatchShape(): CSVBatchShape {
    const deprecatedShape = (options as {shape?: CSVBatchShape} | undefined)?.shape;
    const shape = deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
    switch (shape) {
      case 'array-row-table':
      case 'columnar-table':
        return shape;
      default:
        return DEFAULT_CSV_SHAPE;
    }
  }
}

type CSVBatchShape = 'array-row-table' | 'object-row-table' | 'columnar-table';

/**
 * Checks if a certain row is a header row
 * @param row the row to check
 * @returns true if the row looks like a header
 */
function isHeaderRow(row: string[]): boolean {
  return row && row.every(value => typeof value === 'string');
}

/**
 * Reads, parses, and returns the first row of a CSV text
 * @param csvText the csv text to parse
 * @returns the first row
 */
function readFirstRow(csvText: string): any[] {
  const result = Papa.parse(csvText, {
    dynamicTyping: true,
    preview: 1
  });
  return result.data[0];
}

/**
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
 * duplicate header columns and would use the latest occurrence by default.
 * See the header option in https://www.papaparse.com/docs#config
 * @returns a transform function that returns sanitized names for duplicate fields
 */
function duplicateColumnTransformer(): (column: string) => string {
  const observedColumns = new Set<string>();
  return col => {
    let colName = col;
    let counter = 1;
    while (observedColumns.has(colName)) {
      colName = `${col}.${counter}`;
      counter++;
    }
    observedColumns.add(colName);
    return colName;
  };
}

/**
 * Generates the header of a CSV given a prefix and a column count
 * @param columnPrefix the columnPrefix to use
 * @param count the count of column names to generate
 * @returns an array of column names
 */
function generateHeader(columnPrefix: string, count: number = 0): string[] {
  const headers: string[] = [];
  for (let i = 0; i < count; i++) {
    headers.push(`${columnPrefix}${i + 1}`);
  }
  return headers;
}

function normalizePapaStreamingRow(row: unknown[]): unknown[] {
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
}

function convertToPapaObjectRow(
  row: unknown[],
  headerRow: string[]
): {[columnName: string]: unknown} {
  const objectRow = convertToObjectRow(row, headerRow);
  const parsedExtra = row.slice(headerRow.length);
  if (parsedExtra.length > 0) {
    objectRow.__parsed_extra = parsedExtra;
  }
  return objectRow;
}

1	// loaders.gl
2	// SPDX-License-Identifier: MIT
3	// Copyright (c) vis.gl contributors
4
5	import type {LoaderWithParser} from '@loaders.gl/loader-utils';
6	import type {
7	Schema,
8	ArrayRowTable,
9	ArrowTable,
10	ArrowTableBatch,
11	ColumnarTable,
12	ColumnarTableBatch,
13	ObjectRowTable,
14	TableBatch
15	} from '@loaders.gl/schema';
16
17	import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
18	import {
19	AsyncQueue,
20	TableBatchBuilder,
21	convertToArrayRow,
22	convertToObjectRow
23	} from '@loaders.gl/schema-utils';
24	import Papa from './papaparse/papaparse';
25	import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
26	import {DEFAULT_CSV_SHAPE} from './csv-loader-options';
27	import {
28	parseCSVArrayBufferAsArrow,
29	parseCSVInArrowBatches,
30	parseCSVTextAsArrow
31	} from './csv-arrow-loader-with-parser';
32	import {
33	deduceCSVSchemaFromRows,
34	detectGeometryColumns,
35	MAX_GEOMETRY_SNIFF_ROWS,
36	normalizeGeometryArrayRow,
37	normalizeGeometryObjectRow,
38	shouldFinalizeGeometryDetection
39	} from './lib/csv-geometry';
40	import {CSVLoader as CSVLoaderMetadata, type CSVLoaderOptions} from './csv-loader';
41
42	const {preload: _CSVLoaderPreload, ...CSVLoaderMetadataWithoutPreload} = CSVLoaderMetadata;	14✔
43
44	export type {CSVLoaderOptions} from './csv-loader';
45
46	/** Loader for CSV and other delimiter-separated tabular text formats. */
47	export const CSVLoaderWithParser = {	14✔
48	...CSVLoaderMetadataWithoutPreload,
49	parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
50	options?.csv?.shape === 'arrow-table'	×
51	? parseCSVArrayBufferAsArrow(arrayBuffer, options)
52	: parseCSVText(new TextDecoder().decode(arrayBuffer), options),
53	parseSync: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
54	parseCSVTextSync(new TextDecoder().decode(arrayBuffer), options),	×
55	parseText: (text: string, options?: CSVLoaderOptions) =>
56	options?.csv?.shape === 'arrow-table'	60✔
57	? parseCSVTextAsArrow(text, options)
58	: parseCSVText(text, options),
59	parseTextSync: (text: string, options?: CSVLoaderOptions) => parseCSVTextSync(text, options),	4✔
60	parseInBatches: (asyncIterator, options?: CSVLoaderOptions) =>
61	options?.csv?.shape === 'arrow-table'	48✔
62	? parseCSVInArrowBatches(asyncIterator, options)
63	: parseCSVInBatches(asyncIterator, options)
64	} as const satisfies LoaderWithParser<
65	ObjectRowTable \| ArrayRowTable \| ColumnarTable \| ArrowTable,
66	TableBatch \| ColumnarTableBatch \| ArrowTableBatch,
67	CSVLoaderOptions
68	>;
69
70	async function parseCSVText(
71	csvText: string,
72	options?: CSVLoaderOptions
73	): Promise<ObjectRowTable \| ArrayRowTable> {
74	return parseCSVTextSync(csvText, options);	56✔
75	}
76
77	function parseCSVTextSync(
78	csvText: string,
79	options?: CSVLoaderOptions
80	): ObjectRowTable \| ArrayRowTable {
81	// Apps can call the parse method directly, so we apply default options here
82	const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};	60✔
83
84	const firstRow = readFirstRow(csvText);	60✔
85	const header: boolean =
86	csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);	60✔
87
88	const parseWithHeader = header;	60✔
89
90	const papaparseConfig = {	60✔
91	// dynamicTyping: true,
92	...csvOptions,
93	header: parseWithHeader,
94	download: false, // We handle loading, no need for papaparse to do it for us
95	transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,	60✔
96	error: e => {
97	throw new Error(e);	×
98	}
99	};
100
101	const result = Papa.parse(csvText, papaparseConfig);	60✔
102	const rows = result.data as any[];	60✔
103
104	const headerRow = result.meta.fields \|\| generateHeader(csvOptions.columnPrefix, firstRow.length);	60✔
105
106	const shape = csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	60!
107	let table: ArrayRowTable \| ObjectRowTable;
108	switch (shape) {	60!
109	case 'object-row-table':
110	table = {	52✔
111	shape: 'object-row-table',
112	data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))	174,922✔
113	};
114	break;	52✔
115	case 'array-row-table':
116	table = {	8✔
117	shape: 'array-row-table',
118	data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))	22✔
119	};
120	break;	8✔
121	default:
122	throw new Error(shape);	×
123	}
124	const detectedGeometryColumns = csvOptions.detectGeometryColumns	60✔
125	? detectGeometryColumns(
126	headerRow,
127	rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow))),	24!
128	csvOptions.geometryEncoding
129	)
130	: [];
131
132	if (detectedGeometryColumns.length > 0) {	60✔
133	table =	8✔
134	table.shape === 'array-row-table'	8✔
135	? {
136	...table,
137	data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))	6✔
138	}
139	: {
140	...table,
141	data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))	18✔
142	};
143	}
144
145	table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);	60✔
146	return table;	60✔
147	}
148
149	// TODO - support batch size 0 = no batching/single batch?
150	function parseCSVInBatches(
151	asyncIterator:
152	\| AsyncIterable<ArrayBufferLike \| ArrayBufferView>
153	\| Iterable<ArrayBufferLike \| ArrayBufferView>,
154	options?: CSVLoaderOptions
155	): AsyncIterable<TableBatch> {
156	// Papaparse does not support standard batch size handling
157	// TODO - investigate papaparse chunks mode
158	options = {...options};	46✔
159	if (options?.core?.batchSize === 'auto') {	46✔
160	options.core.batchSize = 4000;	40✔
161	}
162
163	// Apps can call the parse method directly, we so apply default options here
164	const csvOptions = {...CSVLoaderWithParser.options.csv, ...options?.csv};	46✔
165
166	const asyncQueue = new AsyncQueue<TableBatch>();	46✔
167
168	let isFirstRow: boolean = true;	46✔
169	let headerRow: string[] \| null = null;	46✔
170	let tableBatchBuilder: TableBatchBuilder \| null = null;	46✔
171	let schema: Schema \| null = null;	46✔
172	let sniffedRows: unknown[][] = [];	46✔
173	let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;	46✔
174	let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;	46✔
175
176	const config = {	46✔
177	// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
178	...csvOptions,
179	header: false, // Unfortunately, header detection is not automatic and does not infer shapes
180	download: false, // We handle loading, no need for papaparse to do it for us
181	// chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
182	// streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
183	// See https://github.com/mholt/PapaParse/issues/465
184	chunkSize: 1024 * 1024 * 5,
185	// skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
186	// skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
187	// both of the skipEmptyLines and step callback options are provided:
188	// - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
189	// - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
190	skipEmptyLines: false,
191
192	// step is called on every row
193	// eslint-disable-next-line complexity, max-statements
194	step(results) {
195	let row = results.data;	14,516✔
196
197	if (csvOptions.skipEmptyLines === 'greedy') {	14,516✔
198	// Manually reject lines that are empty
199	const collapsedRow = row.flat().join('').trim();	16✔
200	if (collapsedRow === '') {	16✔
201	return;	10✔
202	}
203	} else if (csvOptions.skipEmptyLines === true) {	14,500!
204	row = normalizePapaStreamingRow(row);	14,500✔
205	if (row.length === 1 && row[0] === null) {	14,500✔
206	return;	6✔
207	}
208	}
209	const bytesUsed = results.meta.cursor;	14,500✔
210
211	// Check if we need to save a header row
212	if (isFirstRow && !headerRow) {	14,500✔
213	// Auto detects or can be forced with csvOptions.header
214	const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);	46✔
215	if (header) {	46✔
216	headerRow = row.map(duplicateColumnTransformer());	28✔
217	return;	28✔
218	}
219	}
220
221	// If first data row, we can deduce the schema
222	if (isFirstRow) {	14,472✔
223	if (!headerRow) {	50✔
224	headerRow = generateHeader(csvOptions.columnPrefix, row.length);	18✔
225	}
226	}
227
228	if (csvOptions.optimizeMemoryUsage) {	14,472!
229	// A workaround to allocate new strings and don't retain pointers to original strings.
230	// https://bugs.chromium.org/p/v8/issues/detail?id=2869
231	row = JSON.parse(JSON.stringify(row));	×
232	}
233
234	const shape = getBatchShape();	14,472✔
235
236	if (!geometryDetectionFinalized && headerRow) {	14,472✔
237	sniffedRows.push(row);	6✔
238	geometryDetectionFinalized = shouldFinalizeGeometryDetection(	6✔
239	headerRow,
240	sniffedRows,
241	MAX_GEOMETRY_SNIFF_ROWS
242	);
243	if (geometryDetectionFinalized) {	6!
NEW 244	detectedGeometryColumns = detectGeometryColumns(	×
245	headerRow,
246	sniffedRows,
247	csvOptions.geometryEncoding
248	);
UNCOV 249	const normalizedSniffedRows = sniffedRows.map(sniffedRow =>	×
250	normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
251	);
252	schema = deduceCSVSchemaFromRows(	×
253	normalizedSniffedRows,
254	headerRow,
255	detectedGeometryColumns
256	);
257	isFirstRow = false;	×
258	for (const normalizedSniffedRow of normalizedSniffedRows) {	×
259	addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);	×
260	}
261	sniffedRows = [];	×
262	}
263	return;	6✔
264	}
265
266	if (isFirstRow) {	14,466✔
267	if (!headerRow) {	44!
268	return;	×
269	}
270	schema = deduceCSVSchemaFromRows(	44✔
271	[normalizeGeometryArrayRow(row, detectedGeometryColumns)],
272	headerRow,
273	detectedGeometryColumns
274	);
275	isFirstRow = false;	44✔
276	}
277
278	const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);	14,466✔
279	addCSVBatchRow(normalizedRow, shape, bytesUsed);	14,466✔
280	},
281
282	// complete is called when all rows have been read
283	complete(results) {
284	try {	46✔
285	if (!geometryDetectionFinalized && headerRow) {	46✔
286	detectedGeometryColumns = detectGeometryColumns(	2✔
287	headerRow,
288	sniffedRows,
289	csvOptions.geometryEncoding
290	);
291	const normalizedSniffedRows = sniffedRows.map(row =>	4✔
292	normalizeGeometryArrayRow(row, detectedGeometryColumns)	3✔
293	);
294	schema = deduceCSVSchemaFromRows(	2✔
295	normalizedSniffedRows,
296	headerRow,
297	detectedGeometryColumns
298	);
299	const shape = getBatchShape();	2✔
300	tableBatchBuilder =	2✔
301	tableBatchBuilder \|\|	4✔
302	new TableBatchBuilder(schema, {
303	...(options?.core \|\| {}),	2!
304	shape
305	});
306	for (const normalizedSniffedRow of normalizedSniffedRows) {	2✔
307	const batchRow =
308	shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length	6!
309	? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
310	: normalizedSniffedRow;
311	tableBatchBuilder.addRow(batchRow);	6✔
312	}
313	}
314	const bytesUsed = results.meta.cursor;	46✔
315	// Ensure any final (partial) batch gets emitted
316	const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});	46✔
317	if (batch) {	46✔
318	asyncQueue.enqueue(batch);	42✔
319	}
320	} catch (error) {
321	asyncQueue.enqueue(error as Error);	×
322	}
323
324	asyncQueue.close();	46✔
325	}
326	};
327
328	Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);	46✔
329
330	// TODO - Does it matter if we return asyncIterable or asyncIterator
331	// return asyncQueue[Symbol.asyncIterator]();
332	return asyncQueue;	46✔
333
334	function addCSVBatchRow(rowToAdd: unknown[], shape: CSVBatchShape, bytesUsed: number): void {
335	let batchRow: unknown[] \| {[columnName: string]: unknown} = rowToAdd;	14,466✔
336	if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {	14,466✔
337	batchRow = convertToPapaObjectRow(rowToAdd, headerRow);	4✔
338	}
339
340	tableBatchBuilder =	14,466✔
341	tableBatchBuilder \|\|	14,510✔
342	new TableBatchBuilder(schema!, {
343	...(options?.core \|\| {}),	44!
344	shape
345	});
346
347	try {	14,466✔
348	tableBatchBuilder.addRow(batchRow);	14,466✔
349	const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});	14,466✔
350	if (batch) {	14,466✔
351	asyncQueue.enqueue(batch);	168✔
352	}
353	} catch (error) {
354	asyncQueue.enqueue(error as Error);	×
355	}
356	}
357
358	function getBatchShape(): CSVBatchShape {
359	const deprecatedShape = (options as {shape?: CSVBatchShape} \| undefined)?.shape;	14,474✔
360	const shape = deprecatedShape \|\| csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	14,474!
361	switch (shape) {	14,474✔
362	case 'array-row-table':
363	case 'columnar-table':
364	return shape;	4,034✔
365	default:
366	return DEFAULT_CSV_SHAPE;	10,440✔
367	}
368	}
369	}
370
371	type CSVBatchShape = 'array-row-table' \| 'object-row-table' \| 'columnar-table';
372
373	/**
374	* Checks if a certain row is a header row
375	* @param row the row to check
376	* @returns true if the row looks like a header
377	*/
378	function isHeaderRow(row: string[]): boolean {
379	return row && row.every(value => typeof value === 'string');	302✔
380	}
381
382	/**
383	* Reads, parses, and returns the first row of a CSV text
384	* @param csvText the csv text to parse
385	* @returns the first row
386	*/
387	function readFirstRow(csvText: string): any[] {
388	const result = Papa.parse(csvText, {	60✔
389	dynamicTyping: true,
390	preview: 1
391	});
392	return result.data[0];	60✔
393	}
394
395	/**
396	* Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
397	* duplicate header columns and would use the latest occurrence by default.
398	* See the header option in https://www.papaparse.com/docs#config
399	* @returns a transform function that returns sanitized names for duplicate fields
400	*/
401	function duplicateColumnTransformer(): (column: string) => string {
402	const observedColumns = new Set<string>();	74✔
403	return col => {	74✔
404	let colName = col;	356✔
405	let counter = 1;	356✔
406	while (observedColumns.has(colName)) {	356✔
407	colName = `${col}.${counter}`;	50✔
408	counter++;	50✔
409	}
410	observedColumns.add(colName);	356✔
411	return colName;	356✔
412	};
413	}
414
415	/**
416	* Generates the header of a CSV given a prefix and a column count
417	* @param columnPrefix the columnPrefix to use
418	* @param count the count of column names to generate
419	* @returns an array of column names
420	*/
421	function generateHeader(columnPrefix: string, count: number = 0): string[] {	32✔
422	const headers: string[] = [];	32✔
423	for (let i = 0; i < count; i++) {	32✔
424	headers.push(`${columnPrefix}${i + 1}`);	102✔
425	}
426	return headers;	32✔
427	}
428
429	function normalizePapaStreamingRow(row: unknown[]): unknown[] {
430	return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));	57,522✔
431	}
432
433	function convertToPapaObjectRow(
434	row: unknown[],
435	headerRow: string[]
436	): {[columnName: string]: unknown} {
437	const objectRow = convertToObjectRow(row, headerRow);	4✔
438	const parsedExtra = row.slice(headerRow.length);	4✔
439	if (parsedExtra.length > 0) {	4!
440	objectRow.__parsed_extra = parsedExtra;	4✔
441	}
442	return objectRow;	4✔
443	}

visgl / loaders.gl / 25138678579

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous