24607714991

Committed 18 Apr 2026 03:21PM UTC coverage: 57.099% (+0.3%) from 56.834%

Build # 24607714991

Build Type

push

github

Committed by

web-flow

Commit Message

feat: Organize sprawling converter function fleet into well defined exported Converter objects (#3396)

Coverage Stats

10822 of 20660 branches covered (52.38%)

Branch coverage included in aggregate %.

1386 of 1996 new or added lines in 59 files covered. (69.44%)

2 existing lines in 2 files now uncovered.

22093 of 36985 relevant lines covered (59.74%)

4859.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.0

/modules/csv/src/csv-loader.ts

// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';

import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
import {
  AsyncQueue,
  TableBatchBuilder,
  convertToArrayRow,
  convertToObjectRow
} from '@loaders.gl/schema-utils';
import Papa from './papaparse/papaparse';
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
import {CSVFormat} from './csv-format';
import {
  deduceCSVSchemaFromRows,
  detectGeometryColumns,
  MAX_GEOMETRY_SNIFF_ROWS,
  normalizeGeometryArrayRow,
  normalizeGeometryObjectRow,
  shouldFinalizeGeometryDetection
} from './lib/csv-geometry';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

const DEFAULT_CSV_SHAPE = 'object-row-table';

export type CSVLoaderOptions = LoaderOptions & {
  csv?: {
    // loaders.gl options
    shape?: 'array-row-table' | 'object-row-table';
    /** optimizes memory usage but increases parsing time. */
    optimizeMemoryUsage?: boolean;
    columnPrefix?: string;
    header?: 'auto';

    // CSV options (papaparse)
    // delimiter: auto
    // newline: auto
    quoteChar?: string;
    escapeChar?: string;
    // Convert numbers and boolean values in rows from strings
    dynamicTyping?: boolean;
    comments?: boolean;
    skipEmptyLines?: boolean | 'greedy';
    // transform: null?
    delimitersToGuess?: string[];
    detectGeometryColumns?: boolean;
    // fastMode: auto
  };
};

export const CSVLoader = {
  ...CSVFormat,

  dataType: null as unknown as ObjectRowTable | ArrayRowTable,
  batchType: null as unknown as TableBatch,
  version: VERSION,
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
    parseCSV(new TextDecoder().decode(arrayBuffer), options),
  parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
  parseInBatches: parseCSVInBatches,
  // @ts-ignore
  // testText: null,
  options: {
    csv: {
      shape: DEFAULT_CSV_SHAPE, // 'object-row-table'
      optimizeMemoryUsage: false,
      // CSV options
      header: 'auto',
      columnPrefix: 'column',
      // delimiter: auto
      // newline: auto
      quoteChar: '"',
      escapeChar: '"',
      dynamicTyping: true,
      comments: false,
      skipEmptyLines: true,
      // transform: null?
      detectGeometryColumns: false,
      delimitersToGuess: [',', '\t', '|', ';']
      // fastMode: auto
    }
  }
} as const satisfies LoaderWithParser<ObjectRowTable | ArrayRowTable, TableBatch, CSVLoaderOptions>;

async function parseCSV(
  csvText: string,
  options?: CSVLoaderOptions
): Promise<ObjectRowTable | ArrayRowTable> {
  // Apps can call the parse method directly, so we apply default options here
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};

  const firstRow = readFirstRow(csvText);
  const header: boolean =
    csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);

  const parseWithHeader = header;

  const papaparseConfig = {
    // dynamicTyping: true,
    ...csvOptions,
    header: parseWithHeader,
    download: false, // We handle loading, no need for papaparse to do it for us
    transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
    error: e => {
      throw new Error(e);
    }
  };

  const result = Papa.parse(csvText, papaparseConfig);
  const rows = result.data as any[];

  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);

  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
  let table: ArrayRowTable | ObjectRowTable;
  switch (shape) {
    case 'object-row-table':
      table = {
        shape: 'object-row-table',
        data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
      };
      break;
    case 'array-row-table':
      table = {
        shape: 'array-row-table',
        data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
      };
      break;
    default:
      throw new Error(shape);
  }
  const detectedGeometryColumns = csvOptions.detectGeometryColumns
    ? detectGeometryColumns(
        headerRow,
        rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
      )
    : [];

  if (detectedGeometryColumns.length > 0) {
    table =
      table.shape === 'array-row-table'
        ? {
            ...table,
            data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))
          }
        : {
            ...table,
            data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))
          };
  }

  table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);
  return table;
}

// TODO - support batch size 0 = no batching/single batch?
function parseCSVInBatches(
  asyncIterator:
    | AsyncIterable<ArrayBufferLike | ArrayBufferView>
    | Iterable<ArrayBufferLike | ArrayBufferView>,
  options?: CSVLoaderOptions
): AsyncIterable<TableBatch> {
  // Papaparse does not support standard batch size handling
  // TODO - investigate papaparse chunks mode
  options = {...options};
  if (options?.core?.batchSize === 'auto') {
    options.core.batchSize = 4000;
  }

  // Apps can call the parse method directly, we so apply default options here
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};

  const asyncQueue = new AsyncQueue<TableBatch>();

  let isFirstRow: boolean = true;
  let headerRow: string[] | null = null;
  let tableBatchBuilder: TableBatchBuilder | null = null;
  let schema: Schema | null = null;
  let sniffedRows: unknown[][] = [];
  let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;
  let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;

  const config = {
    // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
    ...csvOptions,
    header: false, // Unfortunately, header detection is not automatic and does not infer shapes
    download: false, // We handle loading, no need for papaparse to do it for us
    // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
    // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
    // See https://github.com/mholt/PapaParse/issues/465
    chunkSize: 1024 * 1024 * 5,
    // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
    // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
    // both of the skipEmptyLines and step callback options are provided:
    // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
    // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
    skipEmptyLines: false,

    // step is called on every row
    // eslint-disable-next-line complexity, max-statements
    step(results) {
      let row = results.data;

      if (csvOptions.skipEmptyLines === 'greedy') {
        // Manually reject lines that are empty
        const collapsedRow = row.flat().join('').trim();
        if (collapsedRow === '') {
          return;
        }
      } else if (csvOptions.skipEmptyLines === true) {
        row = normalizePapaStreamingRow(row);
        if (row.length === 1 && row[0] === null) {
          return;
        }
      }
      const bytesUsed = results.meta.cursor;

      // Check if we need to save a header row
      if (isFirstRow && !headerRow) {
        // Auto detects or can be forced with csvOptions.header
        const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
        if (header) {
          headerRow = row.map(duplicateColumnTransformer());
          return;
        }
      }

      // If first data row, we can deduce the schema
      if (isFirstRow) {
        if (!headerRow) {
          headerRow = generateHeader(csvOptions.columnPrefix, row.length);
        }
      }

      if (csvOptions.optimizeMemoryUsage) {
        // A workaround to allocate new strings and don't retain pointers to original strings.
        // https://bugs.chromium.org/p/v8/issues/detail?id=2869
        row = JSON.parse(JSON.stringify(row));
      }

      const shape = getBatchShape();

      if (!geometryDetectionFinalized && headerRow) {
        sniffedRows.push(row);
        geometryDetectionFinalized = shouldFinalizeGeometryDetection(
          headerRow,
          sniffedRows,
          MAX_GEOMETRY_SNIFF_ROWS
        );
        if (geometryDetectionFinalized) {
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
          const normalizedSniffedRows = sniffedRows.map(sniffedRow =>
            normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)
          );
          schema = deduceCSVSchemaFromRows(
            normalizedSniffedRows,
            headerRow,
            detectedGeometryColumns
          );
          isFirstRow = false;
          for (const normalizedSniffedRow of normalizedSniffedRows) {
            addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);
          }
          sniffedRows = [];
        }
        return;
      }

      if (isFirstRow) {
        if (!headerRow) {
          return;
        }
        schema = deduceCSVSchemaFromRows(
          [normalizeGeometryArrayRow(row, detectedGeometryColumns)],
          headerRow,
          detectedGeometryColumns
        );
        isFirstRow = false;
      }

      const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);
      addCSVBatchRow(normalizedRow, shape, bytesUsed);
    },

    // complete is called when all rows have been read
    complete(results) {
      try {
        if (!geometryDetectionFinalized && headerRow) {
          detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);
          const normalizedSniffedRows = sniffedRows.map(row =>
            normalizeGeometryArrayRow(row, detectedGeometryColumns)
          );
          schema = deduceCSVSchemaFromRows(
            normalizedSniffedRows,
            headerRow,
            detectedGeometryColumns
          );
          const shape = getBatchShape();
          tableBatchBuilder =
            tableBatchBuilder ||
            new TableBatchBuilder(schema, {
              ...(options?.core || {}),
              shape
            });
          for (const normalizedSniffedRow of normalizedSniffedRows) {
            const batchRow =
              shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length
                ? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
                : normalizedSniffedRow;
            tableBatchBuilder.addRow(batchRow);
          }
        }
        const bytesUsed = results.meta.cursor;
        // Ensure any final (partial) batch gets emitted
        const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});
        if (batch) {
          asyncQueue.enqueue(batch);
        }
      } catch (error) {
        asyncQueue.enqueue(error as Error);
      }

      asyncQueue.close();
    }
  };

  Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);

  // TODO - Does it matter if we return asyncIterable or asyncIterator
  // return asyncQueue[Symbol.asyncIterator]();
  return asyncQueue;

  function addCSVBatchRow(
    rowToAdd: unknown[],
    shape: 'array-row-table' | 'object-row-table',
    bytesUsed: number
  ): void {
    let batchRow: unknown[] | {[columnName: string]: unknown} = rowToAdd;
    if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {
      batchRow = convertToPapaObjectRow(rowToAdd, headerRow);
    }

    tableBatchBuilder =
      tableBatchBuilder ||
      new TableBatchBuilder(schema!, {
        ...(options?.core || {}),
        shape
      });

    try {
      tableBatchBuilder.addRow(batchRow);
      const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});
      if (batch) {
        asyncQueue.enqueue(batch);
      }
    } catch (error) {
      asyncQueue.enqueue(error as Error);
    }
  }

  function getBatchShape(): 'array-row-table' | 'object-row-table' {
    const deprecatedShape = (
      options as {shape?: 'array-row-table' | 'object-row-table'} | undefined
    )?.shape;
    return deprecatedShape || csvOptions.shape || DEFAULT_CSV_SHAPE;
  }
}

/**
 * Checks if a certain row is a header row
 * @param row the row to check
 * @returns true if the row looks like a header
 */
function isHeaderRow(row: string[]): boolean {
  return row && row.every(value => typeof value === 'string');
}

/**
 * Reads, parses, and returns the first row of a CSV text
 * @param csvText the csv text to parse
 * @returns the first row
 */
function readFirstRow(csvText: string): any[] {
  const result = Papa.parse(csvText, {
    dynamicTyping: true,
    preview: 1
  });
  return result.data[0];
}

/**
 * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
 * duplicate header columns and would use the latest occurrence by default.
 * See the header option in https://www.papaparse.com/docs#config
 * @returns a transform function that returns sanitized names for duplicate fields
 */
function duplicateColumnTransformer(): (column: string) => string {
  const observedColumns = new Set<string>();
  return col => {
    let colName = col;
    let counter = 1;
    while (observedColumns.has(colName)) {
      colName = `${col}.${counter}`;
      counter++;
    }
    observedColumns.add(colName);
    return colName;
  };
}

/**
 * Generates the header of a CSV given a prefix and a column count
 * @param columnPrefix the columnPrefix to use
 * @param count the count of column names to generate
 * @returns an array of column names
 */
function generateHeader(columnPrefix: string, count: number = 0): string[] {
  const headers: string[] = [];
  for (let i = 0; i < count; i++) {
    headers.push(`${columnPrefix}${i + 1}`);
  }
  return headers;
}

function normalizePapaStreamingRow(row: unknown[]): unknown[] {
  return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));
}

function convertToPapaObjectRow(
  row: unknown[],
  headerRow: string[]
): {[columnName: string]: unknown} {
  const objectRow = convertToObjectRow(row, headerRow);
  const parsedExtra = row.slice(headerRow.length);
  if (parsedExtra.length > 0) {
    objectRow.__parsed_extra = parsedExtra;
  }
  return objectRow;
}

1	// loaders.gl
2	// SPDX-License-Identifier: MIT
3	// Copyright (c) vis.gl contributors
4
5	import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6	import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
7
8	import {toArrayBufferIterator} from '@loaders.gl/loader-utils';
9	import {
10	AsyncQueue,
11	TableBatchBuilder,
12	convertToArrayRow,
13	convertToObjectRow
14	} from '@loaders.gl/schema-utils';
15	import Papa from './papaparse/papaparse';
16	import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
17	import {CSVFormat} from './csv-format';
18	import {
19	deduceCSVSchemaFromRows,
20	detectGeometryColumns,
21	MAX_GEOMETRY_SNIFF_ROWS,
22	normalizeGeometryArrayRow,
23	normalizeGeometryObjectRow,
24	shouldFinalizeGeometryDetection
25	} from './lib/csv-geometry';
26
27	// __VERSION__ is injected by babel-plugin-version-inline
28	// @ts-ignore TS2304: Cannot find name '__VERSION__'.
29	const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';	9!
30
31	const DEFAULT_CSV_SHAPE = 'object-row-table';	9✔
32
33	export type CSVLoaderOptions = LoaderOptions & {
34	csv?: {
35	// loaders.gl options
36	shape?: 'array-row-table' \| 'object-row-table';
37	/** optimizes memory usage but increases parsing time. */
38	optimizeMemoryUsage?: boolean;
39	columnPrefix?: string;
40	header?: 'auto';
41
42	// CSV options (papaparse)
43	// delimiter: auto
44	// newline: auto
45	quoteChar?: string;
46	escapeChar?: string;
47	// Convert numbers and boolean values in rows from strings
48	dynamicTyping?: boolean;
49	comments?: boolean;
50	skipEmptyLines?: boolean \| 'greedy';
51	// transform: null?
52	delimitersToGuess?: string[];
53	detectGeometryColumns?: boolean;
54	// fastMode: auto
55	};
56	};
57
58	export const CSVLoader = {	9✔
59	...CSVFormat,
60
61	dataType: null as unknown as ObjectRowTable \| ArrayRowTable,
62	batchType: null as unknown as TableBatch,
63	version: VERSION,
64	parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
65	parseCSV(new TextDecoder().decode(arrayBuffer), options),	1✔
66	parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),	26✔
67	parseInBatches: parseCSVInBatches,
68	// @ts-ignore
69	// testText: null,
70	options: {
71	csv: {
72	shape: DEFAULT_CSV_SHAPE, // 'object-row-table'
73	optimizeMemoryUsage: false,
74	// CSV options
75	header: 'auto',
76	columnPrefix: 'column',
77	// delimiter: auto
78	// newline: auto
79	quoteChar: '"',
80	escapeChar: '"',
81	dynamicTyping: true,
82	comments: false,
83	skipEmptyLines: true,
84	// transform: null?
85	detectGeometryColumns: false,
86	delimitersToGuess: [',', '\t', '\|', ';']
87	// fastMode: auto
88	}
89	}
90	} as const satisfies LoaderWithParser<ObjectRowTable \| ArrayRowTable, TableBatch, CSVLoaderOptions>;
91
92	async function parseCSV(
93	csvText: string,
94	options?: CSVLoaderOptions
95	): Promise<ObjectRowTable \| ArrayRowTable> {
96	// Apps can call the parse method directly, so we apply default options here
97	const csvOptions = {...CSVLoader.options.csv, ...options?.csv};	27✔
98
99	const firstRow = readFirstRow(csvText);	27✔
100	const header: boolean =
101	csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);	27✔
102
103	const parseWithHeader = header;	27✔
104
105	const papaparseConfig = {	27✔
106	// dynamicTyping: true,
107	...csvOptions,
108	header: parseWithHeader,
109	download: false, // We handle loading, no need for papaparse to do it for us
110	transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,	27✔
111	error: e => {
112	throw new Error(e);	×
113	}
114	};
115
116	const result = Papa.parse(csvText, papaparseConfig);	27✔
117	const rows = result.data as any[];	27✔
118
119	const headerRow = result.meta.fields \|\| generateHeader(csvOptions.columnPrefix, firstRow.length);	27✔
120
121	const shape = csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	27!
122	let table: ArrayRowTable \| ObjectRowTable;
123	switch (shape) {	27!
124	case 'object-row-table':
125	table = {	23✔
126	shape: 'object-row-table',
127	data: rows.map(row => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))	87,461✔
128	};
129	break;	23✔
130	case 'array-row-table':
131	table = {	3✔
132	shape: 'array-row-table',
133	data: rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))	8!
134	};
135	break;	3✔
136	default:
137	throw new Error(shape);	×
138	}
139	const detectedGeometryColumns = csvOptions.detectGeometryColumns	26!
140	? detectGeometryColumns(
141	headerRow,
NEW 142	rows.map(row => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))	×
143	)
144	: [];
145
146	if (detectedGeometryColumns.length > 0) {	27!
NEW 147	table =	×
148	table.shape === 'array-row-table'	×
149	? {
150	...table,
NEW 151	data: table.data.map(row => normalizeGeometryArrayRow(row, detectedGeometryColumns))	×
152	}
153	: {
154	...table,
NEW 155	data: table.data.map(row => normalizeGeometryObjectRow(row, detectedGeometryColumns))	×
156	};
157	}
158
159	table.schema = deduceCSVSchemaFromRows(table.data, headerRow, detectedGeometryColumns);	26✔
160	return table;	26✔
161	}
162
163	// TODO - support batch size 0 = no batching/single batch?
164	function parseCSVInBatches(
165	asyncIterator:
166	\| AsyncIterable<ArrayBufferLike \| ArrayBufferView>
167	\| Iterable<ArrayBufferLike \| ArrayBufferView>,
168	options?: CSVLoaderOptions
169	): AsyncIterable<TableBatch> {
170	// Papaparse does not support standard batch size handling
171	// TODO - investigate papaparse chunks mode
172	options = {...options};	22✔
173	if (options?.core?.batchSize === 'auto') {	22✔
174	options.core.batchSize = 4000;	19✔
175	}
176
177	// Apps can call the parse method directly, we so apply default options here
178	const csvOptions = {...CSVLoader.options.csv, ...options?.csv};	22✔
179
180	const asyncQueue = new AsyncQueue<TableBatch>();	22✔
181
182	let isFirstRow: boolean = true;	22✔
183	let headerRow: string[] \| null = null;	22✔
184	let tableBatchBuilder: TableBatchBuilder \| null = null;	22✔
185	let schema: Schema \| null = null;	22✔
186	let sniffedRows: unknown[][] = [];	22✔
187	let detectedGeometryColumns = [] as ReturnType<typeof detectGeometryColumns>;	22✔
188	let geometryDetectionFinalized = !csvOptions.detectGeometryColumns;	22✔
189
190	const config = {	22✔
191	// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
192	...csvOptions,
193	header: false, // Unfortunately, header detection is not automatic and does not infer shapes
194	download: false, // We handle loading, no need for papaparse to do it for us
195	// chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
196	// streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
197	// See https://github.com/mholt/PapaParse/issues/465
198	chunkSize: 1024 * 1024 * 5,
199	// skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
200	// skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
201	// both of the skipEmptyLines and step callback options are provided:
202	// - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
203	// - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
204	skipEmptyLines: false,
205
206	// step is called on every row
207	// eslint-disable-next-line complexity, max-statements
208	step(results) {
209	let row = results.data;	9,252✔
210
211	if (csvOptions.skipEmptyLines === 'greedy') {	9,252✔
212	// Manually reject lines that are empty
213	const collapsedRow = row.flat().join('').trim();	8✔
214	if (collapsedRow === '') {	8✔
215	return;	5✔
216	}
217	} else if (csvOptions.skipEmptyLines === true) {	9,244!
218	row = normalizePapaStreamingRow(row);	9,244✔
219	if (row.length === 1 && row[0] === null) {	9,244✔
220	return;	3✔
221	}
222	}
223	const bytesUsed = results.meta.cursor;	9,244✔
224
225	// Check if we need to save a header row
226	if (isFirstRow && !headerRow) {	9,244✔
227	// Auto detects or can be forced with csvOptions.header
228	const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);	22✔
229	if (header) {	22✔
230	headerRow = row.map(duplicateColumnTransformer());	13✔
231	return;	13✔
232	}
233	}
234
235	// If first data row, we can deduce the schema
236	if (isFirstRow) {	9,231✔
237	if (!headerRow) {	22✔
238	headerRow = generateHeader(csvOptions.columnPrefix, row.length);	9✔
239	}
240	}
241
242	if (csvOptions.optimizeMemoryUsage) {	9,231!
243	// A workaround to allocate new strings and don't retain pointers to original strings.
244	// https://bugs.chromium.org/p/v8/issues/detail?id=2869
245	row = JSON.parse(JSON.stringify(row));	×
246	}
247
248	const shape = getBatchShape();	9,231✔
249
250	if (!geometryDetectionFinalized && headerRow) {	9,231!
NEW 251	sniffedRows.push(row);	×
NEW 252	geometryDetectionFinalized = shouldFinalizeGeometryDetection(	×
253	headerRow,
254	sniffedRows,
255	MAX_GEOMETRY_SNIFF_ROWS
256	);
NEW 257	if (geometryDetectionFinalized) {	×
NEW 258	detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);	×
NEW 259	const normalizedSniffedRows = sniffedRows.map(sniffedRow =>	×
NEW 260	normalizeGeometryArrayRow(sniffedRow, detectedGeometryColumns)	×
261	);
NEW 262	schema = deduceCSVSchemaFromRows(	×
263	normalizedSniffedRows,
264	headerRow,
265	detectedGeometryColumns
266	);
NEW 267	isFirstRow = false;	×
NEW 268	for (const normalizedSniffedRow of normalizedSniffedRows) {	×
NEW 269	addCSVBatchRow(normalizedSniffedRow, shape, bytesUsed);	×
270	}
NEW 271	sniffedRows = [];	×
272	}
NEW 273	return;	×
274	}
275
276	if (isFirstRow) {	9,231✔
277	if (!headerRow) {	22!
NEW 278	return;	×
279	}
280	schema = deduceCSVSchemaFromRows(	22✔
281	[normalizeGeometryArrayRow(row, detectedGeometryColumns)],
282	headerRow,
283	detectedGeometryColumns
284	);
285	isFirstRow = false;	22✔
286	}
287
288	const normalizedRow = normalizeGeometryArrayRow(row, detectedGeometryColumns);	9,231✔
289	addCSVBatchRow(normalizedRow, shape, bytesUsed);	9,231✔
290	},
291
292	// complete is called when all rows have been read
293	complete(results) {
294	try {	22✔
295	if (!geometryDetectionFinalized && headerRow) {	22!
NEW 296	detectedGeometryColumns = detectGeometryColumns(headerRow, sniffedRows);	×
NEW 297	const normalizedSniffedRows = sniffedRows.map(row =>	×
NEW 298	normalizeGeometryArrayRow(row, detectedGeometryColumns)	×
299	);
NEW 300	schema = deduceCSVSchemaFromRows(	×
301	normalizedSniffedRows,
302	headerRow,
303	detectedGeometryColumns
304	);
NEW 305	const shape = getBatchShape();	×
NEW 306	tableBatchBuilder =	×
307	tableBatchBuilder \|\|	×
308	new TableBatchBuilder(schema, {
309	...(options?.core \|\| {}),	×
310	shape
311	});
NEW 312	for (const normalizedSniffedRow of normalizedSniffedRows) {	×
313	const batchRow =
NEW 314	shape === 'object-row-table' && normalizedSniffedRow.length > headerRow.length	×
315	? convertToPapaObjectRow(normalizedSniffedRow, headerRow)
316	: normalizedSniffedRow;
NEW 317	tableBatchBuilder.addRow(batchRow);	×
318	}
319	}
320	const bytesUsed = results.meta.cursor;	22✔
321	// Ensure any final (partial) batch gets emitted
322	const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});	22✔
323	if (batch) {	22✔
324	asyncQueue.enqueue(batch);	20✔
325	}
326	} catch (error) {
327	asyncQueue.enqueue(error as Error);	×
328	}
329
330	asyncQueue.close();	22✔
331	}
332	};
333
334	Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);	22✔
335
336	// TODO - Does it matter if we return asyncIterable or asyncIterator
337	// return asyncQueue[Symbol.asyncIterator]();
338	return asyncQueue;	22✔
339
340	function addCSVBatchRow(
341	rowToAdd: unknown[],
342	shape: 'array-row-table' \| 'object-row-table',
343	bytesUsed: number
344	): void {
345	let batchRow: unknown[] \| {[columnName: string]: unknown} = rowToAdd;	9,231✔
346	if (shape === 'object-row-table' && headerRow && rowToAdd.length > headerRow.length) {	9,231✔
347	batchRow = convertToPapaObjectRow(rowToAdd, headerRow);	2✔
348	}
349
350	tableBatchBuilder =	9,231✔
351	tableBatchBuilder \|\|	9,253✔
352	new TableBatchBuilder(schema!, {
353	...(options?.core \|\| {}),	23✔
354	shape
355	});
356
357	try {	9,231✔
358	tableBatchBuilder.addRow(batchRow);	9,231✔
359	const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});	9,231✔
360	if (batch) {	9,231✔
361	asyncQueue.enqueue(batch);	84✔
362	}
363	} catch (error) {
NEW 364	asyncQueue.enqueue(error as Error);	×
365	}
366	}
367
368	function getBatchShape(): 'array-row-table' \| 'object-row-table' {
369	const deprecatedShape = (
370	options as {shape?: 'array-row-table' \| 'object-row-table'} \| undefined	9,231✔
371	)?.shape;
372	return deprecatedShape \|\| csvOptions.shape \|\| DEFAULT_CSV_SHAPE;	9,231!
373	}
374	}
375
376	/**
377	* Checks if a certain row is a header row
378	* @param row the row to check
379	* @returns true if the row looks like a header
380	*/
381	function isHeaderRow(row: string[]): boolean {
382	return row && row.every(value => typeof value === 'string');	143✔
383	}
384
385	/**
386	* Reads, parses, and returns the first row of a CSV text
387	* @param csvText the csv text to parse
388	* @returns the first row
389	*/
390	function readFirstRow(csvText: string): any[] {
391	const result = Papa.parse(csvText, {	27✔
392	dynamicTyping: true,
393	preview: 1
394	});
395	return result.data[0];	27✔
396	}
397
398	/**
399	* Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
400	* duplicate header columns and would use the latest occurrence by default.
401	* See the header option in https://www.papaparse.com/docs#config
402	* @returns a transform function that returns sanitized names for duplicate fields
403	*/
404	function duplicateColumnTransformer(): (column: string) => string {
405	const observedColumns = new Set<string>();	31✔
406	return col => {	31✔
407	let colName = col;	166✔
408	let counter = 1;	166✔
409	while (observedColumns.has(colName)) {	166✔
410	colName = `${col}.${counter}`;	25✔
411	counter++;	25✔
412	}
413	observedColumns.add(colName);	166✔
414	return colName;	166✔
415	};
416	}
417
418	/**
419	* Generates the header of a CSV given a prefix and a column count
420	* @param columnPrefix the columnPrefix to use
421	* @param count the count of column names to generate
422	* @returns an array of column names
423	*/
424	function generateHeader(columnPrefix: string, count: number = 0): string[] {	17✔
425	const headers: string[] = [];	17✔
426	for (let i = 0; i < count; i++) {	17✔
427	headers.push(`${columnPrefix}${i + 1}`);	54✔
428	}
429	return headers;	17✔
430	}
431
432	function normalizePapaStreamingRow(row: unknown[]): unknown[] {
433	return row.map(value => (Array.isArray(value) && value.length === 0 ? null : value));	34,742✔
434	}
435
436	function convertToPapaObjectRow(
437	row: unknown[],
438	headerRow: string[]
439	): {[columnName: string]: unknown} {
440	const objectRow = convertToObjectRow(row, headerRow);	2✔
441	const parsedExtra = row.slice(headerRow.length);	2✔
442	if (parsedExtra.length > 0) {	2!
443	objectRow.__parsed_extra = parsedExtra;	2✔
444	}
445	return objectRow;	2✔
446	}

visgl / loaders.gl / 24607714991

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous