12247395692

Committed 10 Dec 2024 01:11AM UTC coverage: 69.332% (+0.01%) from 69.322%

Build # 12247395692

Build Type

push

github

Committed by

web-flow

Commit Message

[Feat] add h3 typed column (#2822)

Detect H3 column as a new h3-type column, like the existing geocolumn.

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

Run Details

5489 of 9166 branches covered (59.88%)

Branch coverage included in aggregate %.

15 of 21 new or added lines in 6 files covered. (71.43%)

22 existing lines in 1 file now uncovered.

11426 of 15231 relevant lines covered (75.02%)

96.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.43

/src/processors/src/data-processor.ts

// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project

import * as arrow from 'apache-arrow';
import {csvParseRows} from 'd3-dsv';
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
import normalize from '@mapbox/geojson-normalize';
import {ArrowTable} from '@loaders.gl/schema';
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
import {ProcessorResult, Field} from '@kepler.gl/types';
import {
  arrowDataTypeToAnalyzerDataType,
  arrowDataTypeToFieldType,
  hasOwnProperty,
  isPlainObject
} from '@kepler.gl/utils';
import {
  analyzerTypeToFieldType,
  getSampleForTypeAnalyze,
  getFieldsFromData,
  h3IsValid,
  notNullorUndefined,
  toArray
} from '@kepler.gl/common-utils';
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
import {Feature} from '@nebula.gl/edit-modes';

// if any of these value occurs in csv, parse it to null;
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
// matches empty string
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;

function tryParseJsonString(str) {
  try {
    return JSON.parse(str);
  } catch (e) {
    return null;
  }
}

export const PARSE_FIELD_VALUE_FROM_STRING = {
  [ALL_FIELD_TYPES.boolean]: {
    valid: (d: unknown): boolean => typeof d === 'boolean',
    parse: (d: unknown): boolean => d === 'true' || d === 'True' || d === 'TRUE' || d === '1'
  },
  [ALL_FIELD_TYPES.integer]: {
    // @ts-ignore
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
    // @ts-ignore
    parse: (d: unknown): number => parseInt(d, 10)
  },
  [ALL_FIELD_TYPES.timestamp]: {
    valid: (d: unknown, field: Field): boolean =>
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
  },
  [ALL_FIELD_TYPES.real]: {
    // @ts-ignore
    valid: (d: unknown): boolean => parseFloat(d) === d,
    // Note this will result in NaN for some string
    parse: parseFloat
  },
  [ALL_FIELD_TYPES.object]: {
    valid: isPlainObject,
    parse: tryParseJsonString
  },

  [ALL_FIELD_TYPES.array]: {
    valid: Array.isArray,
    parse: tryParseJsonString
  },

  [ALL_FIELD_TYPES.h3]: {
    valid: d => h3IsValid(d),
    parse: d => d
  }
};

/**
 * Process csv data, output a data object with `{fields: [], rows: []}`.
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * @param rawData raw csv string
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
 * @public
 * @example
 * import {processCsvData} from 'kepler.gl/processors';
 *
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
 *
 * const dataset = {
 *  info: {id: 'test_data', label: 'My Csv'},
 *  data: processCsvData(testData)
 * };
 *
 * dispatch(addDataToMap({
 *  datasets: [dataset],
 *  options: {centerMap: true, readOnly: true}
 * }));
 */
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
  let rows: unknown[][] | undefined;
  let headerRow: string[] | undefined;

  if (typeof rawData === 'string') {
    const parsedRows: string[][] = csvParseRows(rawData);

    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
      // looks like an empty file, throw error to be catch
      throw new Error('process Csv Data Failed: CSV is empty');
    }
    headerRow = parsedRows[0];
    rows = parsedRows.slice(1);
  } else if (Array.isArray(rawData) && rawData.length) {
    rows = rawData;
    headerRow = header;

    if (!Array.isArray(headerRow)) {
      // if data is passed in as array of rows and missing header
      // assume first row is header
      // @ts-ignore
      headerRow = rawData[0];
      rows = rawData.slice(1);
    }
  }

  if (!rows || !headerRow) {
    throw new Error('invalid input passed to processCsvData');
  }

  // here we assume the csv file that people uploaded will have first row
  // as name of the column

  cleanUpFalsyCsvValue(rows);
  // No need to run type detection on every data point
  // here we get a list of none null values to run analyze on
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
  const fields = getFieldsFromData(sample, headerRow);
  const parsedRows = parseRowsByFields(rows, fields);

  return {fields, rows: parsedRows};
}

/**
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
 * @param rows
 * @param fields
 */
export function parseRowsByFields(rows: any[][], fields: Field[]) {
  // Edit rows in place
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));

  return rows;
}

/**
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
 * so that type-analyzer won't detect it as string
 *
 * @param rows
 */
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
  const re = new RegExp(CSV_NULLS, 'g');
  for (let i = 0; i < rows.length; i++) {
    for (let j = 0; j < rows[i].length; j++) {
      // analyzer will set any fields to 'string' if there are empty values
      // which will be parsed as '' by d3.csv
      // here we parse empty data as null
      // TODO: create warning when deltect `CSV_NULLS` in the data
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
        rows[i][j] = null;
      }
    }
  }
}

/**
 * Process uploaded csv file to parse value by field type
 *
 * @param rows
 * @param geoFieldIdx field index
 * @param field
 * @param i
 */
export function parseCsvRowsByFieldType(
  rows: unknown[][],
  geoFieldIdx: number,
  field: Field,
  i: number
): void {
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
  if (parser) {
    // check first not null value of it's already parsed
    const first = rows.find(r => notNullorUndefined(r[i]));
    if (!first || parser.valid(first[i], field)) {
      return;
    }
    rows.forEach(row => {
      // parse string value based on field type
      if (row[i] !== null) {
        row[i] = parser.parse(row[i], field);
        if (
          geoFieldIdx > -1 &&
          isPlainObject(row[geoFieldIdx]) &&
          // @ts-ignore
          hasOwnProperty(row[geoFieldIdx], 'properties')
        ) {
          // @ts-ignore
          row[geoFieldIdx].properties[field.name] = row[i];
        }
      }
    });
  }
}

/* eslint-enable complexity */

/**
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * NOTE: This function may mutate input.
 * @param rawData an array of row object, each object should have the same number of keys
 * @returns dataset containing `fields` and `rows`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processRowObject} from 'kepler.gl/processors';
 *
 * const data = [
 *  {lat: 31.27, lng: 127.56, value: 3},
 *  {lat: 31.22, lng: 126.26, value: 1}
 * ];
 *
 * dispatch(addDataToMap({
 *  datasets: {
 *    info: {label: 'My Data', id: 'my_data'},
 *    data: processRowObject(data)
 *  }
 * }));
 */
export function processRowObject(rawData: unknown[]): ProcessorResult {
  if (!Array.isArray(rawData)) {
    return null;
  } else if (!rawData.length) {
    // data is empty
    return {
      fields: [],
      rows: []
    };
  }

  const keys = Object.keys(rawData[0]); // [lat, lng, value]
  const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]

  // row object can still contain values like `Null` or `N/A`
  cleanUpFalsyCsvValue(rows);

  return processCsvData(rows, keys);
}

/**
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
 * output a data object with `{fields: [], rows: []}`.
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * NOTE: This function may mutate input.
 *
 * @param rawData raw geojson feature collection
 * @returns dataset containing `fields` and `rows`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processGeojson} from 'kepler.gl/processors';
 *
 * const geojson = {
 *         "type" : "FeatureCollection",
 *         "features" : [{
 *                 "type" : "Feature",
 *                 "properties" : {
 *                         "capacity" : "10",
 *                         "type" : "U-Rack"
 *                 },
 *                 "geometry" : {
 *                         "type" : "Point",
 *                         "coordinates" : [ -71.073283, 42.417500 ]
 *                 }
 *         }]
 * };
 *
 * dispatch(addDataToMap({
 *  datasets: {
 *    info: {
 *      label: 'Sample Taxi Trips in New York City',
 *      id: 'test_trip_data'
 *    },
 *    data: processGeojson(geojson)
 *  }
 * }));
 */
export function processGeojson(rawData: unknown): ProcessorResult {
  const normalizedGeojson = normalize(rawData);

  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
    const error = new Error(
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
    );
    throw error;
    // fail to normalize geojson
  }

  // getting all feature fields
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
    const f = normalizedGeojson.features[i];
    if (f.geometry) {
      allDataRows.push({
        // add feature to _geojson field
        _geojson: f,
        ...(f.properties || {})
      });
    }
  }
  // get all the field
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
    Object.keys(curr).forEach(key => {
      if (!accu.includes(key)) {
        accu.push(key);
      }
    });
    return accu;
  }, []);

  // make sure each feature has exact same fields
  allDataRows.forEach(d => {
    fields.forEach(f => {
      if (!(f in d)) {
        d[f] = null;
        if (d._geojson.properties) {
          d._geojson.properties[f] = null;
        }
      }
    });
  });

  return processRowObject(allDataRows);
}

/**
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
 * The json object should contain `datasets` and `config`.
 * @param rawData
 * @param schema
 * @returns datasets and config `{datasets: {}, config: {}}`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processKeplerglJSON} from 'kepler.gl/processors';
 *
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
 */
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
}

/**
 * Parse a single or an array of datasets saved using kepler.gl schema
 * @param rawData
 * @param schema
 */
export function processKeplerglDataset(
  rawData: object | object[],
  schema = KeplerGlSchema
): ParsedDataset | ParsedDataset[] | null {
  if (!rawData) {
    return null;
  }

  const results = schema.parseSavedData(toArray(rawData));
  if (!results) {
    return null;
  }
  return Array.isArray(rawData) ? results : results[0];
}

/**
 * Parse arrow table and return a dataset
 *
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
 * @returns dataset containing `fields` and `rows` or null
 */
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
  // @ts-ignore - Unknown data type causing build failures
  return processArrowBatches(arrowTable.data.batches);
}

export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
  return schema.fields.map((field: arrow.Field, index: number) => {
    const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');
    return {
      ...field,
      name: field.name,
      id: field.name,
      displayName: field.name,
      format: '',
      fieldIdx: index,
      type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),
      analyzerType: isGeoArrowColumn
        ? AnalyzerDATA_TYPES.GEOMETRY
        : arrowDataTypeToAnalyzerDataType(field.type),
      valueAccessor: (dc: any) => d => {
        return dc.valueAt(d.index, index);
      },
      metadata: field.metadata
    };
  });
}
/**
 * Parse arrow batches returned from parseInBatches()
 *
 * @param arrowTable the arrow table to parse
 * @returns dataset containing `fields` and `rows` or null
 */
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
  if (arrowBatches.length === 0) {
    return null;
  }
  const arrowTable = new arrow.Table(arrowBatches);
  const fields = arrowSchemaToFields(arrowTable.schema);

  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));

  // return empty rows and use raw arrow table to construct column-wise data container
  return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};
}

export const DATASET_HANDLERS = {
  [DATASET_FORMATS.row]: processRowObject,
  [DATASET_FORMATS.geojson]: processGeojson,
  [DATASET_FORMATS.csv]: processCsvData,
  [DATASET_FORMATS.arrow]: processArrowTable,
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
};

export const Processors: {
  processGeojson: typeof processGeojson;
  processCsvData: typeof processCsvData;
  processArrowTable: typeof processArrowTable;
  processArrowBatches: typeof processArrowBatches;
  processRowObject: typeof processRowObject;
  processKeplerglJSON: typeof processKeplerglJSON;
  processKeplerglDataset: typeof processKeplerglDataset;
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
  getFieldsFromData: typeof getFieldsFromData;
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
} = {
  processGeojson,
  processCsvData,
  processArrowTable,
  processArrowBatches,
  processRowObject,
  processKeplerglJSON,
  processKeplerglDataset,
  analyzerTypeToFieldType,
  getFieldsFromData,
  parseCsvRowsByFieldType
};

1	// SPDX-License-Identifier: MIT
2	// Copyright contributors to the kepler.gl project
3
4	import * as arrow from 'apache-arrow';
5	import {csvParseRows} from 'd3-dsv';
6	import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7	import normalize from '@mapbox/geojson-normalize';
8	import {ArrowTable} from '@loaders.gl/schema';
9	import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
10	import {ProcessorResult, Field} from '@kepler.gl/types';
11	import {
12	arrowDataTypeToAnalyzerDataType,
13	arrowDataTypeToFieldType,
14	hasOwnProperty,
15	isPlainObject
16	} from '@kepler.gl/utils';
17	import {
18	analyzerTypeToFieldType,
19	getSampleForTypeAnalyze,
20	getFieldsFromData,
21	h3IsValid,
22	notNullorUndefined,
23	toArray
24	} from '@kepler.gl/common-utils';
25	import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
26	import {Feature} from '@nebula.gl/edit-modes';
27
28	// if any of these value occurs in csv, parse it to null;
29	// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
30	// matches empty string
31	export const CSV_NULLS = /^(null\|NULL\|Null\|NaN\|\/N\|\|)$/;	11✔
32
33	function tryParseJsonString(str) {
34	try {	31✔
35	return JSON.parse(str);	31✔
36	} catch (e) {
37	return null;	×
38	}
39	}
40
41	export const PARSE_FIELD_VALUE_FROM_STRING = {	11✔
42	[ALL_FIELD_TYPES.boolean]: {
43	valid: (d: unknown): boolean => typeof d === 'boolean',	30✔
44	parse: (d: unknown): boolean => d === 'true' \|\| d === 'True' \|\| d === 'TRUE' \|\| d === '1'	364✔
45	},
46	[ALL_FIELD_TYPES.integer]: {
47	// @ts-ignore
48	valid: (d: unknown): boolean => parseInt(d, 10) === d,	123✔
49	// @ts-ignore
50	parse: (d: unknown): number => parseInt(d, 10)	481✔
51	},
52	[ALL_FIELD_TYPES.timestamp]: {
53	valid: (d: unknown, field: Field): boolean =>
54	['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',	98✔
55	parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)	350!
56	},
57	[ALL_FIELD_TYPES.real]: {
58	// @ts-ignore
59	valid: (d: unknown): boolean => parseFloat(d) === d,	144✔
60	// Note this will result in NaN for some string
61	parse: parseFloat
62	},
63	[ALL_FIELD_TYPES.object]: {
64	valid: isPlainObject,
65	parse: tryParseJsonString
66	},
67
68	[ALL_FIELD_TYPES.array]: {
69	valid: Array.isArray,
70	parse: tryParseJsonString
71	},
72
73	[ALL_FIELD_TYPES.h3]: {
74	valid: d => h3IsValid(d),	18✔
NEW 75	parse: d => d	×
76	}
77	};
78
79	/**
80	* Process csv data, output a data object with `{fields: [], rows: []}`.
81	* The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
82	* @param rawData raw csv string
83	* @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
84	* @public
85	* @example
86	* import {processCsvData} from 'kepler.gl/processors';
87	*
88	* const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
89	* 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
90	* 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
91	* 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
92	* 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
93	*
94	* const dataset = {
95	* info: {id: 'test_data', label: 'My Csv'},
96	* data: processCsvData(testData)
97	* };
98	*
99	* dispatch(addDataToMap({
100	* datasets: [dataset],
101	* options: {centerMap: true, readOnly: true}
102	* }));
103	*/
104	export function processCsvData(rawData: unknown[][] \| string, header?: string[]): ProcessorResult {
105	let rows: unknown[][] \| undefined;
106	let headerRow: string[] \| undefined;
107
108	if (typeof rawData === 'string') {	79✔
109	const parsedRows: string[][] = csvParseRows(rawData);	40✔
110
111	if (!Array.isArray(parsedRows) \|\| parsedRows.length < 2) {	40✔
112	// looks like an empty file, throw error to be catch
113	throw new Error('process Csv Data Failed: CSV is empty');	1✔
114	}
115	headerRow = parsedRows[0];	39✔
116	rows = parsedRows.slice(1);	39✔
117	} else if (Array.isArray(rawData) && rawData.length) {	39!
118	rows = rawData;	39✔
119	headerRow = header;	39✔
120
121	if (!Array.isArray(headerRow)) {	39!
122	// if data is passed in as array of rows and missing header
123	// assume first row is header
124	// @ts-ignore
125	headerRow = rawData[0];	×
126	rows = rawData.slice(1);	×
127	}
128	}
129
130	if (!rows \|\| !headerRow) {	78!
131	throw new Error('invalid input passed to processCsvData');	×
132	}
133
134	// here we assume the csv file that people uploaded will have first row
135	// as name of the column
136
137	cleanUpFalsyCsvValue(rows);	78✔
138	// No need to run type detection on every data point
139	// here we get a list of none null values to run analyze on
140	const sample = getSampleForTypeAnalyze({fields: headerRow, rows});	78✔
141	const fields = getFieldsFromData(sample, headerRow);	78✔
142	const parsedRows = parseRowsByFields(rows, fields);	78✔
143
144	return {fields, rows: parsedRows};	78✔
145	}
146
147	/**
148	* Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
149	* @param rows
150	* @param fields
151	*/
152	export function parseRowsByFields(rows: any[][], fields: Field[]) {
153	// Edit rows in place
154	const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');	471✔
155	fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));	78✔
156
157	return rows;	78✔
158	}
159
160	/**
161	* Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
162	* so that type-analyzer won't detect it as string
163	*
164	* @param rows
165	*/
166	function cleanUpFalsyCsvValue(rows: unknown[][]): void {
167	const re = new RegExp(CSV_NULLS, 'g');	117✔
168	for (let i = 0; i < rows.length; i++) {	117✔
169	for (let j = 0; j < rows[i].length; j++) {	1,098✔
170	// analyzer will set any fields to 'string' if there are empty values
171	// which will be parsed as '' by d3.csv
172	// here we parse empty data as null
173	// TODO: create warning when deltect `CSV_NULLS` in the data
174	if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {	9,106✔
175	rows[i][j] = null;	894✔
176	}
177	}
178	}
179	}
180
181	/**
182	* Process uploaded csv file to parse value by field type
183	*
184	* @param rows
185	* @param geoFieldIdx field index
186	* @param field
187	* @param i
188	*/
189	export function parseCsvRowsByFieldType(
190	rows: unknown[][],
191	geoFieldIdx: number,
192	field: Field,
193	i: number
194	): void {
195	const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];	599✔
196	if (parser) {	599✔
197	// check first not null value of it's already parsed
198	const first = rows.find(r => notNullorUndefined(r[i]));	468✔
199	if (!first \|\| parser.valid(first[i], field)) {	443✔
200	return;	222✔
201	}
202	rows.forEach(row => {	221✔
203	// parse string value based on field type
204	if (row[i] !== null) {	3,072✔
205	row[i] = parser.parse(row[i], field);	2,766✔
206	if (	2,766✔
207	geoFieldIdx > -1 &&	2,784✔
208	isPlainObject(row[geoFieldIdx]) &&
209	// @ts-ignore
210	hasOwnProperty(row[geoFieldIdx], 'properties')
211	) {
212	// @ts-ignore
213	row[geoFieldIdx].properties[field.name] = row[i];	9✔
214	}
215	}
216	});
217	}
218	}
219
220	/* eslint-enable complexity */
221
222	/**
223	* Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
224	* NOTE: This function may mutate input.
225	* @param rawData an array of row object, each object should have the same number of keys
226	* @returns dataset containing `fields` and `rows`
227	* @public
228	* @example
229	* import {addDataToMap} from 'kepler.gl/actions';
230	* import {processRowObject} from 'kepler.gl/processors';
231	*
232	* const data = [
233	* {lat: 31.27, lng: 127.56, value: 3},
234	* {lat: 31.22, lng: 126.26, value: 1}
235	* ];
236	*
237	* dispatch(addDataToMap({
238	* datasets: {
239	* info: {label: 'My Data', id: 'my_data'},
240	* data: processRowObject(data)
241	* }
242	* }));
243	*/
244	export function processRowObject(rawData: unknown[]): ProcessorResult {
245	if (!Array.isArray(rawData)) {	40✔
246	return null;	1✔
247	} else if (!rawData.length) {	39!
248	// data is empty
249	return {	×
250	fields: [],
251	rows: []
252	};
253	}
254
255	const keys = Object.keys(rawData[0]); // [lat, lng, value]	39✔
256	const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]	1,780✔
257
258	// row object can still contain values like `Null` or `N/A`
259	cleanUpFalsyCsvValue(rows);	39✔
260
261	return processCsvData(rows, keys);	39✔
262	}
263
264	/**
265	* Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
266	* output a data object with `{fields: [], rows: []}`.
267	* The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
268	* NOTE: This function may mutate input.
269	*
270	* @param rawData raw geojson feature collection
271	* @returns dataset containing `fields` and `rows`
272	* @public
273	* @example
274	* import {addDataToMap} from 'kepler.gl/actions';
275	* import {processGeojson} from 'kepler.gl/processors';
276	*
277	* const geojson = {
278	* "type" : "FeatureCollection",
279	* "features" : [{
280	* "type" : "Feature",
281	* "properties" : {
282	* "capacity" : "10",
283	* "type" : "U-Rack"
284	* },
285	* "geometry" : {
286	* "type" : "Point",
287	* "coordinates" : [ -71.073283, 42.417500 ]
288	* }
289	* }]
290	* };
291	*
292	* dispatch(addDataToMap({
293	* datasets: {
294	* info: {
295	* label: 'Sample Taxi Trips in New York City',
296	* id: 'test_trip_data'
297	* },
298	* data: processGeojson(geojson)
299	* }
300	* }));
301	*/
302	export function processGeojson(rawData: unknown): ProcessorResult {
303	const normalizedGeojson = normalize(rawData);	29✔
304
305	if (!normalizedGeojson \|\| !Array.isArray(normalizedGeojson.features)) {	29✔
306	const error = new Error(	1✔
307	`Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
308	);
309	throw error;	1✔
310	// fail to normalize geojson
311	}
312
313	// getting all feature fields
314	const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];	28✔
315	for (let i = 0; i < normalizedGeojson.features.length; i++) {	28✔
316	const f = normalizedGeojson.features[i];	169✔
317	if (f.geometry) {	169!
318	allDataRows.push({	169✔
319	// add feature to _geojson field
320	_geojson: f,
321	...(f.properties \|\| {})	170✔
322	});
323	}
324	}
325	// get all the field
326	const fields = allDataRows.reduce<string[]>((accu, curr) => {	28✔
327	Object.keys(curr).forEach(key => {	169✔
328	if (!accu.includes(key)) {	849✔
329	accu.push(key);	153✔
330	}
331	});
332	return accu;	169✔
333	}, []);
334
335	// make sure each feature has exact same fields
336	allDataRows.forEach(d => {	28✔
337	fields.forEach(f => {	169✔
338	if (!(f in d)) {	905✔
339	d[f] = null;	56✔
340	if (d._geojson.properties) {	56!
341	d._geojson.properties[f] = null;	56✔
342	}
343	}
344	});
345	});
346
347	return processRowObject(allDataRows);	28✔
348	}
349
350	/**
351	* Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
352	* The json object should contain `datasets` and `config`.
353	* @param rawData
354	* @param schema
355	* @returns datasets and config `{datasets: {}, config: {}}`
356	* @public
357	* @example
358	* import {addDataToMap} from 'kepler.gl/actions';
359	* import {processKeplerglJSON} from 'kepler.gl/processors';
360	*
361	* dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
362	*/
363	export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap \| null {	5✔
364	return rawData ? schema.load(rawData.datasets, rawData.config) : null;	5!
365	}
366
367	/**
368	* Parse a single or an array of datasets saved using kepler.gl schema
369	* @param rawData
370	* @param schema
371	*/
372	export function processKeplerglDataset(
373	rawData: object \| object[],
374	schema = KeplerGlSchema	×
375	): ParsedDataset \| ParsedDataset[] \| null {
376	if (!rawData) {	×
377	return null;	×
378	}
379
380	const results = schema.parseSavedData(toArray(rawData));	×
381	if (!results) {	×
382	return null;	×
383	}
384	return Array.isArray(rawData) ? results : results[0];	×
385	}
386
387	/**
388	* Parse arrow table and return a dataset
389	*
390	* @param arrowTable ArrowTable to parse, see loaders.gl/schema
391	* @returns dataset containing `fields` and `rows` or null
392	*/
393	export function processArrowTable(arrowTable: ArrowTable): ProcessorResult \| null {
394	// @ts-ignore - Unknown data type causing build failures
395	return processArrowBatches(arrowTable.data.batches);	×
396	}
397
398	export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
399	return schema.fields.map((field: arrow.Field, index: number) => {	×
400	const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');	×
401	return {	×
402	...field,
403	name: field.name,
404	id: field.name,
405	displayName: field.name,
406	format: '',
407	fieldIdx: index,
408	type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),	×
409	analyzerType: isGeoArrowColumn	×
410	? AnalyzerDATA_TYPES.GEOMETRY
411	: arrowDataTypeToAnalyzerDataType(field.type),
412	valueAccessor: (dc: any) => d => {	×
413	return dc.valueAt(d.index, index);	×
414	},
415	metadata: field.metadata
416	};
417	});
418	}
419	/**
420	* Parse arrow batches returned from parseInBatches()
421	*
422	* @param arrowTable the arrow table to parse
423	* @returns dataset containing `fields` and `rows` or null
424	*/
425	export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult \| null {
426	if (arrowBatches.length === 0) {	×
427	return null;	×
428	}
429	const arrowTable = new arrow.Table(arrowBatches);	×
430	const fields = arrowSchemaToFields(arrowTable.schema);	×
431
432	const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));	×
433
434	// return empty rows and use raw arrow table to construct column-wise data container
435	return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};	×
436	}
437
438	export const DATASET_HANDLERS = {	11✔
439	[DATASET_FORMATS.row]: processRowObject,
440	[DATASET_FORMATS.geojson]: processGeojson,
441	[DATASET_FORMATS.csv]: processCsvData,
442	[DATASET_FORMATS.arrow]: processArrowTable,
443	[DATASET_FORMATS.keplergl]: processKeplerglDataset
444	};
445
446	export const Processors: {
447	processGeojson: typeof processGeojson;
448	processCsvData: typeof processCsvData;
449	processArrowTable: typeof processArrowTable;
450	processArrowBatches: typeof processArrowBatches;
451	processRowObject: typeof processRowObject;
452	processKeplerglJSON: typeof processKeplerglJSON;
453	processKeplerglDataset: typeof processKeplerglDataset;
454	analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
455	getFieldsFromData: typeof getFieldsFromData;
456	parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
457	} = {	11✔
458	processGeojson,
459	processCsvData,
460	processArrowTable,
461	processArrowBatches,
462	processRowObject,
463	processKeplerglJSON,
464	processKeplerglDataset,
465	analyzerTypeToFieldType,
466	getFieldsFromData,
467	parseCsvRowsByFieldType
468	};

keplergl / kepler.gl / 12247395692

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous