12031095165

Committed 26 Nov 2024 12:57PM UTC coverage: 69.321% (+22.9%) from 46.466%

Build # 12031095165

Build Type

push

github

Committed by

web-flow

Commit Message

[feat] create new dataset action (#2778)

* [feat] create new dataset action

- createNewDataEntry now returns a react-palm task to create or update a dataset asynchronously.
- updateVisDataUpdater now returns tasks to create or update a dataset asynchronously, and once done triggers createNewDatasetSuccess action.
- refactoring of demo-app App and Container to functional components

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>
Co-authored-by: Shan He <heshan0131@gmail.com>

Run Details

5436 of 9079 branches covered (59.87%)

Branch coverage included in aggregate %.

91 of 111 new or added lines in 13 files covered. (81.98%)

8 existing lines in 3 files now uncovered.

11368 of 15162 relevant lines covered (74.98%)

95.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.71

/src/processors/src/data-processor.ts

// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project

import * as arrow from 'apache-arrow';
import {csvParseRows} from 'd3-dsv';
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
import normalize from '@mapbox/geojson-normalize';
import {ArrowTable} from '@loaders.gl/schema';
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
import {ProcessorResult, Field} from '@kepler.gl/types';
import {
  arrowDataTypeToAnalyzerDataType,
  arrowDataTypeToFieldType,
  hasOwnProperty,
  isPlainObject
} from '@kepler.gl/utils';
import {notNullorUndefined, toArray} from '@kepler.gl/common-utils';
import {
  getSampleForTypeAnalyze,
  getFieldsFromData,
  analyzerTypeToFieldType
} from '@kepler.gl/common-utils';
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
import {Feature} from '@nebula.gl/edit-modes';

// if any of these value occurs in csv, parse it to null;
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
// matches empty string
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;

function tryParseJsonString(str) {
  try {
    return JSON.parse(str);
  } catch (e) {
    return null;
  }
}

export const PARSE_FIELD_VALUE_FROM_STRING = {
  [ALL_FIELD_TYPES.boolean]: {
    valid: (d: unknown): boolean => typeof d === 'boolean',
    parse: (d: unknown): boolean => d === 'true' || d === 'True' || d === 'TRUE' || d === '1'
  },
  [ALL_FIELD_TYPES.integer]: {
    // @ts-ignore
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
    // @ts-ignore
    parse: (d: unknown): number => parseInt(d, 10)
  },
  [ALL_FIELD_TYPES.timestamp]: {
    valid: (d: unknown, field: Field): boolean =>
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
  },
  [ALL_FIELD_TYPES.real]: {
    // @ts-ignore
    valid: (d: unknown): boolean => parseFloat(d) === d,
    // Note this will result in NaN for some string
    parse: parseFloat
  },
  [ALL_FIELD_TYPES.object]: {
    valid: isPlainObject,
    parse: tryParseJsonString
  },

  [ALL_FIELD_TYPES.array]: {
    valid: Array.isArray,
    parse: tryParseJsonString
  }
};

/**
 * Process csv data, output a data object with `{fields: [], rows: []}`.
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * @param rawData raw csv string
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
 * @public
 * @example
 * import {processCsvData} from 'kepler.gl/processors';
 *
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
 *
 * const dataset = {
 *  info: {id: 'test_data', label: 'My Csv'},
 *  data: processCsvData(testData)
 * };
 *
 * dispatch(addDataToMap({
 *  datasets: [dataset],
 *  options: {centerMap: true, readOnly: true}
 * }));
 */
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
  let rows: unknown[][] | undefined;
  let headerRow: string[] | undefined;

  if (typeof rawData === 'string') {
    const parsedRows: string[][] = csvParseRows(rawData);

    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
      // looks like an empty file, throw error to be catch
      throw new Error('process Csv Data Failed: CSV is empty');
    }
    headerRow = parsedRows[0];
    rows = parsedRows.slice(1);
  } else if (Array.isArray(rawData) && rawData.length) {
    rows = rawData;
    headerRow = header;

    if (!Array.isArray(headerRow)) {
      // if data is passed in as array of rows and missing header
      // assume first row is header
      // @ts-ignore
      headerRow = rawData[0];
      rows = rawData.slice(1);
    }
  }

  if (!rows || !headerRow) {
    throw new Error('invalid input passed to processCsvData');
  }

  // here we assume the csv file that people uploaded will have first row
  // as name of the column

  cleanUpFalsyCsvValue(rows);
  // No need to run type detection on every data point
  // here we get a list of none null values to run analyze on
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
  const fields = getFieldsFromData(sample, headerRow);
  const parsedRows = parseRowsByFields(rows, fields);

  return {fields, rows: parsedRows};
}

/**
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
 * @param rows
 * @param fields
 */
export function parseRowsByFields(rows: any[][], fields: Field[]) {
  // Edit rows in place
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));

  return rows;
}

/**
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
 * so that type-analyzer won't detect it as string
 *
 * @param rows
 */
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
  const re = new RegExp(CSV_NULLS, 'g');
  for (let i = 0; i < rows.length; i++) {
    for (let j = 0; j < rows[i].length; j++) {
      // analyzer will set any fields to 'string' if there are empty values
      // which will be parsed as '' by d3.csv
      // here we parse empty data as null
      // TODO: create warning when deltect `CSV_NULLS` in the data
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
        rows[i][j] = null;
      }
    }
  }
}

/**
 * Process uploaded csv file to parse value by field type
 *
 * @param rows
 * @param geoFieldIdx field index
 * @param field
 * @param i
 */
export function parseCsvRowsByFieldType(
  rows: unknown[][],
  geoFieldIdx: number,
  field: Field,
  i: number
): void {
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
  if (parser) {
    // check first not null value of it's already parsed
    const first = rows.find(r => notNullorUndefined(r[i]));
    if (!first || parser.valid(first[i], field)) {
      return;
    }
    rows.forEach(row => {
      // parse string value based on field type
      if (row[i] !== null) {
        row[i] = parser.parse(row[i], field);
        if (
          geoFieldIdx > -1 &&
          isPlainObject(row[geoFieldIdx]) &&
          // @ts-ignore
          hasOwnProperty(row[geoFieldIdx], 'properties')
        ) {
          // @ts-ignore
          row[geoFieldIdx].properties[field.name] = row[i];
        }
      }
    });
  }
}

/* eslint-enable complexity */

/**
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * NOTE: This function may mutate input.
 * @param rawData an array of row object, each object should have the same number of keys
 * @returns dataset containing `fields` and `rows`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processRowObject} from 'kepler.gl/processors';
 *
 * const data = [
 *  {lat: 31.27, lng: 127.56, value: 3},
 *  {lat: 31.22, lng: 126.26, value: 1}
 * ];
 *
 * dispatch(addDataToMap({
 *  datasets: {
 *    info: {label: 'My Data', id: 'my_data'},
 *    data: processRowObject(data)
 *  }
 * }));
 */
export function processRowObject(rawData: unknown[]): ProcessorResult {
  if (!Array.isArray(rawData)) {
    return null;
  } else if (!rawData.length) {
    // data is empty
    return {
      fields: [],
      rows: []
    };
  }

  const keys = Object.keys(rawData[0]); // [lat, lng, value]
  const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]

  // row object can still contain values like `Null` or `N/A`
  cleanUpFalsyCsvValue(rows);

  return processCsvData(rows, keys);
}

/**
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
 * output a data object with `{fields: [], rows: []}`.
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * NOTE: This function may mutate input.
 *
 * @param rawData raw geojson feature collection
 * @returns dataset containing `fields` and `rows`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processGeojson} from 'kepler.gl/processors';
 *
 * const geojson = {
 *         "type" : "FeatureCollection",
 *         "features" : [{
 *                 "type" : "Feature",
 *                 "properties" : {
 *                         "capacity" : "10",
 *                         "type" : "U-Rack"
 *                 },
 *                 "geometry" : {
 *                         "type" : "Point",
 *                         "coordinates" : [ -71.073283, 42.417500 ]
 *                 }
 *         }]
 * };
 *
 * dispatch(addDataToMap({
 *  datasets: {
 *    info: {
 *      label: 'Sample Taxi Trips in New York City',
 *      id: 'test_trip_data'
 *    },
 *    data: processGeojson(geojson)
 *  }
 * }));
 */
export function processGeojson(rawData: unknown): ProcessorResult {
  const normalizedGeojson = normalize(rawData);

  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
    const error = new Error(
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
    );
    throw error;
    // fail to normalize geojson
  }

  // getting all feature fields
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
    const f = normalizedGeojson.features[i];
    if (f.geometry) {
      allDataRows.push({
        // add feature to _geojson field
        _geojson: f,
        ...(f.properties || {})
      });
    }
  }
  // get all the field
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
    Object.keys(curr).forEach(key => {
      if (!accu.includes(key)) {
        accu.push(key);
      }
    });
    return accu;
  }, []);

  // make sure each feature has exact same fields
  allDataRows.forEach(d => {
    fields.forEach(f => {
      if (!(f in d)) {
        d[f] = null;
        if (d._geojson.properties) {
          d._geojson.properties[f] = null;
        }
      }
    });
  });

  return processRowObject(allDataRows);
}

/**
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
 * The json object should contain `datasets` and `config`.
 * @param rawData
 * @param schema
 * @returns datasets and config `{datasets: {}, config: {}}`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processKeplerglJSON} from 'kepler.gl/processors';
 *
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
 */
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
}

/**
 * Parse a single or an array of datasets saved using kepler.gl schema
 * @param rawData
 * @param schema
 */
export function processKeplerglDataset(
  rawData: object | object[],
  schema = KeplerGlSchema
): ParsedDataset | ParsedDataset[] | null {
  if (!rawData) {
    return null;
  }

  const results = schema.parseSavedData(toArray(rawData));
  if (!results) {
    return null;
  }
  return Array.isArray(rawData) ? results : results[0];
}

/**
 * Parse arrow table and return a dataset
 *
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
 * @returns dataset containing `fields` and `rows` or null
 */
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
  // @ts-ignore - Unknown data type causing build failures
  return processArrowBatches(arrowTable.data.batches);
}

export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
  return schema.fields.map((field: arrow.Field, index: number) => {
    const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');
    return {
      ...field,
      name: field.name,
      id: field.name,
      displayName: field.name,
      format: '',
      fieldIdx: index,
      type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),
      analyzerType: isGeoArrowColumn
        ? AnalyzerDATA_TYPES.GEOMETRY
        : arrowDataTypeToAnalyzerDataType(field.type),
      valueAccessor: (dc: any) => d => {
        return dc.valueAt(d.index, index);
      },
      metadata: field.metadata
    };
  });
}
/**
 * Parse arrow batches returned from parseInBatches()
 *
 * @param arrowTable the arrow table to parse
 * @returns dataset containing `fields` and `rows` or null
 */
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
  if (arrowBatches.length === 0) {
    return null;
  }
  const arrowTable = new arrow.Table(arrowBatches);
  const fields = arrowSchemaToFields(arrowTable.schema);

  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));

  // return empty rows and use raw arrow table to construct column-wise data container
  return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};
}

export const DATASET_HANDLERS = {
  [DATASET_FORMATS.row]: processRowObject,
  [DATASET_FORMATS.geojson]: processGeojson,
  [DATASET_FORMATS.csv]: processCsvData,
  [DATASET_FORMATS.arrow]: processArrowTable,
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
};

export const Processors: {
  processGeojson: typeof processGeojson;
  processCsvData: typeof processCsvData;
  processArrowTable: typeof processArrowTable;
  processArrowBatches: typeof processArrowBatches;
  processRowObject: typeof processRowObject;
  processKeplerglJSON: typeof processKeplerglJSON;
  processKeplerglDataset: typeof processKeplerglDataset;
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
  getFieldsFromData: typeof getFieldsFromData;
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
} = {
  processGeojson,
  processCsvData,
  processArrowTable,
  processArrowBatches,
  processRowObject,
  processKeplerglJSON,
  processKeplerglDataset,
  analyzerTypeToFieldType,
  getFieldsFromData,
  parseCsvRowsByFieldType
};

1	// SPDX-License-Identifier: MIT
2	// Copyright contributors to the kepler.gl project
3
4	import * as arrow from 'apache-arrow';
5	import {csvParseRows} from 'd3-dsv';
6	import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7	import normalize from '@mapbox/geojson-normalize';
8	import {ArrowTable} from '@loaders.gl/schema';
9	import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
10	import {ProcessorResult, Field} from '@kepler.gl/types';
11	import {
12	arrowDataTypeToAnalyzerDataType,
13	arrowDataTypeToFieldType,
14	hasOwnProperty,
15	isPlainObject
16	} from '@kepler.gl/utils';
17	import {notNullorUndefined, toArray} from '@kepler.gl/common-utils';
18	import {
19	getSampleForTypeAnalyze,
20	getFieldsFromData,
21	analyzerTypeToFieldType
22	} from '@kepler.gl/common-utils';
23	import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
24	import {Feature} from '@nebula.gl/edit-modes';
25
26	// if any of these value occurs in csv, parse it to null;
27	// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
28	// matches empty string
29	export const CSV_NULLS = /^(null\|NULL\|Null\|NaN\|\/N\|\|)$/;	11✔
30
31	function tryParseJsonString(str) {
32	try {	31✔
33	return JSON.parse(str);	31✔
34	} catch (e) {
35	return null;	×
36	}
37	}
38
39	export const PARSE_FIELD_VALUE_FROM_STRING = {	11✔
40	[ALL_FIELD_TYPES.boolean]: {
41	valid: (d: unknown): boolean => typeof d === 'boolean',	30✔
42	parse: (d: unknown): boolean => d === 'true' \|\| d === 'True' \|\| d === 'TRUE' \|\| d === '1'	364✔
43	},
44	[ALL_FIELD_TYPES.integer]: {
45	// @ts-ignore
46	valid: (d: unknown): boolean => parseInt(d, 10) === d,	122✔
47	// @ts-ignore
48	parse: (d: unknown): number => parseInt(d, 10)	459✔
49	},
50	[ALL_FIELD_TYPES.timestamp]: {
51	valid: (d: unknown, field: Field): boolean =>
52	['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',	98✔
53	parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)	350!
54	},
55	[ALL_FIELD_TYPES.real]: {
56	// @ts-ignore
57	valid: (d: unknown): boolean => parseFloat(d) === d,	144✔
58	// Note this will result in NaN for some string
59	parse: parseFloat
60	},
61	[ALL_FIELD_TYPES.object]: {
62	valid: isPlainObject,
63	parse: tryParseJsonString
64	},
65
66	[ALL_FIELD_TYPES.array]: {
67	valid: Array.isArray,
68	parse: tryParseJsonString
69	}
70	};
71
72	/**
73	* Process csv data, output a data object with `{fields: [], rows: []}`.
74	* The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
75	* @param rawData raw csv string
76	* @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
77	* @public
78	* @example
79	* import {processCsvData} from 'kepler.gl/processors';
80	*
81	* const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
82	* 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
83	* 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
84	* 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
85	* 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
86	*
87	* const dataset = {
88	* info: {id: 'test_data', label: 'My Csv'},
89	* data: processCsvData(testData)
90	* };
91	*
92	* dispatch(addDataToMap({
93	* datasets: [dataset],
94	* options: {centerMap: true, readOnly: true}
95	* }));
96	*/
97	export function processCsvData(rawData: unknown[][] \| string, header?: string[]): ProcessorResult {
98	let rows: unknown[][] \| undefined;
99	let headerRow: string[] \| undefined;
100
101	if (typeof rawData === 'string') {	78✔
102	const parsedRows: string[][] = csvParseRows(rawData);	39✔
103
104	if (!Array.isArray(parsedRows) \|\| parsedRows.length < 2) {	39✔
105	// looks like an empty file, throw error to be catch
106	throw new Error('process Csv Data Failed: CSV is empty');	1✔
107	}
108	headerRow = parsedRows[0];	38✔
109	rows = parsedRows.slice(1);	38✔
110	} else if (Array.isArray(rawData) && rawData.length) {	39!
111	rows = rawData;	39✔
112	headerRow = header;	39✔
113
114	if (!Array.isArray(headerRow)) {	39!
115	// if data is passed in as array of rows and missing header
116	// assume first row is header
117	// @ts-ignore
118	headerRow = rawData[0];	×
119	rows = rawData.slice(1);	×
120	}
121	}
122
123	if (!rows \|\| !headerRow) {	77!
124	throw new Error('invalid input passed to processCsvData');	×
125	}
126
127	// here we assume the csv file that people uploaded will have first row
128	// as name of the column
129
130	cleanUpFalsyCsvValue(rows);	77✔
131	// No need to run type detection on every data point
132	// here we get a list of none null values to run analyze on
133	const sample = getSampleForTypeAnalyze({fields: headerRow, rows});	77✔
134	const fields = getFieldsFromData(sample, headerRow);	77✔
135	const parsedRows = parseRowsByFields(rows, fields);	77✔
136
137	return {fields, rows: parsedRows};	77✔
138	}
139
140	/**
141	* Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
142	* @param rows
143	* @param fields
144	*/
145	export function parseRowsByFields(rows: any[][], fields: Field[]) {
146	// Edit rows in place
147	const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');	469✔
148	fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));	77✔
149
150	return rows;	77✔
151	}
152
153	/**
154	* Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
155	* so that type-analyzer won't detect it as string
156	*
157	* @param rows
158	*/
159	function cleanUpFalsyCsvValue(rows: unknown[][]): void {
160	const re = new RegExp(CSV_NULLS, 'g');	116✔
161	for (let i = 0; i < rows.length; i++) {	116✔
162	for (let j = 0; j < rows[i].length; j++) {	1,076✔
163	// analyzer will set any fields to 'string' if there are empty values
164	// which will be parsed as '' by d3.csv
165	// here we parse empty data as null
166	// TODO: create warning when deltect `CSV_NULLS` in the data
167	if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {	9,062✔
168	rows[i][j] = null;	894✔
169	}
170	}
171	}
172	}
173
174	/**
175	* Process uploaded csv file to parse value by field type
176	*
177	* @param rows
178	* @param geoFieldIdx field index
179	* @param field
180	* @param i
181	*/
182	export function parseCsvRowsByFieldType(
183	rows: unknown[][],
184	geoFieldIdx: number,
185	field: Field,
186	i: number
187	): void {
188	const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];	597✔
189	if (parser) {	597✔
190	// check first not null value of it's already parsed
191	const first = rows.find(r => notNullorUndefined(r[i]));	449✔
192	if (!first \|\| parser.valid(first[i], field)) {	424✔
193	return;	204✔
194	}
195	rows.forEach(row => {	220✔
196	// parse string value based on field type
197	if (row[i] !== null) {	3,050✔
198	row[i] = parser.parse(row[i], field);	2,744✔
199	if (	2,744✔
200	geoFieldIdx > -1 &&	2,762✔
201	isPlainObject(row[geoFieldIdx]) &&
202	// @ts-ignore
203	hasOwnProperty(row[geoFieldIdx], 'properties')
204	) {
205	// @ts-ignore
206	row[geoFieldIdx].properties[field.name] = row[i];	9✔
207	}
208	}
209	});
210	}
211	}
212
213	/* eslint-enable complexity */
214
215	/**
216	* Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
217	* NOTE: This function may mutate input.
218	* @param rawData an array of row object, each object should have the same number of keys
219	* @returns dataset containing `fields` and `rows`
220	* @public
221	* @example
222	* import {addDataToMap} from 'kepler.gl/actions';
223	* import {processRowObject} from 'kepler.gl/processors';
224	*
225	* const data = [
226	* {lat: 31.27, lng: 127.56, value: 3},
227	* {lat: 31.22, lng: 126.26, value: 1}
228	* ];
229	*
230	* dispatch(addDataToMap({
231	* datasets: {
232	* info: {label: 'My Data', id: 'my_data'},
233	* data: processRowObject(data)
234	* }
235	* }));
236	*/
237	export function processRowObject(rawData: unknown[]): ProcessorResult {
238	if (!Array.isArray(rawData)) {	40✔
239	return null;	1✔
240	} else if (!rawData.length) {	39!
241	// data is empty
242	return {	×
243	fields: [],
244	rows: []
245	};
246	}
247
248	const keys = Object.keys(rawData[0]); // [lat, lng, value]	39✔
249	const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]	1,780✔
250
251	// row object can still contain values like `Null` or `N/A`
252	cleanUpFalsyCsvValue(rows);	39✔
253
254	return processCsvData(rows, keys);	39✔
255	}
256
257	/**
258	* Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
259	* output a data object with `{fields: [], rows: []}`.
260	* The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
261	* NOTE: This function may mutate input.
262	*
263	* @param rawData raw geojson feature collection
264	* @returns dataset containing `fields` and `rows`
265	* @public
266	* @example
267	* import {addDataToMap} from 'kepler.gl/actions';
268	* import {processGeojson} from 'kepler.gl/processors';
269	*
270	* const geojson = {
271	* "type" : "FeatureCollection",
272	* "features" : [{
273	* "type" : "Feature",
274	* "properties" : {
275	* "capacity" : "10",
276	* "type" : "U-Rack"
277	* },
278	* "geometry" : {
279	* "type" : "Point",
280	* "coordinates" : [ -71.073283, 42.417500 ]
281	* }
282	* }]
283	* };
284	*
285	* dispatch(addDataToMap({
286	* datasets: {
287	* info: {
288	* label: 'Sample Taxi Trips in New York City',
289	* id: 'test_trip_data'
290	* },
291	* data: processGeojson(geojson)
292	* }
293	* }));
294	*/
295	export function processGeojson(rawData: unknown): ProcessorResult {
296	const normalizedGeojson = normalize(rawData);	29✔
297
298	if (!normalizedGeojson \|\| !Array.isArray(normalizedGeojson.features)) {	29✔
299	const error = new Error(	1✔
300	`Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
301	);
302	throw error;	1✔
303	// fail to normalize geojson
304	}
305
306	// getting all feature fields
307	const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];	28✔
308	for (let i = 0; i < normalizedGeojson.features.length; i++) {	28✔
309	const f = normalizedGeojson.features[i];	169✔
310	if (f.geometry) {	169!
311	allDataRows.push({	169✔
312	// add feature to _geojson field
313	_geojson: f,
314	...(f.properties \|\| {})	170✔
315	});
316	}
317	}
318	// get all the field
319	const fields = allDataRows.reduce<string[]>((accu, curr) => {	28✔
320	Object.keys(curr).forEach(key => {	169✔
321	if (!accu.includes(key)) {	849✔
322	accu.push(key);	153✔
323	}
324	});
325	return accu;	169✔
326	}, []);
327
328	// make sure each feature has exact same fields
329	allDataRows.forEach(d => {	28✔
330	fields.forEach(f => {	169✔
331	if (!(f in d)) {	905✔
332	d[f] = null;	56✔
333	if (d._geojson.properties) {	56!
334	d._geojson.properties[f] = null;	56✔
335	}
336	}
337	});
338	});
339
340	return processRowObject(allDataRows);	28✔
341	}
342
343	/**
344	* Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
345	* The json object should contain `datasets` and `config`.
346	* @param rawData
347	* @param schema
348	* @returns datasets and config `{datasets: {}, config: {}}`
349	* @public
350	* @example
351	* import {addDataToMap} from 'kepler.gl/actions';
352	* import {processKeplerglJSON} from 'kepler.gl/processors';
353	*
354	* dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
355	*/
356	export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap \| null {	5✔
357	return rawData ? schema.load(rawData.datasets, rawData.config) : null;	5!
358	}
359
360	/**
361	* Parse a single or an array of datasets saved using kepler.gl schema
362	* @param rawData
363	* @param schema
364	*/
365	export function processKeplerglDataset(
366	rawData: object \| object[],
367	schema = KeplerGlSchema	×
368	): ParsedDataset \| ParsedDataset[] \| null {
369	if (!rawData) {	×
370	return null;	×
371	}
372
373	const results = schema.parseSavedData(toArray(rawData));	×
374	if (!results) {	×
375	return null;	×
376	}
377	return Array.isArray(rawData) ? results : results[0];	×
378	}
379
380	/**
381	* Parse arrow table and return a dataset
382	*
383	* @param arrowTable ArrowTable to parse, see loaders.gl/schema
384	* @returns dataset containing `fields` and `rows` or null
385	*/
386	export function processArrowTable(arrowTable: ArrowTable): ProcessorResult \| null {
387	// @ts-ignore - Unknown data type causing build failures
388	return processArrowBatches(arrowTable.data.batches);	×
389	}
390
391	export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
NEW 392	return schema.fields.map((field: arrow.Field, index: number) => {	×
NEW 393	const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');	×
NEW 394	return {	×
395	...field,
396	name: field.name,
397	id: field.name,
398	displayName: field.name,
399	format: '',
400	fieldIdx: index,
401	type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),	×
402	analyzerType: isGeoArrowColumn	×
403	? AnalyzerDATA_TYPES.GEOMETRY
404	: arrowDataTypeToAnalyzerDataType(field.type),
405	valueAccessor: (dc: any) => d => {	×
406	return dc.valueAt(d.index, index);	×
407	},
408	metadata: field.metadata
409	};
410	});
411	}
412	/**
413	* Parse arrow batches returned from parseInBatches()
414	*
415	* @param arrowTable the arrow table to parse
416	* @returns dataset containing `fields` and `rows` or null
417	*/
418	export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult \| null {
NEW 419	if (arrowBatches.length === 0) {	×
NEW 420	return null;	×
421	}
NEW 422	const arrowTable = new arrow.Table(arrowBatches);	×
NEW 423	const fields = arrowSchemaToFields(arrowTable.schema);	×
424
425	const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));	×
426
427	// return empty rows and use raw arrow table to construct column-wise data container
428	return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};	×
429	}
430
431	export const DATASET_HANDLERS = {	11✔
432	[DATASET_FORMATS.row]: processRowObject,
433	[DATASET_FORMATS.geojson]: processGeojson,
434	[DATASET_FORMATS.csv]: processCsvData,
435	[DATASET_FORMATS.arrow]: processArrowTable,
436	[DATASET_FORMATS.keplergl]: processKeplerglDataset
437	};
438
439	export const Processors: {
440	processGeojson: typeof processGeojson;
441	processCsvData: typeof processCsvData;
442	processArrowTable: typeof processArrowTable;
443	processArrowBatches: typeof processArrowBatches;
444	processRowObject: typeof processRowObject;
445	processKeplerglJSON: typeof processKeplerglJSON;
446	processKeplerglDataset: typeof processKeplerglDataset;
447	analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
448	getFieldsFromData: typeof getFieldsFromData;
449	parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
450	} = {	11✔
451	processGeojson,
452	processCsvData,
453	processArrowTable,
454	processArrowBatches,
455	processRowObject,
456	processKeplerglJSON,
457	processKeplerglDataset,
458	analyzerTypeToFieldType,
459	getFieldsFromData,
460	parseCsvRowsByFieldType
461	};

keplergl / kepler.gl / 12031095165

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous