13015166758

Committed 28 Jan 2025 04:37PM UTC coverage: 66.405% (-0.003%) from 66.408%

Build # 13015166758

Build Type

Pull #2941

github

Committed by

web-flow

Commit Message

Merge c6c84e0ff into da9988532

Pull Request Pull Request #2941: [docs] update docs for Kepler.gl release 3.1

Run Details

5989 of 10516 branches covered (56.95%)

Branch coverage included in aggregate %.

12301 of 17027 relevant lines covered (72.24%)

88.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.29

/src/processors/src/data-processor.ts

// SPDX-License-Identifier: MIT
// Copyright contributors to the kepler.gl project

import * as arrow from 'apache-arrow';
import {csvParseRows} from 'd3-dsv';
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
import normalize from '@mapbox/geojson-normalize';
import {ArrowTable} from '@loaders.gl/schema';
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
import {ProcessorResult, Field} from '@kepler.gl/types';
import {
  arrowDataTypeToAnalyzerDataType,
  arrowDataTypeToFieldType,
  hasOwnProperty,
  isPlainObject
} from '@kepler.gl/utils';
import {
  analyzerTypeToFieldType,
  getSampleForTypeAnalyze,
  getFieldsFromData,
  h3IsValid,
  notNullorUndefined,
  toArray
} from '@kepler.gl/common-utils';
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
import {Feature} from '@nebula.gl/edit-modes';

// if any of these value occurs in csv, parse it to null;
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
// matches empty string
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;

function tryParseJsonString(str) {
  try {
    return JSON.parse(str);
  } catch (e) {
    return null;
  }
}

export const PARSE_FIELD_VALUE_FROM_STRING = {
  [ALL_FIELD_TYPES.boolean]: {
    valid: (d: unknown): boolean => typeof d === 'boolean',
    parse: (d: unknown): boolean => d === 'true' || d === 'True' || d === 'TRUE' || d === '1'
  },
  [ALL_FIELD_TYPES.integer]: {
    // @ts-ignore
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
    // @ts-ignore
    parse: (d: unknown): number => parseInt(d, 10)
  },
  [ALL_FIELD_TYPES.timestamp]: {
    valid: (d: unknown, field: Field): boolean =>
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
  },
  [ALL_FIELD_TYPES.real]: {
    // @ts-ignore
    valid: (d: unknown): boolean => parseFloat(d) === d,
    // Note this will result in NaN for some string
    parse: parseFloat
  },
  [ALL_FIELD_TYPES.object]: {
    valid: isPlainObject,
    parse: tryParseJsonString
  },

  [ALL_FIELD_TYPES.array]: {
    valid: Array.isArray,
    parse: tryParseJsonString
  },

  [ALL_FIELD_TYPES.h3]: {
    valid: d => h3IsValid(d),
    parse: d => d
  }
};

/**
 * Process csv data, output a data object with `{fields: [], rows: []}`.
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * @param rawData raw csv string
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
 * @public
 * @example
 * import {processCsvData} from 'kepler.gl/processors';
 *
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
 *
 * const dataset = {
 *  info: {id: 'test_data', label: 'My Csv'},
 *  data: processCsvData(testData)
 * };
 *
 * dispatch(addDataToMap({
 *  datasets: [dataset],
 *  options: {centerMap: true, readOnly: true}
 * }));
 */
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
  let rows: unknown[][] | undefined;
  let headerRow: string[] | undefined;

  if (typeof rawData === 'string') {
    const parsedRows: string[][] = csvParseRows(rawData);

    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
      // looks like an empty file, throw error to be catch
      throw new Error('process Csv Data Failed: CSV is empty');
    }
    headerRow = parsedRows[0];
    rows = parsedRows.slice(1);
  } else if (Array.isArray(rawData) && rawData.length) {
    rows = rawData;
    headerRow = header;

    if (!Array.isArray(headerRow)) {
      // if data is passed in as array of rows and missing header
      // assume first row is header
      // @ts-ignore
      headerRow = rawData[0];
      rows = rawData.slice(1);
    }
  }

  if (!rows || !headerRow) {
    throw new Error('invalid input passed to processCsvData');
  }

  // here we assume the csv file that people uploaded will have first row
  // as name of the column

  cleanUpFalsyCsvValue(rows);
  // No need to run type detection on every data point
  // here we get a list of none null values to run analyze on
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
  const fields = getFieldsFromData(sample, headerRow);
  const parsedRows = parseRowsByFields(rows, fields);

  return {fields, rows: parsedRows};
}

/**
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
 * @param rows
 * @param fields
 */
export function parseRowsByFields(rows: any[][], fields: Field[]) {
  // Edit rows in place
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));

  return rows;
}

/**
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
 * so that type-analyzer won't detect it as string
 *
 * @param rows
 */
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
  const re = new RegExp(CSV_NULLS, 'g');
  for (let i = 0; i < rows.length; i++) {
    for (let j = 0; j < rows[i].length; j++) {
      // analyzer will set any fields to 'string' if there are empty values
      // which will be parsed as '' by d3.csv
      // here we parse empty data as null
      // TODO: create warning when deltect `CSV_NULLS` in the data
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
        rows[i][j] = null;
      }
    }
  }
}

/**
 * Process uploaded csv file to parse value by field type
 *
 * @param rows
 * @param geoFieldIdx field index
 * @param field
 * @param i
 */
export function parseCsvRowsByFieldType(
  rows: unknown[][],
  geoFieldIdx: number,
  field: Field,
  i: number
): void {
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
  if (parser) {
    // check first not null value of it's already parsed
    const first = rows.find(r => notNullorUndefined(r[i]));
    if (!first || parser.valid(first[i], field)) {
      return;
    }
    rows.forEach(row => {
      // parse string value based on field type
      if (row[i] !== null) {
        row[i] = parser.parse(row[i], field);
        if (
          geoFieldIdx > -1 &&
          isPlainObject(row[geoFieldIdx]) &&
          // @ts-ignore
          hasOwnProperty(row[geoFieldIdx], 'properties')
        ) {
          // @ts-ignore
          row[geoFieldIdx].properties[field.name] = row[i];
        }
      }
    });
  }
}

/* eslint-enable complexity */

/**
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * NOTE: This function may mutate input.
 * @param rawData an array of row object, each object should have the same number of keys
 * @returns dataset containing `fields` and `rows`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processRowObject} from 'kepler.gl/processors';
 *
 * const data = [
 *  {lat: 31.27, lng: 127.56, value: 3},
 *  {lat: 31.22, lng: 126.26, value: 1}
 * ];
 *
 * dispatch(addDataToMap({
 *  datasets: {
 *    info: {label: 'My Data', id: 'my_data'},
 *    data: processRowObject(data)
 *  }
 * }));
 */
export function processRowObject(rawData: unknown[]): ProcessorResult {
  if (!Array.isArray(rawData)) {
    return null;
  } else if (!rawData.length) {
    // data is empty
    return {
      fields: [],
      rows: []
    };
  }

  const keys = Object.keys(rawData[0]); // [lat, lng, value]
  const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]

  // row object can still contain values like `Null` or `N/A`
  cleanUpFalsyCsvValue(rows);

  return processCsvData(rows, keys);
}

/**
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
 * output a data object with `{fields: [], rows: []}`.
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
 * NOTE: This function may mutate input.
 *
 * @param rawData raw geojson feature collection
 * @returns dataset containing `fields` and `rows`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processGeojson} from 'kepler.gl/processors';
 *
 * const geojson = {
 *         "type" : "FeatureCollection",
 *         "features" : [{
 *                 "type" : "Feature",
 *                 "properties" : {
 *                         "capacity" : "10",
 *                         "type" : "U-Rack"
 *                 },
 *                 "geometry" : {
 *                         "type" : "Point",
 *                         "coordinates" : [ -71.073283, 42.417500 ]
 *                 }
 *         }]
 * };
 *
 * dispatch(addDataToMap({
 *  datasets: {
 *    info: {
 *      label: 'Sample Taxi Trips in New York City',
 *      id: 'test_trip_data'
 *    },
 *    data: processGeojson(geojson)
 *  }
 * }));
 */
export function processGeojson(rawData: unknown): ProcessorResult {
  const normalizedGeojson = normalize(rawData);

  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
    throw new Error(
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
    );
  }

  // getting all feature fields
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
    const f = normalizedGeojson.features[i];
    if (f.geometry) {
      allDataRows.push({
        // add feature to _geojson field
        _geojson: f,
        ...(f.properties || {})
      });
    }
  }
  // get all the field
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
    Object.keys(curr).forEach(key => {
      if (!accu.includes(key)) {
        accu.push(key);
      }
    });
    return accu;
  }, []);

  // make sure each feature has exact same fields
  allDataRows.forEach(d => {
    fields.forEach(f => {
      if (!(f in d)) {
        d[f] = null;
        if (d._geojson.properties) {
          d._geojson.properties[f] = null;
        }
      }
    });
  });

  return processRowObject(allDataRows);
}

/**
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
 * The json object should contain `datasets` and `config`.
 * @param rawData
 * @param schema
 * @returns datasets and config `{datasets: {}, config: {}}`
 * @public
 * @example
 * import {addDataToMap} from 'kepler.gl/actions';
 * import {processKeplerglJSON} from 'kepler.gl/processors';
 *
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
 */
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
}

/**
 * Parse a single or an array of datasets saved using kepler.gl schema
 * @param rawData
 * @param schema
 */
export function processKeplerglDataset(
  rawData: object | object[],
  schema = KeplerGlSchema
): ParsedDataset | ParsedDataset[] | null {
  if (!rawData) {
    return null;
  }

  const results = schema.parseSavedData(toArray(rawData));
  if (!results) {
    return null;
  }
  return Array.isArray(rawData) ? results : results[0];
}

/**
 * Parse arrow table and return a dataset
 *
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
 * @returns dataset containing `fields` and `rows` or null
 */
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
  // @ts-ignore - Unknown data type causing build failures
  return processArrowBatches(arrowTable.data.batches);
}

export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
  return schema.fields.map((field: arrow.Field, index: number) => {
    const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');
    return {
      ...field,
      name: field.name,
      id: field.name,
      displayName: field.name,
      format: '',
      fieldIdx: index,
      type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),
      analyzerType: isGeoArrowColumn
        ? AnalyzerDATA_TYPES.GEOMETRY
        : arrowDataTypeToAnalyzerDataType(field.type),
      valueAccessor: (dc: any) => d => {
        return dc.valueAt(d.index, index);
      },
      metadata: field.metadata
    };
  });
}
/**
 * Parse arrow batches returned from parseInBatches()
 *
 * @param arrowTable the arrow table to parse
 * @returns dataset containing `fields` and `rows` or null
 */
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
  if (arrowBatches.length === 0) {
    return null;
  }
  const arrowTable = new arrow.Table(arrowBatches);
  const fields = arrowSchemaToFields(arrowTable.schema);

  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));

  // return empty rows and use raw arrow table to construct column-wise data container
  return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};
}

export const DATASET_HANDLERS = {
  [DATASET_FORMATS.row]: processRowObject,
  [DATASET_FORMATS.geojson]: processGeojson,
  [DATASET_FORMATS.csv]: processCsvData,
  [DATASET_FORMATS.arrow]: processArrowTable,
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
};

export const Processors: {
  processGeojson: typeof processGeojson;
  processCsvData: typeof processCsvData;
  processArrowTable: typeof processArrowTable;
  processArrowBatches: typeof processArrowBatches;
  processRowObject: typeof processRowObject;
  processKeplerglJSON: typeof processKeplerglJSON;
  processKeplerglDataset: typeof processKeplerglDataset;
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
  getFieldsFromData: typeof getFieldsFromData;
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
} = {
  processGeojson,
  processCsvData,
  processArrowTable,
  processArrowBatches,
  processRowObject,
  processKeplerglJSON,
  processKeplerglDataset,
  analyzerTypeToFieldType,
  getFieldsFromData,
  parseCsvRowsByFieldType
};

1	// SPDX-License-Identifier: MIT
2	// Copyright contributors to the kepler.gl project
3
4	import * as arrow from 'apache-arrow';
5	import {csvParseRows} from 'd3-dsv';
6	import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7	import normalize from '@mapbox/geojson-normalize';
8	import {ArrowTable} from '@loaders.gl/schema';
9	import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
10	import {ProcessorResult, Field} from '@kepler.gl/types';
11	import {
12	arrowDataTypeToAnalyzerDataType,
13	arrowDataTypeToFieldType,
14	hasOwnProperty,
15	isPlainObject
16	} from '@kepler.gl/utils';
17	import {
18	analyzerTypeToFieldType,
19	getSampleForTypeAnalyze,
20	getFieldsFromData,
21	h3IsValid,
22	notNullorUndefined,
23	toArray
24	} from '@kepler.gl/common-utils';
25	import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
26	import {Feature} from '@nebula.gl/edit-modes';
27
28	// if any of these value occurs in csv, parse it to null;
29	// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
30	// matches empty string
31	export const CSV_NULLS = /^(null\|NULL\|Null\|NaN\|\/N\|\|)$/;	13✔
32
33	function tryParseJsonString(str) {
34	try {	31✔
35	return JSON.parse(str);	31✔
36	} catch (e) {
37	return null;	×
38	}
39	}
40
41	export const PARSE_FIELD_VALUE_FROM_STRING = {	13✔
42	[ALL_FIELD_TYPES.boolean]: {
43	valid: (d: unknown): boolean => typeof d === 'boolean',	29✔
44	parse: (d: unknown): boolean => d === 'true' \|\| d === 'True' \|\| d === 'TRUE' \|\| d === '1'	359✔
45	},
46	[ALL_FIELD_TYPES.integer]: {
47	// @ts-ignore
48	valid: (d: unknown): boolean => parseInt(d, 10) === d,	121✔
49	// @ts-ignore
50	parse: (d: unknown): number => parseInt(d, 10)	494✔
51	},
52	[ALL_FIELD_TYPES.timestamp]: {
53	valid: (d: unknown, field: Field): boolean =>
54	['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',	100✔
55	parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)	362!
56	},
57	[ALL_FIELD_TYPES.real]: {
58	// @ts-ignore
59	valid: (d: unknown): boolean => parseFloat(d) === d,	124✔
60	// Note this will result in NaN for some string
61	parse: parseFloat
62	},
63	[ALL_FIELD_TYPES.object]: {
64	valid: isPlainObject,
65	parse: tryParseJsonString
66	},
67
68	[ALL_FIELD_TYPES.array]: {
69	valid: Array.isArray,
70	parse: tryParseJsonString
71	},
72
73	[ALL_FIELD_TYPES.h3]: {
74	valid: d => h3IsValid(d),	15✔
75	parse: d => d	×
76	}
77	};
78
79	/**
80	* Process csv data, output a data object with `{fields: [], rows: []}`.
81	* The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
82	* @param rawData raw csv string
83	* @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
84	* @public
85	* @example
86	* import {processCsvData} from 'kepler.gl/processors';
87	*
88	* const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
89	* 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
90	* 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
91	* 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
92	* 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
93	*
94	* const dataset = {
95	* info: {id: 'test_data', label: 'My Csv'},
96	* data: processCsvData(testData)
97	* };
98	*
99	* dispatch(addDataToMap({
100	* datasets: [dataset],
101	* options: {centerMap: true, readOnly: true}
102	* }));
103	*/
104	export function processCsvData(rawData: unknown[][] \| string, header?: string[]): ProcessorResult {
105	let rows: unknown[][] \| undefined;
106	let headerRow: string[] \| undefined;
107
108	if (typeof rawData === 'string') {	75✔
109	const parsedRows: string[][] = csvParseRows(rawData);	39✔
110
111	if (!Array.isArray(parsedRows) \|\| parsedRows.length < 2) {	39✔
112	// looks like an empty file, throw error to be catch
113	throw new Error('process Csv Data Failed: CSV is empty');	1✔
114	}
115	headerRow = parsedRows[0];	38✔
116	rows = parsedRows.slice(1);	38✔
117	} else if (Array.isArray(rawData) && rawData.length) {	36!
118	rows = rawData;	36✔
119	headerRow = header;	36✔
120
121	if (!Array.isArray(headerRow)) {	36!
122	// if data is passed in as array of rows and missing header
123	// assume first row is header
124	// @ts-ignore
125	headerRow = rawData[0];	×
126	rows = rawData.slice(1);	×
127	}
128	}
129
130	if (!rows \|\| !headerRow) {	74!
131	throw new Error('invalid input passed to processCsvData');	×
132	}
133
134	// here we assume the csv file that people uploaded will have first row
135	// as name of the column
136
137	cleanUpFalsyCsvValue(rows);	74✔
138	// No need to run type detection on every data point
139	// here we get a list of none null values to run analyze on
140	const sample = getSampleForTypeAnalyze({fields: headerRow, rows});	74✔
141	const fields = getFieldsFromData(sample, headerRow);	74✔
142	const parsedRows = parseRowsByFields(rows, fields);	74✔
143
144	return {fields, rows: parsedRows};	74✔
145	}
146
147	/**
148	* Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
149	* @param rows
150	* @param fields
151	*/
152	export function parseRowsByFields(rows: any[][], fields: Field[]) {
153	// Edit rows in place
154	const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');	439✔
155	fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));	74✔
156
157	return rows;	74✔
158	}
159
160	/**
161	* Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
162	* so that type-analyzer won't detect it as string
163	*
164	* @param rows
165	*/
166	function cleanUpFalsyCsvValue(rows: unknown[][]): void {
167	const re = new RegExp(CSV_NULLS, 'g');	110✔
168	for (let i = 0; i < rows.length; i++) {	110✔
169	for (let j = 0; j < rows[i].length; j++) {	1,036✔
170	// analyzer will set any fields to 'string' if there are empty values
171	// which will be parsed as '' by d3.csv
172	// here we parse empty data as null
173	// TODO: create warning when deltect `CSV_NULLS` in the data
174	if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {	8,486✔
175	rows[i][j] = null;	905✔
176	}
177	}
178	}
179	}
180
181	/**
182	* Process uploaded csv file to parse value by field type
183	*
184	* @param rows
185	* @param geoFieldIdx field index
186	* @param field
187	* @param i
188	*/
189	export function parseCsvRowsByFieldType(
190	rows: unknown[][],
191	geoFieldIdx: number,
192	field: Field,
193	i: number
194	): void {
195	const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];	563✔
196	if (parser) {	563✔
197	// check first not null value of it's already parsed
198	const first = rows.find(r => notNullorUndefined(r[i]));	441✔
199	if (!first \|\| parser.valid(first[i], field)) {	418✔
200	return;	210✔
201	}
202	rows.forEach(row => {	208✔
203	// parse string value based on field type
204	if (row[i] !== null) {	2,938✔
205	row[i] = parser.parse(row[i], field);	2,635✔
206	if (	2,635✔
207	geoFieldIdx > -1 &&	2,653✔
208	isPlainObject(row[geoFieldIdx]) &&
209	// @ts-ignore
210	hasOwnProperty(row[geoFieldIdx], 'properties')
211	) {
212	// @ts-ignore
213	row[geoFieldIdx].properties[field.name] = row[i];	9✔
214	}
215	}
216	});
217	}
218	}
219
220	/* eslint-enable complexity */
221
222	/**
223	* Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
224	* NOTE: This function may mutate input.
225	* @param rawData an array of row object, each object should have the same number of keys
226	* @returns dataset containing `fields` and `rows`
227	* @public
228	* @example
229	* import {addDataToMap} from 'kepler.gl/actions';
230	* import {processRowObject} from 'kepler.gl/processors';
231	*
232	* const data = [
233	* {lat: 31.27, lng: 127.56, value: 3},
234	* {lat: 31.22, lng: 126.26, value: 1}
235	* ];
236	*
237	* dispatch(addDataToMap({
238	* datasets: {
239	* info: {label: 'My Data', id: 'my_data'},
240	* data: processRowObject(data)
241	* }
242	* }));
243	*/
244	export function processRowObject(rawData: unknown[]): ProcessorResult {
245	if (!Array.isArray(rawData)) {	37✔
246	return null;	1✔
247	} else if (!rawData.length) {	36!
248	// data is empty
249	return {	×
250	fields: [],
251	rows: []
252	};
253	}
254
255	const keys = Object.keys(rawData[0]); // [lat, lng, value]	36✔
256	const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]	1,551✔
257
258	// row object can still contain values like `Null` or `N/A`
259	cleanUpFalsyCsvValue(rows);	36✔
260
261	return processCsvData(rows, keys);	36✔
262	}
263
264	/**
265	* Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
266	* output a data object with `{fields: [], rows: []}`.
267	* The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
268	* NOTE: This function may mutate input.
269	*
270	* @param rawData raw geojson feature collection
271	* @returns dataset containing `fields` and `rows`
272	* @public
273	* @example
274	* import {addDataToMap} from 'kepler.gl/actions';
275	* import {processGeojson} from 'kepler.gl/processors';
276	*
277	* const geojson = {
278	* "type" : "FeatureCollection",
279	* "features" : [{
280	* "type" : "Feature",
281	* "properties" : {
282	* "capacity" : "10",
283	* "type" : "U-Rack"
284	* },
285	* "geometry" : {
286	* "type" : "Point",
287	* "coordinates" : [ -71.073283, 42.417500 ]
288	* }
289	* }]
290	* };
291	*
292	* dispatch(addDataToMap({
293	* datasets: {
294	* info: {
295	* label: 'Sample Taxi Trips in New York City',
296	* id: 'test_trip_data'
297	* },
298	* data: processGeojson(geojson)
299	* }
300	* }));
301	*/
302	export function processGeojson(rawData: unknown): ProcessorResult {
303	const normalizedGeojson = normalize(rawData);	28✔
304
305	if (!normalizedGeojson \|\| !Array.isArray(normalizedGeojson.features)) {	28✔
306	throw new Error(	1✔
307	`Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
308	);
309	}
310
311	// getting all feature fields
312	const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];	27✔
313	for (let i = 0; i < normalizedGeojson.features.length; i++) {	27✔
314	const f = normalizedGeojson.features[i];	160✔
315	if (f.geometry) {	160!
316	allDataRows.push({	160✔
317	// add feature to _geojson field
318	_geojson: f,
319	...(f.properties \|\| {})	161✔
320	});
321	}
322	}
323	// get all the field
324	const fields = allDataRows.reduce<string[]>((accu, curr) => {	27✔
325	Object.keys(curr).forEach(key => {	160✔
326	if (!accu.includes(key)) {	807✔
327	accu.push(key);	148✔
328	}
329	});
330	return accu;	160✔
331	}, []);
332
333	// make sure each feature has exact same fields
334	allDataRows.forEach(d => {	27✔
335	fields.forEach(f => {	160✔
336	if (!(f in d)) {	860✔
337	d[f] = null;	53✔
338	if (d._geojson.properties) {	53!
339	d._geojson.properties[f] = null;	53✔
340	}
341	}
342	});
343	});
344
345	return processRowObject(allDataRows);	27✔
346	}
347
348	/**
349	* Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
350	* The json object should contain `datasets` and `config`.
351	* @param rawData
352	* @param schema
353	* @returns datasets and config `{datasets: {}, config: {}}`
354	* @public
355	* @example
356	* import {addDataToMap} from 'kepler.gl/actions';
357	* import {processKeplerglJSON} from 'kepler.gl/processors';
358	*
359	* dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
360	*/
361	export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap \| null {	5✔
362	return rawData ? schema.load(rawData.datasets, rawData.config) : null;	5!
363	}
364
365	/**
366	* Parse a single or an array of datasets saved using kepler.gl schema
367	* @param rawData
368	* @param schema
369	*/
370	export function processKeplerglDataset(
371	rawData: object \| object[],
372	schema = KeplerGlSchema	×
373	): ParsedDataset \| ParsedDataset[] \| null {
374	if (!rawData) {	×
375	return null;	×
376	}
377
378	const results = schema.parseSavedData(toArray(rawData));	×
379	if (!results) {	×
380	return null;	×
381	}
382	return Array.isArray(rawData) ? results : results[0];	×
383	}
384
385	/**
386	* Parse arrow table and return a dataset
387	*
388	* @param arrowTable ArrowTable to parse, see loaders.gl/schema
389	* @returns dataset containing `fields` and `rows` or null
390	*/
391	export function processArrowTable(arrowTable: ArrowTable): ProcessorResult \| null {
392	// @ts-ignore - Unknown data type causing build failures
393	return processArrowBatches(arrowTable.data.batches);	×
394	}
395
396	export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
397	return schema.fields.map((field: arrow.Field, index: number) => {	×
398	const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');	×
399	return {	×
400	...field,
401	name: field.name,
402	id: field.name,
403	displayName: field.name,
404	format: '',
405	fieldIdx: index,
406	type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),	×
407	analyzerType: isGeoArrowColumn	×
408	? AnalyzerDATA_TYPES.GEOMETRY
409	: arrowDataTypeToAnalyzerDataType(field.type),
410	valueAccessor: (dc: any) => d => {	×
411	return dc.valueAt(d.index, index);	×
412	},
413	metadata: field.metadata
414	};
415	});
416	}
417	/**
418	* Parse arrow batches returned from parseInBatches()
419	*
420	* @param arrowTable the arrow table to parse
421	* @returns dataset containing `fields` and `rows` or null
422	*/
423	export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult \| null {
424	if (arrowBatches.length === 0) {	×
425	return null;	×
426	}
427	const arrowTable = new arrow.Table(arrowBatches);	×
428	const fields = arrowSchemaToFields(arrowTable.schema);	×
429
430	const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));	×
431
432	// return empty rows and use raw arrow table to construct column-wise data container
433	return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};	×
434	}
435
436	export const DATASET_HANDLERS = {	13✔
437	[DATASET_FORMATS.row]: processRowObject,
438	[DATASET_FORMATS.geojson]: processGeojson,
439	[DATASET_FORMATS.csv]: processCsvData,
440	[DATASET_FORMATS.arrow]: processArrowTable,
441	[DATASET_FORMATS.keplergl]: processKeplerglDataset
442	};
443
444	export const Processors: {
445	processGeojson: typeof processGeojson;
446	processCsvData: typeof processCsvData;
447	processArrowTable: typeof processArrowTable;
448	processArrowBatches: typeof processArrowBatches;
449	processRowObject: typeof processRowObject;
450	processKeplerglJSON: typeof processKeplerglJSON;
451	processKeplerglDataset: typeof processKeplerglDataset;
452	analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
453	getFieldsFromData: typeof getFieldsFromData;
454	parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
455	} = {	13✔
456	processGeojson,
457	processCsvData,
458	processArrowTable,
459	processArrowBatches,
460	processRowObject,
461	processKeplerglJSON,
462	processKeplerglDataset,
463	analyzerTypeToFieldType,
464	getFieldsFromData,
465	parseCsvRowsByFieldType
466	};

keplergl / kepler.gl / 13015166758

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous