• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 13015166758

28 Jan 2025 04:37PM UTC coverage: 66.405% (-0.003%) from 66.408%
13015166758

Pull #2941

github

web-flow
Merge c6c84e0ff into da9988532
Pull Request #2941: [docs] update docs for Kepler.gl release 3.1

5989 of 10516 branches covered (56.95%)

Branch coverage included in aggregate %.

12301 of 17027 relevant lines covered (72.24%)

88.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.29
/src/processors/src/data-processor.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import * as arrow from 'apache-arrow';
5
import {csvParseRows} from 'd3-dsv';
6
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7
import normalize from '@mapbox/geojson-normalize';
8
import {ArrowTable} from '@loaders.gl/schema';
9
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
10
import {ProcessorResult, Field} from '@kepler.gl/types';
11
import {
12
  arrowDataTypeToAnalyzerDataType,
13
  arrowDataTypeToFieldType,
14
  hasOwnProperty,
15
  isPlainObject
16
} from '@kepler.gl/utils';
17
import {
18
  analyzerTypeToFieldType,
19
  getSampleForTypeAnalyze,
20
  getFieldsFromData,
21
  h3IsValid,
22
  notNullorUndefined,
23
  toArray
24
} from '@kepler.gl/common-utils';
25
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
26
import {Feature} from '@nebula.gl/edit-modes';
27

28
// if any of these value occurs in csv, parse it to null;
29
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
30
// matches empty string
31
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
13✔
32

33
function tryParseJsonString(str) {
34
  try {
31✔
35
    return JSON.parse(str);
31✔
36
  } catch (e) {
37
    return null;
×
38
  }
39
}
40

41
export const PARSE_FIELD_VALUE_FROM_STRING = {
13✔
42
  [ALL_FIELD_TYPES.boolean]: {
43
    valid: (d: unknown): boolean => typeof d === 'boolean',
29✔
44
    parse: (d: unknown): boolean => d === 'true' || d === 'True' || d === 'TRUE' || d === '1'
359✔
45
  },
46
  [ALL_FIELD_TYPES.integer]: {
47
    // @ts-ignore
48
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
121✔
49
    // @ts-ignore
50
    parse: (d: unknown): number => parseInt(d, 10)
494✔
51
  },
52
  [ALL_FIELD_TYPES.timestamp]: {
53
    valid: (d: unknown, field: Field): boolean =>
54
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
100✔
55
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
362!
56
  },
57
  [ALL_FIELD_TYPES.real]: {
58
    // @ts-ignore
59
    valid: (d: unknown): boolean => parseFloat(d) === d,
124✔
60
    // Note this will result in NaN for some string
61
    parse: parseFloat
62
  },
63
  [ALL_FIELD_TYPES.object]: {
64
    valid: isPlainObject,
65
    parse: tryParseJsonString
66
  },
67

68
  [ALL_FIELD_TYPES.array]: {
69
    valid: Array.isArray,
70
    parse: tryParseJsonString
71
  },
72

73
  [ALL_FIELD_TYPES.h3]: {
74
    valid: d => h3IsValid(d),
15✔
75
    parse: d => d
×
76
  }
77
};
78

79
/**
80
 * Process csv data, output a data object with `{fields: [], rows: []}`.
81
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
82
 * @param rawData raw csv string
83
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
84
 * @public
85
 * @example
86
 * import {processCsvData} from 'kepler.gl/processors';
87
 *
88
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
89
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
90
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
91
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
92
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
93
 *
94
 * const dataset = {
95
 *  info: {id: 'test_data', label: 'My Csv'},
96
 *  data: processCsvData(testData)
97
 * };
98
 *
99
 * dispatch(addDataToMap({
100
 *  datasets: [dataset],
101
 *  options: {centerMap: true, readOnly: true}
102
 * }));
103
 */
104
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
105
  let rows: unknown[][] | undefined;
106
  let headerRow: string[] | undefined;
107

108
  if (typeof rawData === 'string') {
75✔
109
    const parsedRows: string[][] = csvParseRows(rawData);
39✔
110

111
    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
39✔
112
      // looks like an empty file, throw error to be catch
113
      throw new Error('process Csv Data Failed: CSV is empty');
1✔
114
    }
115
    headerRow = parsedRows[0];
38✔
116
    rows = parsedRows.slice(1);
38✔
117
  } else if (Array.isArray(rawData) && rawData.length) {
36!
118
    rows = rawData;
36✔
119
    headerRow = header;
36✔
120

121
    if (!Array.isArray(headerRow)) {
36!
122
      // if data is passed in as array of rows and missing header
123
      // assume first row is header
124
      // @ts-ignore
125
      headerRow = rawData[0];
×
126
      rows = rawData.slice(1);
×
127
    }
128
  }
129

130
  if (!rows || !headerRow) {
74!
131
    throw new Error('invalid input passed to processCsvData');
×
132
  }
133

134
  // here we assume the csv file that people uploaded will have first row
135
  // as name of the column
136

137
  cleanUpFalsyCsvValue(rows);
74✔
138
  // No need to run type detection on every data point
139
  // here we get a list of none null values to run analyze on
140
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
74✔
141
  const fields = getFieldsFromData(sample, headerRow);
74✔
142
  const parsedRows = parseRowsByFields(rows, fields);
74✔
143

144
  return {fields, rows: parsedRows};
74✔
145
}
146

147
/**
148
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
149
 * @param rows
150
 * @param fields
151
 */
152
export function parseRowsByFields(rows: any[][], fields: Field[]) {
153
  // Edit rows in place
154
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
439✔
155
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));
74✔
156

157
  return rows;
74✔
158
}
159

160
/**
161
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
162
 * so that type-analyzer won't detect it as string
163
 *
164
 * @param rows
165
 */
166
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
167
  const re = new RegExp(CSV_NULLS, 'g');
110✔
168
  for (let i = 0; i < rows.length; i++) {
110✔
169
    for (let j = 0; j < rows[i].length; j++) {
1,036✔
170
      // analyzer will set any fields to 'string' if there are empty values
171
      // which will be parsed as '' by d3.csv
172
      // here we parse empty data as null
173
      // TODO: create warning when deltect `CSV_NULLS` in the data
174
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
8,486✔
175
        rows[i][j] = null;
905✔
176
      }
177
    }
178
  }
179
}
180

181
/**
182
 * Process uploaded csv file to parse value by field type
183
 *
184
 * @param rows
185
 * @param geoFieldIdx field index
186
 * @param field
187
 * @param i
188
 */
189
export function parseCsvRowsByFieldType(
190
  rows: unknown[][],
191
  geoFieldIdx: number,
192
  field: Field,
193
  i: number
194
): void {
195
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
563✔
196
  if (parser) {
563✔
197
    // check first not null value of it's already parsed
198
    const first = rows.find(r => notNullorUndefined(r[i]));
441✔
199
    if (!first || parser.valid(first[i], field)) {
418✔
200
      return;
210✔
201
    }
202
    rows.forEach(row => {
208✔
203
      // parse string value based on field type
204
      if (row[i] !== null) {
2,938✔
205
        row[i] = parser.parse(row[i], field);
2,635✔
206
        if (
2,635✔
207
          geoFieldIdx > -1 &&
2,653✔
208
          isPlainObject(row[geoFieldIdx]) &&
209
          // @ts-ignore
210
          hasOwnProperty(row[geoFieldIdx], 'properties')
211
        ) {
212
          // @ts-ignore
213
          row[geoFieldIdx].properties[field.name] = row[i];
9✔
214
        }
215
      }
216
    });
217
  }
218
}
219

220
/* eslint-enable complexity */
221

222
/**
223
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
224
 * NOTE: This function may mutate input.
225
 * @param rawData an array of row object, each object should have the same number of keys
226
 * @returns dataset containing `fields` and `rows`
227
 * @public
228
 * @example
229
 * import {addDataToMap} from 'kepler.gl/actions';
230
 * import {processRowObject} from 'kepler.gl/processors';
231
 *
232
 * const data = [
233
 *  {lat: 31.27, lng: 127.56, value: 3},
234
 *  {lat: 31.22, lng: 126.26, value: 1}
235
 * ];
236
 *
237
 * dispatch(addDataToMap({
238
 *  datasets: {
239
 *    info: {label: 'My Data', id: 'my_data'},
240
 *    data: processRowObject(data)
241
 *  }
242
 * }));
243
 */
244
export function processRowObject(rawData: unknown[]): ProcessorResult {
245
  if (!Array.isArray(rawData)) {
37✔
246
    return null;
1✔
247
  } else if (!rawData.length) {
36!
248
    // data is empty
249
    return {
×
250
      fields: [],
251
      rows: []
252
    };
253
  }
254

255
  const keys = Object.keys(rawData[0]); // [lat, lng, value]
36✔
256
  const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]
1,551✔
257

258
  // row object can still contain values like `Null` or `N/A`
259
  cleanUpFalsyCsvValue(rows);
36✔
260

261
  return processCsvData(rows, keys);
36✔
262
}
263

264
/**
265
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
266
 * output a data object with `{fields: [], rows: []}`.
267
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
268
 * NOTE: This function may mutate input.
269
 *
270
 * @param rawData raw geojson feature collection
271
 * @returns dataset containing `fields` and `rows`
272
 * @public
273
 * @example
274
 * import {addDataToMap} from 'kepler.gl/actions';
275
 * import {processGeojson} from 'kepler.gl/processors';
276
 *
277
 * const geojson = {
278
 *         "type" : "FeatureCollection",
279
 *         "features" : [{
280
 *                 "type" : "Feature",
281
 *                 "properties" : {
282
 *                         "capacity" : "10",
283
 *                         "type" : "U-Rack"
284
 *                 },
285
 *                 "geometry" : {
286
 *                         "type" : "Point",
287
 *                         "coordinates" : [ -71.073283, 42.417500 ]
288
 *                 }
289
 *         }]
290
 * };
291
 *
292
 * dispatch(addDataToMap({
293
 *  datasets: {
294
 *    info: {
295
 *      label: 'Sample Taxi Trips in New York City',
296
 *      id: 'test_trip_data'
297
 *    },
298
 *    data: processGeojson(geojson)
299
 *  }
300
 * }));
301
 */
302
export function processGeojson(rawData: unknown): ProcessorResult {
303
  const normalizedGeojson = normalize(rawData);
28✔
304

305
  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
28✔
306
    throw new Error(
1✔
307
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
308
    );
309
  }
310

311
  // getting all feature fields
312
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
27✔
313
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
27✔
314
    const f = normalizedGeojson.features[i];
160✔
315
    if (f.geometry) {
160!
316
      allDataRows.push({
160✔
317
        // add feature to _geojson field
318
        _geojson: f,
319
        ...(f.properties || {})
161✔
320
      });
321
    }
322
  }
323
  // get all the field
324
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
27✔
325
    Object.keys(curr).forEach(key => {
160✔
326
      if (!accu.includes(key)) {
807✔
327
        accu.push(key);
148✔
328
      }
329
    });
330
    return accu;
160✔
331
  }, []);
332

333
  // make sure each feature has exact same fields
334
  allDataRows.forEach(d => {
27✔
335
    fields.forEach(f => {
160✔
336
      if (!(f in d)) {
860✔
337
        d[f] = null;
53✔
338
        if (d._geojson.properties) {
53!
339
          d._geojson.properties[f] = null;
53✔
340
        }
341
      }
342
    });
343
  });
344

345
  return processRowObject(allDataRows);
27✔
346
}
347

348
/**
349
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
350
 * The json object should contain `datasets` and `config`.
351
 * @param rawData
352
 * @param schema
353
 * @returns datasets and config `{datasets: {}, config: {}}`
354
 * @public
355
 * @example
356
 * import {addDataToMap} from 'kepler.gl/actions';
357
 * import {processKeplerglJSON} from 'kepler.gl/processors';
358
 *
359
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
360
 */
361
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
5✔
362
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
5!
363
}
364

365
/**
366
 * Parse a single or an array of datasets saved using kepler.gl schema
367
 * @param rawData
368
 * @param schema
369
 */
370
export function processKeplerglDataset(
371
  rawData: object | object[],
372
  schema = KeplerGlSchema
×
373
): ParsedDataset | ParsedDataset[] | null {
374
  if (!rawData) {
×
375
    return null;
×
376
  }
377

378
  const results = schema.parseSavedData(toArray(rawData));
×
379
  if (!results) {
×
380
    return null;
×
381
  }
382
  return Array.isArray(rawData) ? results : results[0];
×
383
}
384

385
/**
386
 * Parse arrow table and return a dataset
387
 *
388
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
389
 * @returns dataset containing `fields` and `rows` or null
390
 */
391
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
392
  // @ts-ignore - Unknown data type causing build failures
393
  return processArrowBatches(arrowTable.data.batches);
×
394
}
395

396
export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
397
  return schema.fields.map((field: arrow.Field, index: number) => {
×
398
    const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');
×
399
    return {
×
400
      ...field,
401
      name: field.name,
402
      id: field.name,
403
      displayName: field.name,
404
      format: '',
405
      fieldIdx: index,
406
      type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),
×
407
      analyzerType: isGeoArrowColumn
×
408
        ? AnalyzerDATA_TYPES.GEOMETRY
409
        : arrowDataTypeToAnalyzerDataType(field.type),
410
      valueAccessor: (dc: any) => d => {
×
411
        return dc.valueAt(d.index, index);
×
412
      },
413
      metadata: field.metadata
414
    };
415
  });
416
}
417
/**
418
 * Parse arrow batches returned from parseInBatches()
419
 *
420
 * @param arrowTable the arrow table to parse
421
 * @returns dataset containing `fields` and `rows` or null
422
 */
423
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
424
  if (arrowBatches.length === 0) {
×
425
    return null;
×
426
  }
427
  const arrowTable = new arrow.Table(arrowBatches);
×
428
  const fields = arrowSchemaToFields(arrowTable.schema);
×
429

430
  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));
×
431

432
  // return empty rows and use raw arrow table to construct column-wise data container
433
  return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};
×
434
}
435

436
export const DATASET_HANDLERS = {
13✔
437
  [DATASET_FORMATS.row]: processRowObject,
438
  [DATASET_FORMATS.geojson]: processGeojson,
439
  [DATASET_FORMATS.csv]: processCsvData,
440
  [DATASET_FORMATS.arrow]: processArrowTable,
441
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
442
};
443

444
export const Processors: {
445
  processGeojson: typeof processGeojson;
446
  processCsvData: typeof processCsvData;
447
  processArrowTable: typeof processArrowTable;
448
  processArrowBatches: typeof processArrowBatches;
449
  processRowObject: typeof processRowObject;
450
  processKeplerglJSON: typeof processKeplerglJSON;
451
  processKeplerglDataset: typeof processKeplerglDataset;
452
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
453
  getFieldsFromData: typeof getFieldsFromData;
454
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
455
} = {
13✔
456
  processGeojson,
457
  processCsvData,
458
  processArrowTable,
459
  processArrowBatches,
460
  processRowObject,
461
  processKeplerglJSON,
462
  processKeplerglDataset,
463
  analyzerTypeToFieldType,
464
  getFieldsFromData,
465
  parseCsvRowsByFieldType
466
};
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc