• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 12247395692

10 Dec 2024 01:11AM UTC coverage: 69.332% (+0.01%) from 69.322%
12247395692

push

github

web-flow
[Feat] add h3 typed column (#2822)

Detect H3 column as a new h3-type column, like the existing geocolumn.

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

5489 of 9166 branches covered (59.88%)

Branch coverage included in aggregate %.

15 of 21 new or added lines in 6 files covered. (71.43%)

22 existing lines in 1 file now uncovered.

11426 of 15231 relevant lines covered (75.02%)

96.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.43
/src/processors/src/data-processor.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import * as arrow from 'apache-arrow';
5
import {csvParseRows} from 'd3-dsv';
6
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7
import normalize from '@mapbox/geojson-normalize';
8
import {ArrowTable} from '@loaders.gl/schema';
9
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
10
import {ProcessorResult, Field} from '@kepler.gl/types';
11
import {
12
  arrowDataTypeToAnalyzerDataType,
13
  arrowDataTypeToFieldType,
14
  hasOwnProperty,
15
  isPlainObject
16
} from '@kepler.gl/utils';
17
import {
18
  analyzerTypeToFieldType,
19
  getSampleForTypeAnalyze,
20
  getFieldsFromData,
21
  h3IsValid,
22
  notNullorUndefined,
23
  toArray
24
} from '@kepler.gl/common-utils';
25
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
26
import {Feature} from '@nebula.gl/edit-modes';
27

28
// if any of these value occurs in csv, parse it to null;
29
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
30
// matches empty string
31
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
11✔
32

33
function tryParseJsonString(str) {
34
  try {
31✔
35
    return JSON.parse(str);
31✔
36
  } catch (e) {
37
    return null;
×
38
  }
39
}
40

41
export const PARSE_FIELD_VALUE_FROM_STRING = {
11✔
42
  [ALL_FIELD_TYPES.boolean]: {
43
    valid: (d: unknown): boolean => typeof d === 'boolean',
30✔
44
    parse: (d: unknown): boolean => d === 'true' || d === 'True' || d === 'TRUE' || d === '1'
364✔
45
  },
46
  [ALL_FIELD_TYPES.integer]: {
47
    // @ts-ignore
48
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
123✔
49
    // @ts-ignore
50
    parse: (d: unknown): number => parseInt(d, 10)
481✔
51
  },
52
  [ALL_FIELD_TYPES.timestamp]: {
53
    valid: (d: unknown, field: Field): boolean =>
54
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
98✔
55
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
350!
56
  },
57
  [ALL_FIELD_TYPES.real]: {
58
    // @ts-ignore
59
    valid: (d: unknown): boolean => parseFloat(d) === d,
144✔
60
    // Note this will result in NaN for some string
61
    parse: parseFloat
62
  },
63
  [ALL_FIELD_TYPES.object]: {
64
    valid: isPlainObject,
65
    parse: tryParseJsonString
66
  },
67

68
  [ALL_FIELD_TYPES.array]: {
69
    valid: Array.isArray,
70
    parse: tryParseJsonString
71
  },
72

73
  [ALL_FIELD_TYPES.h3]: {
74
    valid: d => h3IsValid(d),
18✔
NEW
75
    parse: d => d
×
76
  }
77
};
78

79
/**
80
 * Process csv data, output a data object with `{fields: [], rows: []}`.
81
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
82
 * @param rawData raw csv string
83
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
84
 * @public
85
 * @example
86
 * import {processCsvData} from 'kepler.gl/processors';
87
 *
88
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
89
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
90
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
91
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
92
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
93
 *
94
 * const dataset = {
95
 *  info: {id: 'test_data', label: 'My Csv'},
96
 *  data: processCsvData(testData)
97
 * };
98
 *
99
 * dispatch(addDataToMap({
100
 *  datasets: [dataset],
101
 *  options: {centerMap: true, readOnly: true}
102
 * }));
103
 */
104
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
105
  let rows: unknown[][] | undefined;
106
  let headerRow: string[] | undefined;
107

108
  if (typeof rawData === 'string') {
79✔
109
    const parsedRows: string[][] = csvParseRows(rawData);
40✔
110

111
    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
40✔
112
      // looks like an empty file, throw error to be catch
113
      throw new Error('process Csv Data Failed: CSV is empty');
1✔
114
    }
115
    headerRow = parsedRows[0];
39✔
116
    rows = parsedRows.slice(1);
39✔
117
  } else if (Array.isArray(rawData) && rawData.length) {
39!
118
    rows = rawData;
39✔
119
    headerRow = header;
39✔
120

121
    if (!Array.isArray(headerRow)) {
39!
122
      // if data is passed in as array of rows and missing header
123
      // assume first row is header
124
      // @ts-ignore
125
      headerRow = rawData[0];
×
126
      rows = rawData.slice(1);
×
127
    }
128
  }
129

130
  if (!rows || !headerRow) {
78!
131
    throw new Error('invalid input passed to processCsvData');
×
132
  }
133

134
  // here we assume the csv file that people uploaded will have first row
135
  // as name of the column
136

137
  cleanUpFalsyCsvValue(rows);
78✔
138
  // No need to run type detection on every data point
139
  // here we get a list of none null values to run analyze on
140
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
78✔
141
  const fields = getFieldsFromData(sample, headerRow);
78✔
142
  const parsedRows = parseRowsByFields(rows, fields);
78✔
143

144
  return {fields, rows: parsedRows};
78✔
145
}
146

147
/**
148
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
149
 * @param rows
150
 * @param fields
151
 */
152
export function parseRowsByFields(rows: any[][], fields: Field[]) {
153
  // Edit rows in place
154
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
471✔
155
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));
78✔
156

157
  return rows;
78✔
158
}
159

160
/**
161
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
162
 * so that type-analyzer won't detect it as string
163
 *
164
 * @param rows
165
 */
166
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
167
  const re = new RegExp(CSV_NULLS, 'g');
117✔
168
  for (let i = 0; i < rows.length; i++) {
117✔
169
    for (let j = 0; j < rows[i].length; j++) {
1,098✔
170
      // analyzer will set any fields to 'string' if there are empty values
171
      // which will be parsed as '' by d3.csv
172
      // here we parse empty data as null
173
      // TODO: create warning when deltect `CSV_NULLS` in the data
174
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
9,106✔
175
        rows[i][j] = null;
894✔
176
      }
177
    }
178
  }
179
}
180

181
/**
182
 * Process uploaded csv file to parse value by field type
183
 *
184
 * @param rows
185
 * @param geoFieldIdx field index
186
 * @param field
187
 * @param i
188
 */
189
export function parseCsvRowsByFieldType(
190
  rows: unknown[][],
191
  geoFieldIdx: number,
192
  field: Field,
193
  i: number
194
): void {
195
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
599✔
196
  if (parser) {
599✔
197
    // check first not null value of it's already parsed
198
    const first = rows.find(r => notNullorUndefined(r[i]));
468✔
199
    if (!first || parser.valid(first[i], field)) {
443✔
200
      return;
222✔
201
    }
202
    rows.forEach(row => {
221✔
203
      // parse string value based on field type
204
      if (row[i] !== null) {
3,072✔
205
        row[i] = parser.parse(row[i], field);
2,766✔
206
        if (
2,766✔
207
          geoFieldIdx > -1 &&
2,784✔
208
          isPlainObject(row[geoFieldIdx]) &&
209
          // @ts-ignore
210
          hasOwnProperty(row[geoFieldIdx], 'properties')
211
        ) {
212
          // @ts-ignore
213
          row[geoFieldIdx].properties[field.name] = row[i];
9✔
214
        }
215
      }
216
    });
217
  }
218
}
219

220
/* eslint-enable complexity */
221

222
/**
223
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
224
 * NOTE: This function may mutate input.
225
 * @param rawData an array of row object, each object should have the same number of keys
226
 * @returns dataset containing `fields` and `rows`
227
 * @public
228
 * @example
229
 * import {addDataToMap} from 'kepler.gl/actions';
230
 * import {processRowObject} from 'kepler.gl/processors';
231
 *
232
 * const data = [
233
 *  {lat: 31.27, lng: 127.56, value: 3},
234
 *  {lat: 31.22, lng: 126.26, value: 1}
235
 * ];
236
 *
237
 * dispatch(addDataToMap({
238
 *  datasets: {
239
 *    info: {label: 'My Data', id: 'my_data'},
240
 *    data: processRowObject(data)
241
 *  }
242
 * }));
243
 */
244
export function processRowObject(rawData: unknown[]): ProcessorResult {
245
  if (!Array.isArray(rawData)) {
40✔
246
    return null;
1✔
247
  } else if (!rawData.length) {
39!
248
    // data is empty
249
    return {
×
250
      fields: [],
251
      rows: []
252
    };
253
  }
254

255
  const keys = Object.keys(rawData[0]); // [lat, lng, value]
39✔
256
  const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]
1,780✔
257

258
  // row object can still contain values like `Null` or `N/A`
259
  cleanUpFalsyCsvValue(rows);
39✔
260

261
  return processCsvData(rows, keys);
39✔
262
}
263

264
/**
265
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
266
 * output a data object with `{fields: [], rows: []}`.
267
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
268
 * NOTE: This function may mutate input.
269
 *
270
 * @param rawData raw geojson feature collection
271
 * @returns dataset containing `fields` and `rows`
272
 * @public
273
 * @example
274
 * import {addDataToMap} from 'kepler.gl/actions';
275
 * import {processGeojson} from 'kepler.gl/processors';
276
 *
277
 * const geojson = {
278
 *         "type" : "FeatureCollection",
279
 *         "features" : [{
280
 *                 "type" : "Feature",
281
 *                 "properties" : {
282
 *                         "capacity" : "10",
283
 *                         "type" : "U-Rack"
284
 *                 },
285
 *                 "geometry" : {
286
 *                         "type" : "Point",
287
 *                         "coordinates" : [ -71.073283, 42.417500 ]
288
 *                 }
289
 *         }]
290
 * };
291
 *
292
 * dispatch(addDataToMap({
293
 *  datasets: {
294
 *    info: {
295
 *      label: 'Sample Taxi Trips in New York City',
296
 *      id: 'test_trip_data'
297
 *    },
298
 *    data: processGeojson(geojson)
299
 *  }
300
 * }));
301
 */
302
export function processGeojson(rawData: unknown): ProcessorResult {
303
  const normalizedGeojson = normalize(rawData);
29✔
304

305
  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
29✔
306
    const error = new Error(
1✔
307
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
308
    );
309
    throw error;
1✔
310
    // fail to normalize geojson
311
  }
312

313
  // getting all feature fields
314
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
28✔
315
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
28✔
316
    const f = normalizedGeojson.features[i];
169✔
317
    if (f.geometry) {
169!
318
      allDataRows.push({
169✔
319
        // add feature to _geojson field
320
        _geojson: f,
321
        ...(f.properties || {})
170✔
322
      });
323
    }
324
  }
325
  // get all the field
326
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
28✔
327
    Object.keys(curr).forEach(key => {
169✔
328
      if (!accu.includes(key)) {
849✔
329
        accu.push(key);
153✔
330
      }
331
    });
332
    return accu;
169✔
333
  }, []);
334

335
  // make sure each feature has exact same fields
336
  allDataRows.forEach(d => {
28✔
337
    fields.forEach(f => {
169✔
338
      if (!(f in d)) {
905✔
339
        d[f] = null;
56✔
340
        if (d._geojson.properties) {
56!
341
          d._geojson.properties[f] = null;
56✔
342
        }
343
      }
344
    });
345
  });
346

347
  return processRowObject(allDataRows);
28✔
348
}
349

350
/**
351
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
352
 * The json object should contain `datasets` and `config`.
353
 * @param rawData
354
 * @param schema
355
 * @returns datasets and config `{datasets: {}, config: {}}`
356
 * @public
357
 * @example
358
 * import {addDataToMap} from 'kepler.gl/actions';
359
 * import {processKeplerglJSON} from 'kepler.gl/processors';
360
 *
361
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
362
 */
363
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
5✔
364
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
5!
365
}
366

367
/**
368
 * Parse a single or an array of datasets saved using kepler.gl schema
369
 * @param rawData
370
 * @param schema
371
 */
372
export function processKeplerglDataset(
373
  rawData: object | object[],
374
  schema = KeplerGlSchema
×
375
): ParsedDataset | ParsedDataset[] | null {
376
  if (!rawData) {
×
377
    return null;
×
378
  }
379

380
  const results = schema.parseSavedData(toArray(rawData));
×
381
  if (!results) {
×
382
    return null;
×
383
  }
384
  return Array.isArray(rawData) ? results : results[0];
×
385
}
386

387
/**
388
 * Parse arrow table and return a dataset
389
 *
390
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
391
 * @returns dataset containing `fields` and `rows` or null
392
 */
393
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
394
  // @ts-ignore - Unknown data type causing build failures
395
  return processArrowBatches(arrowTable.data.batches);
×
396
}
397

398
export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
399
  return schema.fields.map((field: arrow.Field, index: number) => {
×
400
    const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');
×
401
    return {
×
402
      ...field,
403
      name: field.name,
404
      id: field.name,
405
      displayName: field.name,
406
      format: '',
407
      fieldIdx: index,
408
      type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),
×
409
      analyzerType: isGeoArrowColumn
×
410
        ? AnalyzerDATA_TYPES.GEOMETRY
411
        : arrowDataTypeToAnalyzerDataType(field.type),
412
      valueAccessor: (dc: any) => d => {
×
413
        return dc.valueAt(d.index, index);
×
414
      },
415
      metadata: field.metadata
416
    };
417
  });
418
}
419
/**
420
 * Parse arrow batches returned from parseInBatches()
421
 *
422
 * @param arrowTable the arrow table to parse
423
 * @returns dataset containing `fields` and `rows` or null
424
 */
425
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
426
  if (arrowBatches.length === 0) {
×
427
    return null;
×
428
  }
429
  const arrowTable = new arrow.Table(arrowBatches);
×
430
  const fields = arrowSchemaToFields(arrowTable.schema);
×
431

432
  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));
×
433

434
  // return empty rows and use raw arrow table to construct column-wise data container
435
  return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};
×
436
}
437

438
export const DATASET_HANDLERS = {
11✔
439
  [DATASET_FORMATS.row]: processRowObject,
440
  [DATASET_FORMATS.geojson]: processGeojson,
441
  [DATASET_FORMATS.csv]: processCsvData,
442
  [DATASET_FORMATS.arrow]: processArrowTable,
443
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
444
};
445

446
export const Processors: {
447
  processGeojson: typeof processGeojson;
448
  processCsvData: typeof processCsvData;
449
  processArrowTable: typeof processArrowTable;
450
  processArrowBatches: typeof processArrowBatches;
451
  processRowObject: typeof processRowObject;
452
  processKeplerglJSON: typeof processKeplerglJSON;
453
  processKeplerglDataset: typeof processKeplerglDataset;
454
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
455
  getFieldsFromData: typeof getFieldsFromData;
456
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
457
} = {
11✔
458
  processGeojson,
459
  processCsvData,
460
  processArrowTable,
461
  processArrowBatches,
462
  processRowObject,
463
  processKeplerglJSON,
464
  processKeplerglDataset,
465
  analyzerTypeToFieldType,
466
  getFieldsFromData,
467
  parseCsvRowsByFieldType
468
};
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc