• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 12031095165

26 Nov 2024 12:57PM UTC coverage: 69.321% (+22.9%) from 46.466%
12031095165

push

github

web-flow
[feat] create new dataset action (#2778)

* [feat] create new dataset action

- createNewDataEntry now returns a react-palm task to create or update a dataset asynchronously.
- updateVisDataUpdater now returns tasks to create or update a dataset asynchronously, and once done triggers createNewDatasetSuccess action.
- refactoring of demo-app App and Container to functional components

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>
Co-authored-by: Shan He <heshan0131@gmail.com>

5436 of 9079 branches covered (59.87%)

Branch coverage included in aggregate %.

91 of 111 new or added lines in 13 files covered. (81.98%)

8 existing lines in 3 files now uncovered.

11368 of 15162 relevant lines covered (74.98%)

95.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.71
/src/processors/src/data-processor.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import * as arrow from 'apache-arrow';
5
import {csvParseRows} from 'd3-dsv';
6
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7
import normalize from '@mapbox/geojson-normalize';
8
import {ArrowTable} from '@loaders.gl/schema';
9
import {ALL_FIELD_TYPES, DATASET_FORMATS, GUIDES_FILE_FORMAT_DOC} from '@kepler.gl/constants';
10
import {ProcessorResult, Field} from '@kepler.gl/types';
11
import {
12
  arrowDataTypeToAnalyzerDataType,
13
  arrowDataTypeToFieldType,
14
  hasOwnProperty,
15
  isPlainObject
16
} from '@kepler.gl/utils';
17
import {notNullorUndefined, toArray} from '@kepler.gl/common-utils';
18
import {
19
  getSampleForTypeAnalyze,
20
  getFieldsFromData,
21
  analyzerTypeToFieldType
22
} from '@kepler.gl/common-utils';
23
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
24
import {Feature} from '@nebula.gl/edit-modes';
25

26
// if any of these value occurs in csv, parse it to null;
27
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
28
// matches empty string
29
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
11✔
30

31
function tryParseJsonString(str) {
32
  try {
31✔
33
    return JSON.parse(str);
31✔
34
  } catch (e) {
35
    return null;
×
36
  }
37
}
38

39
export const PARSE_FIELD_VALUE_FROM_STRING = {
11✔
40
  [ALL_FIELD_TYPES.boolean]: {
41
    valid: (d: unknown): boolean => typeof d === 'boolean',
30✔
42
    parse: (d: unknown): boolean => d === 'true' || d === 'True' || d === 'TRUE' || d === '1'
364✔
43
  },
44
  [ALL_FIELD_TYPES.integer]: {
45
    // @ts-ignore
46
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
122✔
47
    // @ts-ignore
48
    parse: (d: unknown): number => parseInt(d, 10)
459✔
49
  },
50
  [ALL_FIELD_TYPES.timestamp]: {
51
    valid: (d: unknown, field: Field): boolean =>
52
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
98✔
53
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
350!
54
  },
55
  [ALL_FIELD_TYPES.real]: {
56
    // @ts-ignore
57
    valid: (d: unknown): boolean => parseFloat(d) === d,
144✔
58
    // Note this will result in NaN for some string
59
    parse: parseFloat
60
  },
61
  [ALL_FIELD_TYPES.object]: {
62
    valid: isPlainObject,
63
    parse: tryParseJsonString
64
  },
65

66
  [ALL_FIELD_TYPES.array]: {
67
    valid: Array.isArray,
68
    parse: tryParseJsonString
69
  }
70
};
71

72
/**
73
 * Process csv data, output a data object with `{fields: [], rows: []}`.
74
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
75
 * @param rawData raw csv string
76
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
77
 * @public
78
 * @example
79
 * import {processCsvData} from 'kepler.gl/processors';
80
 *
81
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
82
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
83
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
84
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
85
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
86
 *
87
 * const dataset = {
88
 *  info: {id: 'test_data', label: 'My Csv'},
89
 *  data: processCsvData(testData)
90
 * };
91
 *
92
 * dispatch(addDataToMap({
93
 *  datasets: [dataset],
94
 *  options: {centerMap: true, readOnly: true}
95
 * }));
96
 */
97
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
98
  let rows: unknown[][] | undefined;
99
  let headerRow: string[] | undefined;
100

101
  if (typeof rawData === 'string') {
78✔
102
    const parsedRows: string[][] = csvParseRows(rawData);
39✔
103

104
    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
39✔
105
      // looks like an empty file, throw error to be catch
106
      throw new Error('process Csv Data Failed: CSV is empty');
1✔
107
    }
108
    headerRow = parsedRows[0];
38✔
109
    rows = parsedRows.slice(1);
38✔
110
  } else if (Array.isArray(rawData) && rawData.length) {
39!
111
    rows = rawData;
39✔
112
    headerRow = header;
39✔
113

114
    if (!Array.isArray(headerRow)) {
39!
115
      // if data is passed in as array of rows and missing header
116
      // assume first row is header
117
      // @ts-ignore
118
      headerRow = rawData[0];
×
119
      rows = rawData.slice(1);
×
120
    }
121
  }
122

123
  if (!rows || !headerRow) {
77!
124
    throw new Error('invalid input passed to processCsvData');
×
125
  }
126

127
  // here we assume the csv file that people uploaded will have first row
128
  // as name of the column
129

130
  cleanUpFalsyCsvValue(rows);
77✔
131
  // No need to run type detection on every data point
132
  // here we get a list of none null values to run analyze on
133
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
77✔
134
  const fields = getFieldsFromData(sample, headerRow);
77✔
135
  const parsedRows = parseRowsByFields(rows, fields);
77✔
136

137
  return {fields, rows: parsedRows};
77✔
138
}
139

140
/**
141
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
142
 * @param rows
143
 * @param fields
144
 */
145
export function parseRowsByFields(rows: any[][], fields: Field[]) {
146
  // Edit rows in place
147
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
469✔
148
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));
77✔
149

150
  return rows;
77✔
151
}
152

153
/**
154
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
155
 * so that type-analyzer won't detect it as string
156
 *
157
 * @param rows
158
 */
159
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
160
  const re = new RegExp(CSV_NULLS, 'g');
116✔
161
  for (let i = 0; i < rows.length; i++) {
116✔
162
    for (let j = 0; j < rows[i].length; j++) {
1,076✔
163
      // analyzer will set any fields to 'string' if there are empty values
164
      // which will be parsed as '' by d3.csv
165
      // here we parse empty data as null
166
      // TODO: create warning when deltect `CSV_NULLS` in the data
167
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
9,062✔
168
        rows[i][j] = null;
894✔
169
      }
170
    }
171
  }
172
}
173

174
/**
175
 * Process uploaded csv file to parse value by field type
176
 *
177
 * @param rows
178
 * @param geoFieldIdx field index
179
 * @param field
180
 * @param i
181
 */
182
export function parseCsvRowsByFieldType(
183
  rows: unknown[][],
184
  geoFieldIdx: number,
185
  field: Field,
186
  i: number
187
): void {
188
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
597✔
189
  if (parser) {
597✔
190
    // check first not null value of it's already parsed
191
    const first = rows.find(r => notNullorUndefined(r[i]));
449✔
192
    if (!first || parser.valid(first[i], field)) {
424✔
193
      return;
204✔
194
    }
195
    rows.forEach(row => {
220✔
196
      // parse string value based on field type
197
      if (row[i] !== null) {
3,050✔
198
        row[i] = parser.parse(row[i], field);
2,744✔
199
        if (
2,744✔
200
          geoFieldIdx > -1 &&
2,762✔
201
          isPlainObject(row[geoFieldIdx]) &&
202
          // @ts-ignore
203
          hasOwnProperty(row[geoFieldIdx], 'properties')
204
        ) {
205
          // @ts-ignore
206
          row[geoFieldIdx].properties[field.name] = row[i];
9✔
207
        }
208
      }
209
    });
210
  }
211
}
212

213
/* eslint-enable complexity */
214

215
/**
216
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
217
 * NOTE: This function may mutate input.
218
 * @param rawData an array of row object, each object should have the same number of keys
219
 * @returns dataset containing `fields` and `rows`
220
 * @public
221
 * @example
222
 * import {addDataToMap} from 'kepler.gl/actions';
223
 * import {processRowObject} from 'kepler.gl/processors';
224
 *
225
 * const data = [
226
 *  {lat: 31.27, lng: 127.56, value: 3},
227
 *  {lat: 31.22, lng: 126.26, value: 1}
228
 * ];
229
 *
230
 * dispatch(addDataToMap({
231
 *  datasets: {
232
 *    info: {label: 'My Data', id: 'my_data'},
233
 *    data: processRowObject(data)
234
 *  }
235
 * }));
236
 */
237
export function processRowObject(rawData: unknown[]): ProcessorResult {
238
  if (!Array.isArray(rawData)) {
40✔
239
    return null;
1✔
240
  } else if (!rawData.length) {
39!
241
    // data is empty
242
    return {
×
243
      fields: [],
244
      rows: []
245
    };
246
  }
247

248
  const keys = Object.keys(rawData[0]); // [lat, lng, value]
39✔
249
  const rows = rawData.map(d => keys.map(key => d[key])); // [[31.27, 127.56, 3]]
1,780✔
250

251
  // row object can still contain values like `Null` or `N/A`
252
  cleanUpFalsyCsvValue(rows);
39✔
253

254
  return processCsvData(rows, keys);
39✔
255
}
256

257
/**
258
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
259
 * output a data object with `{fields: [], rows: []}`.
260
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
261
 * NOTE: This function may mutate input.
262
 *
263
 * @param rawData raw geojson feature collection
264
 * @returns dataset containing `fields` and `rows`
265
 * @public
266
 * @example
267
 * import {addDataToMap} from 'kepler.gl/actions';
268
 * import {processGeojson} from 'kepler.gl/processors';
269
 *
270
 * const geojson = {
271
 *         "type" : "FeatureCollection",
272
 *         "features" : [{
273
 *                 "type" : "Feature",
274
 *                 "properties" : {
275
 *                         "capacity" : "10",
276
 *                         "type" : "U-Rack"
277
 *                 },
278
 *                 "geometry" : {
279
 *                         "type" : "Point",
280
 *                         "coordinates" : [ -71.073283, 42.417500 ]
281
 *                 }
282
 *         }]
283
 * };
284
 *
285
 * dispatch(addDataToMap({
286
 *  datasets: {
287
 *    info: {
288
 *      label: 'Sample Taxi Trips in New York City',
289
 *      id: 'test_trip_data'
290
 *    },
291
 *    data: processGeojson(geojson)
292
 *  }
293
 * }));
294
 */
295
export function processGeojson(rawData: unknown): ProcessorResult {
296
  const normalizedGeojson = normalize(rawData);
29✔
297

298
  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
29✔
299
    const error = new Error(
1✔
300
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
301
    );
302
    throw error;
1✔
303
    // fail to normalize geojson
304
  }
305

306
  // getting all feature fields
307
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
28✔
308
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
28✔
309
    const f = normalizedGeojson.features[i];
169✔
310
    if (f.geometry) {
169!
311
      allDataRows.push({
169✔
312
        // add feature to _geojson field
313
        _geojson: f,
314
        ...(f.properties || {})
170✔
315
      });
316
    }
317
  }
318
  // get all the field
319
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
28✔
320
    Object.keys(curr).forEach(key => {
169✔
321
      if (!accu.includes(key)) {
849✔
322
        accu.push(key);
153✔
323
      }
324
    });
325
    return accu;
169✔
326
  }, []);
327

328
  // make sure each feature has exact same fields
329
  allDataRows.forEach(d => {
28✔
330
    fields.forEach(f => {
169✔
331
      if (!(f in d)) {
905✔
332
        d[f] = null;
56✔
333
        if (d._geojson.properties) {
56!
334
          d._geojson.properties[f] = null;
56✔
335
        }
336
      }
337
    });
338
  });
339

340
  return processRowObject(allDataRows);
28✔
341
}
342

343
/**
344
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
345
 * The json object should contain `datasets` and `config`.
346
 * @param rawData
347
 * @param schema
348
 * @returns datasets and config `{datasets: {}, config: {}}`
349
 * @public
350
 * @example
351
 * import {addDataToMap} from 'kepler.gl/actions';
352
 * import {processKeplerglJSON} from 'kepler.gl/processors';
353
 *
354
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
355
 */
356
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
5✔
357
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
5!
358
}
359

360
/**
361
 * Parse a single or an array of datasets saved using kepler.gl schema
362
 * @param rawData
363
 * @param schema
364
 */
365
export function processKeplerglDataset(
366
  rawData: object | object[],
367
  schema = KeplerGlSchema
×
368
): ParsedDataset | ParsedDataset[] | null {
369
  if (!rawData) {
×
370
    return null;
×
371
  }
372

373
  const results = schema.parseSavedData(toArray(rawData));
×
374
  if (!results) {
×
375
    return null;
×
376
  }
377
  return Array.isArray(rawData) ? results : results[0];
×
378
}
379

380
/**
381
 * Parse arrow table and return a dataset
382
 *
383
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
384
 * @returns dataset containing `fields` and `rows` or null
385
 */
386
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
387
  // @ts-ignore - Unknown data type causing build failures
388
  return processArrowBatches(arrowTable.data.batches);
×
389
}
390

391
export function arrowSchemaToFields(schema: arrow.Schema): Field[] {
NEW
392
  return schema.fields.map((field: arrow.Field, index: number) => {
×
NEW
393
    const isGeoArrowColumn = field.metadata.get('ARROW:extension:name')?.startsWith('geoarrow');
×
NEW
394
    return {
×
395
      ...field,
396
      name: field.name,
397
      id: field.name,
398
      displayName: field.name,
399
      format: '',
400
      fieldIdx: index,
401
      type: isGeoArrowColumn ? ALL_FIELD_TYPES.geoarrow : arrowDataTypeToFieldType(field.type),
×
402
      analyzerType: isGeoArrowColumn
×
403
        ? AnalyzerDATA_TYPES.GEOMETRY
404
        : arrowDataTypeToAnalyzerDataType(field.type),
405
      valueAccessor: (dc: any) => d => {
×
406
        return dc.valueAt(d.index, index);
×
407
      },
408
      metadata: field.metadata
409
    };
410
  });
411
}
412
/**
413
 * Parse arrow batches returned from parseInBatches()
414
 *
415
 * @param arrowTable the arrow table to parse
416
 * @returns dataset containing `fields` and `rows` or null
417
 */
418
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
NEW
419
  if (arrowBatches.length === 0) {
×
NEW
420
    return null;
×
421
  }
NEW
422
  const arrowTable = new arrow.Table(arrowBatches);
×
NEW
423
  const fields = arrowSchemaToFields(arrowTable.schema);
×
424

425
  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));
×
426

427
  // return empty rows and use raw arrow table to construct column-wise data container
428
  return {fields, rows: [], cols, metadata: arrowTable.schema.metadata};
×
429
}
430

431
export const DATASET_HANDLERS = {
11✔
432
  [DATASET_FORMATS.row]: processRowObject,
433
  [DATASET_FORMATS.geojson]: processGeojson,
434
  [DATASET_FORMATS.csv]: processCsvData,
435
  [DATASET_FORMATS.arrow]: processArrowTable,
436
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
437
};
438

439
export const Processors: {
440
  processGeojson: typeof processGeojson;
441
  processCsvData: typeof processCsvData;
442
  processArrowTable: typeof processArrowTable;
443
  processArrowBatches: typeof processArrowBatches;
444
  processRowObject: typeof processRowObject;
445
  processKeplerglJSON: typeof processKeplerglJSON;
446
  processKeplerglDataset: typeof processKeplerglDataset;
447
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
448
  getFieldsFromData: typeof getFieldsFromData;
449
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
450
} = {
11✔
451
  processGeojson,
452
  processCsvData,
453
  processArrowTable,
454
  processArrowBatches,
455
  processRowObject,
456
  processKeplerglJSON,
457
  processKeplerglDataset,
458
  analyzerTypeToFieldType,
459
  getFieldsFromData,
460
  parseCsvRowsByFieldType
461
};
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc