• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 25884645943

14 May 2026 08:43PM UTC coverage: 57.684% (-1.0%) from 58.684%
25884645943

push

github

web-flow
feat: basic annotations (#3434)

* feat: basic annotations

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

* fixes and improvements

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fix annotations lag

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* tests, lint, fixes

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* formatting/prettier

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* update icon from target to letters

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fix tests

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fixes

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

* fix dragging

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fixes

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fixes

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fixes

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* follow up

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fixes; follow ups

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

---------

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>
Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>
Co-authored-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

7158 of 14867 branches covered (48.15%)

Branch coverage included in aggregate %.

217 of 737 new or added lines in 25 files covered. (29.44%)

70 existing lines in 2 files now uncovered.

14556 of 22776 relevant lines covered (63.91%)

77.67 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

50.47
/src/processors/src/data-processor.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import * as arrow from 'apache-arrow';
5
import {csvParseRows, tsvParseRows, dsvFormat} from 'd3-dsv';
6
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7
import normalize from '@mapbox/geojson-normalize';
8
import {parseSync} from '@loaders.gl/core';
9
import {ArrowTable} from '@loaders.gl/schema';
10
import {WKBLoader} from '@loaders.gl/wkt';
11

12
import {
13
  ALL_FIELD_TYPES,
14
  DATASET_FORMATS,
15
  GEOARROW_EXTENSIONS,
16
  GEOARROW_METADATA_KEY,
17
  GUIDES_FILE_FORMAT_DOC
18
} from '@kepler.gl/constants';
19
import {ProcessorResult, Field} from '@kepler.gl/types';
20
import {
21
  arrowDataTypeToAnalyzerDataType,
22
  arrowDataTypeToFieldType,
23
  hasOwnProperty,
24
  isPlainObject
25
} from '@kepler.gl/utils';
26
import {
27
  analyzerTypeToFieldType,
28
  getSampleForTypeAnalyze,
29
  getSampleForTypeAnalyzeArrow,
30
  getFieldsFromData,
31
  h3IsValid,
32
  notNullorUndefined,
33
  toArray
34
} from '@kepler.gl/common-utils';
35
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
36
import {Feature} from '@deck.gl-community/editable-layers';
37

38
// if any of these value occurs in csv, parse it to null;
39
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
40
// matches empty string
41
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
13✔
42

43
const SUPPORTED_DELIMITERS = [',', '\t', ';', '|'] as const;
13✔
44

45
function getRowParser(delimiter: string): (raw: string) => string[][] {
46
  if (delimiter === ',') return csvParseRows;
329✔
47
  if (delimiter === '\t') return tsvParseRows;
219✔
48
  return dsvFormat(delimiter).parseRows;
144✔
49
}
50

51
/**
52
 * Detect the delimiter used in a DSV string by checking the first line.
53
 * Returns the delimiter that produces the most columns (minimum 2).
54
 * Falls back to comma if no delimiter produces multiple columns.
55
 */
56
export function detectDelimiter(rawData: string): string {
57
  const newlineIdx = rawData.indexOf('\n');
71✔
58
  const firstLine = newlineIdx === -1 ? rawData : rawData.slice(0, newlineIdx);
71✔
59
  if (!firstLine) return ',';
71✔
60

61
  let bestDelimiter = ',';
69✔
62
  let bestCount = 1;
69✔
63

64
  for (const delimiter of SUPPORTED_DELIMITERS) {
69✔
65
    const parsed = getRowParser(delimiter)(firstLine);
276✔
66
    const count = parsed[0]?.length || 0;
276!
67
    if (count > bestCount) {
276✔
68
      bestCount = count;
72✔
69
      bestDelimiter = delimiter;
72✔
70
    }
71
  }
72

73
  return bestDelimiter;
69✔
74
}
75

76
function tryParseJsonString(str) {
77
  try {
31✔
78
    return JSON.parse(str);
31✔
79
  } catch (e) {
80
    return null;
×
81
  }
82
}
83

84
export const PARSE_FIELD_VALUE_FROM_STRING = {
13✔
85
  [ALL_FIELD_TYPES.boolean]: {
86
    valid: (d: unknown): boolean => typeof d === 'boolean',
31✔
87
    parse: (d: unknown): boolean => {
88
      const s = String(d).toLowerCase();
387✔
89
      return s === 'true' || s === 'yes' || s === '1';
387✔
90
    }
91
  },
92
  [ALL_FIELD_TYPES.integer]: {
93
    // @ts-ignore
94
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
140✔
95
    // @ts-ignore
96
    parse: (d: unknown): number => parseInt(d, 10)
540✔
97
  },
98
  [ALL_FIELD_TYPES.timestamp]: {
99
    valid: (d: unknown, field: Field): boolean =>
100
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
106✔
101
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
386!
102
  },
103
  [ALL_FIELD_TYPES.real]: {
104
    // @ts-ignore
105
    valid: (d: unknown): boolean => parseFloat(d) === d,
130✔
106
    // Note this will result in NaN for some string
107
    parse: parseFloat
108
  },
109
  [ALL_FIELD_TYPES.object]: {
110
    valid: isPlainObject,
111
    parse: tryParseJsonString
112
  },
113

114
  [ALL_FIELD_TYPES.array]: {
115
    valid: Array.isArray,
116
    parse: tryParseJsonString
117
  },
118

119
  [ALL_FIELD_TYPES.h3]: {
120
    valid: d => h3IsValid(d),
15✔
121
    parse: d => d
×
122
  }
123
};
124

125
/**
126
 * Process csv data, output a data object with `{fields: [], rows: []}`.
127
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
128
 * @param rawData raw csv string
129
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
130
 * @public
131
 * @example
132
 * import {processCsvData} from '@kepler.gl/processors';
133
 *
134
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
135
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
136
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
137
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
138
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
139
 *
140
 * const dataset = {
141
 *  info: {id: 'test_data', label: 'My Csv'},
142
 *  data: processCsvData(testData)
143
 * };
144
 *
145
 * dispatch(addDataToMap({
146
 *  datasets: [dataset],
147
 *  options: {centerMap: true, readOnly: true}
148
 * }));
149
 */
150
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
151
  let rows: unknown[][] | undefined;
152
  let headerRow: string[] | undefined;
153

154
  if (typeof rawData === 'string') {
89✔
155
    const delimiter = detectDelimiter(rawData);
53✔
156
    const parsedRows: string[][] = getRowParser(delimiter)(rawData);
53✔
157

158
    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
53✔
159
      throw new Error('processCsvData Failed: delimited text is empty or has no data rows');
1✔
160
    }
161
    headerRow = parsedRows[0];
52✔
162
    rows = parsedRows.slice(1);
52✔
163
  } else if (Array.isArray(rawData) && rawData.length) {
36!
164
    rows = rawData;
36✔
165
    headerRow = header;
36✔
166

167
    if (!Array.isArray(headerRow)) {
36!
168
      // if data is passed in as array of rows and missing header
169
      // assume first row is header
170
      // @ts-ignore
171
      headerRow = rawData[0];
×
172
      rows = rawData.slice(1);
×
173
    }
174
  }
175

176
  if (!rows || !headerRow) {
88!
177
    throw new Error('invalid input passed to processCsvData');
×
178
  }
179

180
  // here we assume the csv file that people uploaded will have first row
181
  // as name of the column
182

183
  cleanUpFalsyCsvValue(rows);
88✔
184
  // No need to run type detection on every data point
185
  // here we get a list of none null values to run analyze on
186
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
88✔
187
  const fields = getFieldsFromData(sample, headerRow);
88✔
188
  const parsedRows = parseRowsByFields(rows, fields);
88✔
189

190
  return {fields, rows: parsedRows};
88✔
191
}
192

193
/**
194
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
195
 * @param rows
196
 * @param fields
197
 */
198
export function parseRowsByFields(rows: any[][], fields: Field[]) {
199
  // Edit rows in place
200
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
504✔
201
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));
88✔
202

203
  return rows;
88✔
204
}
205

206
/**
207
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
208
 * so that type-analyzer won't detect it as string
209
 *
210
 * @param rows
211
 */
212
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
213
  const re = new RegExp(CSV_NULLS, 'g');
124✔
214
  for (let i = 0; i < rows.length; i++) {
124✔
215
    for (let j = 0; j < rows[i].length; j++) {
1,086✔
216
      // analyzer will set any fields to 'string' if there are empty values
217
      // which will be parsed as '' by d3.csv
218
      // here we parse empty data as null
219
      // TODO: create warning when deltect `CSV_NULLS` in the data
220
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
8,858✔
221
        rows[i][j] = null;
959✔
222
      }
223
    }
224
  }
225
}
226

227
/**
228
 * Process uploaded csv file to parse value by field type
229
 *
230
 * @param rows
231
 * @param geoFieldIdx field index
232
 * @param field
233
 * @param i
234
 */
235
export function parseCsvRowsByFieldType(
236
  rows: unknown[][],
237
  geoFieldIdx: number,
238
  field: Field,
239
  i: number
240
): void {
241
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
628✔
242
  if (parser) {
628✔
243
    // check first not null value of it's already parsed
244
    const first = rows.find(r => notNullorUndefined(r[i]));
475✔
245
    if (!first || parser.valid(first[i], field)) {
451✔
246
      return;
215✔
247
    }
248
    rows.forEach(row => {
236✔
249
      // parse string value based on field type
250
      if (row[i] !== null) {
3,113✔
251
        row[i] = parser.parse(row[i], field);
2,792✔
252
        if (
2,792✔
253
          geoFieldIdx > -1 &&
2,810✔
254
          isPlainObject(row[geoFieldIdx]) &&
255
          // @ts-ignore
256
          hasOwnProperty(row[geoFieldIdx], 'properties')
257
        ) {
258
          // @ts-ignore
259
          row[geoFieldIdx].properties[field.name] = row[i];
9✔
260
        }
261
      }
262
    });
263
  }
264
}
265

266
/* eslint-enable complexity */
267

268
/**
269
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
270
 * NOTE: This function may mutate input.
271
 * @param rawData an array of row object, each object should have the same number of keys
272
 * @returns dataset containing `fields` and `rows`
273
 * @public
274
 * @example
275
 * import {addDataToMap} from '@kepler.gl/actions';
276
 * import {processRowObject} from '@kepler.gl/processors';
277
 *
278
 * const data = [
279
 *  {lat: 31.27, lng: 127.56, value: 3},
280
 *  {lat: 31.22, lng: 126.26, value: 1}
281
 * ];
282
 *
283
 * dispatch(addDataToMap({
284
 *  datasets: {
285
 *    info: {label: 'My Data', id: 'my_data'},
286
 *    data: processRowObject(data)
287
 *  }
288
 * }));
289
 */
290
export function processRowObject(rawData: unknown[]): ProcessorResult {
291
  if (!Array.isArray(rawData)) {
37✔
292
    return null;
1✔
293
  } else if (!rawData.length) {
36!
294
    // data is empty
295
    return {
×
296
      fields: [],
297
      rows: []
298
    };
299
  }
300

301
  const firstRow = rawData[0] as Record<string, unknown>;
36✔
302
  const keys = Object.keys(firstRow); // [lat, lng, value]
36✔
303
  const rows = rawData.map(d => keys.map(key => (d as Record<string, unknown>)[key])); // [[31.27, 127.56, 3]]
1,551✔
304

305
  // row object can still contain values like `Null` or `N/A`
306
  cleanUpFalsyCsvValue(rows);
36✔
307

308
  return processCsvData(rows, keys);
36✔
309
}
310

311
/**
312
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
313
 * output a data object with `{fields: [], rows: []}`.
314
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
315
 * NOTE: This function may mutate input.
316
 *
317
 * @param rawData raw geojson feature collection
318
 * @returns dataset containing `fields` and `rows`
319
 * @public
320
 * @example
321
 * import {addDataToMap} from '@kepler.gl/actions';
322
 * import {processGeojson} from '@kepler.gl/processors';
323
 *
324
 * const geojson = {
325
 *         "type" : "FeatureCollection",
326
 *         "features" : [{
327
 *                 "type" : "Feature",
328
 *                 "properties" : {
329
 *                         "capacity" : "10",
330
 *                         "type" : "U-Rack"
331
 *                 },
332
 *                 "geometry" : {
333
 *                         "type" : "Point",
334
 *                         "coordinates" : [ -71.073283, 42.417500 ]
335
 *                 }
336
 *         }]
337
 * };
338
 *
339
 * dispatch(addDataToMap({
340
 *  datasets: {
341
 *    info: {
342
 *      label: 'Sample Taxi Trips in New York City',
343
 *      id: 'test_trip_data'
344
 *    },
345
 *    data: processGeojson(geojson)
346
 *  }
347
 * }));
348
 */
349
export function processGeojson(rawData: unknown): ProcessorResult {
350
  const normalizedGeojson = normalize(rawData);
28✔
351

352
  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
28✔
353
    throw new Error(
1✔
354
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
355
    );
356
  }
357

358
  // getting all feature fields
359
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
27✔
360
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
27✔
361
    const f = normalizedGeojson.features[i];
160✔
362
    if (f.geometry) {
160!
363
      allDataRows.push({
160✔
364
        // add feature to _geojson field
365
        _geojson: f,
366
        ...(f.properties || {})
161✔
367
      });
368
    }
369
  }
370
  // get all the field
371
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
27✔
372
    Object.keys(curr).forEach(key => {
160✔
373
      if (!accu.includes(key)) {
807✔
374
        accu.push(key);
148✔
375
      }
376
    });
377
    return accu;
160✔
378
  }, []);
379

380
  // make sure each feature has exact same fields
381
  allDataRows.forEach(d => {
27✔
382
    fields.forEach(f => {
160✔
383
      if (!(f in d)) {
860✔
384
        d[f] = null;
53✔
385
        if (d._geojson.properties) {
53!
386
          d._geojson.properties[f] = null;
53✔
387
        }
388
      }
389
    });
390
  });
391

392
  return processRowObject(allDataRows);
27✔
393
}
394

395
/**
396
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
397
 * The json object should contain `datasets` and `config`.
398
 * @param rawData
399
 * @param schema
400
 * @returns datasets and config `{datasets: {}, config: {}}`
401
 * @public
402
 * @example
403
 * import {addDataToMap} from '@kepler.gl/actions';
404
 * import {processKeplerglJSON} from '@kepler.gl/processors';
405
 *
406
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
407
 */
408
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
5✔
409
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
5!
410
}
411

412
/**
413
 * Parse a single or an array of datasets saved using kepler.gl schema
414
 * @param rawData
415
 * @param schema
416
 */
417
export function processKeplerglDataset(
418
  rawData: object | object[],
419
  schema = KeplerGlSchema
×
420
): ParsedDataset | ParsedDataset[] | null {
421
  if (!rawData) {
×
422
    return null;
×
423
  }
424

425
  const results = schema.parseSavedData(toArray(rawData));
×
426
  if (!results) {
×
427
    return null;
×
428
  }
429
  return Array.isArray(rawData) ? results : results[0];
×
430
}
431

432
/**
433
 * Parse arrow table and return a dataset
434
 *
435
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
436
 * @returns dataset containing `fields` and `rows` or null
437
 */
438
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
439
  // @ts-ignore - Unknown data type causing build failures
440
  return processArrowBatches(arrowTable.data.batches);
×
441
}
442

443
/**
444
 * Extracts GeoArrow metadata from an Apache Arrow table schema.
445
 * For geoparquet files geoarrow metadata isn't present in fields, so extract extra info from schema.
446
 * @param table The Apache Arrow table to extract metadata from.
447
 * @returns An object mapping column names to their GeoArrow encoding type.
448
 * @throws Logs an error message if parsing of metadata fails.
449
 */
450
export function getGeoArrowMetadataFromSchema(table: arrow.Table): Record<string, string> {
451
  const geoArrowMetadata: Record<string, string> = {};
×
452
  try {
×
453
    const geoString = table.schema.metadata?.get('geo');
×
454
    if (geoString) {
×
455
      const parsedGeoString = JSON.parse(geoString);
×
456
      if (parsedGeoString.columns) {
×
457
        Object.keys(parsedGeoString.columns).forEach(columnName => {
×
458
          const columnData = parsedGeoString.columns[columnName];
×
459
          if (columnData?.encoding === 'WKB') {
×
460
            geoArrowMetadata[columnName] = GEOARROW_EXTENSIONS.WKB;
×
461
          }
462
          // TODO potentially there are other types but no datasets to test
463
        });
464
      }
465
    }
466
  } catch (error) {
467
    console.error('An error during arrow table schema metadata parsing');
×
468
  }
469
  return geoArrowMetadata;
×
470
}
471

472
/**
473
 * Converts an Apache Arrow table schema into an array of Kepler.gl field objects.
474
 * @param table The Apache Arrow table whose schema needs to be converted.
475
 * @param fieldTypeSuggestions Optional mapping of field names to suggested field types.
476
 * @returns An array of field objects suitable for Kepler.gl.
477
 */
478
export function arrowSchemaToFields(
479
  table: arrow.Table,
480
  fieldTypeSuggestions: Record<string, string> = {}
×
481
): Field[] {
482
  const headerRow = table.schema.fields.map(f => f.name);
×
483
  const sample = getSampleForTypeAnalyzeArrow(table, headerRow);
×
484
  const keplerFields = getFieldsFromData(sample, headerRow);
×
485
  const geoArrowMetadata = getGeoArrowMetadataFromSchema(table);
×
486

487
  return table.schema.fields.map((field: arrow.Field, fieldIndex: number) => {
×
488
    let type = arrowDataTypeToFieldType(field.type);
×
489
    let analyzerType = arrowDataTypeToAnalyzerDataType(field.type);
×
490
    let format = '';
×
491

492
    const fieldTypeSuggestion = fieldTypeSuggestions[field.name];
×
493
    const keplerField = keplerFields[fieldIndex];
×
494

495
    // geometry fields produced by DuckDB's st_asgeojson()
496
    if (fieldTypeSuggestion === 'JSON') {
×
497
      type = ALL_FIELD_TYPES.geojson;
×
498
      analyzerType = AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING;
×
499
    } else if (
×
500
      fieldTypeSuggestion === 'GEOMETRY' ||
×
501
      field.metadata.get(GEOARROW_METADATA_KEY)?.startsWith('geoarrow')
502
    ) {
503
      type = ALL_FIELD_TYPES.geoarrow;
×
504
      analyzerType = AnalyzerDATA_TYPES.GEOMETRY;
×
505
    } else if (geoArrowMetadata[field.name]) {
×
506
      type = ALL_FIELD_TYPES.geoarrow;
×
507
      analyzerType = AnalyzerDATA_TYPES.GEOMETRY;
×
508
      field.metadata?.set(GEOARROW_METADATA_KEY, geoArrowMetadata[field.name]);
×
509
    } else if (fieldTypeSuggestion === 'BLOB') {
×
510
      // When arrow wkb column saved to DuckDB as BLOB without any metadata, then queried back
511
      try {
×
512
        const data = table.getChildAt(fieldIndex)?.get(0);
×
513
        if (data) {
×
514
          const binaryGeo = parseSync(data, WKBLoader);
×
515
          if (binaryGeo) {
×
516
            type = ALL_FIELD_TYPES.geoarrow;
×
517
            analyzerType = AnalyzerDATA_TYPES.GEOMETRY;
×
518
            field.metadata?.set(GEOARROW_METADATA_KEY, GEOARROW_EXTENSIONS.WKB);
×
519
          }
520
        }
521
      } catch (error) {
522
        // ignore, not WKB
523
      }
524
    } else if (
×
525
      fieldTypeSuggestion === 'VARCHAR' &&
×
526
      (keplerField.analyzerType === AnalyzerDATA_TYPES.GEOMETRY ||
527
        keplerField.analyzerType === AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING)
528
    ) {
529
      // When wkb/wkt was saved as varchar in DuckDB
530
      type = keplerField.type;
×
531
      analyzerType = keplerField.analyzerType;
×
532
      format = keplerField.format;
×
533
    } else if (fieldTypeSuggestion === 'VARCHAR' && keplerField.type === ALL_FIELD_TYPES.h3) {
×
534
      // when kepler detected h3 column using getFieldsFromData(), set type to h3 and analyzerType to H3
535
      type = ALL_FIELD_TYPES.h3;
×
536
      analyzerType = keplerField.analyzerType;
×
537
    } else {
538
      // TODO should we use Kepler getFieldsFromData instead
539
      // of arrowDataTypeToFieldType for all fields?
540
      if (keplerField.type === ALL_FIELD_TYPES.timestamp) {
×
541
        type = keplerField.type;
×
542
        analyzerType = keplerField.analyzerType;
×
543
        format = keplerField.format;
×
544
      }
545
    }
546

547
    return {
×
548
      ...field,
549
      name: field.name,
550
      id: field.name,
551
      displayName: field.name,
552
      format: format,
553
      fieldIdx: fieldIndex,
554
      type,
555
      analyzerType,
556
      valueAccessor: (dc: any) => d => {
×
557
        return dc.valueAt(d.index, fieldIndex);
×
558
      },
559
      metadata: field.metadata
560
    };
561
  });
562
}
563

564
const CAST_BIGINTS = false;
13✔
565

566
/**
567
 * Cast 64-bit integer Arrow columns (Int64, Uint64) to Float64 to avoid BigInt values
568
 * that are incompatible with d3 scales, sorting, and other numeric operations.
569
 * Mirrors the DuckDB approach of casting BIGINT/UBIGINT to DOUBLE.
570
 */
571
function castBigIntColumnsToFloat64(arrowTable: arrow.Table): arrow.Table {
572
  if (!CAST_BIGINTS) {
×
573
    return arrowTable;
×
574
  }
575

576
  const needsCast = arrowTable.schema.fields.some(
×
577
    f => arrow.DataType.isInt(f.type) && f.type.bitWidth === 64
×
578
  );
579
  if (!needsCast) {
×
580
    return arrowTable;
×
581
  }
582

583
  const newColumns: Record<string, arrow.Vector> = {};
×
584
  for (let i = 0; i < arrowTable.numCols; i++) {
×
585
    const field = arrowTable.schema.fields[i];
×
NEW
586
    const col = arrowTable.getChildAt(i);
×
NEW
587
    if (!col) continue;
×
588
    if (arrow.DataType.isInt(field.type) && field.type.bitWidth === 64) {
×
589
      const float64Array = new Float64Array(col.length);
×
590
      for (let j = 0; j < col.length; j++) {
×
591
        const val = col.get(j);
×
592
        float64Array[j] = val === null ? NaN : Number(val);
×
593
      }
594
      newColumns[field.name] = arrow.makeVector(float64Array);
×
595
    } else {
596
      newColumns[field.name] = col;
×
597
    }
598
  }
599
  return new arrow.Table(newColumns);
×
600
}
601

602
/**
603
 * Parse arrow batches returned from parseInBatches()
604
 *
605
 * @param arrowTable the arrow table to parse
606
 * @returns dataset containing `fields` and `rows` or null
607
 */
608
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
609
  if (arrowBatches.length === 0) {
×
610
    return null;
×
611
  }
612
  const arrowTable = castBigIntColumnsToFloat64(new arrow.Table(arrowBatches));
×
613
  const fields = arrowSchemaToFields(arrowTable);
×
614

615
  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));
×
616

617
  // return empty rows and use raw arrow table to construct column-wise data container
618
  return {
×
619
    fields,
620
    rows: [],
621
    cols,
622
    metadata: arrowTable.schema.metadata,
623
    // Save original arrow schema, for better ingestion into DuckDB.
624
    // TODO consider returning arrowTable in cols, not an array of Vectors from arrowTable.
625
    arrowSchema: arrowTable.schema
626
  };
627
}
628

629
export const DATASET_HANDLERS = {
13✔
630
  [DATASET_FORMATS.row]: processRowObject,
631
  [DATASET_FORMATS.geojson]: processGeojson,
632
  [DATASET_FORMATS.csv]: processCsvData,
633
  [DATASET_FORMATS.arrow]: processArrowTable,
634
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
635
};
636

637
export const Processors: {
638
  processGeojson: typeof processGeojson;
639
  processCsvData: typeof processCsvData;
640
  processArrowTable: typeof processArrowTable;
641
  processArrowBatches: typeof processArrowBatches;
642
  processRowObject: typeof processRowObject;
643
  processKeplerglJSON: typeof processKeplerglJSON;
644
  processKeplerglDataset: typeof processKeplerglDataset;
645
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
646
  getFieldsFromData: typeof getFieldsFromData;
647
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
648
} = {
13✔
649
  processGeojson,
650
  processCsvData,
651
  processArrowTable,
652
  processArrowBatches,
653
  processRowObject,
654
  processKeplerglJSON,
655
  processKeplerglDataset,
656
  analyzerTypeToFieldType,
657
  getFieldsFromData,
658
  parseCsvRowsByFieldType
659
};
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc