• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 25062278298

28 Apr 2026 03:32PM UTC coverage: 59.381% (-0.05%) from 59.429%
25062278298

push

github

web-flow
chore: bump deck.gl to 9.3.1 (#3392)

* chore: bump deck.gl to 9.3.1

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* bump 2

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* bump 3

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* lint

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* revert

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* fix post loaders upgrade

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

* upgrade regressions

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

---------

Signed-off-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>
Co-authored-by: Ihor Dykhta <ihordykhta@Ihors-MacBook-Pro.local>

6844 of 13819 branches covered (49.53%)

Branch coverage included in aggregate %.

7 of 32 new or added lines in 9 files covered. (21.88%)

53 existing lines in 4 files now uncovered.

14105 of 21460 relevant lines covered (65.73%)

79.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

46.32
/src/processors/src/data-processor.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import * as arrow from 'apache-arrow';
5
import {csvParseRows} from 'd3-dsv';
6
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
7
import normalize from '@mapbox/geojson-normalize';
8
import {parseSync} from '@loaders.gl/core';
9
import {ArrowTable} from '@loaders.gl/schema';
10
import {WKBLoader} from '@loaders.gl/wkt';
11

12
import {
13
  ALL_FIELD_TYPES,
14
  DATASET_FORMATS,
15
  GEOARROW_EXTENSIONS,
16
  GEOARROW_METADATA_KEY,
17
  GUIDES_FILE_FORMAT_DOC
18
} from '@kepler.gl/constants';
19
import {ProcessorResult, Field} from '@kepler.gl/types';
20
import {
21
  arrowDataTypeToAnalyzerDataType,
22
  arrowDataTypeToFieldType,
23
  hasOwnProperty,
24
  isPlainObject
25
} from '@kepler.gl/utils';
26
import {
27
  analyzerTypeToFieldType,
28
  getSampleForTypeAnalyze,
29
  getSampleForTypeAnalyzeArrow,
30
  getFieldsFromData,
31
  h3IsValid,
32
  notNullorUndefined,
33
  toArray
34
} from '@kepler.gl/common-utils';
35
import {KeplerGlSchema, ParsedDataset, SavedMap, LoadedMap} from '@kepler.gl/schemas';
36
import {Feature} from '@deck.gl-community/editable-layers';
37

38
// if any of these value occurs in csv, parse it to null;
39
// const CSV_NULLS = ['', 'null', 'NULL', 'Null', 'NaN', '/N'];
40
// matches empty string
41
export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
13✔
42

43
function tryParseJsonString(str) {
44
  try {
31✔
45
    return JSON.parse(str);
31✔
46
  } catch (e) {
47
    return null;
×
48
  }
49
}
50

51
export const PARSE_FIELD_VALUE_FROM_STRING = {
13✔
52
  [ALL_FIELD_TYPES.boolean]: {
53
    valid: (d: unknown): boolean => typeof d === 'boolean',
30✔
54
    parse: (d: unknown): boolean => {
55
      const s = String(d).toLowerCase();
385✔
56
      return s === 'true' || s === 'yes' || s === '1';
385✔
57
    }
58
  },
59
  [ALL_FIELD_TYPES.integer]: {
60
    // @ts-ignore
61
    valid: (d: unknown): boolean => parseInt(d, 10) === d,
122✔
62
    // @ts-ignore
63
    parse: (d: unknown): number => parseInt(d, 10)
508✔
64
  },
65
  [ALL_FIELD_TYPES.timestamp]: {
66
    valid: (d: unknown, field: Field): boolean =>
67
      ['x', 'X'].includes(field.format) ? typeof d === 'number' : typeof d === 'string',
105✔
68
    parse: (d: any, field: Field) => (['x', 'X'].includes(field.format) ? Number(d) : d)
386!
69
  },
70
  [ALL_FIELD_TYPES.real]: {
71
    // @ts-ignore
72
    valid: (d: unknown): boolean => parseFloat(d) === d,
126✔
73
    // Note this will result in NaN for some string
74
    parse: parseFloat
75
  },
76
  [ALL_FIELD_TYPES.object]: {
77
    valid: isPlainObject,
78
    parse: tryParseJsonString
79
  },
80

81
  [ALL_FIELD_TYPES.array]: {
82
    valid: Array.isArray,
83
    parse: tryParseJsonString
84
  },
85

86
  [ALL_FIELD_TYPES.h3]: {
87
    valid: d => h3IsValid(d),
15✔
88
    parse: d => d
×
89
  }
90
};
91

92
/**
93
 * Process csv data, output a data object with `{fields: [], rows: []}`.
94
 * The data object can be wrapped in a `dataset` and pass to [`addDataToMap`](../actions/actions.md#adddatatomap)
95
 * @param rawData raw csv string
96
 * @returns data object `{fields: [], rows: []}` can be passed to addDataToMaps
97
 * @public
98
 * @example
99
 * import {processCsvData} from '@kepler.gl/processors';
100
 *
101
 * const testData = `gps_data.utc_timestamp,gps_data.lat,gps_data.lng,gps_data.types,epoch,has_result,id,time,begintrip_ts_utc,begintrip_ts_local,date
102
 * 2016-09-17 00:09:55,29.9900937,31.2590542,driver_analytics,1472688000000,False,1,2016-09-23T00:00:00.000Z,2016-10-01 09:41:39+00:00,2016-10-01 09:41:39+00:00,2016-09-23
103
 * 2016-09-17 00:10:56,29.9927699,31.2461142,driver_analytics,1472688000000,False,2,2016-09-23T00:00:00.000Z,2016-10-01 09:46:37+00:00,2016-10-01 16:46:37+00:00,2016-09-23
104
 * 2016-09-17 00:11:56,29.9907261,31.2312742,driver_analytics,1472688000000,False,3,2016-09-23T00:00:00.000Z,,,2016-09-23
105
 * 2016-09-17 00:12:58,29.9870074,31.2175827,driver_analytics,1472688000000,False,4,2016-09-23T00:00:00.000Z,,,2016-09-23`
106
 *
107
 * const dataset = {
108
 *  info: {id: 'test_data', label: 'My Csv'},
109
 *  data: processCsvData(testData)
110
 * };
111
 *
112
 * dispatch(addDataToMap({
113
 *  datasets: [dataset],
114
 *  options: {centerMap: true, readOnly: true}
115
 * }));
116
 */
117
export function processCsvData(rawData: unknown[][] | string, header?: string[]): ProcessorResult {
118
  let rows: unknown[][] | undefined;
119
  let headerRow: string[] | undefined;
120

121
  if (typeof rawData === 'string') {
76✔
122
    const parsedRows: string[][] = csvParseRows(rawData);
40✔
123

124
    if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
40✔
125
      // looks like an empty file, throw error to be catch
126
      throw new Error('process Csv Data Failed: CSV is empty');
1✔
127
    }
128
    headerRow = parsedRows[0];
39✔
129
    rows = parsedRows.slice(1);
39✔
130
  } else if (Array.isArray(rawData) && rawData.length) {
36!
131
    rows = rawData;
36✔
132
    headerRow = header;
36✔
133

134
    if (!Array.isArray(headerRow)) {
36!
135
      // if data is passed in as array of rows and missing header
136
      // assume first row is header
137
      // @ts-ignore
138
      headerRow = rawData[0];
×
139
      rows = rawData.slice(1);
×
140
    }
141
  }
142

143
  if (!rows || !headerRow) {
75!
144
    throw new Error('invalid input passed to processCsvData');
×
145
  }
146

147
  // here we assume the csv file that people uploaded will have first row
148
  // as name of the column
149

150
  cleanUpFalsyCsvValue(rows);
75✔
151
  // No need to run type detection on every data point
152
  // here we get a list of none null values to run analyze on
153
  const sample = getSampleForTypeAnalyze({fields: headerRow, rows});
75✔
154
  const fields = getFieldsFromData(sample, headerRow);
75✔
155
  const parsedRows = parseRowsByFields(rows, fields);
75✔
156

157
  return {fields, rows: parsedRows};
75✔
158
}
159

160
/**
161
 * Parse rows of csv by analyzed field types. So that `'1'` -> `1`, `'True'` -> `true`
162
 * @param rows
163
 * @param fields
164
 */
165
export function parseRowsByFields(rows: any[][], fields: Field[]) {
166
  // Edit rows in place
167
  const geojsonFieldIdx = fields.findIndex(f => f.name === '_geojson');
450✔
168
  fields.forEach(parseCsvRowsByFieldType.bind(null, rows, geojsonFieldIdx));
75✔
169

170
  return rows;
75✔
171
}
172

173
/**
174
 * Convert falsy value in csv including `'', 'null', 'NULL', 'Null', 'NaN'` to `null`,
175
 * so that type-analyzer won't detect it as string
176
 *
177
 * @param rows
178
 */
179
function cleanUpFalsyCsvValue(rows: unknown[][]): void {
180
  const re = new RegExp(CSV_NULLS, 'g');
111✔
181
  for (let i = 0; i < rows.length; i++) {
111✔
182
    for (let j = 0; j < rows[i].length; j++) {
1,060✔
183
      // analyzer will set any fields to 'string' if there are empty values
184
      // which will be parsed as '' by d3.csv
185
      // here we parse empty data as null
186
      // TODO: create warning when deltect `CSV_NULLS` in the data
187
      if (typeof rows[i][j] === 'string' && (rows[i][j] as string).match(re)) {
8,750✔
188
        rows[i][j] = null;
956✔
189
      }
190
    }
191
  }
192
}
193

194
/**
195
 * Process uploaded csv file to parse value by field type
196
 *
197
 * @param rows
198
 * @param geoFieldIdx field index
199
 * @param field
200
 * @param i
201
 */
202
export function parseCsvRowsByFieldType(
203
  rows: unknown[][],
204
  geoFieldIdx: number,
205
  field: Field,
206
  i: number
207
): void {
208
  const parser = PARSE_FIELD_VALUE_FROM_STRING[field.type];
574✔
209
  if (parser) {
574✔
210
    // check first not null value of it's already parsed
211
    const first = rows.find(r => notNullorUndefined(r[i]));
450✔
212
    if (!first || parser.valid(first[i], field)) {
427✔
213
      return;
214✔
214
    }
215
    rows.forEach(row => {
213✔
216
      // parse string value based on field type
217
      if (row[i] !== null) {
3,067✔
218
        row[i] = parser.parse(row[i], field);
2,747✔
219
        if (
2,747✔
220
          geoFieldIdx > -1 &&
2,765✔
221
          isPlainObject(row[geoFieldIdx]) &&
222
          // @ts-ignore
223
          hasOwnProperty(row[geoFieldIdx], 'properties')
224
        ) {
225
          // @ts-ignore
226
          row[geoFieldIdx].properties[field.name] = row[i];
9✔
227
        }
228
      }
229
    });
230
  }
231
}
232

233
/* eslint-enable complexity */
234

235
/**
236
 * Process data where each row is an object, output can be passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
237
 * NOTE: This function may mutate input.
238
 * @param rawData an array of row object, each object should have the same number of keys
239
 * @returns dataset containing `fields` and `rows`
240
 * @public
241
 * @example
242
 * import {addDataToMap} from '@kepler.gl/actions';
243
 * import {processRowObject} from '@kepler.gl/processors';
244
 *
245
 * const data = [
246
 *  {lat: 31.27, lng: 127.56, value: 3},
247
 *  {lat: 31.22, lng: 126.26, value: 1}
248
 * ];
249
 *
250
 * dispatch(addDataToMap({
251
 *  datasets: {
252
 *    info: {label: 'My Data', id: 'my_data'},
253
 *    data: processRowObject(data)
254
 *  }
255
 * }));
256
 */
257
export function processRowObject(rawData: unknown[]): ProcessorResult {
258
  if (!Array.isArray(rawData)) {
37✔
259
    return null;
1✔
260
  } else if (!rawData.length) {
36!
261
    // data is empty
262
    return {
×
263
      fields: [],
264
      rows: []
265
    };
266
  }
267

268
  const firstRow = rawData[0] as Record<string, unknown>;
36✔
269
  const keys = Object.keys(firstRow); // [lat, lng, value]
36✔
270
  const rows = rawData.map(d => keys.map(key => (d as Record<string, unknown>)[key])); // [[31.27, 127.56, 3]]
1,551✔
271

272
  // row object can still contain values like `Null` or `N/A`
273
  cleanUpFalsyCsvValue(rows);
36✔
274

275
  return processCsvData(rows, keys);
36✔
276
}
277

278
/**
279
 * Process GeoJSON [`FeatureCollection`](http://wiki.geojson.org/GeoJSON_draft_version_6#FeatureCollection),
280
 * output a data object with `{fields: [], rows: []}`.
281
 * The data object can be wrapped in a `dataset` and passed to [`addDataToMap`](../actions/actions.md#adddatatomap)
282
 * NOTE: This function may mutate input.
283
 *
284
 * @param rawData raw geojson feature collection
285
 * @returns dataset containing `fields` and `rows`
286
 * @public
287
 * @example
288
 * import {addDataToMap} from '@kepler.gl/actions';
289
 * import {processGeojson} from '@kepler.gl/processors';
290
 *
291
 * const geojson = {
292
 *         "type" : "FeatureCollection",
293
 *         "features" : [{
294
 *                 "type" : "Feature",
295
 *                 "properties" : {
296
 *                         "capacity" : "10",
297
 *                         "type" : "U-Rack"
298
 *                 },
299
 *                 "geometry" : {
300
 *                         "type" : "Point",
301
 *                         "coordinates" : [ -71.073283, 42.417500 ]
302
 *                 }
303
 *         }]
304
 * };
305
 *
306
 * dispatch(addDataToMap({
307
 *  datasets: {
308
 *    info: {
309
 *      label: 'Sample Taxi Trips in New York City',
310
 *      id: 'test_trip_data'
311
 *    },
312
 *    data: processGeojson(geojson)
313
 *  }
314
 * }));
315
 */
316
export function processGeojson(rawData: unknown): ProcessorResult {
317
  const normalizedGeojson = normalize(rawData);
28✔
318

319
  if (!normalizedGeojson || !Array.isArray(normalizedGeojson.features)) {
28✔
320
    throw new Error(
1✔
321
      `Read File Failed: File is not a valid GeoJSON. Read more about [supported file format](${GUIDES_FILE_FORMAT_DOC})`
322
    );
323
  }
324

325
  // getting all feature fields
326
  const allDataRows: Array<{_geojson: Feature} & keyof Feature> = [];
27✔
327
  for (let i = 0; i < normalizedGeojson.features.length; i++) {
27✔
328
    const f = normalizedGeojson.features[i];
160✔
329
    if (f.geometry) {
160!
330
      allDataRows.push({
160✔
331
        // add feature to _geojson field
332
        _geojson: f,
333
        ...(f.properties || {})
161✔
334
      });
335
    }
336
  }
337
  // get all the field
338
  const fields = allDataRows.reduce<string[]>((accu, curr) => {
27✔
339
    Object.keys(curr).forEach(key => {
160✔
340
      if (!accu.includes(key)) {
807✔
341
        accu.push(key);
148✔
342
      }
343
    });
344
    return accu;
160✔
345
  }, []);
346

347
  // make sure each feature has exact same fields
348
  allDataRows.forEach(d => {
27✔
349
    fields.forEach(f => {
160✔
350
      if (!(f in d)) {
860✔
351
        d[f] = null;
53✔
352
        if (d._geojson.properties) {
53!
353
          d._geojson.properties[f] = null;
53✔
354
        }
355
      }
356
    });
357
  });
358

359
  return processRowObject(allDataRows);
27✔
360
}
361

362
/**
363
 * Process saved kepler.gl json to be pass to [`addDataToMap`](../actions/actions.md#adddatatomap).
364
 * The json object should contain `datasets` and `config`.
365
 * @param rawData
366
 * @param schema
367
 * @returns datasets and config `{datasets: {}, config: {}}`
368
 * @public
369
 * @example
370
 * import {addDataToMap} from '@kepler.gl/actions';
371
 * import {processKeplerglJSON} from '@kepler.gl/processors';
372
 *
373
 * dispatch(addDataToMap(processKeplerglJSON(keplerGlJson)));
374
 */
375
export function processKeplerglJSON(rawData: SavedMap, schema = KeplerGlSchema): LoadedMap | null {
5✔
376
  return rawData ? schema.load(rawData.datasets, rawData.config) : null;
5!
377
}
378

379
/**
380
 * Parse a single or an array of datasets saved using kepler.gl schema
381
 * @param rawData
382
 * @param schema
383
 */
384
export function processKeplerglDataset(
385
  rawData: object | object[],
386
  schema = KeplerGlSchema
×
387
): ParsedDataset | ParsedDataset[] | null {
388
  if (!rawData) {
×
389
    return null;
×
390
  }
391

392
  const results = schema.parseSavedData(toArray(rawData));
×
393
  if (!results) {
×
394
    return null;
×
395
  }
396
  return Array.isArray(rawData) ? results : results[0];
×
397
}
398

399
/**
400
 * Parse arrow table and return a dataset
401
 *
402
 * @param arrowTable ArrowTable to parse, see loaders.gl/schema
403
 * @returns dataset containing `fields` and `rows` or null
404
 */
405
export function processArrowTable(arrowTable: ArrowTable): ProcessorResult | null {
406
  // @ts-ignore - Unknown data type causing build failures
407
  return processArrowBatches(arrowTable.data.batches);
×
408
}
409

410
/**
411
 * Extracts GeoArrow metadata from an Apache Arrow table schema.
412
 * For geoparquet files geoarrow metadata isn't present in fields, so extract extra info from schema.
413
 * @param table The Apache Arrow table to extract metadata from.
414
 * @returns An object mapping column names to their GeoArrow encoding type.
415
 * @throws Logs an error message if parsing of metadata fails.
416
 */
417
export function getGeoArrowMetadataFromSchema(table: arrow.Table): Record<string, string> {
418
  const geoArrowMetadata: Record<string, string> = {};
×
419
  try {
×
420
    const geoString = table.schema.metadata?.get('geo');
×
421
    if (geoString) {
×
422
      const parsedGeoString = JSON.parse(geoString);
×
423
      if (parsedGeoString.columns) {
×
424
        Object.keys(parsedGeoString.columns).forEach(columnName => {
×
425
          const columnData = parsedGeoString.columns[columnName];
×
426
          if (columnData?.encoding === 'WKB') {
×
427
            geoArrowMetadata[columnName] = GEOARROW_EXTENSIONS.WKB;
×
428
          }
429
          // TODO potentially there are other types but no datasets to test
430
        });
431
      }
432
    }
433
  } catch (error) {
434
    console.error('An error during arrow table schema metadata parsing');
×
435
  }
436
  return geoArrowMetadata;
×
437
}
438

439
/**
440
 * Converts an Apache Arrow table schema into an array of Kepler.gl field objects.
441
 * @param table The Apache Arrow table whose schema needs to be converted.
442
 * @param fieldTypeSuggestions Optional mapping of field names to suggested field types.
443
 * @returns An array of field objects suitable for Kepler.gl.
444
 */
445
export function arrowSchemaToFields(
446
  table: arrow.Table,
447
  fieldTypeSuggestions: Record<string, string> = {}
×
448
): Field[] {
449
  const headerRow = table.schema.fields.map(f => f.name);
×
450
  const sample = getSampleForTypeAnalyzeArrow(table, headerRow);
×
451
  const keplerFields = getFieldsFromData(sample, headerRow);
×
452
  const geoArrowMetadata = getGeoArrowMetadataFromSchema(table);
×
453

454
  return table.schema.fields.map((field: arrow.Field, fieldIndex: number) => {
×
455
    let type = arrowDataTypeToFieldType(field.type);
×
456
    let analyzerType = arrowDataTypeToAnalyzerDataType(field.type);
×
457
    let format = '';
×
458

459
    const fieldTypeSuggestion = fieldTypeSuggestions[field.name];
×
460
    const keplerField = keplerFields[fieldIndex];
×
461

462
    // geometry fields produced by DuckDB's st_asgeojson()
463
    if (fieldTypeSuggestion === 'JSON') {
×
464
      type = ALL_FIELD_TYPES.geojson;
×
465
      analyzerType = AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING;
×
466
    } else if (
×
467
      fieldTypeSuggestion === 'GEOMETRY' ||
×
468
      field.metadata.get(GEOARROW_METADATA_KEY)?.startsWith('geoarrow')
469
    ) {
470
      type = ALL_FIELD_TYPES.geoarrow;
×
471
      analyzerType = AnalyzerDATA_TYPES.GEOMETRY;
×
472
    } else if (geoArrowMetadata[field.name]) {
×
473
      type = ALL_FIELD_TYPES.geoarrow;
×
474
      analyzerType = AnalyzerDATA_TYPES.GEOMETRY;
×
475
      field.metadata?.set(GEOARROW_METADATA_KEY, geoArrowMetadata[field.name]);
×
476
    } else if (fieldTypeSuggestion === 'BLOB') {
×
477
      // When arrow wkb column saved to DuckDB as BLOB without any metadata, then queried back
478
      try {
×
479
        const data = table.getChildAt(fieldIndex)?.get(0);
×
480
        if (data) {
×
481
          const binaryGeo = parseSync(data, WKBLoader);
×
482
          if (binaryGeo) {
×
483
            type = ALL_FIELD_TYPES.geoarrow;
×
484
            analyzerType = AnalyzerDATA_TYPES.GEOMETRY;
×
485
            field.metadata?.set(GEOARROW_METADATA_KEY, GEOARROW_EXTENSIONS.WKB);
×
486
          }
487
        }
488
      } catch (error) {
489
        // ignore, not WKB
490
      }
491
    } else if (
×
492
      fieldTypeSuggestion === 'VARCHAR' &&
×
493
      (keplerField.analyzerType === AnalyzerDATA_TYPES.GEOMETRY ||
494
        keplerField.analyzerType === AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING)
495
    ) {
496
      // When wkb/wkt was saved as varchar in DuckDB
497
      type = keplerField.type;
×
498
      analyzerType = keplerField.analyzerType;
×
499
      format = keplerField.format;
×
500
    } else if (fieldTypeSuggestion === 'VARCHAR' && keplerField.type === ALL_FIELD_TYPES.h3) {
×
501
      // when kepler detected h3 column using getFieldsFromData(), set type to h3 and analyzerType to H3
502
      type = ALL_FIELD_TYPES.h3;
×
503
      analyzerType = keplerField.analyzerType;
×
504
    } else {
505
      // TODO should we use Kepler getFieldsFromData instead
506
      // of arrowDataTypeToFieldType for all fields?
507
      if (keplerField.type === ALL_FIELD_TYPES.timestamp) {
×
508
        type = keplerField.type;
×
509
        analyzerType = keplerField.analyzerType;
×
510
        format = keplerField.format;
×
511
      }
512
    }
513

514
    return {
×
515
      ...field,
516
      name: field.name,
517
      id: field.name,
518
      displayName: field.name,
519
      format: format,
520
      fieldIdx: fieldIndex,
521
      type,
522
      analyzerType,
523
      valueAccessor: (dc: any) => d => {
×
524
        return dc.valueAt(d.index, fieldIndex);
×
525
      },
526
      metadata: field.metadata
527
    };
528
  });
529
}
530

531
const CAST_BIGINTS = false;
13✔
532

533
/**
534
 * Cast 64-bit integer Arrow columns (Int64, Uint64) to Float64 to avoid BigInt values
535
 * that are incompatible with d3 scales, sorting, and other numeric operations.
536
 * Mirrors the DuckDB approach of casting BIGINT/UBIGINT to DOUBLE.
537
 */
538
function castBigIntColumnsToFloat64(arrowTable: arrow.Table): arrow.Table {
NEW
539
  if (!CAST_BIGINTS) {
×
NEW
540
    return arrowTable;
×
541
  }
542
  
NEW
543
  const needsCast = arrowTable.schema.fields.some(
×
NEW
544
    f => arrow.DataType.isInt(f.type) && f.type.bitWidth === 64
×
545
  );
NEW
546
  if (!needsCast) {
×
NEW
547
    return arrowTable;
×
548
  }
549

NEW
550
  const newColumns: Record<string, arrow.Vector> = {};
×
NEW
551
  for (let i = 0; i < arrowTable.numCols; i++) {
×
NEW
552
    const field = arrowTable.schema.fields[i];
×
NEW
553
    const col = arrowTable.getChildAt(i)!;
×
NEW
554
    if (arrow.DataType.isInt(field.type) && field.type.bitWidth === 64) {
×
NEW
555
      const float64Array = new Float64Array(col.length);
×
NEW
556
      for (let j = 0; j < col.length; j++) {
×
NEW
557
        const val = col.get(j);
×
NEW
558
        float64Array[j] = val === null ? NaN : Number(val);
×
559
      }
NEW
560
      newColumns[field.name] = arrow.makeVector(float64Array);
×
561
    } else {
NEW
562
      newColumns[field.name] = col;
×
563
    }
564
  }
NEW
565
  return new arrow.Table(newColumns);
×
566
}
567

568
/**
569
 * Parse arrow batches returned from parseInBatches()
570
 *
571
 * @param arrowTable the arrow table to parse
572
 * @returns dataset containing `fields` and `rows` or null
573
 */
574
export function processArrowBatches(arrowBatches: arrow.RecordBatch[]): ProcessorResult | null {
575
  if (arrowBatches.length === 0) {
×
576
    return null;
×
577
  }
NEW
578
  const arrowTable = castBigIntColumnsToFloat64(new arrow.Table(arrowBatches));
×
579
  const fields = arrowSchemaToFields(arrowTable);
×
580

581
  const cols = [...Array(arrowTable.numCols).keys()].map(i => arrowTable.getChildAt(i));
×
582

583
  // return empty rows and use raw arrow table to construct column-wise data container
584
  return {
×
585
    fields,
586
    rows: [],
587
    cols,
588
    metadata: arrowTable.schema.metadata,
589
    // Save original arrow schema, for better ingestion into DuckDB.
590
    // TODO consider returning arrowTable in cols, not an array of Vectors from arrowTable.
591
    arrowSchema: arrowTable.schema
592
  };
593
}
594

595
export const DATASET_HANDLERS = {
13✔
596
  [DATASET_FORMATS.row]: processRowObject,
597
  [DATASET_FORMATS.geojson]: processGeojson,
598
  [DATASET_FORMATS.csv]: processCsvData,
599
  [DATASET_FORMATS.arrow]: processArrowTable,
600
  [DATASET_FORMATS.keplergl]: processKeplerglDataset
601
};
602

603
export const Processors: {
604
  processGeojson: typeof processGeojson;
605
  processCsvData: typeof processCsvData;
606
  processArrowTable: typeof processArrowTable;
607
  processArrowBatches: typeof processArrowBatches;
608
  processRowObject: typeof processRowObject;
609
  processKeplerglJSON: typeof processKeplerglJSON;
610
  processKeplerglDataset: typeof processKeplerglDataset;
611
  analyzerTypeToFieldType: typeof analyzerTypeToFieldType;
612
  getFieldsFromData: typeof getFieldsFromData;
613
  parseCsvRowsByFieldType: typeof parseCsvRowsByFieldType;
614
} = {
13✔
615
  processGeojson,
616
  processCsvData,
617
  processArrowTable,
618
  processArrowBatches,
619
  processRowObject,
620
  processKeplerglJSON,
621
  processKeplerglDataset,
622
  analyzerTypeToFieldType,
623
  getFieldsFromData,
624
  parseCsvRowsByFieldType
625
};
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc