• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 19768106976

28 Nov 2025 03:32PM UTC coverage: 61.675% (-0.09%) from 61.76%
19768106976

push

github

web-flow
chore: patch release 3.2.3 (#3250)

* draft

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

* patch

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

* fix eslint during release

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

---------

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

6352 of 12229 branches covered (51.94%)

Branch coverage included in aggregate %.

13043 of 19218 relevant lines covered (67.87%)

81.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

1.83
/src/utils/src/arrow-data-container.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import {ALL_FIELD_TYPES} from '@kepler.gl/constants';
5
import {ProtoDatasetField} from '@kepler.gl/types';
6
import * as arrow from 'apache-arrow';
7
import {console as globalConsole} from 'global/window';
8
import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
9

10
import {DataContainerInterface, RangeOptions} from './data-container-interface';
11
import {DataRow, SharedRowOptions} from './data-row';
12

13
type ArrowDataContainerInput = {
14
  cols: arrow.Vector[];
15
  fields?: ProtoDatasetField[];
16
  arrowTable?: arrow.Table;
17
};
18

19
/**
20
 * Check if table is an ArrowTable object.
21
 *
22
 * We use duck-typing instead of `instanceof arrow.Table` because DuckDB loads its own
23
 * bundled version of Apache Arrow. When DuckDB creates Arrow tables, they are instances
24
 * of DuckDB's Arrow.Table class, not the Arrow.Table class from our application's
25
 * apache-arrow package. This causes `instanceof` checks to fail even though the objects
26
 * are functionally equivalent Arrow tables.
27
 *
28
 * @param data - object to check
29
 * @returns true if data is an ArrowTable object (type guarded)
30
 */
31
export function isArrowTable(data: any): data is arrow.Table {
32
  return (
88✔
33
    typeof data === 'object' &&
264!
34
    data !== null &&
35
    'schema' in data &&
36
    'getChildAt' in data &&
37
    typeof data.getChildAt === 'function' &&
38
    'batches' in data &&
39
    Array.isArray(data.batches)
40
  );
41
}
42

43
/**
44
 * Check if data is an ArrowVector object.
45
 * Uses duck-typing instead of `instanceof` to handle DuckDB's bundled Arrow version.
46
 *
47
 * @param data - object to check
48
 * @returns true if data is an ArrowVector object (type guarded)
49
 */
50
export function isArrowVector(data: any): data is arrow.Vector {
51
  return (
×
52
    typeof data === 'object' &&
×
53
    data !== null &&
54
    'type' in data &&
55
    'length' in data &&
56
    typeof data.length === 'number' &&
57
    'get' in data &&
58
    typeof data.get === 'function' &&
59
    'data' in data &&
60
    Array.isArray(data.data)
61
  );
62
}
63

64
/**
65
 * Check if data is an Arrow FixedSizeList DataType.
66
 * Uses duck-typing instead of `instanceof` to handle DuckDB's bundled Arrow version.
67
 *
68
 * @param data - object to check
69
 * @returns true if data is an Arrow FixedSizeList DataType (type guarded)
70
 */
71
export function isArrowFixedSizeList(data: any): data is arrow.FixedSizeList {
72
  return (
×
73
    typeof data === 'object' &&
×
74
    data !== null &&
75
    'typeId' in data &&
76
    'listSize' in data &&
77
    typeof data.listSize === 'number' &&
78
    'children' in data &&
79
    Array.isArray(data.children)
80
  );
81
}
82

83
/**
84
 * Check if data is an Arrow Struct DataType.
85
 * Uses duck-typing instead of `instanceof` to handle DuckDB's bundled Arrow version.
86
 *
87
 * @param data - object to check
88
 * @returns true if data is an Arrow Struct DataType (type guarded)
89
 */
90
export function isArrowStruct(data: any): data is arrow.Struct {
91
  return (
×
92
    typeof data === 'object' &&
×
93
    data !== null &&
94
    'typeId' in data &&
95
    'children' in data &&
96
    Array.isArray(data.children) &&
97
    !('listSize' in data)
98
  );
99
}
100

101
/**
102
 * @param dataContainer
103
 * @param sharedRow
104
 */
105
function* rowsIterator(dataContainer: DataContainerInterface, sharedRow: SharedRowOptions) {
106
  const numRows = dataContainer.numRows();
×
107
  for (let rowIndex = 0; rowIndex < numRows; ++rowIndex) {
×
108
    yield dataContainer.row(rowIndex, sharedRow);
×
109
  }
110
}
111

112
/**
113
 * @param dataContainer
114
 * @param columnIndex
115
 */
116
function* columnIterator(dataContainer: DataContainerInterface, columnIndex: number) {
117
  const numRows = dataContainer.numRows();
×
118
  for (let rowIndex = 0; rowIndex < numRows; ++rowIndex) {
×
119
    yield dataContainer.valueAt(rowIndex, columnIndex);
×
120
  }
121
}
122

123
/**
124
 * A data container where all data is stored in raw Arrow table
125
 */
126
export class ArrowDataContainer implements DataContainerInterface {
127
  _cols: arrow.Vector[];
128
  _numColumns: number;
129
  _numRows: number;
130
  _fields: ProtoDatasetField[];
131
  _numChunks: number;
132
  // cache column data to make valueAt() faster
133
  // _colData: any[][];
134

135
  /** An arrow table recreated from vectors */
136
  _arrowTable: arrow.Table;
137

138
  constructor(data: ArrowDataContainerInput) {
139
    if (!data.cols) {
×
140
      throw Error('ArrowDataContainer: no columns provided');
×
141
    }
142

143
    if (!Array.isArray(data.cols)) {
×
144
      throw Error("ArrowDataContainer: columns object isn't an array");
×
145
    }
146

147
    this._cols = data.cols;
×
148
    this._numColumns = data.cols.length;
×
149
    this._numRows = data.cols[0].length;
×
150
    this._fields = data.fields || [];
×
151
    this._numChunks = data.cols[0].data.length;
×
152
    // this._colData = data.cols.map(c => c.toArray());
153

154
    this._arrowTable = data.arrowTable || this._createTable();
×
155
  }
156

157
  /**
158
   * Restores internal Arrow table from vectors.
159
   * TODO: consider using original arrow table, as it could contain extra metadata, not passed to the fields.
160
   */
161
  private _createTable() {
162
    const creaOpts = {};
×
163
    this._fields.map((field, index) => {
×
164
      creaOpts[field.name] = this._cols[index];
×
165
    });
166
    return new arrow.Table(creaOpts);
×
167
  }
168

169
  getTable() {
170
    return this._arrowTable;
×
171
  }
172

173
  update(updateData: arrow.Vector<any>[] | arrow.Table) {
174
    const isArrow = isArrowTable(updateData);
×
175
    if (isArrow) {
×
176
      this._cols = Array.from(
×
177
        {length: updateData.numCols},
178
        (_, i) => updateData.getChildAt(i) as arrow.Vector
×
179
      ).filter(col => col);
×
180
    } else {
181
      this._cols = updateData;
×
182
    }
183
    this._numColumns = this._cols?.length ?? 0;
×
184
    this._numRows = this._cols?.[0]?.length ?? 0;
×
185
    this._numChunks = this._cols?.[0]?.data?.length ?? 0;
×
186
    this._arrowTable = isArrow ? updateData : this._createTable();
×
187

188
    // cache column data to make valueAt() faster
189
    // this._colData = this._cols.map(c => c.toArray());
190
  }
191

192
  numChunks(): number {
193
    return this._numChunks;
×
194
  }
195

196
  numRows(): number {
197
    return this._numRows;
×
198
  }
199

200
  numColumns(): number {
201
    return this._numColumns;
×
202
  }
203

204
  valueAt(rowIndex: number, columnIndex: number): any {
205
    // return this._colData[columnIndex][rowIndex];
206
    return this._cols[columnIndex].get(rowIndex);
×
207
  }
208

209
  row(rowIndex: number, sharedRow?: SharedRowOptions): DataRow {
210
    const tSharedRow = DataRow.createSharedRow(sharedRow);
×
211
    if (tSharedRow) {
×
212
      tSharedRow.setSource(this, rowIndex);
×
213
      return tSharedRow;
×
214
    }
215

216
    return new DataRow(this, rowIndex);
×
217
  }
218

219
  rowAsArray(rowIndex: number): any[] {
220
    // return this._colData.map(col => col[rowIndex]);
221
    return this._cols.map(col => col.get(rowIndex));
×
222
  }
223

224
  rows(sharedRow: SharedRowOptions) {
225
    const tSharedRow = DataRow.createSharedRow(sharedRow);
×
226
    return rowsIterator(this, tSharedRow);
×
227
  }
228

229
  column(columnIndex: number) {
230
    return columnIterator(this, columnIndex);
×
231
  }
232

233
  getColumn(columnIndex: number): arrow.Vector {
234
    return this._cols[columnIndex];
×
235
  }
236

237
  getField(columnIndex: number): ProtoDatasetField {
238
    return this._fields[columnIndex];
×
239
  }
240

241
  flattenData(): any[][] {
242
    const data: any[][] = [];
×
243
    for (let i = 0; i < this._numRows; ++i) {
×
244
      data.push(this.rowAsArray(i));
×
245
    }
246
    return data;
×
247
  }
248

249
  getPlainIndex(): number[] {
250
    return [...Array(this._numRows).keys()];
×
251
  }
252

253
  map<T>(
254
    func: (row: DataRow, index: number) => T,
255
    sharedRow?: SharedRowOptions,
256
    options: RangeOptions = {}
×
257
  ): T[] {
258
    const tSharedRow = DataRow.createSharedRow(sharedRow);
×
259

260
    const {start = 0, end = this.numRows()} = options;
×
261
    const endRow = Math.min(this.numRows(), end);
×
262

263
    const out: T[] = [];
×
264
    for (let rowIndex = start; rowIndex < endRow; ++rowIndex) {
×
265
      const row = this.row(rowIndex, tSharedRow);
×
266
      out.push(func(row, rowIndex));
×
267
    }
268
    return out;
×
269
  }
270

271
  mapIndex<T>(func: ({index}, dc: DataContainerInterface) => T, options: RangeOptions = {}): T[] {
×
272
    const {start = 0, end = this.numRows()} = options;
×
273
    const endRow = Math.min(this.numRows(), end);
×
274

275
    const out: T[] = [];
×
276
    for (let rowIndex = start; rowIndex < endRow; ++rowIndex) {
×
277
      out.push(func({index: rowIndex}, this));
×
278
    }
279
    return out;
×
280
  }
281

282
  find(
283
    func: (row: DataRow, index: number) => boolean,
284
    sharedRow?: SharedRowOptions
285
  ): DataRow | undefined {
286
    const tSharedRow = DataRow.createSharedRow(sharedRow);
×
287

288
    for (let rowIndex = 0; rowIndex < this._numRows; ++rowIndex) {
×
289
      const row = this.row(rowIndex, tSharedRow);
×
290
      if (func(row, rowIndex)) {
×
291
        return row;
×
292
      }
293
    }
294
    return undefined;
×
295
  }
296

297
  reduce<T>(
298
    func: (acc: T, row: DataRow, index: number) => T,
299
    initialValue: T,
300
    sharedRow?: SharedRowOptions
301
  ): T {
302
    const tSharedRow = DataRow.createSharedRow(sharedRow);
×
303

304
    for (let rowIndex = 0; rowIndex < this._numRows; ++rowIndex) {
×
305
      const row = this.row(rowIndex, tSharedRow);
×
306
      initialValue = func(initialValue, row, rowIndex);
×
307
    }
308
    return initialValue;
×
309
  }
310
}
311

312
/**
313
 * Convert arrow data type to kepler.gl field types
314
 *
315
 * @param arrowType the arrow data type
316
 * @returns corresponding type in `ALL_FIELD_TYPES`
317
 */
318
export function arrowDataTypeToFieldType(arrowType: arrow.DataType): string {
319
  // Note: this function doesn't return ALL_FIELD_TYPES.geojson or ALL_FIELD_TYPES.array, which
320
  // should be further detected by caller
321
  if (arrow.DataType.isDate(arrowType)) {
×
322
    return ALL_FIELD_TYPES.date;
×
323
  } else if (arrow.DataType.isTimestamp(arrowType) || arrow.DataType.isTime(arrowType)) {
×
324
    return ALL_FIELD_TYPES.timestamp;
×
325
  } else if (arrow.DataType.isFloat(arrowType)) {
×
326
    return ALL_FIELD_TYPES.real;
×
327
  } else if (arrow.DataType.isInt(arrowType)) {
×
328
    return ALL_FIELD_TYPES.integer;
×
329
  } else if (arrow.DataType.isBool(arrowType)) {
×
330
    return ALL_FIELD_TYPES.boolean;
×
331
  } else if (arrow.DataType.isUtf8(arrowType) || arrow.DataType.isNull(arrowType)) {
×
332
    return ALL_FIELD_TYPES.string;
×
333
  } else if (
×
334
    arrow.DataType.isBinary(arrowType) ||
×
335
    arrow.DataType.isDictionary(arrowType) ||
336
    arrow.DataType.isFixedSizeBinary(arrowType) ||
337
    arrow.DataType.isFixedSizeList(arrowType) ||
338
    arrow.DataType.isList(arrowType) ||
339
    arrow.DataType.isMap(arrowType) ||
340
    arrow.DataType.isStruct(arrowType)
341
  ) {
342
    return ALL_FIELD_TYPES.object;
×
343
  }
344
  globalConsole.warn(`Unsupported arrow type: ${arrowType}`);
×
345
  return ALL_FIELD_TYPES.string;
×
346
}
347

348
/**
349
 * Convert arrow data type to analyzer type
350
 *
351
 * @param arrowType the arrow data type
352
 * @returns corresponding type in `AnalyzerDATA_TYPES`
353
 */
354
export function arrowDataTypeToAnalyzerDataType(
355
  arrowType: arrow.DataType
356
): typeof AnalyzerDATA_TYPES {
357
  if (arrow.DataType.isDate(arrowType)) {
×
358
    return AnalyzerDATA_TYPES.DATE;
×
359
  } else if (arrow.DataType.isTimestamp(arrowType) || arrow.DataType.isTime(arrowType)) {
×
360
    return AnalyzerDATA_TYPES.DATETIME;
×
361
  } else if (arrow.DataType.isFloat(arrowType)) {
×
362
    return AnalyzerDATA_TYPES.FLOAT;
×
363
  } else if (arrow.DataType.isInt(arrowType)) {
×
364
    return AnalyzerDATA_TYPES.INT;
×
365
  } else if (arrow.DataType.isBool(arrowType)) {
×
366
    return AnalyzerDATA_TYPES.BOOLEAN;
×
367
  } else if (arrow.DataType.isUtf8(arrowType) || arrow.DataType.isNull(arrowType)) {
×
368
    return AnalyzerDATA_TYPES.STRING;
×
369
  } else if (
×
370
    arrow.DataType.isBinary(arrowType) ||
×
371
    arrow.DataType.isDictionary(arrowType) ||
372
    arrow.DataType.isFixedSizeBinary(arrowType) ||
373
    arrow.DataType.isFixedSizeList(arrowType) ||
374
    arrow.DataType.isList(arrowType) ||
375
    arrow.DataType.isMap(arrowType) ||
376
    arrow.DataType.isStruct(arrowType)
377
  ) {
378
    return AnalyzerDATA_TYPES.OBJECT;
×
379
  }
380
  globalConsole.warn(`Unsupported arrow type: ${arrowType}`);
×
381
  return AnalyzerDATA_TYPES.STRING;
×
382
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc