• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 13395431770

18 Feb 2025 04:29PM UTC coverage: 66.175% (-0.3%) from 66.434%
13395431770

push

github

web-flow
[feat] improvements to duckDB column type handling (#2970)

This PR intends to preserve column types between different types of ingestion into Kepler and DuckDb

- timestamps stored as strings from Arrow tables are recognized as timestamps. 
- apply extra metadata from table.schema.metadata (geoparquet files). 
- DuckDB geometry is automatically casted to WKB, and properly marked with geoarrow extensions.
- DuckDB column types and query result Arrow table types consolidation.
- Apply extra logic only to the last select query.
- geoarrow constants to constants module
- add getSampleForTypeAnalyzeArrow to support and not fail for arrow data
- arrowSchemaToFields accepts extra info from DuckDB table schemas. JSON type gets GEOMETRY_FROM_STRING type, GEOMETRY with geoarrow metadata gets GEOMETRY type, timestamp ...
- fix in validateInputData - check analyzerType only for current field
- fix in validateInputData - support arrow input data

---------

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

6024 of 10612 branches covered (56.77%)

Branch coverage included in aggregate %.

10 of 94 new or added lines in 8 files covered. (10.64%)

1 existing line in 1 file now uncovered.

12368 of 17181 relevant lines covered (71.99%)

88.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

14.36
/src/layers/src/layer-utils.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import * as arrow from 'apache-arrow';
5
import {Feature, BBox} from 'geojson';
6
import {getGeoMetadata} from '@loaders.gl/gis';
7

8
import {GEOARROW_EXTENSIONS, GEOARROW_METADATA_KEY} from '@kepler.gl/constants';
9
import {KeplerTable} from '@kepler.gl/table';
10
import {
11
  Field,
12
  ProtoDatasetField,
13
  FieldPair,
14
  SupportedColumnMode,
15
  LayerColumn,
16
  LayerColumns,
17
  RGBColor
18
} from '@kepler.gl/types';
19
import {DataContainerInterface, ArrowDataContainer} from '@kepler.gl/utils';
20
import {
21
  getBinaryGeometriesFromArrow,
22
  parseGeometryFromArrow,
23
  BinaryGeometriesFromArrowOptions,
24
  updateBoundsFromGeoArrowSamples
25
} from '@loaders.gl/arrow';
26

27
import {WKBLoader} from '@loaders.gl/wkt';
28
import {geojsonToBinary} from '@loaders.gl/gis';
29
import {
30
  BinaryFeatureCollection,
31
  Geometry,
32
  BinaryPointFeature,
33
  BinaryLineFeature,
34
  BinaryPolygonFeature
35
} from '@loaders.gl/schema';
36

37
import {DeckGlGeoTypes, GeojsonDataMaps} from './geojson-layer/geojson-utils';
38

39
export type FindDefaultLayerProps = {
40
  label: string;
41
  color?: RGBColor;
42
  isVisible?: boolean;
43
  columns?: Record<string, LayerColumn>;
44
};
45

46
export type FindDefaultLayerPropsReturnValue = {
47
  /** Layer props to create layers by default when a dataset is added */
48
  props: FindDefaultLayerProps[];
49
  /** layer props of possible alternative layer configurations, not created by default */
50
  altProps?: FindDefaultLayerProps[];
51
  /** Already found layer configurations */
52
  foundLayers?: (FindDefaultLayerProps & {type: string})[];
53
};
54

55
export function assignPointPairToLayerColumn(pair: FieldPair, hasAlt: boolean): Record<string, LayerColumn> {
56
  const {lat, lng, altitude} = pair.pair;
57
  if (!hasAlt) {
58
    return {lat, lng};
59
  }
79✔
60

79!
61
  const defaultAltColumn = {value: null, fieldIdx: -1, optional: true};
×
62

63
  return {
64
    lat,
79✔
65
    lng,
66
    altitude: altitude ? {...defaultAltColumn, ...altitude} : defaultAltColumn
79✔
67
  };
68
}
69

79✔
70
export type GeojsonLayerMetaProps = {
71
  dataToFeature: GeojsonDataMaps;
72
  featureTypes: DeckGlGeoTypes;
73
  bounds: BBox | null;
74
  fixedRadius: boolean;
75
  centroids?: Array<number[] | null>;
76
};
77

78
/**
79
 * Converts a geoarrow.wkb vector into an array of BinaryFeatureCollections.
80
 * @param geoColumn A vector column with geoarrow.wkb extension.
81
 * @param options Options for geometry transformation.
82
 * @returns
83
 */
84
function getBinaryGeometriesFromWKBArrow(
85
  geoColumn: arrow.Vector,
86
  options: {chunkIndex?: number; chunkOffset?: number}
87
): GeojsonLayerMetaProps {
88
  const dataToFeature: BinaryFeatureCollection[] = [];
89
  const featureTypes: GeojsonLayerMetaProps['featureTypes'] = {
90
    point: false,
91
    line: false,
×
92
    polygon: false
×
93
  };
94

95
  const chunks =
96
    options?.chunkIndex !== undefined && options?.chunkIndex >= 0
97
      ? [geoColumn.data[options?.chunkIndex]]
98
      : geoColumn.data;
99
  const globalFeatureIdOffset = options?.chunkOffset || 0;
×
100
  let featureIndex = globalFeatureIdOffset;
101
  let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity];
102

×
103
  chunks.forEach(chunk => {
×
104
    const geojsonFeatures: Feature[] = [];
×
105
    for (let i = 0; i < chunk.length; ++i) {
106
      // ignore features without any geometry
×
107
      if (chunk.valueOffsets[i + 1] - chunk.valueOffsets[i] > 0) {
×
108
        const valuesSlice = chunk.values.slice(chunk.valueOffsets[i], chunk.valueOffsets[i + 1]);
×
109

110
        const geometry = WKBLoader?.parseSync?.(valuesSlice.buffer, {
×
111
          wkb: {shape: 'geojson-geometry'}
×
112
        }) as Geometry;
113
        const feature: Feature = {
×
114
          type: 'Feature',
115
          geometry,
116
          properties: {index: featureIndex}
×
117
        };
118
        geojsonFeatures.push(feature);
119

120
        const {type} = geometry;
121
        featureTypes.polygon = type === 'Polygon' || type === 'MultiPolygon';
×
122
        featureTypes.point = type === 'Point' || type === 'MultiPoint';
123
        featureTypes.line = type === 'LineString' || type === 'MultiLineString';
×
124
      }
×
125

×
126
      featureIndex++;
×
127
    }
128

129
    const geojsonToBinaryOptions = {
×
130
      triangulate: true,
131
      fixRingWinding: true
132
    };
×
133
    const binaryFeatures = geojsonToBinary(geojsonFeatures, geojsonToBinaryOptions);
134

135
    // Need to update globalFeatureIds, to take into account previous batches,
136
    // as geojsonToBinary doesn't have such option.
×
137
    const featureTypesArr = ['points', 'lines', 'polygons'];
138
    featureTypesArr.forEach(prop => {
139
      const features = binaryFeatures[prop] as
140
        | BinaryPointFeature
×
141
        | BinaryLineFeature
×
142
        | BinaryPolygonFeature;
×
143
      if (features) {
144
        bounds = updateBoundsFromGeoArrowSamples(
145
          features.positions.value as Float64Array,
146
          features.positions.size,
×
147
          bounds
×
148
        );
149

150
        const {globalFeatureIds, numericProps} = features;
151
        const {index} = numericProps;
152
        const len = globalFeatureIds.value.length;
153
        for (let i = 0; i < len; ++i) {
×
154
          globalFeatureIds.value[i] = index.value[i];
×
155
        }
×
156
      }
×
157
    });
×
158

159
    dataToFeature.push(binaryFeatures);
160
  });
161

162
  return {
×
163
    dataToFeature: dataToFeature,
164
    featureTypes: featureTypes,
165
    bounds,
×
166
    fixedRadius: false
167
  };
168
}
169

170
export function getGeojsonLayerMetaFromArrow({
171
  dataContainer,
172
  geoColumn,
173
  geoField,
174
  chunkIndex
175
}: {
176
  dataContainer: DataContainerInterface;
177
  geoColumn: arrow.Vector;
178
  geoField: ProtoDatasetField;
179
  chunkIndex?: number;
180
}): GeojsonLayerMetaProps {
181
  const encoding = geoField?.metadata?.get(GEOARROW_METADATA_KEY);
182
  const options: BinaryGeometriesFromArrowOptions = {
183
    ...(chunkIndex !== undefined && chunkIndex >= 0
184
      ? {
×
185
          chunkIndex,
×
186
          chunkOffset: geoColumn.data[0].length * chunkIndex
×
187
        }
188
      : {}),
189
    triangulate: true,
190
    calculateMeanCenters: true
191
  };
192

193
  // getBinaryGeometriesFromArrow doesn't support geoarrow.wkb
194
  if (encoding === GEOARROW_EXTENSIONS.WKB) {
195
    return getBinaryGeometriesFromWKBArrow(geoColumn, options);
196
  }
197

×
198
  // create binary data from arrow data for GeoJsonLayer
×
199
  const {binaryGeometries, featureTypes, bounds, meanCenters} = getBinaryGeometriesFromArrow(
200
    // @ts-ignore
201
    geoColumn,
202
    encoding,
×
203
    options
204
  );
205

206
  // since there is no feature.properties.radius, we set fixedRadius to false
207
  const fixedRadius = false;
208

209
  return {
210
    dataToFeature: binaryGeometries,
×
211
    featureTypes,
212
    bounds,
×
213
    fixedRadius,
214
    centroids: meanCenters
215
  };
216
}
217

218
export function isLayerHoveredFromArrow(objectInfo, layerId: string): boolean {
219
  // there could be multiple deck.gl layers created from multiple chunks in arrow table
220
  // the objectInfo.layer id should be `${this.id}-${i}`
221
  if (objectInfo?.picked) {
222
    const deckLayerId = objectInfo?.layer?.props?.id;
223
    return deckLayerId.startsWith(layerId);
224
  }
22!
225
  return false;
×
226
}
×
227

228
export function getHoveredObjectFromArrow(
22✔
229
  objectInfo,
230
  dataContainer,
231
  layerId,
232
  columnAccessor,
233
  fieldAccessor
234
): Feature | null {
235
  // hover object returns the index of the object in the data array
236
  // NOTE: this could be done in Deck.gl getPickingInfo(params) and binaryToGeojson()
237
  if (isLayerHoveredFromArrow(objectInfo, layerId) && objectInfo.index >= 0 && dataContainer) {
238
    const col = columnAccessor(dataContainer);
239
    const rawGeometry = col?.get(objectInfo.index);
240

×
241
    const field = fieldAccessor(dataContainer);
×
NEW
242
    const encoding = field?.metadata?.get(GEOARROW_METADATA_KEY);
×
243

244
    const hoveredFeature = parseGeometryFromArrow(rawGeometry, encoding);
×
245

×
246
    const properties = dataContainer.rowAsArray(objectInfo.index).reduce((prev, cur, i) => {
247
      const fieldName = dataContainer?.getField?.(i).name;
×
248
      if (fieldName !== field.name) {
249
        prev[fieldName] = cur;
×
250
      }
×
251
      return prev;
×
252
    }, {});
×
253

254
    return hoveredFeature
×
255
      ? {
256
          type: 'Feature',
257
          geometry: hoveredFeature,
×
258
          properties: {
259
            ...properties,
260
            index: objectInfo.index
261
          }
262
        }
263
      : null;
264
  }
265
  return null;
266
}
267

268
/**
×
269
 * find requiredColumns of supported column mode based on column mode
270
 */
271
export function getColumnModeRequiredColumns(
272
  supportedColumnModes: SupportedColumnMode[] | null,
273
  columnMode?: string
274
): string[] | undefined {
275
  return supportedColumnModes?.find(({key}) => key === columnMode)?.requiredColumns;
276
}
277

278
/**
×
279
 * Returns geoarrow fields with ARROW:extension:name POINT metadata
280
 * @param fields Any fields
281
 * @returns geoarrow fields with ARROW:extension:name POINT metadata
282
 */
283
export function getGeoArrowPointFields(fields: Field[]): Field[] {
284
  return fields.filter(field => {
285
    return (
286
      field.type === 'geoarrow' &&
NEW
287
      field.metadata?.get(GEOARROW_METADATA_KEY) === GEOARROW_EXTENSIONS.POINT
×
288
    );
×
289
  });
×
290
}
291

292
/**
293
 * Builds an arrow vector compatible with ARROW:extension:name geoarrow.point.
294
 * @param getPosition Position accessor.
295
 * @param numElements Number of elements in the vector.
296
 * @returns An arrow vector compatible with ARROW:extension:name geoarrow.point.
297
 */
298
export function createGeoArrowPointVector(
299
  dataContainer: ArrowDataContainer,
300
  getPosition: ({index}: {index: number}) => number[]
301
): arrow.Vector {
302
  // TODO update/resize existing vector?
303
  // TODO find an easier way to create point geo columns
304
  // in a correct arrow format, as this approach seems too excessive for just a simple interleaved buffer.
305

306
  const numElements = dataContainer.numRows();
307
  const table = dataContainer.getTable();
308

309
  const numCoords = numElements > 0 ? getPosition({index: 0}).length : 2;
×
310
  const precision = 2;
×
311

312
  const metadata = new Map();
×
NEW
313
  metadata.set(GEOARROW_METADATA_KEY, GEOARROW_EXTENSIONS.POINT);
×
314

315
  const childField = new arrow.Field('xyz', new arrow.Float(precision), false, metadata);
×
316
  const fixedSizeList = new arrow.FixedSizeList(numCoords, childField);
×
317
  const floatBuilder = new arrow.FloatBuilder({type: new arrow.Float(precision)});
318
  const fixedSizeListBuilder = new arrow.FixedSizeListBuilder({type: fixedSizeList});
×
319
  fixedSizeListBuilder.addChild(floatBuilder);
×
320

×
321
  const assembledBatches: arrow.Data[] = [];
×
322
  const indexData = {index: 0};
×
323
  for (let batchIndex = 0; batchIndex < table.batches.length; ++batchIndex) {
324
    const numRowsInBatch = table.batches[batchIndex].numRows;
×
325

×
326
    for (let i = 0; i < numRowsInBatch; ++i) {
×
327
      const pos = getPosition(indexData);
×
328
      fixedSizeListBuilder.append(pos);
329

×
330
      ++indexData.index;
×
331
    }
×
332
    assembledBatches.push(fixedSizeListBuilder.flush());
333
  }
×
334

335
  return arrow.makeVector(assembledBatches);
×
336
}
337

338
/**
×
339
 * Builds a filtered index suitable for FilterArrowExtension.
340
 * @param numElements Size for filtered index array.
341
 * @param visibleIndices An array with indices of elements that aren't filtered out.
342
 * @returns filteredIndex [0|1] array for GPU filtering
343
 */
344
export function getFilteredIndex(
345
  numElements: number,
346
  visibleIndices: number[],
347
  existingFilteredIndex: Uint8ClampedArray | null
348
) {
349
  // contents are initialized with zeros by default, meaning not visible
350
  const filteredIndex =
351
    existingFilteredIndex && existingFilteredIndex.length === numElements
352
      ? existingFilteredIndex
353
      : new Uint8ClampedArray(numElements);
354
  filteredIndex.fill(0);
×
355

356
  if (visibleIndices) {
357
    for (let i = 0; i < visibleIndices.length; ++i) {
×
358
      filteredIndex[visibleIndices[i]] = 1;
359
    }
×
360
  }
×
361
  return filteredIndex;
×
362
}
363

364
/**
×
365
 * Returns an array of neighbors to the specified index.
366
 * @param neighborsField LayerColumn field with information about neighbors.
367
 * @param dataContainer Data container.
368
 * @param index Index of interest.
369
 * @param getPosition Position accessor.
370
 * @returns An array with information about neighbors.
371
 */
372
export function getNeighbors(
373
  neighborsField: LayerColumn | undefined,
374
  dataContainer: DataContainerInterface,
375
  index: number,
376
  getPosition: ({index}: {index: number}) => number[]
377
): {index: number; position: number[]}[] {
378
  if (!neighborsField || neighborsField.fieldIdx < 0) return [];
379

380
  let neighborIndices = dataContainer.valueAt(index, neighborsField.fieldIdx);
381
  // In case of arrow column with an array of indices.
×
382
  if (neighborIndices.toArray) {
383
    neighborIndices = Array.from(neighborIndices.toArray());
×
384
  }
385
  if (!Array.isArray(neighborIndices)) return [];
×
386

×
387
  // find neighbor
388
  const neighborsData = neighborIndices.map(idx => ({
×
389
    index: idx,
390
    position: getPosition({index: idx})
391
  }));
×
392

393
  return neighborsData;
394
}
395

396
/**
×
397
 * Returns bounds from a geoarrow field.
398
 * TODO: refactor once metadata extraction from parquet to arrow vectors is in place.
399
 * @param layerColumn Layer columns for which to check for a bounding box.
400
 * @param dataContainer Data container with geoarrow metadata.
401
 * @returns Returns bounding box if exists.
402
 */
403
export function getBoundsFromArrowMetadata(
404
  layerColumn: LayerColumn,
405
  dataContainer: ArrowDataContainer
406
): [number, number, number, number] | false {
407
  try {
408
    const field = dataContainer.getField(layerColumn.fieldIdx);
409
    const table = dataContainer.getTable();
410

×
411
    const geoMetadata = getGeoMetadata({
×
412
      metadata: {
×
413
        // @ts-expect-error
414
        geo: table.schema.metadata.get('geo')
×
415
      }
416
    });
417

418
    if (geoMetadata) {
419
      const fieldMetadata = geoMetadata.columns[field.name];
420
      if (fieldMetadata) {
421
        const boundsFromMetadata = fieldMetadata['bbox'];
×
422
        if (Array.isArray(boundsFromMetadata) && boundsFromMetadata.length === 4) {
×
423
          return boundsFromMetadata;
×
424
        }
×
425
      }
×
426
    }
×
427
  } catch (error) {
428
    // ignore for now
429
  }
430

431
  return false;
432
}
433

434
/**
×
435
 * Finds and returns the first satisfied column mode based on the provided columns and fields.
436
 * @param supportedColumnModes - An array of supported column modes to check.
437
 * @param columns - The available columns.
438
 * @param fields - Optional table fields to be used for extra verification.
439
 * @returns The first column mode that satisfies the required conditions, or undefined if none match.
440
 */
441
export function getSatisfiedColumnMode(
442
  columnModes: SupportedColumnMode[] | null,
443
  columns: LayerColumns | undefined,
444
  fields?: KeplerTable['fields']
445
): SupportedColumnMode | undefined {
446
  return columnModes?.find(mode => {
447
    return mode.requiredColumns?.every(requriedCol => {
448
      const column = columns?.[requriedCol];
449
      if (column?.value) {
3✔
450
        if (mode.verifyField && fields?.[column.fieldIdx]) {
4✔
451
          const field = fields[column.fieldIdx];
4✔
452
          return mode.verifyField(field);
4✔
453
        }
2!
454
        return true;
×
455
      }
×
456
      return false;
457
    });
2✔
458
  });
459
}
2✔
460

461
/**
462
 * Returns true if the field is of geoarrow point format.
463
 * @param field A field.
464
 * @returns Returns true if the field is of geoarrow point format.
465
 */
466
export function isGeoArrowPointField(field: Field) {
467
  return (
468
    field.type === 'geoarrow' &&
469
    field.metadata?.get(GEOARROW_METADATA_KEY) === GEOARROW_EXTENSIONS.POINT
470
  );
1,380✔
471
}
1,380!
472

473
/**
474
 * Create default geoarrow column props based on the dataset.
475
 * @param dataset A dataset to create layer props from.
476
 * @returns  geoarrow column props.
477
 */
478
export function getGeoArrowPointLayerProps(dataset: KeplerTable) {
479
  const {label} = dataset;
480
  const altProps: FindDefaultLayerProps[] = [];
481
  dataset.fields.forEach(field => {
482
    if (isGeoArrowPointField(field)) {
181✔
483
      altProps.push({
181✔
484
        label: (typeof label === 'string' && label.replace(/\.[^/.]+$/, '')) || field.name,
181✔
485
        columns: {geoarrow: {value: field.name, fieldIdx: field.fieldIdx}}
1,380!
486
      });
×
487
    }
×
488
  });
489
  return altProps;
490
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc