• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 13395431770

18 Feb 2025 04:29PM UTC coverage: 66.175% (-0.3%) from 66.434%
13395431770

push

github

web-flow
[feat] improvements to duckDB column type handling (#2970)

This PR intends to preserve column types between different types of ingestion into Kepler and DuckDb

- timestamps stored as strings from Arrow tables are recognized as timestamps. 
- apply extra metadata from table.schema.metadata (geoparquet files). 
- DuckDB geometry is automatically casted to WKB, and properly marked with geoarrow extensions.
- DuckDB column types and query result Arrow table types consolidation.
- Apply extra logic only to the last select query.
- geoarrow constants to constants module
- add getSampleForTypeAnalyzeArrow to support and not fail for arrow data
- arrowSchemaToFields accepts extra info from DuckDB table schemas. JSON type gets GEOMETRY_FROM_STRING type, GEOMETRY with geoarrow metadata gets GEOMETRY type, timestamp ...
- fix in validateInputData - check analyzerType only for current field
- fix in validateInputData - support arrow input data

---------

Signed-off-by: Ihor Dykhta <dikhta.igor@gmail.com>

6024 of 10612 branches covered (56.77%)

Branch coverage included in aggregate %.

10 of 94 new or added lines in 8 files covered. (10.64%)

1 existing line in 1 file now uncovered.

12368 of 17181 relevant lines covered (71.99%)

88.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.49
/src/deckgl-arrow-layers/src/layers/geo-arrow-scatterplot-layer.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
// deck.gl-community
5
// SPDX-License-Identifier: MIT
6
// Copyright (c) vis.gl contributors
7

8
import {
9
  CompositeLayer,
10
  CompositeLayerProps,
11
  DefaultProps,
12
  GetPickingInfoParams,
13
  Layer,
14
  LayersList,
15
  assert
16
} from '@deck.gl/core/typed';
17
import {ScatterplotLayer} from '@deck.gl/layers/typed';
18
import type {ScatterplotLayerProps} from '@deck.gl/layers/typed';
19
import * as arrow from 'apache-arrow';
20
import * as ga from '@geoarrow/geoarrow-js';
21

22
import {GEOARROW_EXTENSIONS} from '@kepler.gl/constants';
23

24
import {
25
  assignAccessor,
26
  extractAccessorsFromProps,
27
  getGeometryVector,
28
  invertOffsets
29
} from '../utils/utils';
30
import {GeoArrowExtraPickingProps, computeChunkOffsets, getPickingInfo} from '../utils/picking';
31
import {ColorAccessor, FloatAccessor, GeoArrowPickingInfo, ExtensionProps} from '../types';
32
import {validateAccessors} from '../utils/validate';
33

34
/** All properties supported by GeoArrowScatterplotLayer */
35
export type GeoArrowScatterplotLayerProps = Omit<
36
  ScatterplotLayerProps<arrow.Table>,
37
  'data' | 'getPosition' | 'getRadius' | 'getFillColor' | 'getLineColor'
38
> &
39
  _GeoArrowScatterplotLayerProps &
40
  CompositeLayerProps;
41

42
/** Properties added by GeoArrowScatterplotLayer */
43
type _GeoArrowScatterplotLayerProps = {
44
  data: arrow.Table;
45

46
  /**
47
   * If `true`, validate the arrays provided (e.g. chunk lengths)
48
   * @default true
49
   */
50
  _validate?: boolean;
51
  /**
52
   * Center position accessor.
53
   * If not provided, will be inferred by finding a column with extension type
54
   * `"geoarrow.point"` or `"geoarrow.multipoint"`.
55
   */
56
  getPosition?: ga.vector.PointVector | ga.vector.MultiPointVector;
57
  /**
58
   * Radius accessor.
59
   * @default 1
60
   */
61
  getRadius?: FloatAccessor;
62
  /**
63
   * Fill color accessor.
64
   * @default [0, 0, 0, 255]
65
   */
66
  getFillColor?: ColorAccessor;
67
  /**
68
   * Stroke color accessor.
69
   * @default [0, 0, 0, 255]
70
   */
71
  getLineColor?: ColorAccessor;
72
  /**
73
   * Stroke width accessor.
74
   * @default 1
75
   */
76
  getLineWidth?: FloatAccessor;
77
};
78

79
// Remove data and getPosition from the upstream default props
80
const {
81
  data: _data,
82
  getPosition: _getPosition,
83
  ..._upstreamDefaultProps
84
} = ScatterplotLayer.defaultProps;
13✔
85

86
// Default props added by us
87
const ourDefaultProps = {
13✔
88
  _validate: true
89
};
90

91
// @ts-expect-error
92
const defaultProps: DefaultProps<GeoArrowScatterplotLayerProps> = {
13✔
93
  ..._upstreamDefaultProps,
94
  ...ourDefaultProps
95
};
96

97
export class GeoArrowScatterplotLayer<ExtraProps extends object = object> extends CompositeLayer<
98
  GeoArrowScatterplotLayerProps & ExtraProps
99
> {
100
  static defaultProps = defaultProps;
13✔
101
  static layerName = 'GeoArrowScatterplotLayer';
13✔
102

103
  getPickingInfo(
104
    params: GetPickingInfoParams & {
105
      sourceLayer: {props: GeoArrowExtraPickingProps};
106
    }
107
  ): GeoArrowPickingInfo {
108
    return getPickingInfo(params, this.props.data);
×
109
  }
110

111
  renderLayers(): Layer<object> | LayersList | null {
112
    const {data: table} = this.props;
×
113

114
    if (this.props.getPosition !== undefined) {
×
115
      const geometryColumn = this.props.getPosition;
×
116
      if (geometryColumn !== undefined && ga.vector.isPointVector(geometryColumn)) {
×
117
        return this._renderLayersPoint(geometryColumn);
×
118
      }
119

120
      if (geometryColumn !== undefined && ga.vector.isMultiPointVector(geometryColumn)) {
×
121
        return this._renderLayersMultiPoint(geometryColumn);
×
122
      }
123

124
      throw new Error('getPosition should pass in an arrow Vector of Point or MultiPoint type');
×
125
    } else {
NEW
126
      const pointVector = getGeometryVector(table, GEOARROW_EXTENSIONS.POINT);
×
127
      if (pointVector !== null) {
×
128
        return this._renderLayersPoint(pointVector);
×
129
      }
130

NEW
131
      const multiPointVector = getGeometryVector(table, GEOARROW_EXTENSIONS.MULTIPOINT);
×
132
      if (multiPointVector !== null) {
×
133
        return this._renderLayersMultiPoint(multiPointVector);
×
134
      }
135
    }
136

137
    throw new Error('getPosition not GeoArrow point or multipoint');
×
138
  }
139

140
  _renderLayersPoint(geometryColumn: ga.vector.PointVector): Layer<object> | LayersList | null {
141
    const {data: table} = this.props;
×
142

143
    if (this.props._validate) {
×
144
      assert(ga.vector.isPointVector(geometryColumn));
×
145
      validateAccessors(this.props, table);
×
146
    }
147

148
    // Exclude manually-set accessors
149
    const [accessors, otherProps] = extractAccessorsFromProps(this.props, ['getPosition']);
×
150
    const tableOffsets = computeChunkOffsets(table.data);
×
151

152
    const layers: ScatterplotLayer<any>[] = [];
×
153
    for (let recordBatchIdx = 0; recordBatchIdx < table.batches.length; recordBatchIdx++) {
×
154
      const geometryData = geometryColumn.data[recordBatchIdx];
×
155
      const flatCoordsData = ga.child.getPointChild(geometryData);
×
156
      const flatCoordinateArray = flatCoordsData.values;
×
157

158
      // @ts-expect-error how to properly retrieve batch offset?
159
      const batchOffset = geometryColumn._offsets[recordBatchIdx];
×
160

161
      const props: ScatterplotLayerProps<any> & ExtensionProps = {
×
162
        // Note: because this is a composite layer and not doing the rendering
163
        // itself, we still have to pass in our defaultProps
164
        ...ourDefaultProps,
165
        ...otherProps,
166

167
        // used for picking purposes
168
        recordBatchIdx,
169
        tableOffsets,
170

171
        id: `${this.props.id}-geoarrow-scatterplot-${recordBatchIdx}`,
172
        data: {
173
          // @ts-expect-error
174
          data: table.batches[recordBatchIdx],
175
          length: geometryData.length,
176
          attributes: {
177
            getPosition: {
178
              value: flatCoordinateArray,
179
              size: geometryData.type.listSize
180
            }
181
          }
182
        }
183
      };
184

185
      for (const [propName, propInput] of Object.entries(accessors)) {
×
186
        assignAccessor({
×
187
          props,
188
          propName,
189
          propInput,
190
          chunkIdx: recordBatchIdx,
191
          batchOffset
192
        });
193
      }
194

195
      const layer = new ScatterplotLayer({
×
196
        ...this.getSubLayerProps(props),
197
        // preserve binded accessors, as they are overwriten back by pass-through accessors from extensions
198
        getFiltered: props.getFiltered,
199
        getFilterValue: props.getFilterValue
200
      });
201
      layers.push(layer);
×
202
    }
203

204
    return layers;
×
205
  }
206

207
  _renderLayersMultiPoint(
208
    geometryColumn: ga.vector.MultiPointVector
209
  ): Layer<object> | LayersList | null {
210
    const {data: table} = this.props;
×
211

212
    // TODO: validate that if nested, accessor props have the same nesting
213
    // structure as the main geometry column.
214
    if (this.props._validate) {
×
215
      assert(ga.vector.isMultiPointVector(geometryColumn));
×
216
      validateAccessors(this.props, table);
×
217
    }
218

219
    // Exclude manually-set accessors
220
    const [accessors, otherProps] = extractAccessorsFromProps(this.props, ['getPosition']);
×
221
    const tableOffsets = computeChunkOffsets(table.data);
×
222

223
    const layers: ScatterplotLayer[] = [];
×
224
    for (let recordBatchIdx = 0; recordBatchIdx < table.batches.length; recordBatchIdx++) {
×
225
      const multiPointData = geometryColumn.data[recordBatchIdx];
×
226
      const pointData = ga.child.getMultiPointChild(multiPointData);
×
227
      const geomOffsets = multiPointData.valueOffsets;
×
228
      const flatCoordsData = ga.child.getPointChild(pointData);
×
229
      const flatCoordinateArray = flatCoordsData.values;
×
230

231
      // @ts-expect-error how to properly retrieve batch offset?
232
      const batchOffset = geometryColumn._offsets[recordBatchIdx];
×
233

234
      const props: ScatterplotLayerProps & ExtensionProps = {
×
235
        // Note: because this is a composite layer and not doing the rendering
236
        // itself, we still have to pass in our defaultProps
237
        ...ourDefaultProps,
238
        ...otherProps,
239

240
        // used for picking purposes
241
        recordBatchIdx,
242
        tableOffsets,
243

244
        id: `${this.props.id}-geoarrow-scatterplot-${recordBatchIdx}`,
245
        data: {
246
          // @ts-expect-error
247
          data: table.batches[recordBatchIdx],
248
          // Map from expanded multi-geometry index to original index
249
          // Used both in picking and for function callbacks
250
          invertedGeomOffsets: invertOffsets(geomOffsets),
251
          // Note: this needs to be the length one level down.
252
          length: pointData.length,
253
          attributes: {
254
            getPosition: {
255
              value: flatCoordinateArray,
256
              size: pointData.type.listSize
257
            }
258
          }
259
        }
260
      };
261

262
      for (const [propName, propInput] of Object.entries(accessors)) {
×
263
        assignAccessor({
×
264
          props,
265
          propName,
266
          propInput,
267
          chunkIdx: recordBatchIdx,
268
          geomCoordOffsets: geomOffsets,
269
          batchOffset
270
        });
271
      }
272

273
      const layer = new ScatterplotLayer({
×
274
        ...this.getSubLayerProps(props),
275
        // preserve binded accessors, as they are overwriten back by pass-through accessors from extensions
276
        getFiltered: props.getFiltered,
277
        getFilterValue: props.getFilterValue
278
      });
279
      layers.push(layer);
×
280
    }
281

282
    return layers;
×
283
  }
284
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc