• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In
Build has been canceled!

keplergl / kepler.gl / 21722610128

05 Feb 2026 05:57PM UTC coverage: 61.648% (+0.02%) from 61.633%
21722610128

Pull #3298

github

web-flow
Merge 8ef57d291 into 4bdf8f4ff
Pull Request #3298: Implement WKT validation in data-type.ts

6375 of 12273 branches covered (51.94%)

Branch coverage included in aggregate %.

10 of 10 new or added lines in 1 file covered. (100.0%)

10 existing lines in 1 file now uncovered.

13067 of 19264 relevant lines covered (67.83%)

82.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.12
/src/common-utils/src/data-type.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import {Analyzer, DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
5
import {ArrowTableInterface, ApacheVectorInterface, RowData, Field} from '@kepler.gl/types';
6
import {ALL_FIELD_TYPES} from '@kepler.gl/constants';
7
import {console as globalConsole} from 'global/window';
8
import {range} from 'd3-array';
9
import {isHexWkb, notNullorUndefined} from './data';
10
import {h3IsValid} from './h3-utils';
11

12
const H3_ANALYZER_TYPE = 'H3';
15✔
13

14
// WKT (Well-Known Text) geometry prefixes.
15
// Keep this intentionally lightweight: we only use it to prevent geometry strings
16
// from being treated as generic text (e.g. for tooltip field auto-picking).
17
const WKT_PREFIX_RE =
18
  /^(?:SRID=\d+\s*;\s*)?(?:POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)(?:\s+(?:Z|M|ZM))?\s*\(/i;
15✔
19

20
function isWkt(value: unknown): boolean {
21
  if (typeof value !== 'string') {
1,471✔
22
    return false;
60✔
23
  }
24

25
  const s = value.trim();
1,411✔
26
  if (s.length < 10) {
1,411✔
27
    return false;
475✔
28
  }
29

30
  // Quick structural checks to avoid regex work for typical strings.
31
  if (!s.includes('(') || !s.includes(')')) {
936!
32
    return false;
936✔
33
  }
34

UNCOV
35
  return WKT_PREFIX_RE.test(s);
×
36
}
37

38
export const ACCEPTED_ANALYZER_TYPES = [
15✔
39
  AnalyzerDATA_TYPES.DATE,
40
  AnalyzerDATA_TYPES.TIME,
41
  AnalyzerDATA_TYPES.DATETIME,
42
  AnalyzerDATA_TYPES.NUMBER,
43
  AnalyzerDATA_TYPES.INT,
44
  AnalyzerDATA_TYPES.FLOAT,
45
  AnalyzerDATA_TYPES.BOOLEAN,
46
  AnalyzerDATA_TYPES.STRING,
47
  AnalyzerDATA_TYPES.GEOMETRY,
48
  AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING,
49
  AnalyzerDATA_TYPES.PAIR_GEOMETRY_FROM_STRING,
50
  AnalyzerDATA_TYPES.ZIPCODE,
51
  AnalyzerDATA_TYPES.ARRAY,
52
  AnalyzerDATA_TYPES.OBJECT,
53
  H3_ANALYZER_TYPE
54
];
55

56
const IGNORE_DATA_TYPES = Object.keys(AnalyzerDATA_TYPES).filter(
15✔
57
  type => !ACCEPTED_ANALYZER_TYPES.includes(type)
270✔
58
);
59

60
/**
61
 * Getting sample data for analyzing field type.
62
 */
63
export function getSampleForTypeAnalyze({
64
  fields,
65
  rows,
66
  sampleCount = 50
118✔
67
}: {
68
  fields: string[];
69
  rows: unknown[][] | RowData;
70
  sampleCount?: number;
71
}): RowData {
72
  const total = Math.min(sampleCount, rows.length);
119✔
73
  // const fieldOrder = fields.map(f => f.name);
74
  const sample = range(0, total, 1).map(() => ({}));
1,362✔
75

76
  if (rows.length < 1) {
119!
UNCOV
77
    return [];
×
78
  }
79
  const isRowObject = !Array.isArray(rows[0]);
119✔
80

81
  // collect sample data for each field
82
  fields.forEach((field, fieldIdx) => {
119✔
83
    // row counter
84
    let i = 0;
947✔
85
    // sample counter
86
    let j = 0;
947✔
87

88
    while (j < total) {
947✔
89
      if (i >= rows.length) {
15,078✔
90
        // if depleted data pool
91
        sample[j][field] = null;
1,000✔
92
        j++;
1,000✔
93
      } else if (notNullorUndefined(rows[i][isRowObject ? field : fieldIdx])) {
14,078!
94
        const value = rows[i][isRowObject ? field : fieldIdx];
13,073!
95
        sample[j][field] = typeof value === 'string' ? value.trim() : value;
13,073✔
96
        j++;
13,073✔
97
        i++;
13,073✔
98
      } else {
99
        i++;
1,005✔
100
      }
101
    }
102
  });
103

104
  return sample;
119✔
105
}
106

107
/**
108
 * Getting sample data for analyzing field type for Arrow tables.
109
 * @param table Arrow table or an array of vectors.
110
 * @param fields Field names.
111
 * @param sampleCount Number of sample rows to get.
112
 * @returns Sample rows.
113
 */
114
export function getSampleForTypeAnalyzeArrow(
115
  table: ArrowTableInterface | ApacheVectorInterface[],
116
  fields: string[],
117
  sampleCount = 50
×
118
): any[] {
119
  const isTable = !Array.isArray(table);
×
120

121
  const numRows = isTable ? table.numRows : table[0].length;
×
122
  const getVector = isTable ? index => table.getChildAt(index) : index => table[index];
×
123

124
  const total = Math.min(sampleCount, numRows);
×
125
  const sample = range(0, total, 1).map(() => ({}));
×
126

UNCOV
127
  if (numRows < 1) {
×
UNCOV
128
    return [];
×
129
  }
130

131
  // collect sample data for each field
UNCOV
132
  fields.forEach((field, fieldIdx) => {
×
133
    let rowIndex = 0;
×
134
    let sampleIndex = 0;
×
135

136
    while (sampleIndex < total) {
×
137
      if (rowIndex >= numRows) {
×
138
        // if depleted data pool
139
        sample[sampleIndex][field] = null;
×
140
        sampleIndex++;
×
141
      } else if (notNullorUndefined(getVector(fieldIdx)?.get(rowIndex))) {
×
142
        const value = getVector(fieldIdx)?.get(rowIndex);
×
UNCOV
143
        sample[sampleIndex][field] = typeof value === 'string' ? value.trim() : value;
×
144
        sampleIndex++;
×
UNCOV
145
        rowIndex++;
×
146
      } else {
UNCOV
147
        rowIndex++;
×
148
      }
149
    }
150
  });
151

UNCOV
152
  return sample;
×
153
}
154

155
/**
156
 * Convert type-analyzer output to kepler.gl field types
157
 *
158
 * @param aType
159
 * @returns corresponding type in `ALL_FIELD_TYPES`
160
 */
161
/* eslint-disable complexity */
162
export function analyzerTypeToFieldType(aType: string): string {
163
  const {
164
    DATE,
165
    TIME,
166
    DATETIME,
167
    NUMBER,
168
    INT,
169
    FLOAT,
170
    BOOLEAN,
171
    STRING,
172
    GEOMETRY,
173
    GEOMETRY_FROM_STRING,
174
    PAIR_GEOMETRY_FROM_STRING,
175
    ZIPCODE,
176
    ARRAY,
177
    OBJECT
178
  } = AnalyzerDATA_TYPES;
973✔
179

180
  // TODO: un recognized types
181
  // CURRENCY PERCENT NONE
182
  switch (aType) {
973✔
183
    case DATE:
184
      return ALL_FIELD_TYPES.date;
19✔
185
    case TIME:
186
    case DATETIME:
187
      return ALL_FIELD_TYPES.timestamp;
133✔
188
    case FLOAT:
189
      return ALL_FIELD_TYPES.real;
332✔
190
    case INT:
191
      return ALL_FIELD_TYPES.integer;
193✔
192
    case BOOLEAN:
193
      return ALL_FIELD_TYPES.boolean;
48✔
194
    case GEOMETRY:
195
    case GEOMETRY_FROM_STRING:
196
    case PAIR_GEOMETRY_FROM_STRING:
197
      return ALL_FIELD_TYPES.geojson;
70✔
198
    case ARRAY:
199
      return ALL_FIELD_TYPES.array;
21✔
200
    case OBJECT:
201
      return ALL_FIELD_TYPES.object;
10✔
202
    case NUMBER:
203
    case STRING:
204
    case ZIPCODE:
205
      return ALL_FIELD_TYPES.string;
127✔
206
    case H3_ANALYZER_TYPE:
207
      return ALL_FIELD_TYPES.h3;
16✔
208
    default:
209
      globalConsole.warn(`Unsupported analyzer type: ${aType}`);
4✔
210
      return ALL_FIELD_TYPES.string;
4✔
211
  }
212
}
213

214
/**
215
 * Analyze field types from data in `string` format, e.g. uploaded csv.
216
 * Assign `type`, `fieldIdx` and `format` (timestamp only) to each field
217
 *
218
 * @param data array of row object
219
 * @param fieldOrder array of field names as string
220
 * @returns formatted fields
221
 * @public
222
 * @example
223
 *
224
 * import {getFieldsFromData} from '@kepler.gl/common-utils';
225
 * const data = [{
226
 *   time: '2016-09-17 00:09:55',
227
 *   value: '4',
228
 *   surge: '1.2',
229
 *   isTrip: 'true',
230
 *   zeroOnes: '0'
231
 * }, {
232
 *   time: '2016-09-17 00:30:08',
233
 *   value: '3',
234
 *   surge: null,
235
 *   isTrip: 'false',
236
 *   zeroOnes: '1'
237
 * }, {
238
 *   time: null,
239
 *   value: '2',
240
 *   surge: '1.3',
241
 *   isTrip: null,
242
 *   zeroOnes: '1'
243
 * }];
244
 *
245
 * const fieldOrder = ['time', 'value', 'surge', 'isTrip', 'zeroOnes'];
246
 * const fields = getFieldsFromData(data, fieldOrder);
247
 * // fields = [
248
 * // {name: 'time', format: 'YYYY-M-D H:m:s', fieldIdx: 1, type: 'timestamp'},
249
 * // {name: 'value', format: '', fieldIdx: 4, type: 'integer'},
250
 * // {name: 'surge', format: '', fieldIdx: 5, type: 'real'},
251
 * // {name: 'isTrip', format: '', fieldIdx: 6, type: 'boolean'},
252
 * // {name: 'zeroOnes', format: '', fieldIdx: 7, type: 'integer'}];
253
 *
254
 */
255
export function getFieldsFromData(data: RowData, fieldOrder: string[]): Field[] {
256
  // add a check for epoch timestamp
257
  const metadata = Analyzer.computeColMeta(
119✔
258
    data,
259
    [
260
      {regex: /.*geojson|all_points/g, dataType: 'GEOMETRY'},
261
      {regex: /.*census/g, dataType: 'STRING'}
262
    ],
263
    {ignoredDataTypes: IGNORE_DATA_TYPES}
264
  );
265

266
  const {fieldByIndex} = renameDuplicateFields(fieldOrder);
119✔
267

268
  const result = fieldOrder.map((field, index) => {
119✔
269
    const name = fieldByIndex[index];
955✔
270

271
    const fieldMeta = metadata.find(m => m.key === field);
5,416✔
272

273
    // fieldMeta could be undefined if the field has no data and Analyzer.computeColMeta
274
    // will ignore the field. In this case, we will simply assign the field type to STRING
275
    // since dropping the column in the RowData could be expensive
276
    let type = fieldMeta?.type || 'STRING';
955✔
277
    const format = fieldMeta?.format || '';
955✔
278

279
    // quick check if first valid string in column is H3
280
    if (type === AnalyzerDATA_TYPES.STRING) {
955✔
281
      for (let i = 0, n = data.length; i < n; ++i) {
141✔
282
        if (notNullorUndefined(data[i][name])) {
146✔
283
          type = h3IsValid(data[i][name] || '') ? H3_ANALYZER_TYPE : type;
138!
284
          break;
138✔
285
        }
286
      }
287
    }
288

289
    // quick check if string is hex wkb
290
    if (type === AnalyzerDATA_TYPES.STRING) {
955✔
291
      type = data.some(d => isHexWkb(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY : type;
1,472✔
292
    }
293

294
    // quick check if string is wkt
295
    if (type === AnalyzerDATA_TYPES.STRING) {
955✔
296
      type = data.some(d => isWkt(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING : type;
1,471!
297
    }
298

299
    return {
955✔
300
      name,
301
      id: name,
302
      displayName: name,
303
      format,
304
      fieldIdx: index,
305
      type: analyzerTypeToFieldType(type),
306
      analyzerType: type,
307
      valueAccessor: dc => d => {
17✔
UNCOV
308
        return dc.valueAt(d.index, index);
×
309
      }
310
    };
311
  });
312

313
  return result;
119✔
314
}
315

316
/**
317
 * pass in an array of field names, rename duplicated one
318
 * and return a map from old field index to new name
319
 *
320
 * @param fieldOrder
321
 * @returns new field name by index
322
 */
323
export function renameDuplicateFields(fieldOrder: string[]): {
324
  allNames: string[];
325
  fieldByIndex: string[];
326
} {
327
  return fieldOrder.reduce<{allNames: string[]; fieldByIndex: string[]}>(
119✔
328
    (accu, field, i) => {
329
      const {allNames} = accu;
955✔
330
      let fieldName = field;
955✔
331

332
      // add a counter to duplicated names
333
      if (allNames.includes(field)) {
955✔
334
        let counter = 0;
2✔
335
        while (allNames.includes(`${field}-${counter}`)) {
2✔
336
          counter++;
1✔
337
        }
338
        fieldName = `${field}-${counter}`;
2✔
339
      }
340

341
      accu.fieldByIndex[i] = fieldName;
955✔
342
      accu.allNames.push(fieldName);
955✔
343

344
      return accu;
955✔
345
    },
346
    {allNames: [], fieldByIndex: []}
347
  );
348
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc