• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

keplergl / kepler.gl / 21722643399

05 Feb 2026 05:58PM UTC coverage: 61.648% (+0.02%) from 61.633%
21722643399

Pull #3298

github

web-flow
Merge cf4c058bd into 4bdf8f4ff
Pull Request #3298: Implement WKT validation in data-type.ts

6375 of 12273 branches covered (51.94%)

Branch coverage included in aggregate %.

10 of 11 new or added lines in 1 file covered. (90.91%)

11 existing lines in 1 file now uncovered.

13067 of 19264 relevant lines covered (67.83%)

82.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.12
/src/common-utils/src/data-type.ts
1
// SPDX-License-Identifier: MIT
2
// Copyright contributors to the kepler.gl project
3

4
import {Analyzer, DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
5
import {ArrowTableInterface, ApacheVectorInterface, RowData, Field} from '@kepler.gl/types';
6
import {ALL_FIELD_TYPES} from '@kepler.gl/constants';
7
import {console as globalConsole} from 'global/window';
8
import {range} from 'd3-array';
9
import {isHexWkb, notNullorUndefined} from './data';
10
import {h3IsValid} from './h3-utils';
11

12
const H3_ANALYZER_TYPE = 'H3';
15✔
13

14
// Returns true if the value is likely a WKT geometry string (heuristic check).
15
const WKT_PREFIX_RE =
16
  /^(?:SRID=\d+\s*;\s*)?(?:POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)(?:\s+(?:Z|M|ZM))?\s*\(/i;
15✔
17

18
function isWkt(value: unknown): boolean {
19
  if (typeof value !== 'string') {
1,471✔
20
    return false;
60✔
21
  }
22

23
  const s = value.trim();
1,411✔
24
  if (s.length < 10) {
1,411✔
25
    return false;
475✔
26
  }
27

28
  // Quick structural checks to avoid regex work for typical strings.
29
  if (!s.includes('(') || !s.includes(')')) {
936!
30
    return false;
936✔
31
  }
32

NEW
33
  return WKT_PREFIX_RE.test(s);
×
34
}
35

36
export const ACCEPTED_ANALYZER_TYPES = [
15✔
37
  AnalyzerDATA_TYPES.DATE,
38
  AnalyzerDATA_TYPES.TIME,
39
  AnalyzerDATA_TYPES.DATETIME,
40
  AnalyzerDATA_TYPES.NUMBER,
41
  AnalyzerDATA_TYPES.INT,
42
  AnalyzerDATA_TYPES.FLOAT,
43
  AnalyzerDATA_TYPES.BOOLEAN,
44
  AnalyzerDATA_TYPES.STRING,
45
  AnalyzerDATA_TYPES.GEOMETRY,
46
  AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING,
47
  AnalyzerDATA_TYPES.PAIR_GEOMETRY_FROM_STRING,
48
  AnalyzerDATA_TYPES.ZIPCODE,
49
  AnalyzerDATA_TYPES.ARRAY,
50
  AnalyzerDATA_TYPES.OBJECT,
51
  H3_ANALYZER_TYPE
52
];
53

54
const IGNORE_DATA_TYPES = Object.keys(AnalyzerDATA_TYPES).filter(
15✔
55
  type => !ACCEPTED_ANALYZER_TYPES.includes(type)
270✔
56
);
57

58
/**
59
 * Getting sample data for analyzing field type.
60
 */
61
export function getSampleForTypeAnalyze({
62
  fields,
63
  rows,
64
  sampleCount = 50
118✔
65
}: {
66
  fields: string[];
67
  rows: unknown[][] | RowData;
68
  sampleCount?: number;
69
}): RowData {
70
  const total = Math.min(sampleCount, rows.length);
119✔
71
  // const fieldOrder = fields.map(f => f.name);
72
  const sample = range(0, total, 1).map(() => ({}));
1,362✔
73

74
  if (rows.length < 1) {
119!
UNCOV
75
    return [];
×
76
  }
77
  const isRowObject = !Array.isArray(rows[0]);
119✔
78

79
  // collect sample data for each field
80
  fields.forEach((field, fieldIdx) => {
119✔
81
    // row counter
82
    let i = 0;
947✔
83
    // sample counter
84
    let j = 0;
947✔
85

86
    while (j < total) {
947✔
87
      if (i >= rows.length) {
15,078✔
88
        // if depleted data pool
89
        sample[j][field] = null;
1,000✔
90
        j++;
1,000✔
91
      } else if (notNullorUndefined(rows[i][isRowObject ? field : fieldIdx])) {
14,078!
92
        const value = rows[i][isRowObject ? field : fieldIdx];
13,073!
93
        sample[j][field] = typeof value === 'string' ? value.trim() : value;
13,073✔
94
        j++;
13,073✔
95
        i++;
13,073✔
96
      } else {
97
        i++;
1,005✔
98
      }
99
    }
100
  });
101

102
  return sample;
119✔
103
}
104

105
/**
106
 * Getting sample data for analyzing field type for Arrow tables.
107
 * @param table Arrow table or an array of vectors.
108
 * @param fields Field names.
109
 * @param sampleCount Number of sample rows to get.
110
 * @returns Sample rows.
111
 */
112
export function getSampleForTypeAnalyzeArrow(
113
  table: ArrowTableInterface | ApacheVectorInterface[],
114
  fields: string[],
115
  sampleCount = 50
×
116
): any[] {
UNCOV
117
  const isTable = !Array.isArray(table);
×
118

119
  const numRows = isTable ? table.numRows : table[0].length;
×
UNCOV
120
  const getVector = isTable ? index => table.getChildAt(index) : index => table[index];
×
121

122
  const total = Math.min(sampleCount, numRows);
×
UNCOV
123
  const sample = range(0, total, 1).map(() => ({}));
×
124

125
  if (numRows < 1) {
×
UNCOV
126
    return [];
×
127
  }
128

129
  // collect sample data for each field
130
  fields.forEach((field, fieldIdx) => {
×
131
    let rowIndex = 0;
×
UNCOV
132
    let sampleIndex = 0;
×
133

134
    while (sampleIndex < total) {
×
UNCOV
135
      if (rowIndex >= numRows) {
×
136
        // if depleted data pool
137
        sample[sampleIndex][field] = null;
×
138
        sampleIndex++;
×
139
      } else if (notNullorUndefined(getVector(fieldIdx)?.get(rowIndex))) {
×
140
        const value = getVector(fieldIdx)?.get(rowIndex);
×
141
        sample[sampleIndex][field] = typeof value === 'string' ? value.trim() : value;
×
142
        sampleIndex++;
×
UNCOV
143
        rowIndex++;
×
144
      } else {
UNCOV
145
        rowIndex++;
×
146
      }
147
    }
148
  });
149

UNCOV
150
  return sample;
×
151
}
152

153
/**
154
 * Convert type-analyzer output to kepler.gl field types
155
 *
156
 * @param aType
157
 * @returns corresponding type in `ALL_FIELD_TYPES`
158
 */
159
/* eslint-disable complexity */
160
export function analyzerTypeToFieldType(aType: string): string {
161
  const {
162
    DATE,
163
    TIME,
164
    DATETIME,
165
    NUMBER,
166
    INT,
167
    FLOAT,
168
    BOOLEAN,
169
    STRING,
170
    GEOMETRY,
171
    GEOMETRY_FROM_STRING,
172
    PAIR_GEOMETRY_FROM_STRING,
173
    ZIPCODE,
174
    ARRAY,
175
    OBJECT
176
  } = AnalyzerDATA_TYPES;
973✔
177

178
  // TODO: un recognized types
179
  // CURRENCY PERCENT NONE
180
  switch (aType) {
973✔
181
    case DATE:
182
      return ALL_FIELD_TYPES.date;
19✔
183
    case TIME:
184
    case DATETIME:
185
      return ALL_FIELD_TYPES.timestamp;
133✔
186
    case FLOAT:
187
      return ALL_FIELD_TYPES.real;
332✔
188
    case INT:
189
      return ALL_FIELD_TYPES.integer;
193✔
190
    case BOOLEAN:
191
      return ALL_FIELD_TYPES.boolean;
48✔
192
    case GEOMETRY:
193
    case GEOMETRY_FROM_STRING:
194
    case PAIR_GEOMETRY_FROM_STRING:
195
      return ALL_FIELD_TYPES.geojson;
70✔
196
    case ARRAY:
197
      return ALL_FIELD_TYPES.array;
21✔
198
    case OBJECT:
199
      return ALL_FIELD_TYPES.object;
10✔
200
    case NUMBER:
201
    case STRING:
202
    case ZIPCODE:
203
      return ALL_FIELD_TYPES.string;
127✔
204
    case H3_ANALYZER_TYPE:
205
      return ALL_FIELD_TYPES.h3;
16✔
206
    default:
207
      globalConsole.warn(`Unsupported analyzer type: ${aType}`);
4✔
208
      return ALL_FIELD_TYPES.string;
4✔
209
  }
210
}
211

212
/**
213
 * Analyze field types from data in `string` format, e.g. uploaded csv.
214
 * Assign `type`, `fieldIdx` and `format` (timestamp only) to each field
215
 *
216
 * @param data array of row object
217
 * @param fieldOrder array of field names as string
218
 * @returns formatted fields
219
 * @public
220
 * @example
221
 *
222
 * import {getFieldsFromData} from '@kepler.gl/common-utils';
223
 * const data = [{
224
 *   time: '2016-09-17 00:09:55',
225
 *   value: '4',
226
 *   surge: '1.2',
227
 *   isTrip: 'true',
228
 *   zeroOnes: '0'
229
 * }, {
230
 *   time: '2016-09-17 00:30:08',
231
 *   value: '3',
232
 *   surge: null,
233
 *   isTrip: 'false',
234
 *   zeroOnes: '1'
235
 * }, {
236
 *   time: null,
237
 *   value: '2',
238
 *   surge: '1.3',
239
 *   isTrip: null,
240
 *   zeroOnes: '1'
241
 * }];
242
 *
243
 * const fieldOrder = ['time', 'value', 'surge', 'isTrip', 'zeroOnes'];
244
 * const fields = getFieldsFromData(data, fieldOrder);
245
 * // fields = [
246
 * // {name: 'time', format: 'YYYY-M-D H:m:s', fieldIdx: 1, type: 'timestamp'},
247
 * // {name: 'value', format: '', fieldIdx: 4, type: 'integer'},
248
 * // {name: 'surge', format: '', fieldIdx: 5, type: 'real'},
249
 * // {name: 'isTrip', format: '', fieldIdx: 6, type: 'boolean'},
250
 * // {name: 'zeroOnes', format: '', fieldIdx: 7, type: 'integer'}];
251
 *
252
 */
253
export function getFieldsFromData(data: RowData, fieldOrder: string[]): Field[] {
254
  // add a check for epoch timestamp
255
  const metadata = Analyzer.computeColMeta(
119✔
256
    data,
257
    [
258
      {regex: /.*geojson|all_points/g, dataType: 'GEOMETRY'},
259
      {regex: /.*census/g, dataType: 'STRING'}
260
    ],
261
    {ignoredDataTypes: IGNORE_DATA_TYPES}
262
  );
263

264
  const {fieldByIndex} = renameDuplicateFields(fieldOrder);
119✔
265

266
  const result = fieldOrder.map((field, index) => {
119✔
267
    const name = fieldByIndex[index];
955✔
268

269
    const fieldMeta = metadata.find(m => m.key === field);
5,416✔
270

271
    // fieldMeta could be undefined if the field has no data and Analyzer.computeColMeta
272
    // will ignore the field. In this case, we will simply assign the field type to STRING
273
    // since dropping the column in the RowData could be expensive
274
    let type = fieldMeta?.type || 'STRING';
955✔
275
    const format = fieldMeta?.format || '';
955✔
276

277
    // quick check if first valid string in column is H3
278
    if (type === AnalyzerDATA_TYPES.STRING) {
955✔
279
      for (let i = 0, n = data.length; i < n; ++i) {
141✔
280
        if (notNullorUndefined(data[i][name])) {
146✔
281
          type = h3IsValid(data[i][name] || '') ? H3_ANALYZER_TYPE : type;
138!
282
          break;
138✔
283
        }
284
      }
285
    }
286

287
    // quick check if string is hex wkb
288
    if (type === AnalyzerDATA_TYPES.STRING) {
955✔
289
      type = data.some(d => isHexWkb(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY : type;
1,472✔
290
    }
291

292
    // quick check if string is wkt
293
    if (type === AnalyzerDATA_TYPES.STRING) {
955✔
294
      type = data.some(d => isWkt(d[name])) ? AnalyzerDATA_TYPES.GEOMETRY_FROM_STRING : type;
1,471!
295
    }
296

297
    return {
955✔
298
      name,
299
      id: name,
300
      displayName: name,
301
      format,
302
      fieldIdx: index,
303
      type: analyzerTypeToFieldType(type),
304
      analyzerType: type,
305
      valueAccessor: dc => d => {
17✔
UNCOV
306
        return dc.valueAt(d.index, index);
×
307
      }
308
    };
309
  });
310

311
  return result;
119✔
312
}
313

314
/**
315
 * pass in an array of field names, rename duplicated one
316
 * and return a map from old field index to new name
317
 *
318
 * @param fieldOrder
319
 * @returns new field name by index
320
 */
321
export function renameDuplicateFields(fieldOrder: string[]): {
322
  allNames: string[];
323
  fieldByIndex: string[];
324
} {
325
  return fieldOrder.reduce<{allNames: string[]; fieldByIndex: string[]}>(
119✔
326
    (accu, field, i) => {
327
      const {allNames} = accu;
955✔
328
      let fieldName = field;
955✔
329

330
      // add a counter to duplicated names
331
      if (allNames.includes(field)) {
955✔
332
        let counter = 0;
2✔
333
        while (allNames.includes(`${field}-${counter}`)) {
2✔
334
          counter++;
1✔
335
        }
336
        fieldName = `${field}-${counter}`;
2✔
337
      }
338

339
      accu.fieldByIndex[i] = fieldName;
955✔
340
      accu.allNames.push(fieldName);
955✔
341

342
      return accu;
955✔
343
    },
344
    {allNames: [], fieldByIndex: []}
345
  );
346
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc