• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

teableio / teable / 10070785195

24 Jul 2024 05:01AM UTC coverage: 81.98% (+64.2%) from 17.779%
10070785195

push

github

web-flow
perf: csv import (#746)

* feat: using worker parse csv

* fix: import multiple column error

* feat: update webpack config for import worker

* fix: vitest worker file path error

* fix: excel import missing key

* feat: using `convertCellValue2DBValue` transfer cellvalue

* feat: add workerId escape conflict

* fix: sqlite e2e error

* feat: compact filter input

4275 of 4473 branches covered (95.57%)

103 of 173 new or added lines in 4 files covered. (59.54%)

28342 of 34572 relevant lines covered (81.98%)

1216.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.51
/apps/nestjs-backend/src/features/import/open-api/import.class.ts
1
import { BadRequestException } from '@nestjs/common';
2✔
2
import { getUniqName, FieldType } from '@teable/core';
2✔
3
import type { IValidateTypes, IAnalyzeVo } from '@teable/openapi';
2✔
4
import { SUPPORTEDTYPE, importTypeMap } from '@teable/openapi';
2✔
5
import { zip, toString, intersection, chunk as chunkArray } from 'lodash';
2✔
6
import fetch from 'node-fetch';
2✔
7
import sizeof from 'object-sizeof';
2✔
8
import Papa from 'papaparse';
2✔
9
import * as XLSX from 'xlsx';
2✔
10
import type { ZodType } from 'zod';
2✔
11
import z from 'zod';
2✔
12
import { exceptionParse } from '../../../utils/exception-parse';
2✔
13
import { toLineDelimitedStream } from './delimiter-stream';
2✔
14

2✔
15
const validateZodSchemaMap: Record<IValidateTypes, ZodType> = {
2✔
16
  [FieldType.Checkbox]: z.union([z.string(), z.boolean()]).refine((value: unknown) => {
2✔
17
    if (typeof value === 'boolean') {
44✔
18
      return true;
12✔
19
    }
12✔
20
    if (
32✔
21
      typeof value === 'string' &&
32✔
22
      (value.toLowerCase() === 'false' || value.toLowerCase() === 'true')
32✔
23
    ) {
44✔
24
      return true;
8✔
25
    }
8✔
26
    return false;
24✔
27
  }),
24✔
28
  [FieldType.Date]: z.coerce.date(),
2✔
29
  [FieldType.Number]: z.coerce.number(),
2✔
30
  [FieldType.LongText]: z
2✔
31
    .string()
2✔
32
    .refine((value) => z.string().safeParse(value) && /\n/.test(value)),
2✔
33
  [FieldType.SingleLineText]: z.string(),
2✔
34
};
2✔
35

2✔
36
export interface IImportConstructorParams {
2✔
37
  url: string;
2✔
38
  type: SUPPORTEDTYPE;
2✔
39
  maxRowCount?: number;
2✔
40
  fileName?: string;
2✔
41
}
2✔
42

2✔
43
interface IParseResult {
2✔
44
  [x: string]: unknown[][];
2✔
45
}
2✔
46

2✔
47
export abstract class Importer {
2✔
48
  public static DEFAULT_ERROR_MESSAGE = 'unknown error';
20✔
49

20✔
50
  public static CHUNK_SIZE = 1024 * 1024 * 0.2;
20✔
51

20✔
52
  public static MAX_CHUNK_LENGTH = 500;
20✔
53

20✔
54
  public static DEFAULT_COLUMN_TYPE: IValidateTypes = FieldType.SingleLineText;
20✔
55

20✔
56
  // order make sence
20✔
57
  public static readonly SUPPORTEDTYPE: IValidateTypes[] = [
20✔
58
    FieldType.Checkbox,
72✔
59
    FieldType.Number,
72✔
60
    FieldType.Date,
72✔
61
    FieldType.LongText,
72✔
62
    FieldType.SingleLineText,
72✔
63
  ];
72✔
64

20✔
65
  constructor(public config: IImportConstructorParams) {}
20✔
66

20✔
67
  abstract parse(
20✔
68
    ...args: [
20✔
69
      options?: unknown,
20✔
70
      chunk?: (
20✔
71
        chunk: Record<string, unknown[][]>,
20✔
72
        onFinished?: () => void,
20✔
73
        onError?: (errorMsg: string) => void
20✔
74
      ) => Promise<void>,
20✔
75
    ]
20✔
76
  ): Promise<IParseResult>;
20✔
77

20✔
78
  private setFileNameFromHeader(fileName: string) {
20✔
79
    this.config.fileName = fileName;
10✔
80
  }
10✔
81

20✔
82
  getConfig() {
20✔
83
    return this.config;
8✔
84
  }
8✔
85

20✔
86
  async getFile() {
20✔
87
    const { url, type } = this.config;
12✔
88
    const { body: stream, headers } = await fetch(url);
12✔
89

12✔
90
    const supportType = importTypeMap[type].accept.split(',');
12✔
91

12✔
92
    const fileFormat = headers
12✔
93
      .get('content-type')
12✔
94
      ?.split(';')
12✔
95
      ?.map((item: string) => item.trim());
12✔
96

12✔
97
    // if (!fileFormat?.length) {
12✔
98
    //   throw new BadRequestException(
12✔
99
    //     `Input url is not a standard document service without right content-type`
12✔
100
    //   );
12✔
101
    // }
12✔
102

12✔
103
    if (fileFormat?.length && !intersection(fileFormat, supportType).length) {
12✔
104
      throw new BadRequestException(
2✔
105
        `File format is not supported, only ${supportType.join(',')} are supported,`
2✔
106
      );
2✔
107
    }
2✔
108

10✔
109
    const contentDisposition = headers.get('content-disposition');
10✔
110
    let fileName = 'Import Table.csv';
10✔
111

10✔
112
    if (contentDisposition) {
10✔
113
      const fileNameMatch =
10✔
114
        contentDisposition.match(/filename\*=UTF-8''([^;]+)/) ||
10!
NEW
115
        contentDisposition.match(/filename="?([^"]+)"?/);
×
116
      if (fileNameMatch) {
10✔
117
        fileName = fileNameMatch[1];
10✔
118
      }
10✔
119
    }
10✔
120

10✔
121
    const finalFileName = fileName.split('.').shift() as string;
10✔
122

10✔
123
    this.setFileNameFromHeader(decodeURIComponent(finalFileName));
10✔
124

10✔
125
    return { stream, fileName: finalFileName };
10✔
126
  }
10✔
127

20✔
128
  async genColumns() {
20✔
129
    const supportTypes = Importer.SUPPORTEDTYPE;
12✔
130
    const parseResult = await this.parse();
12✔
131
    const { fileName, type } = this.config;
10✔
132
    const result: IAnalyzeVo['worksheets'] = {};
10✔
133

10✔
134
    for (const [sheetName, cols] of Object.entries(parseResult)) {
10✔
135
      const zipColumnInfo = zip(...cols);
10✔
136
      const existNames: string[] = [];
10✔
137
      const calculatedColumnHeaders = zipColumnInfo.map((column, index) => {
10✔
138
        let isColumnEmpty = true;
60✔
139
        let validatingFieldTypes = [...supportTypes];
60✔
140
        for (let i = 0; i < column.length; i++) {
60✔
141
          if (validatingFieldTypes.length <= 1) {
170✔
142
            break;
10✔
143
          }
10✔
144

160✔
145
          // ignore empty value and first row causing first row as header
160✔
146
          if (column[i] === '' || column[i] == null || i === 0) {
170✔
147
            continue;
80✔
148
          }
80✔
149

80✔
150
          // when the whole columns aren't empty should flag
80✔
151
          isColumnEmpty = false;
80✔
152

80✔
153
          // when one of column's value validates long text, then break;
80✔
154
          if (validateZodSchemaMap[FieldType.LongText].safeParse(column[i]).success) {
170✔
155
            validatingFieldTypes = [FieldType.LongText];
10✔
156
            break;
10✔
157
          }
10✔
158

70✔
159
          const matchTypes = validatingFieldTypes.filter((type) => {
70✔
160
            const schema = validateZodSchemaMap[type];
270✔
161
            return schema.safeParse(column[i]).success;
270✔
162
          });
270✔
163

70✔
164
          validatingFieldTypes = matchTypes;
70✔
165
        }
70✔
166

170✔
167
        // empty columns should be default type
170✔
168
        validatingFieldTypes = !isColumnEmpty
170✔
169
          ? validatingFieldTypes
50✔
170
          : [Importer.DEFAULT_COLUMN_TYPE];
10✔
171

60✔
172
        const name = getUniqName(toString(column?.[0]).trim() || `Field ${index}`, existNames);
60!
173

60✔
174
        existNames.push(name);
60✔
175

60✔
176
        return {
60✔
177
          type: validatingFieldTypes[0] || Importer.DEFAULT_COLUMN_TYPE,
60!
178
          name: name.toString(),
60✔
179
        };
60✔
180
      });
60✔
181

10✔
182
      result[sheetName] = {
10✔
183
        name: type === SUPPORTEDTYPE.EXCEL ? sheetName : fileName ? fileName : sheetName,
10✔
184
        columns: calculatedColumnHeaders,
10✔
185
      };
10✔
186
    }
10✔
187

10✔
188
    return {
10✔
189
      worksheets: result,
10✔
190
    };
10✔
191
  }
10✔
192
}
20✔
193

2✔
194
export class CsvImporter extends Importer {
2✔
195
  public static readonly CHECK_LINES = 5000;
2✔
196
  public static readonly DEFAULT_SHEETKEY = 'Import Table';
2✔
197

2✔
198
  parse(): Promise<IParseResult>;
2✔
199
  parse(
2✔
200
    options: Papa.ParseConfig & { skipFirstNLines: number; key: string },
2✔
201
    chunk: (chunk: Record<string, unknown[][]>) => Promise<void>,
2✔
202
    onFinished?: () => void,
2✔
203
    onError?: (errorMsg: string) => void
2✔
204
  ): Promise<void>;
2✔
205
  async parse(
2✔
206
    ...args: [
8✔
207
      options?: Papa.ParseConfig & { skipFirstNLines: number; key: string },
8✔
208
      chunkCb?: (chunk: Record<string, unknown[][]>) => Promise<void>,
8✔
209
      onFinished?: () => void,
8✔
210
      onError?: (errorMsg: string) => void,
8✔
211
    ]
8✔
212
  ): Promise<unknown> {
8✔
213
    const [options, chunkCb, onFinished, onError] = args;
8✔
214
    const { stream } = await this.getFile();
8✔
215

6✔
216
    // chunk parse
6✔
217
    if (options && chunkCb) {
8!
218
      return new Promise((resolve, reject) => {
×
219
        let isFirst = true;
×
220
        let recordBuffer: unknown[][] = [];
×
221
        let isAbort = false;
×
222
        let totalRowCount = 0;
×
223

×
224
        Papa.parse(toLineDelimitedStream(stream), {
×
225
          download: false,
×
226
          dynamicTyping: true,
×
227
          chunk: (chunk, parser) => {
×
228
            (async () => {
×
229
              const newChunk = [...chunk.data] as unknown[][];
×
230
              if (isFirst && options.skipFirstNLines) {
×
231
                newChunk.splice(0, 1);
×
232
                isFirst = false;
×
233
              }
×
234

×
235
              recordBuffer.push(...newChunk);
×
236
              totalRowCount += recordBuffer.length;
×
237

×
238
              if (this.config.maxRowCount && totalRowCount > this.config.maxRowCount) {
×
239
                isAbort = true;
×
240
                recordBuffer = [];
×
241
                onError?.('please upgrade your plan to import more records');
×
242
                parser.abort();
×
243
              }
×
244

×
245
              if (
×
246
                recordBuffer.length >= Importer.MAX_CHUNK_LENGTH ||
×
247
                sizeof(recordBuffer) > Importer.CHUNK_SIZE
×
248
              ) {
×
249
                parser.pause();
×
250
                try {
×
251
                  await chunkCb({ [CsvImporter.DEFAULT_SHEETKEY]: recordBuffer });
×
252
                } catch (e) {
×
253
                  isAbort = true;
×
254
                  recordBuffer = [];
×
255
                  const error = exceptionParse(e as Error);
×
256
                  onError?.(error?.message || Importer.DEFAULT_ERROR_MESSAGE);
×
257
                  parser.abort();
×
258
                }
×
259
                recordBuffer = [];
×
260
                parser.resume();
×
261
              }
×
262
            })();
×
263
          },
×
264
          complete: () => {
×
265
            (async () => {
×
266
              try {
×
267
                recordBuffer.length &&
×
268
                  (await chunkCb({ [CsvImporter.DEFAULT_SHEETKEY]: recordBuffer }));
×
269
              } catch (e) {
×
270
                isAbort = true;
×
271
                recordBuffer = [];
×
272
                const error = exceptionParse(e as Error);
×
273
                onError?.(error?.message || Importer.DEFAULT_ERROR_MESSAGE);
×
274
              }
×
275
              !isAbort && onFinished?.();
×
276
              resolve({});
×
277
            })();
×
278
          },
×
279
          error: (e) => {
×
280
            onError?.(e?.message || Importer.DEFAULT_ERROR_MESSAGE);
×
281
            reject(e);
×
282
          },
×
283
        });
×
284
      });
×
285
    } else {
8✔
286
      return new Promise((resolve, reject) => {
6✔
287
        Papa.parse(stream, {
6✔
288
          download: false,
6✔
289
          dynamicTyping: true,
6✔
290
          preview: CsvImporter.CHECK_LINES,
6✔
291
          complete: (result) => {
6✔
292
            resolve({
6✔
293
              [CsvImporter.DEFAULT_SHEETKEY]: result.data,
6✔
294
            });
6✔
295
          },
6✔
296
          error: (err) => {
6✔
297
            reject(err);
×
298
          },
×
299
        });
6✔
300
      });
6✔
301
    }
6✔
302
  }
8✔
303
}
2✔
304

2✔
305
export class ExcelImporter extends Importer {
2✔
306
  public static readonly SUPPORTEDTYPE: IValidateTypes[] = [
2✔
307
    FieldType.Checkbox,
72✔
308
    FieldType.Number,
72✔
309
    FieldType.Date,
72✔
310
    FieldType.SingleLineText,
72✔
311
    FieldType.LongText,
72✔
312
  ];
72✔
313

2✔
314
  parse(): Promise<IParseResult>;
2✔
315
  parse(
2✔
316
    options: { skipFirstNLines: number; key: string },
2✔
317
    chunk: (chunk: Record<string, unknown[][]>) => Promise<void>,
2✔
318
    onFinished?: () => void,
2✔
319
    onError?: (errorMsg: string) => void
2✔
320
  ): Promise<void>;
2✔
321

2✔
322
  async parse(
2✔
323
    options?: { skipFirstNLines: number; key: string },
4✔
324
    chunk?: (chunk: Record<string, unknown[][]>) => Promise<void>,
4✔
325
    onFinished?: () => void,
4✔
326
    onError?: (errorMsg: string) => void
4✔
327
  ): Promise<unknown> {
4✔
328
    const { stream: fileSteam } = await this.getFile();
4✔
329

4✔
330
    const asyncRs = async (stream: NodeJS.ReadableStream): Promise<IParseResult> =>
4✔
331
      new Promise((res, rej) => {
4✔
332
        const buffers: Buffer[] = [];
4✔
333
        stream.on('data', function (data) {
4✔
334
          buffers.push(data);
4✔
335
        });
4✔
336
        stream.on('end', function () {
4✔
337
          const buf = Buffer.concat(buffers);
4✔
338
          const workbook = XLSX.read(buf, { dense: true });
4✔
339
          const result: IParseResult = {};
4✔
340
          Object.keys(workbook.Sheets).forEach((name) => {
4✔
341
            result[name] = workbook.Sheets[name]['!data']?.map((item) =>
4✔
342
              item.map((v) => v.w)
12✔
343
            ) as unknown[][];
4✔
344
          });
4✔
345
          res(result);
4✔
346
        });
4✔
347
        stream.on('error', (e) => {
4✔
348
          onError?.(e?.message || Importer.DEFAULT_ERROR_MESSAGE);
×
349
          rej(e);
×
350
        });
×
351
      });
4✔
352

4✔
353
    const parseResult = await asyncRs(fileSteam);
4✔
354

4✔
355
    if (options && chunk) {
4!
356
      const { skipFirstNLines, key } = options;
×
357
      const chunks = parseResult[key];
×
358
      const parseResults = chunkArray(chunks, Importer.MAX_CHUNK_LENGTH);
×
359

×
360
      if (this.config.maxRowCount && chunks.length > this.config.maxRowCount) {
×
361
        onError?.('Please upgrade your plan to import more records');
×
362
        return;
×
363
      }
×
364

×
365
      for (let i = 0; i < parseResults.length; i++) {
×
366
        const currentChunk = parseResults[i];
×
367
        if (i === 0 && skipFirstNLines) {
×
368
          currentChunk.splice(0, 1);
×
369
        }
×
370
        try {
×
371
          await chunk({ [key]: currentChunk });
×
372
        } catch (e) {
×
373
          onError?.((e as Error)?.message || Importer.DEFAULT_ERROR_MESSAGE);
×
374
        }
×
375
      }
×
376
      onFinished?.();
×
377
    }
×
378

4✔
379
    return parseResult;
4✔
380
  }
4✔
381
}
2✔
382

2✔
383
export const importerFactory = (type: SUPPORTEDTYPE, config: IImportConstructorParams) => {
2✔
384
  switch (type) {
20✔
385
    case SUPPORTEDTYPE.CSV:
20✔
386
      return new CsvImporter(config);
14✔
387
    case SUPPORTEDTYPE.EXCEL:
20✔
388
      return new ExcelImporter(config);
6✔
389
    default:
20!
390
      throw new Error('not support');
×
391
  }
20✔
392
};
20✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc