• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

streetsidesoftware / cspell / 8745810937

18 Apr 2024 11:05PM UTC coverage: 93.481% (+0.02%) from 93.46%
8745810937

Pull #5502

github

web-flow
Merge 53f2b8079 into c515cc91c
Pull Request #5502: chore: Add lint rules

6439 of 7332 branches covered (87.82%)

126 of 144 new or added lines in 68 files covered. (87.5%)

3 existing lines in 3 files now uncovered.

13192 of 14112 relevant lines covered (93.48%)

23103.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.0
/packages/cspell-tools/src/compiler/wordListParser.ts
1
import { opCombine, opCombine as opPipe, type Operator, opFilter, opMap } from '@cspell/cspell-pipe/sync';
2
import { createDictionaryLineParser } from 'cspell-trie-lib';
3
import { uniqueFilter } from 'hunspell-reader';
4

5
import type { CompileOptions } from './CompileOptions.js';
6
import { legacyLineToWords } from './legacyLineToWords.js';
7
import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
8
import type { AllowedSplitWordsCollection } from './WordsCollection.js';
9

10
export function normalizeTargetWords(options: CompileOptions): Operator<string> {
11
    const lineParser = createDictionaryLineParser({
93✔
12
        stripCaseAndAccents: options.generateNonStrict,
13
        stripCaseAndAccentsOnForbidden: true,
14
    });
15
    const operations: Operator<string>[] = [
93✔
16
        opFilter<string>((a) => !!a),
896✔
17
        lineParser,
18
        options.sort ? createInlineBufferedSort(10_000) : undefined,
93✔
19
        opFilter<string>(uniqueFilter(10_000)),
20
        options.filter ? opFilter<string>(options.filter) : undefined,
93✔
21
    ].filter(isDefined);
22
    return opCombine(...operations);
93✔
23
}
24

25
function isDefined<T>(v: T | undefined): v is T {
26
    return v !== undefined;
465✔
27
}
28

29
function createInlineBufferedSort(bufferSize = 1000): (lines: Iterable<string>) => Iterable<string> {
×
30
    function* inlineBufferedSort(lines: Iterable<string>): Iterable<string> {
31
        const buffer: string[] = [];
74✔
32

33
        for (const line of lines) {
74✔
34
            buffer.push(line);
1,015✔
35
            if (buffer.length >= bufferSize) {
1,015!
36
                buffer.sort();
×
37
                yield* buffer;
×
38
                buffer.length = 0;
×
39
            }
40
        }
41

42
        buffer.sort();
74✔
43
        yield* buffer;
74✔
44
    }
45

46
    return inlineBufferedSort;
83✔
47
}
48

49
export interface ParseFileOptions {
50
    /**
51
     * Preserve case
52
     * @default true
53
     */
54
    keepCase?: boolean;
55

56
    /**
57
     * Tell the parser to split into words along spaces.
58
     * @default false
59
     */
60
    split?: boolean | undefined;
61

62
    /**
63
     * When splitting tells the parser to output both the split and non-split versions of the line.
64
     * @default false
65
     */
66
    splitKeepBoth?: boolean | undefined;
67

68
    // /**
69
    //  * Specify the separator for splitting words.
70
    //  */
71
    // splitSeparator?: RegExp | string | undefined;
72

73
    /**
74
     * Use legacy splitting.
75
     * @default false
76
     */
77
    legacy?: boolean;
78

79
    allowedSplitWords: AllowedSplitWordsCollection;
80
}
81

82
type ParseFileOptionsRequired = Required<ParseFileOptions>;
83

84
const commentCharacter = '#';
8✔
85

86
const _defaultOptions: ParseFileOptionsRequired = {
8✔
87
    keepCase: true,
88
    legacy: false,
89
    split: false,
90
    splitKeepBoth: false,
91
    // splitSeparator: regExpSplit,
92
    allowedSplitWords: { has: () => true, size: 0 },
14✔
93
};
94

95
export const defaultParseDictionaryOptions: ParseFileOptionsRequired = Object.freeze(_defaultOptions);
8✔
96

97
export const cSpellToolDirective = 'cspell-tools:';
8✔
98

99
export const setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-keep-case', 'legacy'];
8✔
100

101
/**
102
 * Normalizes a dictionary words based upon prefix / suffixes.
103
 * Case insensitive versions are also generated.
104
 * @param options - defines prefixes used when parsing lines.
105
 * @returns words that have been normalized.
106
 */
107
export function createParseFileLineMapper(options?: Partial<ParseFileOptions>): Operator<string> {
108
    const _options = options || _defaultOptions;
65!
109
    const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords } =
68✔
110
        _options;
65✔
111

112
    let { legacy = _defaultOptions.legacy } = _options;
65✔
113

114
    let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;
65✔
115

116
    function isString(line: unknown | string): line is string {
117
        return typeof line === 'string';
12,471✔
118
    }
119

120
    function trim(line: string): string {
121
        return line.trim();
12,516✔
122
    }
123

124
    function removeComments(line: string): string {
125
        const idx = line.indexOf(commentCharacter);
12,479✔
126
        if (idx < 0) return line;
12,479✔
127

128
        const idxDirective = line.indexOf(cSpellToolDirective, idx);
58✔
129
        if (idxDirective >= 0) {
58✔
130
            const flags = line
2✔
131
                .slice(idxDirective)
132
                .split(/[\s,;]/g)
133
                .map((s) => s.trim())
5✔
134
                .filter((a) => !!a);
5✔
135
            for (const flag of flags) {
2✔
136
                switch (flag) {
5!
137
                    case 'split': {
138
                        split = true;
1✔
139
                        break;
1✔
140
                    }
141
                    case 'no-split': {
142
                        split = false;
1✔
143
                        break;
1✔
144
                    }
145
                    case 'keep-case': {
146
                        keepCase = true;
1✔
147
                        legacy = false;
1✔
148
                        break;
1✔
149
                    }
150
                    case 'no-keep-case': {
151
                        keepCase = false;
×
152
                        break;
×
153
                    }
154
                    case 'legacy': {
155
                        keepCase = false;
×
156
                        legacy = true;
×
157
                        break;
×
158
                    }
159
                }
160
            }
161
        }
162

163
        return line.slice(0, idx).trim();
58✔
164
    }
165

166
    function filterEmptyLines(line: string): boolean {
167
        return !!line;
12,516✔
168
    }
169

170
    const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;
65✔
171

172
    function splitLine(line: string): string[] {
173
        line = line.replace(/#.*/, ''); // remove comment
48✔
174
        line = line.trim();
48✔
175
        line = line.replaceAll(/\bU\+[0-9A-F]{4}\b/gi, '|'); // Remove Unicode Definitions
48✔
176
        line = line.replaceAll(/\\U[0-9A-F]{4}/gi, '|'); // Remove Unicode Definitions
48✔
177
        line = line.replaceAll(regNonWordOrDigit, '|');
48✔
178
        line = line.replaceAll(/'(?=\|)/g, ''); // remove trailing '
48✔
179
        line = line.replace(/'$/, ''); // remove trailing '
48✔
180
        line = line.replaceAll(/(?<=\|)'/g, ''); // remove leading '
48✔
181
        line = line.replace(/^'/, ''); // remove leading '
48✔
182
        line = line.replaceAll(/\s*\|\s*/g, '|'); // remove spaces around |
48✔
183
        line = line.replaceAll(/[|]+/g, '|'); // reduce repeated |
48✔
184
        line = line.replace(/^\|/, ''); // remove leading |
48✔
185
        line = line.replace(/\|$/, ''); // remove trailing |
48✔
186
        const lines = line
48✔
187
            .split('|')
188
            .map((a) => a.trim())
71✔
189
            .filter((a) => !!a)
71✔
190
            .filter((a) => !/^[0-9_-]+$/.test(a)) // pure numbers and symbols
64✔
191
            .filter((a) => !/^0[xo][0-9A-F]+$/i.test(a)); // c-style hex/octal digits
64✔
192

193
        return lines;
48✔
194
    }
195

196
    function* splitWords(lines: Iterable<string>): Iterable<string> {
197
        for (const line of lines) {
65✔
198
            if (legacy) {
12,479✔
199
                yield* legacyLineToWords(line, keepCase, allowedSplitWords);
26✔
200
                continue;
26✔
201
            }
202
            if (split) {
12,453✔
203
                const words = splitLine(line);
48✔
204
                yield* !allowedSplitWords.size
48!
205
                    ? words
NEW
206
                    : words.flatMap((word) => splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase));
×
207
                if (!splitKeepBoth) continue;
48!
208
            }
209
            yield line.replaceAll(/["]/g, '');
12,405✔
210
        }
211
    }
212

213
    function* unique(lines: Iterable<string>): Iterable<string> {
214
        const known = new Set<string>();
65✔
215
        for (const line of lines) {
65✔
216
            if (known.has(line)) continue;
12,424✔
217
            known.add(line);
12,399✔
218
            yield line;
12,399✔
219
        }
220
    }
221

222
    function* splitLines(paragraphs: Iterable<string>): Iterable<string> {
223
        for (const paragraph of paragraphs) {
65✔
224
            yield* paragraph.split('\n');
12,471✔
225
        }
226
    }
227

228
    const processLines = opPipe(
65✔
229
        opFilter(isString),
230
        splitLines,
231
        opMap(removeComments),
232
        splitWords,
233
        opMap(trim),
234
        opFilter(filterEmptyLines),
235
        unique,
236
    );
237

238
    return processLines;
65✔
239
}
240

241
/**
242
 * Normalizes a dictionary words based upon prefix / suffixes.
243
 * Case insensitive versions are also generated.
244
 * @param lines - one word per line
245
 * @param _options - defines prefixes used when parsing lines.
246
 * @returns words that have been normalized.
247
 */
248
export function parseFileLines(lines: Iterable<string> | string, options: Partial<ParseFileOptions>): Iterable<string> {
249
    return createParseFileLineMapper(options)(typeof lines === 'string' ? [lines] : lines);
65✔
250
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc