8745810937

Committed 18 Apr 2024 11:05PM UTC coverage: 93.481% (+0.02%) from 93.46%

Build # 8745810937

Build Type

Pull #5502

github

Committed by

web-flow

Commit Message

Merge 53f2b8079 into c515cc91c

Pull Request Pull Request #5502: chore: Add lint rules

Run Details

6439 of 7332 branches covered (87.82%)

126 of 144 new or added lines in 68 files covered. (87.5%)

3 existing lines in 3 files now uncovered.

13192 of 14112 relevant lines covered (93.48%)

23103.11 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.0

/packages/cspell-tools/src/compiler/wordListParser.ts

import { opCombine, opCombine as opPipe, type Operator, opFilter, opMap } from '@cspell/cspell-pipe/sync';
import { createDictionaryLineParser } from 'cspell-trie-lib';
import { uniqueFilter } from 'hunspell-reader';

import type { CompileOptions } from './CompileOptions.js';
import { legacyLineToWords } from './legacyLineToWords.js';
import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
import type { AllowedSplitWordsCollection } from './WordsCollection.js';

export function normalizeTargetWords(options: CompileOptions): Operator<string> {
    const lineParser = createDictionaryLineParser({
        stripCaseAndAccents: options.generateNonStrict,
        stripCaseAndAccentsOnForbidden: true,
    });
    const operations: Operator<string>[] = [
        opFilter<string>((a) => !!a),
        lineParser,
        options.sort ? createInlineBufferedSort(10_000) : undefined,
        opFilter<string>(uniqueFilter(10_000)),
        options.filter ? opFilter<string>(options.filter) : undefined,
    ].filter(isDefined);
    return opCombine(...operations);
}

function isDefined<T>(v: T | undefined): v is T {
    return v !== undefined;
}

function createInlineBufferedSort(bufferSize = 1000): (lines: Iterable<string>) => Iterable<string> {
    function* inlineBufferedSort(lines: Iterable<string>): Iterable<string> {
        const buffer: string[] = [];

        for (const line of lines) {
            buffer.push(line);
            if (buffer.length >= bufferSize) {
                buffer.sort();
                yield* buffer;
                buffer.length = 0;
            }
        }

        buffer.sort();
        yield* buffer;
    }

    return inlineBufferedSort;
}

export interface ParseFileOptions {
    /**
     * Preserve case
     * @default true
     */
    keepCase?: boolean;

    /**
     * Tell the parser to split into words along spaces.
     * @default false
     */
    split?: boolean | undefined;

    /**
     * When splitting tells the parser to output both the split and non-split versions of the line.
     * @default false
     */
    splitKeepBoth?: boolean | undefined;

    // /**
    //  * Specify the separator for splitting words.
    //  */
    // splitSeparator?: RegExp | string | undefined;

    /**
     * Use legacy splitting.
     * @default false
     */
    legacy?: boolean;

    allowedSplitWords: AllowedSplitWordsCollection;
}

type ParseFileOptionsRequired = Required<ParseFileOptions>;

const commentCharacter = '#';

const _defaultOptions: ParseFileOptionsRequired = {
    keepCase: true,
    legacy: false,
    split: false,
    splitKeepBoth: false,
    // splitSeparator: regExpSplit,
    allowedSplitWords: { has: () => true, size: 0 },
};

export const defaultParseDictionaryOptions: ParseFileOptionsRequired = Object.freeze(_defaultOptions);

export const cSpellToolDirective = 'cspell-tools:';

export const setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-keep-case', 'legacy'];

/**
 * Normalizes a dictionary words based upon prefix / suffixes.
 * Case insensitive versions are also generated.
 * @param options - defines prefixes used when parsing lines.
 * @returns words that have been normalized.
 */
export function createParseFileLineMapper(options?: Partial<ParseFileOptions>): Operator<string> {
    const _options = options || _defaultOptions;
    const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords } =
        _options;

    let { legacy = _defaultOptions.legacy } = _options;

    let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;

    function isString(line: unknown | string): line is string {
        return typeof line === 'string';
    }

    function trim(line: string): string {
        return line.trim();
    }

    function removeComments(line: string): string {
        const idx = line.indexOf(commentCharacter);
        if (idx < 0) return line;

        const idxDirective = line.indexOf(cSpellToolDirective, idx);
        if (idxDirective >= 0) {
            const flags = line
                .slice(idxDirective)
                .split(/[\s,;]/g)
                .map((s) => s.trim())
                .filter((a) => !!a);
            for (const flag of flags) {
                switch (flag) {
                    case 'split': {
                        split = true;
                        break;
                    }
                    case 'no-split': {
                        split = false;
                        break;
                    }
                    case 'keep-case': {
                        keepCase = true;
                        legacy = false;
                        break;
                    }
                    case 'no-keep-case': {
                        keepCase = false;
                        break;
                    }
                    case 'legacy': {
                        keepCase = false;
                        legacy = true;
                        break;
                    }
                }
            }
        }

        return line.slice(0, idx).trim();
    }

    function filterEmptyLines(line: string): boolean {
        return !!line;
    }

    const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;

    function splitLine(line: string): string[] {
        line = line.replace(/#.*/, ''); // remove comment
        line = line.trim();
        line = line.replaceAll(/\bU\+[0-9A-F]{4}\b/gi, '|'); // Remove Unicode Definitions
        line = line.replaceAll(/\\U[0-9A-F]{4}/gi, '|'); // Remove Unicode Definitions
        line = line.replaceAll(regNonWordOrDigit, '|');
        line = line.replaceAll(/'(?=\|)/g, ''); // remove trailing '
        line = line.replace(/'$/, ''); // remove trailing '
        line = line.replaceAll(/(?<=\|)'/g, ''); // remove leading '
        line = line.replace(/^'/, ''); // remove leading '
        line = line.replaceAll(/\s*\|\s*/g, '|'); // remove spaces around |
        line = line.replaceAll(/[|]+/g, '|'); // reduce repeated |
        line = line.replace(/^\|/, ''); // remove leading |
        line = line.replace(/\|$/, ''); // remove trailing |
        const lines = line
            .split('|')
            .map((a) => a.trim())
            .filter((a) => !!a)
            .filter((a) => !/^[0-9_-]+$/.test(a)) // pure numbers and symbols
            .filter((a) => !/^0[xo][0-9A-F]+$/i.test(a)); // c-style hex/octal digits

        return lines;
    }

    function* splitWords(lines: Iterable<string>): Iterable<string> {
        for (const line of lines) {
            if (legacy) {
                yield* legacyLineToWords(line, keepCase, allowedSplitWords);
                continue;
            }
            if (split) {
                const words = splitLine(line);
                yield* !allowedSplitWords.size
                    ? words
                    : words.flatMap((word) => splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase));
                if (!splitKeepBoth) continue;
            }
            yield line.replaceAll(/["]/g, '');
        }
    }

    function* unique(lines: Iterable<string>): Iterable<string> {
        const known = new Set<string>();
        for (const line of lines) {
            if (known.has(line)) continue;
            known.add(line);
            yield line;
        }
    }

    function* splitLines(paragraphs: Iterable<string>): Iterable<string> {
        for (const paragraph of paragraphs) {
            yield* paragraph.split('\n');
        }
    }

    const processLines = opPipe(
        opFilter(isString),
        splitLines,
        opMap(removeComments),
        splitWords,
        opMap(trim),
        opFilter(filterEmptyLines),
        unique,
    );

    return processLines;
}

/**
 * Normalizes a dictionary words based upon prefix / suffixes.
 * Case insensitive versions are also generated.
 * @param lines - one word per line
 * @param _options - defines prefixes used when parsing lines.
 * @returns words that have been normalized.
 */
export function parseFileLines(lines: Iterable<string> | string, options: Partial<ParseFileOptions>): Iterable<string> {
    return createParseFileLineMapper(options)(typeof lines === 'string' ? [lines] : lines);
}

1	import { opCombine, opCombine as opPipe, type Operator, opFilter, opMap } from '@cspell/cspell-pipe/sync';
2	import { createDictionaryLineParser } from 'cspell-trie-lib';
3	import { uniqueFilter } from 'hunspell-reader';
4
5	import type { CompileOptions } from './CompileOptions.js';
6	import { legacyLineToWords } from './legacyLineToWords.js';
7	import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed.js';
8	import type { AllowedSplitWordsCollection } from './WordsCollection.js';
9
10	export function normalizeTargetWords(options: CompileOptions): Operator<string> {
11	const lineParser = createDictionaryLineParser({	93✔
12	stripCaseAndAccents: options.generateNonStrict,
13	stripCaseAndAccentsOnForbidden: true,
14	});
15	const operations: Operator<string>[] = [	93✔
16	opFilter<string>((a) => !!a),	896✔
17	lineParser,
18	options.sort ? createInlineBufferedSort(10_000) : undefined,	93✔
19	opFilter<string>(uniqueFilter(10_000)),
20	options.filter ? opFilter<string>(options.filter) : undefined,	93✔
21	].filter(isDefined);
22	return opCombine(...operations);	93✔
23	}
24
25	function isDefined<T>(v: T \| undefined): v is T {
26	return v !== undefined;	465✔
27	}
28
29	function createInlineBufferedSort(bufferSize = 1000): (lines: Iterable<string>) => Iterable<string> {	×
30	function* inlineBufferedSort(lines: Iterable<string>): Iterable<string> {
31	const buffer: string[] = [];	74✔
32
33	for (const line of lines) {	74✔
34	buffer.push(line);	1,015✔
35	if (buffer.length >= bufferSize) {	1,015!
36	buffer.sort();	×
37	yield* buffer;	×
38	buffer.length = 0;	×
39	}
40	}
41
42	buffer.sort();	74✔
43	yield* buffer;	74✔
44	}
45
46	return inlineBufferedSort;	83✔
47	}
48
49	export interface ParseFileOptions {
50	/**
51	* Preserve case
52	* @default true
53	*/
54	keepCase?: boolean;
55
56	/**
57	* Tell the parser to split into words along spaces.
58	* @default false
59	*/
60	split?: boolean \| undefined;
61
62	/**
63	* When splitting tells the parser to output both the split and non-split versions of the line.
64	* @default false
65	*/
66	splitKeepBoth?: boolean \| undefined;
67
68	// /**
69	// * Specify the separator for splitting words.
70	// */
71	// splitSeparator?: RegExp \| string \| undefined;
72
73	/**
74	* Use legacy splitting.
75	* @default false
76	*/
77	legacy?: boolean;
78
79	allowedSplitWords: AllowedSplitWordsCollection;
80	}
81
82	type ParseFileOptionsRequired = Required<ParseFileOptions>;
83
84	const commentCharacter = '#';	8✔
85
86	const _defaultOptions: ParseFileOptionsRequired = {	8✔
87	keepCase: true,
88	legacy: false,
89	split: false,
90	splitKeepBoth: false,
91	// splitSeparator: regExpSplit,
92	allowedSplitWords: { has: () => true, size: 0 },	14✔
93	};
94
95	export const defaultParseDictionaryOptions: ParseFileOptionsRequired = Object.freeze(_defaultOptions);	8✔
96
97	export const cSpellToolDirective = 'cspell-tools:';	8✔
98
99	export const setOfCSpellDirectiveFlags = ['no-split', 'split', 'keep-case', 'no-keep-case', 'legacy'];	8✔
100
101	/**
102	* Normalizes a dictionary words based upon prefix / suffixes.
103	* Case insensitive versions are also generated.
104	* @param options - defines prefixes used when parsing lines.
105	* @returns words that have been normalized.
106	*/
107	export function createParseFileLineMapper(options?: Partial<ParseFileOptions>): Operator<string> {
108	const _options = options \|\| _defaultOptions;	65!
109	const { splitKeepBoth = _defaultOptions.splitKeepBoth, allowedSplitWords = _defaultOptions.allowedSplitWords } =	68✔
110	_options;	65✔
111
112	let { legacy = _defaultOptions.legacy } = _options;	65✔
113
114	let { split = _defaultOptions.split, keepCase = legacy ? false : _defaultOptions.keepCase } = _options;	65✔
115
116	function isString(line: unknown \| string): line is string {
117	return typeof line === 'string';	12,471✔
118	}
119
120	function trim(line: string): string {
121	return line.trim();	12,516✔
122	}
123
124	function removeComments(line: string): string {
125	const idx = line.indexOf(commentCharacter);	12,479✔
126	if (idx < 0) return line;	12,479✔
127
128	const idxDirective = line.indexOf(cSpellToolDirective, idx);	58✔
129	if (idxDirective >= 0) {	58✔
130	const flags = line	2✔
131	.slice(idxDirective)
132	.split(/[\s,;]/g)
133	.map((s) => s.trim())	5✔
134	.filter((a) => !!a);	5✔
135	for (const flag of flags) {	2✔
136	switch (flag) {	5!
137	case 'split': {
138	split = true;	1✔
139	break;	1✔
140	}
141	case 'no-split': {
142	split = false;	1✔
143	break;	1✔
144	}
145	case 'keep-case': {
146	keepCase = true;	1✔
147	legacy = false;	1✔
148	break;	1✔
149	}
150	case 'no-keep-case': {
151	keepCase = false;	×
152	break;	×
153	}
154	case 'legacy': {
155	keepCase = false;	×
156	legacy = true;	×
157	break;	×
158	}
159	}
160	}
161	}
162
163	return line.slice(0, idx).trim();	58✔
164	}
165
166	function filterEmptyLines(line: string): boolean {
167	return !!line;	12,516✔
168	}
169
170	const regNonWordOrDigit = /[^\p{L}\p{M}'\w-]+/giu;	65✔
171
172	function splitLine(line: string): string[] {
173	line = line.replace(/#.*/, ''); // remove comment	48✔
174	line = line.trim();	48✔
175	line = line.replaceAll(/\bU\+[0-9A-F]{4}\b/gi, '\|'); // Remove Unicode Definitions	48✔
176	line = line.replaceAll(/\\U[0-9A-F]{4}/gi, '\|'); // Remove Unicode Definitions	48✔
177	line = line.replaceAll(regNonWordOrDigit, '\|');	48✔
178	line = line.replaceAll(/'(?=\\|)/g, ''); // remove trailing '	48✔
179	line = line.replace(/'$/, ''); // remove trailing '	48✔
180	line = line.replaceAll(/(?<=\\|)'/g, ''); // remove leading '	48✔
181	line = line.replace(/^'/, ''); // remove leading '	48✔
182	line = line.replaceAll(/\s\\|\s/g, '\|'); // remove spaces around \|	48✔
183	line = line.replaceAll(/[\|]+/g, '\|'); // reduce repeated \|	48✔
184	line = line.replace(/^\\|/, ''); // remove leading \|	48✔
185	line = line.replace(/\\|$/, ''); // remove trailing \|	48✔
186	const lines = line	48✔
187	.split('\|')
188	.map((a) => a.trim())	71✔
189	.filter((a) => !!a)	71✔
190	.filter((a) => !/^[0-9_-]+$/.test(a)) // pure numbers and symbols	64✔
191	.filter((a) => !/^0[xo][0-9A-F]+$/i.test(a)); // c-style hex/octal digits	64✔
192
193	return lines;	48✔
194	}
195
196	function* splitWords(lines: Iterable<string>): Iterable<string> {
197	for (const line of lines) {	65✔
198	if (legacy) {	12,479✔
199	yield* legacyLineToWords(line, keepCase, allowedSplitWords);	26✔
200	continue;	26✔
201	}
202	if (split) {	12,453✔
203	const words = splitLine(line);	48✔
204	yield* !allowedSplitWords.size	48!
205	? words
NEW 206	: words.flatMap((word) => splitCamelCaseIfAllowed(word, allowedSplitWords, keepCase));	×
207	if (!splitKeepBoth) continue;	48!
208	}
209	yield line.replaceAll(/["]/g, '');	12,405✔
210	}
211	}
212
213	function* unique(lines: Iterable<string>): Iterable<string> {
214	const known = new Set<string>();	65✔
215	for (const line of lines) {	65✔
216	if (known.has(line)) continue;	12,424✔
217	known.add(line);	12,399✔
218	yield line;	12,399✔
219	}
220	}
221
222	function* splitLines(paragraphs: Iterable<string>): Iterable<string> {
223	for (const paragraph of paragraphs) {	65✔
224	yield* paragraph.split('\n');	12,471✔
225	}
226	}
227
228	const processLines = opPipe(	65✔
229	opFilter(isString),
230	splitLines,
231	opMap(removeComments),
232	splitWords,
233	opMap(trim),
234	opFilter(filterEmptyLines),
235	unique,
236	);
237
238	return processLines;	65✔
239	}
240
241	/**
242	* Normalizes a dictionary words based upon prefix / suffixes.
243	* Case insensitive versions are also generated.
244	* @param lines - one word per line
245	* @param _options - defines prefixes used when parsing lines.
246	* @returns words that have been normalized.
247	*/
248	export function parseFileLines(lines: Iterable<string> \| string, options: Partial<ParseFileOptions>): Iterable<string> {
249	return createParseFileLineMapper(options)(typeof lines === 'string' ? [lines] : lines);	65✔
250	}

streetsidesoftware / cspell / 8745810937

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous