• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

streetsidesoftware / cspell / 20623518016

31 Dec 2025 05:03PM UTC coverage: 92.831%. First build
20623518016

Pull #8259

github

web-flow
Merge dab0de18e into 40ec03fe0
Pull Request #8259: fix: Support string prefixes when walking nodes

8722 of 11441 branches covered (76.23%)

160 of 182 new or added lines in 12 files covered. (87.91%)

17144 of 18468 relevant lines covered (92.83%)

116412.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.17
/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts
1
import type { BinaryFormat } from '../binary/index.ts';
2
import { BinaryDataBuilder, BinaryDataReader, BinaryFormatBuilder } from '../binary/index.ts';
3
import { GTrie } from '../GTrie/index.ts';
4
import { assert } from '../utils/assert.ts';
5

6
type U32Array = Uint32Array<ArrayBuffer>;
7
type U16Array = Uint16Array<ArrayBuffer>;
8
type U8Array = Uint8Array<ArrayBuffer>;
9

10
type IndexArray = U32Array | U16Array | number[];
11

12
type IndexArrayRO = U32Array | U16Array | Readonly<number[]>;
13

14
/**
15
 * This is a set of strings stored in a compact form.
16
 *
17
 * Strings are stored as UTF-8 encoded bytes in a single contiguous buffer.
18
 * Each string is referenced by its starting index and length within the buffer.
19
 *
20
 * This design minimizes memory overhead by avoiding individual string objects,
21
 * allowing efficient storage and retrieval of a large number of strings.
22
 *
23
 * Strings are retrieved based on their index.
24
 *
25
 * The internal index table contains the offset and length of each string in the buffer.
26
 *
27
 */
28
export class StringTable {
29
    #index: IndexArray;
30
    #data: U8Array;
31
    #strLenBits: number;
32
    #strLenMask: number;
33
    #decoder = new TextDecoder();
170✔
34

35
    /**
36
     *
37
     * @param index - the lookup index format: `offset|len` where the low bits are the length
38
     * @param utf8ByteData - the UTF-8 encoded byte data for all the strings
39
     * @param strLenBits - number of bits used to store the length of the string in the index entry
40
     */
41
    constructor(index: IndexArray, utf8ByteData: U8Array, strLenBits: number) {
42
        this.#index = index;
170✔
43
        this.#data = utf8ByteData;
170✔
44
        this.#strLenBits = strLenBits;
170✔
45
        this.#strLenMask = (1 << strLenBits) - 1;
170✔
46
    }
47

48
    get index(): Readonly<IndexArray> {
49
        return this.#index;
3✔
50
    }
51

52
    get charData(): Readonly<U8Array> {
53
        return this.#data;
5✔
54
    }
55

56
    get strLenBits(): number {
57
        return this.#strLenBits;
2✔
58
    }
59

60
    get length(): number {
61
        return this.#index.length;
5✔
62
    }
63

64
    getStringBytes(idx: number): U8Array | undefined {
65
        if (idx < 0 || idx >= this.#index.length) return undefined;
294,484!
66
        return this.#getBytesByIndexValue(this.#index[idx]);
294,484✔
67
    }
68

69
    getString(idx: number): string | undefined {
70
        const bytes = this.getStringBytes(idx);
87✔
71
        if (!bytes) return undefined;
87!
72
        return this.#decoder.decode(bytes);
87✔
73
    }
74

75
    #getBytesByIndexValue(value: number): U8Array {
76
        const offset = value >>> this.#strLenBits;
294,484✔
77
        const length = value & this.#strLenMask;
294,484✔
78
        return this.#data.subarray(offset, offset + length);
294,484✔
79
    }
80

81
    values(): U8Array[] {
82
        return [...this.#index].map((v) => this.#getBytesByIndexValue(v));
8✔
83
    }
84

85
    toString(): string {
86
        return [...this.#index].map((_, i) => this.getString(i) || '').join(', ');
29!
87
    }
88

89
    toJSON(): { index: number[]; data: number[]; strLenBits: number } {
90
        return {
1✔
91
            index: [...this.#index],
92
            data: [...this.#data],
93
            strLenBits: this.#strLenBits,
94
        };
95
    }
96
}
97

98
export class StringTableBuilder {
99
    #data: (number[] | Uint8Array)[] = [];
166✔
100
    #encoder = new TextEncoder();
166✔
101
    #lookupTrie = new GTrie<number, number>();
166✔
102
    #locked = false;
166✔
103
    #maxStrLen = 0;
166✔
104

105
    addStringBytes(bytes: Uint8Array | number[]): number {
106
        assert(!this.#locked, 'StringTableBuilder is locked and cannot be modified.');
74,432✔
107
        const found = this.#lookupTrie.get(bytes);
74,432✔
108
        if (found !== undefined) {
74,432✔
109
            return found;
45,242✔
110
        }
111
        const idx = this.#data.push(bytes) - 1;
29,190✔
112
        this.#lookupTrie.insert(bytes, idx);
29,190✔
113
        this.#maxStrLen = Math.max(this.#maxStrLen, bytes.length);
29,190✔
114
        return idx;
29,190✔
115
    }
116

117
    addString(str: string): number {
118
        const bytes = this.#encoder.encode(str);
124✔
119
        return this.addStringBytes(bytes);
124✔
120
    }
121

122
    getEntry(idx: number): number[] | Uint8Array | undefined {
123
        return this.#data[idx];
74,308✔
124
    }
125

126
    get length(): number {
127
        return this.#data.length;
8✔
128
    }
129

130
    build(): StringTable {
131
        this.#locked = true;
165✔
132

133
        if (!this.#data.length) {
165✔
134
            return new StringTable([], new Uint8Array(0), 8);
153✔
135
        }
136

137
        // sorted by size descending
138
        const sortedBySize = this.#data.map((b, i) => ({ b, i })).sort((a, b) => b.b.length - a.b.length);
188,195✔
139
        const byteValues: number[] = [];
12✔
140

141
        const strLenBits = Math.ceil(Math.log2(this.#maxStrLen + 1));
12✔
142
        const strLenMask = (1 << strLenBits) - 1;
12✔
143
        const index: number[] = new Array(this.#data.length);
12✔
144

145
        for (const { b, i } of sortedBySize) {
12✔
146
            let offset = findValues(b);
29,190✔
147
            if (offset < 0) {
29,190✔
148
                offset = appendValues(b);
8,757✔
149
            }
150
            const length = b.length;
29,190✔
151
            assert(length <= strLenMask, `String length ${length} exceeds maximum of ${strLenMask}`);
29,190✔
152
            index[i] = (offset << strLenBits) | length;
29,190✔
153
        }
154

155
        return new StringTable(index, new Uint8Array(byteValues), strLenBits);
12✔
156

157
        function findValues(buf: number[] | Uint8Array): number {
158
            const bufLen = buf.length;
29,190✔
159
            const byteLen = byteValues.length;
29,190✔
160
            const maxOffset = byteLen - bufLen;
29,190✔
161

162
            for (let i = 0; i <= maxOffset; i++) {
29,190✔
163
                let match = true;
240,568,151✔
164
                for (let j = 0; j < bufLen; j++) {
240,568,151✔
165
                    if (byteValues[i + j] !== buf[j]) {
256,332,767✔
166
                        match = false;
240,547,718✔
167
                        break;
240,547,718✔
168
                    }
169
                }
170
                if (match) {
240,568,151!
171
                    return i;
20,433✔
172
                }
173
            }
174

175
            return -1;
8,757✔
176
        }
177

178
        function appendValues(buf: number[] | Uint8Array): number {
179
            const offset = byteValues.length;
8,757✔
180
            byteValues.push(...buf);
8,757✔
181
            return offset;
8,757✔
182
        }
183
    }
184

185
    static fromStringTable(table: StringTable): StringTableBuilder {
186
        const builder = new StringTableBuilder();
8✔
187
        const values = table.values();
8✔
188
        const len = values.length;
8✔
189
        for (let i = 0; i < len; ++i) {
8✔
NEW
190
            builder.addStringBytes(values[i]);
×
191
        }
192

193
        return builder;
8✔
194
    }
195
}
196

197
function getStringTableBinaryFormat(): BinaryFormat {
198
    return new BinaryFormatBuilder()
4✔
199
        .addUint8('indexBits', 'The number of bits needed for each index entry', 32)
200
        .addUint8('strLenBits', 'The number of bits needed to store the max length of a string in the table.', 8)
201
        .addString('reserved', 'Reserved for future use', 6)
202
        .addUint32ArrayPtr('index32', 'String index array of 32 bit entries')
203
        .addUint16ArrayPtr('index16', 'String index array of 16 bit entries', 'index32')
204
        .addUint8ArrayPtr('index', 'String index array of 8 bit entries', 'index32')
205
        .addUint8ArrayPtr('data', 'String byte data')
206
        .build();
207
}
208

209
export function encodeStringTableToBinary(table: StringTable, endian?: 'LE' | 'BE'): U8Array {
210
    const strLenBits = table.strLenBits;
2✔
211
    const offsetBits = Math.ceil(Math.log2(table.charData.length + 1));
2✔
212
    const minIndexBits = strLenBits + offsetBits;
2✔
213
    const indexBits = minIndexBits <= 16 ? 16 : 32;
2!
214
    assert(minIndexBits <= indexBits, `Index bits ${indexBits} is too small for required bits ${minIndexBits}`);
2✔
215

216
    const format = getStringTableBinaryFormat();
2✔
217

218
    const builder = new BinaryDataBuilder(format, endian);
2✔
219
    builder.setUint8('indexBits', indexBits);
2✔
220
    builder.setUint8('strLenBits', strLenBits);
2✔
221
    if (indexBits === 16) {
2!
222
        builder.setPtrUint16Array('index16', toU16Array(table.index));
2✔
223
    } else {
224
        builder.setPtrUint32Array('index32', toU32Array(table.index));
×
225
    }
226
    builder.setPtrUint8Array('data', table.charData);
2✔
227

228
    return builder.build();
2✔
229
}
230

231
export function decodeStringTableFromBinary(data: U8Array, endian?: 'LE' | 'BE'): StringTable {
232
    if (!data?.length) {
5!
233
        return new StringTable([], new Uint8Array(0), 8);
3✔
234
    }
235
    const reader = new BinaryDataReader(data, getStringTableBinaryFormat(), endian);
2✔
236
    const indexBits = reader.getUint8('indexBits');
2✔
237
    const strLenBits = reader.getUint8('strLenBits');
2✔
238
    const index = indexBits === 16 ? reader.getPtrUint16Array('index16') : reader.getPtrUint32Array('index32');
2!
239
    const buffer = reader.getPtrUint8Array('data');
2✔
240
    return new StringTable(index, buffer, strLenBits);
2✔
241
}
242

243
function toU16Array(data: IndexArrayRO): U16Array {
244
    if (data instanceof Uint16Array) {
2!
245
        return data;
×
246
    }
247
    return new Uint16Array(data);
2✔
248
}
249

250
function toU32Array(data: IndexArrayRO): U32Array {
251
    if (data instanceof Uint32Array) {
×
252
        return data;
×
253
    }
254
    return new Uint32Array(data);
×
255
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc