• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

streetsidesoftware / cspell / 20635026836

01 Jan 2026 07:57AM UTC coverage: 93.043% (+0.1%) from 92.94%
20635026836

Pull #8265

github

web-flow
Merge 01699e25d into 90ae3e28f
Pull Request #8265: fix: Make endian required when encoding a StringTable

8749 of 11464 branches covered (76.32%)

24 of 24 new or added lines in 3 files covered. (100.0%)

10 existing lines in 2 files now uncovered.

17199 of 18485 relevant lines covered (93.04%)

116648.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.39
/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts
1
import type { BinaryFormat } from '../binary/index.ts';
2
import { BinaryDataBuilder, BinaryDataReader, BinaryFormatBuilder } from '../binary/index.ts';
3
import { GTrie } from '../GTrie/index.ts';
4
import { assert } from '../utils/assert.ts';
5

6
type U32Array = Uint32Array<ArrayBuffer>;
7
type U16Array = Uint16Array<ArrayBuffer>;
8
type U8Array = Uint8Array<ArrayBuffer>;
9

10
type IndexArray = U32Array | U16Array | number[];
11

12
type IndexArrayRO = U32Array | U16Array | Readonly<number[]>;
13

14
/**
15
 * This is a set of strings stored in a compact form.
16
 *
17
 * Strings are stored as UTF-8 encoded bytes in a single contiguous buffer.
18
 * Each string is referenced by its starting index and length within the buffer.
19
 *
20
 * This design minimizes memory overhead by avoiding individual string objects,
21
 * allowing efficient storage and retrieval of a large number of strings.
22
 *
23
 * Strings are retrieved based on their index.
24
 *
25
 * The internal index table contains the offset and length of each string in the buffer.
26
 *
27
 */
28
export class StringTable {
29
    #index: IndexArray;
30
    #data: U8Array;
31
    #strLenBits: number;
32
    #strLenMask: number;
33
    #decoder = new TextDecoder();
170✔
34

35
    /**
36
     *
37
     * @param index - the lookup index format: `offset|len` where the low bits are the length
38
     * @param utf8ByteData - the UTF-8 encoded byte data for all the strings
39
     * @param strLenBits - number of bits used to store the length of the string in the index entry
40
     */
41
    constructor(index: IndexArray, utf8ByteData: U8Array, strLenBits: number) {
42
        this.#index = index;
170✔
43
        this.#data = utf8ByteData;
170✔
44
        this.#strLenBits = strLenBits;
170✔
45
        this.#strLenMask = (1 << strLenBits) - 1;
170✔
46
    }
47

48
    get index(): Readonly<IndexArray> {
49
        return this.#index;
3✔
50
    }
51

52
    get charData(): Readonly<U8Array> {
53
        return this.#data;
5✔
54
    }
55

56
    get strLenBits(): number {
57
        return this.#strLenBits;
2✔
58
    }
59

60
    get length(): number {
61
        return this.#index.length;
5✔
62
    }
63

64
    getStringBytes(idx: number): U8Array | undefined {
65
        if (idx < 0 || idx >= this.#index.length) return undefined;
294,484!
66
        return this.#getBytesByIndexValue(this.#index[idx]);
294,484✔
67
    }
68

69
    getString(idx: number): string | undefined {
70
        const bytes = this.getStringBytes(idx);
87✔
71
        if (!bytes) return undefined;
87!
72
        return this.#decoder.decode(bytes);
87✔
73
    }
74

75
    #getBytesByIndexValue(value: number): U8Array {
76
        const offset = value >>> this.#strLenBits;
294,484✔
77
        const length = value & this.#strLenMask;
294,484✔
78
        return this.#data.subarray(offset, offset + length);
294,484✔
79
    }
80

81
    values(): U8Array[] {
82
        return [...this.#index].map((v) => this.#getBytesByIndexValue(v));
8✔
83
    }
84

85
    toString(): string {
86
        return [...this.#index].map((_, i) => this.getString(i) || '').join(', ');
29!
87
    }
88

89
    toJSON(): { index: number[]; data: number[]; strLenBits: number } {
90
        return {
1✔
91
            index: [...this.#index],
92
            data: [...this.#data],
93
            strLenBits: this.#strLenBits,
94
        };
95
    }
96
}
97

98
export class StringTableBuilder {
99
    #data: (number[] | Uint8Array)[] = [];
166✔
100
    #encoder = new TextEncoder();
166✔
101
    #lookupTrie = new GTrie<number, number>();
166✔
102
    #locked = false;
166✔
103
    #maxStrLen = 0;
166✔
104

105
    addStringBytes(bytes: Uint8Array | number[]): number {
106
        assert(!this.#locked, 'StringTableBuilder is locked and cannot be modified.');
74,432✔
107
        const found = this.#lookupTrie.get(bytes);
74,432✔
108
        if (found !== undefined) {
74,432✔
109
            return found;
45,242✔
110
        }
111
        const idx = this.#data.push(bytes) - 1;
29,190✔
112
        this.#lookupTrie.insert(bytes, idx);
29,190✔
113
        this.#maxStrLen = Math.max(this.#maxStrLen, bytes.length);
29,190✔
114
        return idx;
29,190✔
115
    }
116

117
    addString(str: string): number {
118
        const bytes = this.#encoder.encode(str);
124✔
119
        return this.addStringBytes(bytes);
124✔
120
    }
121

122
    getEntry(idx: number): number[] | Uint8Array | undefined {
123
        return this.#data[idx];
74,308✔
124
    }
125

126
    get length(): number {
127
        return this.#data.length;
8✔
128
    }
129

130
    build(): StringTable {
131
        this.#locked = true;
165✔
132

133
        if (!this.#data.length) {
165✔
134
            return new StringTable([], new Uint8Array(0), 8);
153✔
135
        }
136

137
        // sorted by size descending
138
        const sortedBySize = this.#data.map((b, i) => ({ b, i })).sort((a, b) => b.b.length - a.b.length);
188,195✔
139
        const byteValues: number[] = [];
12✔
140

141
        const strLenBits = Math.ceil(Math.log2(this.#maxStrLen + 1));
12✔
142
        const strLenMask = (1 << strLenBits) - 1;
12✔
143
        const index: number[] = new Array(this.#data.length);
12✔
144

145
        for (const { b, i } of sortedBySize) {
12✔
146
            let offset = findValues(b);
29,190✔
147
            if (offset < 0) {
29,190✔
148
                offset = appendValues(b);
8,757✔
149
            }
150
            const length = b.length;
29,190✔
151
            assert(length <= strLenMask, `String length ${length} exceeds maximum of ${strLenMask}`);
29,190✔
152
            index[i] = (offset << strLenBits) | length;
29,190✔
153
        }
154

155
        return new StringTable(index, new Uint8Array(byteValues), strLenBits);
12✔
156

157
        function findValues(buf: number[] | Uint8Array): number {
158
            const bufLen = buf.length;
29,190✔
159
            const byteLen = byteValues.length;
29,190✔
160
            const maxOffset = byteLen - bufLen;
29,190✔
161

162
            for (let i = 0; i <= maxOffset; i++) {
29,190✔
163
                let match = true;
240,568,151✔
164
                for (let j = 0; j < bufLen; j++) {
240,568,151✔
165
                    if (byteValues[i + j] !== buf[j]) {
256,332,767✔
166
                        match = false;
240,547,718✔
167
                        break;
240,547,718✔
168
                    }
169
                }
170
                if (match) {
240,568,151!
171
                    return i;
20,433✔
172
                }
173
            }
174

175
            return -1;
8,757✔
176
        }
177

178
        function appendValues(buf: number[] | Uint8Array): number {
179
            const offset = byteValues.length;
8,757✔
180
            byteValues.push(...buf);
8,757✔
181
            return offset;
8,757✔
182
        }
183
    }
184

185
    static fromStringTable(table: StringTable): StringTableBuilder {
186
        const builder = new StringTableBuilder();
8✔
187
        const values = table.values();
8✔
188
        const len = values.length;
8✔
189
        for (let i = 0; i < len; ++i) {
8✔
190
            builder.addStringBytes(values[i]);
×
191
        }
192

193
        return builder;
8✔
194
    }
195
}
196

197
function getStringTableBinaryFormat(): BinaryFormat {
198
    return new BinaryFormatBuilder()
4✔
199
        .addUint8('indexBits', 'The number of bits needed for each index entry', 32)
200
        .addUint8('strLenBits', 'The number of bits needed to store the max length of a string in the table.', 8)
201
        .addUint16('endian', 'Endianness of the data', 0x3130) // '01' little endian
202
        .addString('reserved', 'Reserved for future use', 4)
203
        .addUint32ArrayPtr('index32', 'String index array of 32 bit entries')
204
        .addUint16ArrayPtr('index16', 'String index array of 16 bit entries', 'index32')
205
        .addUint8ArrayPtr('index', 'String index array of 8 bit entries', 'index32')
206
        .addUint8ArrayPtr('data', 'String byte data')
207
        .build();
208
}
209

210
/**
211
 * Encodes a StringTable into binary data so that it can be stored or transmitted.
212
 * @param table - the string table to encode
213
 * @param endian - the resulting endianness of the data.
214
 * @returns The encoded string table binary data.
215
 */
216
export function encodeStringTableToBinary(table: StringTable, endian: 'LE' | 'BE'): U8Array {
217
    const strLenBits = table.strLenBits;
2✔
218
    const offsetBits = Math.ceil(Math.log2(table.charData.length + 1));
2✔
219
    const minIndexBits = strLenBits + offsetBits;
2✔
220
    const indexBits = minIndexBits <= 16 ? 16 : 32;
2!
221
    assert(minIndexBits <= indexBits, `Index bits ${indexBits} is too small for required bits ${minIndexBits}`);
2✔
222

223
    const format = getStringTableBinaryFormat();
2✔
224

225
    const builder = new BinaryDataBuilder(format, endian);
2✔
226
    builder.setUint8('indexBits', indexBits);
2✔
227
    builder.setUint8('strLenBits', strLenBits);
2✔
228
    builder.setUint16('endian', 0x3130); // '01' little endian
2✔
229
    if (indexBits === 16) {
2!
230
        builder.setPtrUint16Array('index16', toU16Array(table.index));
2✔
231
    } else {
UNCOV
232
        builder.setPtrUint32Array('index32', toU32Array(table.index));
×
233
    }
234
    builder.setPtrUint8Array('data', table.charData);
2✔
235

236
    return builder.build();
2✔
237
}
238

239
/**
240
 * Decodes binary data into a StringTable.
241
 * @param data - the byte data of the string table.
242
 * @param endian - the endianness of the encoded data.
243
 * @returns
244
 */
245
export function decodeStringTableFromBinary(data: U8Array, endian: 'LE' | 'BE'): StringTable {
246
    if (!data?.length) {
5!
247
        return new StringTable([], new Uint8Array(0), 8);
3✔
248
    }
249
    const reader = new BinaryDataReader(data, getStringTableBinaryFormat(), endian);
2✔
250
    const indexBits = reader.getUint8('indexBits');
2✔
251
    const strLenBits = reader.getUint8('strLenBits');
2✔
252
    const endianStored = reader.getUint16('endian');
2✔
253
    const endianExpected = endian === 'LE' ? 0x3130 : 0x3031;
2!
254
    assert(
2✔
255
        !endianStored || endianStored === endianExpected,
4!
256
        `Endian mismatch: expected ${endian}, found ${endianStored}`,
257
    );
258
    const index = indexBits === 16 ? reader.getPtrUint16Array('index16') : reader.getPtrUint32Array('index32');
2!
259
    const buffer = reader.getPtrUint8Array('data');
2✔
260
    return new StringTable(index, buffer, strLenBits);
2✔
261
}
262

263
function toU16Array(data: IndexArrayRO): U16Array {
264
    if (data instanceof Uint16Array) {
2!
UNCOV
265
        return data;
×
266
    }
267
    return new Uint16Array(data);
2✔
268
}
269

270
function toU32Array(data: IndexArrayRO): U32Array {
271
    if (data instanceof Uint32Array) {
×
UNCOV
272
        return data;
×
273
    }
UNCOV
274
    return new Uint32Array(data);
×
275
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc