• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

streetsidesoftware / cspell / 20568885960

29 Dec 2025 08:57AM UTC coverage: 92.813%. First build
20568885960

Pull #8243

github

web-flow
Merge 6838753d6 into 0cd2b7dfc
Pull Request #8243: fix: Add StringTable and refactor BinaryFormat

8648 of 11263 branches covered (76.78%)

305 of 344 new or added lines in 2 files covered. (88.66%)

16957 of 18270 relevant lines covered (92.81%)

29980.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.26
/packages/cspell-trie-lib/src/lib/StringTable/StringTable.ts
1
import type { BinaryFormat } from '../binary/index.ts';
2
import { BinaryDataBuilder, BinaryDataReader, BinaryFormatBuilder } from '../binary/index.ts';
3
import { GTrie } from '../GTrie/index.ts';
4
import { assert } from '../utils/assert.ts';
5

6
type U32Array = Uint32Array<ArrayBuffer>;
7
type U16Array = Uint16Array<ArrayBuffer>;
8
type U8Array = Uint8Array<ArrayBuffer>;
9

10
type IndexArray = U32Array | U16Array | number[];
11

12
type IndexArrayRO = U32Array | U16Array | Readonly<number[]>;
13

14
/**
15
 * This is a set of strings stored in a compact form.
16
 *
17
 * Strings are stored as UTF-8 encoded bytes in a single contiguous buffer.
18
 * Each string is referenced by its starting index and length within the buffer.
19
 *
20
 * This design minimizes memory overhead by avoiding individual string objects,
21
 * allowing efficient storage and retrieval of a large number of strings.
22
 *
23
 * Strings are retrieved based up their index.
24
 *
25
 * The internal index table contains the offset and length of each string in the buffer.
26
 *
27
 */
28
export class StringTable {
29
    #index: IndexArray;
30
    #data: U8Array;
31
    #strLenBits: number;
32
    #strLenMask: number;
33
    #decoder = new TextDecoder();
4✔
34

35
    /**
36
     *
37
     * @param index - the lookup index format: `offset|len` where the low bits are the length
38
     * @param utf8ByteData - the UTF-8 encoded byte data for all the strings
39
     * @param strLenBits - number of bits used to store the length of the string in the index entry
40
     */
41
    constructor(index: IndexArray, utf8ByteData: U8Array, strLenBits: number) {
42
        this.#index = index;
4✔
43
        this.#data = utf8ByteData;
4✔
44
        this.#strLenBits = strLenBits;
4✔
45
        this.#strLenMask = (1 << strLenBits) - 1;
4✔
46
    }
47

48
    get index(): Readonly<IndexArray> {
49
        return this.#index;
2✔
50
    }
51

52
    get charData(): Readonly<U8Array> {
53
        return this.#data;
3✔
54
    }
55

56
    get strLenBits(): number {
57
        return this.#strLenBits;
1✔
58
    }
59

60
    getStringBytes(idx: number): Uint8Array | undefined {
61
        if (idx < 0 || idx >= this.#index.length) return undefined;
58!
62
        const value = this.#index[idx];
58✔
63
        const offset = value >>> this.#strLenBits;
58✔
64
        const length = value & this.#strLenMask;
58✔
65
        return this.#data.subarray(offset, offset + length);
58✔
66
    }
67

68
    getString(idx: number): string | undefined {
69
        const bytes = this.getStringBytes(idx);
58✔
70
        if (!bytes) return undefined;
58!
71
        return this.#decoder.decode(bytes);
58✔
72
    }
73
}
74

75
export class StringTableBuilder {
76
    #buffers: (number[] | Uint8Array)[] = [];
4✔
77
    #encoder = new TextEncoder();
4✔
78
    #lookupTrie = new GTrie<number, number>();
4✔
79
    #locked = false;
4✔
80
    #maxStrLen = 0;
4✔
81

82
    addStringBytes(bytes: Uint8Array | number[]): number {
83
        assert(!this.#locked, 'StringTableBuilder is locked and cannot be modified.');
58✔
84
        const found = this.#lookupTrie.get(bytes);
58✔
85
        if (found !== undefined) {
58!
NEW
86
            return found;
×
87
        }
88
        const idx = this.#buffers.push(bytes) - 1;
58✔
89
        this.#lookupTrie.insert(bytes, idx);
58✔
90
        this.#maxStrLen = Math.max(this.#maxStrLen, bytes.length);
58✔
91
        return idx;
58✔
92
    }
93

94
    addString(str: string): number {
95
        const bytes = this.#encoder.encode(str);
58✔
96
        return this.addStringBytes(bytes);
58✔
97
    }
98

99
    build(): StringTable {
100
        this.#locked = true;
3✔
101

102
        if (!this.#buffers.length) {
3✔
103
            return new StringTable([], new Uint8Array(0), 8);
1✔
104
        }
105

106
        // sorted by size descending
107
        const sortedBySize = this.#buffers.map((b, i) => ({ b, i })).sort((a, b) => b.b.length - a.b.length);
208✔
108
        const byteValues: number[] = [];
2✔
109

110
        const strLenBits = Math.ceil(Math.log2(this.#maxStrLen + 1));
2✔
111
        const strLenMask = (1 << strLenBits) - 1;
2✔
112
        const index: number[] = new Array(this.#buffers.length);
2✔
113

114
        for (const { b, i } of sortedBySize) {
2✔
115
            let offset = findValues(b);
58✔
116
            if (offset < 0) {
58✔
117
                offset = appendValues(b);
20✔
118
            }
119
            const length = b.length;
58✔
120
            assert(length <= strLenMask, `String length ${length} exceeds maximum of ${strLenMask}`);
58✔
121
            index[i] = (offset << strLenBits) | length;
58✔
122
        }
123

124
        return new StringTable(index, new Uint8Array(byteValues), strLenBits);
2✔
125

126
        function findValues(buf: number[] | Uint8Array): number {
127
            const bufLen = buf.length;
58✔
128
            const byteLen = byteValues.length;
58✔
129
            const maxOffset = byteLen - bufLen;
58✔
130

131
            for (let i = 0; i <= maxOffset; i++) {
58✔
132
                let match = true;
1,792✔
133
                for (let j = 0; j < bufLen; j++) {
1,792✔
134
                    if (byteValues[i + j] !== buf[j]) {
1,990✔
135
                        match = false;
1,754✔
136
                        break;
1,754✔
137
                    }
138
                }
139
                if (match) {
1,792✔
140
                    return i;
38✔
141
                }
142
            }
143

144
            return -1;
20✔
145
        }
146

147
        function appendValues(buf: number[] | Uint8Array): number {
148
            const offset = byteValues.length;
20✔
149
            byteValues.push(...buf);
20✔
150
            return offset;
20✔
151
        }
152
    }
153
}
154

155
function getStringTableBinaryFormat(): BinaryFormat {
156
    return new BinaryFormatBuilder()
2✔
157
        .addUint8('indexBits', 'The number of bits needed for each index entry', 32)
158
        .addUint8('strLenBits', 'The number of bits needed to store the max length of a string in the table.', 8)
159
        .addString('reserved', 'Reserved for future use', 6)
160
        .addUint32ArrayPtr('index32', 'String index array of 32 bit entries')
161
        .addUint16ArrayPtr('index16', 'String index array of 16 bit entries', 'index32')
162
        .addUint8ArrayPtr('index', 'String index array of 8 bit entries', 'index32')
163
        .addUint8ArrayPtr('data', 'String byte data')
164
        .build();
165
}
166

167
export function encodeStringTableToBinary(table: StringTable, endian: 'LE' | 'BE'): U8Array {
168
    const strLenBits = table.strLenBits;
1✔
169
    const offsetBits = Math.ceil(Math.log2(table.charData.length + 1));
1✔
170
    const minIndexBits = strLenBits + offsetBits;
1✔
171
    const indexBits = minIndexBits <= 16 ? 16 : 32;
1!
172
    assert(minIndexBits <= indexBits, `Index bits ${indexBits} is too small for required bits ${minIndexBits}`);
1✔
173

174
    const format = getStringTableBinaryFormat();
1✔
175

176
    const builder = new BinaryDataBuilder(format, endian);
1✔
177
    builder.setUint8('indexBits', indexBits);
1✔
178
    builder.setUint8('strLenBits', strLenBits);
1✔
179
    if (indexBits === 16) {
1!
180
        builder.setPtrUint16Array('index16', toU16Array(table.index));
1✔
181
    } else {
NEW
182
        builder.setPtrUint32Array('index32', toU32Array(table.index));
×
183
    }
184
    builder.setPtrUint8Array('data', table.charData);
1✔
185

186
    return builder.build();
1✔
187
}
188

189
export function decodeStringTableFromBinary(data: U8Array, endian: 'LE' | 'BE'): StringTable {
190
    const reader = new BinaryDataReader(data, getStringTableBinaryFormat(), endian);
1✔
191
    const indexBits = reader.getUint8('indexBits');
1✔
192
    const strLenBits = reader.getUint8('strLenBits');
1✔
193
    const index = indexBits === 16 ? reader.getPtrUint16Array('index16') : reader.getPtrUint32Array('index32');
1!
194
    const buffer = reader.getPtrUint8Array('data');
1✔
195
    return new StringTable(index, buffer, strLenBits);
1✔
196
}
197

198
function toU16Array(data: IndexArrayRO): U16Array {
199
    if (data instanceof Uint16Array) {
1!
NEW
200
        return data;
×
201
    }
202
    return new Uint16Array(data);
1✔
203
}
204

205
function toU32Array(data: IndexArrayRO): U32Array {
NEW
206
    if (data instanceof Uint32Array) {
×
NEW
207
        return data;
×
208
    }
NEW
209
    return new Uint32Array(data);
×
210
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc