• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

fb55 / htmlparser2 / 11292752892

11 Oct 2024 12:56PM CUT coverage: 99.264%. First build
11292752892

Pull #1932

github

web-flow
Merge 8a0342b61 into 37726f15e
Pull Request #1932: build(deps): Bump coverallsapp/github-action from 2.3.0 to 2.3.1

339 of 344 branches covered (98.55%)

1214 of 1223 relevant lines covered (99.26%)

1044.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.71
/src/Tokenizer.ts
1
import {
1✔
2
    EntityDecoder,
3
    DecodingMode,
4
    htmlDecodeTree,
5
    xmlDecodeTree,
6
} from "entities/dist/decode.js";
1✔
7

8
const enum CharCodes {
6✔
9
    Tab = 0x9, // "\t"
1✔
10
    NewLine = 0xa, // "\n"
1✔
11
    FormFeed = 0xc, // "\f"
1✔
12
    CarriageReturn = 0xd, // "\r"
1✔
13
    Space = 0x20, // " "
1✔
14
    ExclamationMark = 0x21, // "!"
1✔
15
    Number = 0x23, // "#"
1✔
16
    Amp = 0x26, // "&"
1✔
17
    SingleQuote = 0x27, // "'"
1✔
18
    DoubleQuote = 0x22, // '"'
1✔
19
    Dash = 0x2d, // "-"
1✔
20
    Slash = 0x2f, // "/"
1✔
21
    Zero = 0x30, // "0"
1✔
22
    Nine = 0x39, // "9"
1✔
23
    Semi = 0x3b, // ";"
1✔
24
    Lt = 0x3c, // "<"
1✔
25
    Eq = 0x3d, // "="
1✔
26
    Gt = 0x3e, // ">"
1✔
27
    Questionmark = 0x3f, // "?"
1✔
28
    UpperA = 0x41, // "A"
1✔
29
    LowerA = 0x61, // "a"
1✔
30
    UpperF = 0x46, // "F"
1✔
31
    LowerF = 0x66, // "f"
1✔
32
    UpperZ = 0x5a, // "Z"
1✔
33
    LowerZ = 0x7a, // "z"
1✔
34
    LowerX = 0x78, // "x"
1✔
35
    OpeningSquareBracket = 0x5b, // "["
1✔
36
}
37

38
/** All the states the tokenizer can be in. */
39
const enum State {
6✔
40
    Text = 1,
1✔
41
    BeforeTagName, // After <
1✔
42
    InTagName,
1✔
43
    InSelfClosingTag,
1✔
44
    BeforeClosingTagName,
1✔
45
    InClosingTagName,
1✔
46
    AfterClosingTagName,
1✔
47

48
    // Attributes
49
    BeforeAttributeName,
1✔
50
    InAttributeName,
1✔
51
    AfterAttributeName,
1✔
52
    BeforeAttributeValue,
1✔
53
    InAttributeValueDq, // "
1✔
54
    InAttributeValueSq, // '
1✔
55
    InAttributeValueNq,
1✔
56

57
    // Declarations
58
    BeforeDeclaration, // !
1✔
59
    InDeclaration,
1✔
60

61
    // Processing instructions
62
    InProcessingInstruction, // ?
1✔
63

64
    // Comments & CDATA
65
    BeforeComment,
1✔
66
    CDATASequence,
1✔
67
    InSpecialComment,
1✔
68
    InCommentLike,
1✔
69

70
    // Special tags
71
    BeforeSpecialS, // Decide if we deal with `<script` or `<style`
1✔
72
    BeforeSpecialT, // Decide if we deal with `<title` or `<textarea`
1✔
73
    SpecialStartSequence,
1✔
74
    InSpecialTag,
1✔
75

76
    InEntity,
1✔
77
}
78

79
function isWhitespace(c: number): boolean {
6,581✔
80
    return (
6,581✔
81
        c === CharCodes.Space ||
6,581✔
82
        c === CharCodes.NewLine ||
6,190✔
83
        c === CharCodes.Tab ||
6,184✔
84
        c === CharCodes.FormFeed ||
6,184✔
85
        c === CharCodes.CarriageReturn
6,184✔
86
    );
87
}
6,581✔
88

89
function isEndOfTagSection(c: number): boolean {
4,440✔
90
    return c === CharCodes.Slash || c === CharCodes.Gt || isWhitespace(c);
4,440✔
91
}
4,440✔
92

93
function isASCIIAlpha(c: number): boolean {
463✔
94
    return (
463✔
95
        (c >= CharCodes.LowerA && c <= CharCodes.LowerZ) ||
463✔
96
        (c >= CharCodes.UpperA && c <= CharCodes.UpperZ)
130✔
97
    );
98
}
463✔
99

100
export enum QuoteType {
1✔
101
    NoValue = 0,
1✔
102
    Unquoted = 1,
1✔
103
    Single = 2,
1✔
104
    Double = 3,
1✔
105
}
106

107
export interface Callbacks {
108
    onattribdata(start: number, endIndex: number): void;
109
    onattribentity(codepoint: number): void;
110
    onattribend(quote: QuoteType, endIndex: number): void;
111
    onattribname(start: number, endIndex: number): void;
112
    oncdata(start: number, endIndex: number, endOffset: number): void;
113
    onclosetag(start: number, endIndex: number): void;
114
    oncomment(start: number, endIndex: number, endOffset: number): void;
115
    ondeclaration(start: number, endIndex: number): void;
116
    onend(): void;
117
    onopentagend(endIndex: number): void;
118
    onopentagname(start: number, endIndex: number): void;
119
    onprocessinginstruction(start: number, endIndex: number): void;
120
    onselfclosingtag(endIndex: number): void;
121
    ontext(start: number, endIndex: number): void;
122
    ontextentity(codepoint: number, endIndex: number): void;
123
}
124

125
/**
126
 * Sequences used to match longer strings.
127
 *
128
 * We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End
129
 * sequences with an increased offset.
130
 */
131
const Sequences = {
1✔
132
    Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
1✔
133
    CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]>
1✔
134
    CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->`
1✔
135
    ScriptEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]), // `</script`
1✔
136
    StyleEnd: new Uint8Array([0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]), // `</style`
1✔
137
    TitleEnd: new Uint8Array([0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]), // `</title`
1✔
138
    TextareaEnd: new Uint8Array([
1✔
139
        0x3c, 0x2f, 0x74, 0x65, 0x78, 0x74, 0x61, 0x72, 0x65, 0x61,
1✔
140
    ]), // `</textarea`
1✔
141
};
1✔
142

143
export default class Tokenizer {
1✔
144
    /** The current state the tokenizer is in. */
145
    private state = State.Text;
1✔
146
    /** The read buffer. */
147
    private buffer = "";
1✔
148
    /** The beginning of the section that is currently being read. */
149
    private sectionStart = 0;
1✔
150
    /** The index within the buffer that we are currently looking at. */
151
    private index = 0;
1✔
152
    /** The start of the last entity. */
153
    private entityStart = 0;
1✔
154
    /** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */
155
    private baseState = State.Text;
1✔
156
    /** For special parsing behavior inside of script and style tags. */
157
    private isSpecial = false;
1✔
158
    /** Indicates whether the tokenizer has been paused. */
159
    public running = true;
1✔
160
    /** The offset of the current buffer. */
161
    private offset = 0;
1✔
162

163
    private readonly xmlMode: boolean;
164
    private readonly decodeEntities: boolean;
165
    private readonly entityDecoder: EntityDecoder;
166

167
    constructor(
1✔
168
        {
105✔
169
            xmlMode = false,
105✔
170
            decodeEntities = true,
105✔
171
        }: { xmlMode?: boolean; decodeEntities?: boolean },
105✔
172
        private readonly cbs: Callbacks,
105✔
173
    ) {
105✔
174
        this.xmlMode = xmlMode;
105✔
175
        this.decodeEntities = decodeEntities;
105✔
176
        this.entityDecoder = new EntityDecoder(
105✔
177
            xmlMode ? xmlDecodeTree : htmlDecodeTree,
105✔
178
            (cp, consumed) => this.emitCodePoint(cp, consumed),
105✔
179
        );
105✔
180
    }
105✔
181

182
    public reset(): void {
1✔
183
        this.state = State.Text;
56✔
184
        this.buffer = "";
56✔
185
        this.sectionStart = 0;
56✔
186
        this.index = 0;
56✔
187
        this.baseState = State.Text;
56✔
188
        this.currentSequence = undefined!;
56✔
189
        this.running = true;
56✔
190
        this.offset = 0;
56✔
191
    }
56✔
192

193
    public write(chunk: string): void {
1✔
194
        this.offset += this.buffer.length;
2,364✔
195
        this.buffer = chunk;
2,364✔
196
        this.parse();
2,364✔
197
    }
2,364✔
198

199
    public end(): void {
1✔
200
        if (this.running) this.finish();
158✔
201
    }
158✔
202

203
    public pause(): void {
1✔
204
        this.running = false;
4✔
205
    }
4✔
206

207
    public resume(): void {
1✔
208
        this.running = true;
5✔
209
        if (this.index < this.buffer.length + this.offset) {
5!
210
            this.parse();
×
211
        }
×
212
    }
5✔
213

214
    private stateText(c: number): void {
1✔
215
        if (
10,792✔
216
            c === CharCodes.Lt ||
10,792✔
217
            (!this.decodeEntities && this.fastForwardTo(CharCodes.Lt))
9,773!
218
        ) {
10,792✔
219
            if (this.index > this.sectionStart) {
1,019✔
220
                this.cbs.ontext(this.sectionStart, this.index);
623✔
221
            }
623✔
222
            this.state = State.BeforeTagName;
1,019✔
223
            this.sectionStart = this.index;
1,019✔
224
        } else if (this.decodeEntities && c === CharCodes.Amp) {
10,792✔
225
            this.startEntity();
122✔
226
        }
122✔
227
    }
10,792✔
228

229
    private currentSequence: Uint8Array = undefined!;
1✔
230
    private sequenceIndex = 0;
1✔
231
    private stateSpecialStartSequence(c: number): void {
1✔
232
        const isEnd = this.sequenceIndex === this.currentSequence.length;
228✔
233
        const isMatch = isEnd
228✔
234
            ? // If we are at the end of the sequence, make sure the tag name has ended
235
              isEndOfTagSection(c)
50✔
236
            : // Otherwise, do a case-insensitive comparison
237
              (c | 0x20) === this.currentSequence[this.sequenceIndex];
178✔
238

239
        if (!isMatch) {
228✔
240
            this.isSpecial = false;
6✔
241
        } else if (!isEnd) {
228✔
242
            this.sequenceIndex++;
176✔
243
            return;
176✔
244
        }
176✔
245

246
        this.sequenceIndex = 0;
52✔
247
        this.state = State.InTagName;
52✔
248
        this.stateInTagName(c);
52✔
249
    }
228✔
250

251
    /** Look for an end tag. For <title> tags, also decode entities. */
252
    private stateInSpecialTag(c: number): void {
1✔
253
        if (this.sequenceIndex === this.currentSequence.length) {
703✔
254
            if (c === CharCodes.Gt || isWhitespace(c)) {
41✔
255
                const endOfText = this.index - this.currentSequence.length;
37✔
256

257
                if (this.sectionStart < endOfText) {
37✔
258
                    // Spoof the index so that reported locations match up.
259
                    const actualIndex = this.index;
25✔
260
                    this.index = endOfText;
25✔
261
                    this.cbs.ontext(this.sectionStart, endOfText);
25✔
262
                    this.index = actualIndex;
25✔
263
                }
25✔
264

265
                this.isSpecial = false;
37✔
266
                this.sectionStart = endOfText + 2; // Skip over the `</`
37✔
267
                this.stateInClosingTagName(c);
37✔
268
                return; // We are done; skip the rest of the function.
37✔
269
            }
37✔
270

271
            this.sequenceIndex = 0;
4✔
272
        }
4✔
273

274
        if ((c | 0x20) === this.currentSequence[this.sequenceIndex]) {
703✔
275
            this.sequenceIndex += 1;
339✔
276
        } else if (this.sequenceIndex === 0) {
703✔
277
            if (this.currentSequence === Sequences.TitleEnd) {
291✔
278
                // We have to parse entities in <title> tags.
279
                if (this.decodeEntities && c === CharCodes.Amp) {
182✔
280
                    this.startEntity();
4✔
281
                }
4✔
282
            } else if (this.fastForwardTo(CharCodes.Lt)) {
291✔
283
                // Outside of <title> tags, we can fast-forward.
284
                this.sequenceIndex = 1;
22✔
285
            }
22✔
286
        } else {
327✔
287
            // If we see a `<`, set the sequence index to 1; useful for eg. `<</script>`.
288
            this.sequenceIndex = Number(c === CharCodes.Lt);
36✔
289
        }
36✔
290
    }
703✔
291

292
    private stateCDATASequence(c: number): void {
1✔
293
        if (c === Sequences.Cdata[this.sequenceIndex]) {
204✔
294
            if (++this.sequenceIndex === Sequences.Cdata.length) {
200✔
295
                this.state = State.InCommentLike;
31✔
296
                this.currentSequence = Sequences.CdataEnd;
31✔
297
                this.sequenceIndex = 0;
31✔
298
                this.sectionStart = this.index + 1;
31✔
299
            }
31✔
300
        } else {
204✔
301
            this.sequenceIndex = 0;
4✔
302
            this.state = State.InDeclaration;
4✔
303
            this.stateInDeclaration(c); // Reconsume the character
4✔
304
        }
4✔
305
    }
204✔
306

307
    /**
308
     * When we wait for one specific character, we can speed things up
309
     * by skipping through the buffer until we find it.
310
     *
311
     * @returns Whether the character was found.
312
     */
313
    private fastForwardTo(c: number): boolean {
1✔
314
        while (++this.index < this.buffer.length + this.offset) {
292✔
315
            if (this.buffer.charCodeAt(this.index - this.offset) === c) {
1,752✔
316
                return true;
87✔
317
            }
87✔
318
        }
1,752✔
319

320
        /*
321
         * We increment the index at the end of the `parse` loop,
322
         * so set it to `buffer.length - 1` here.
323
         *
324
         * TODO: Refactor `parse` to increment index before calling states.
325
         */
326
        this.index = this.buffer.length + this.offset - 1;
205✔
327

328
        return false;
205✔
329
    }
292✔
330

331
    /**
332
     * Comments and CDATA end with `-->` and `]]>`.
333
     *
334
     * Their common qualities are:
335
     * - Their end sequences have a distinct character they start with.
336
     * - That character is then repeated, so we have to check multiple repeats.
337
     * - All characters but the start character of the sequence can be skipped.
338
     */
339
    private stateInCommentLike(c: number): void {
1✔
340
        if (c === this.currentSequence[this.sequenceIndex]) {
316✔
341
            if (++this.sequenceIndex === this.currentSequence.length) {
130✔
342
                if (this.currentSequence === Sequences.CdataEnd) {
52✔
343
                    this.cbs.oncdata(this.sectionStart, this.index, 2);
29✔
344
                } else {
41✔
345
                    this.cbs.oncomment(this.sectionStart, this.index, 2);
23✔
346
                }
23✔
347

348
                this.sequenceIndex = 0;
52✔
349
                this.sectionStart = this.index + 1;
52✔
350
                this.state = State.Text;
52✔
351
            }
52✔
352
        } else if (this.sequenceIndex === 0) {
316✔
353
            // Fast-forward to the first character of the sequence
354
            if (this.fastForwardTo(this.currentSequence[0])) {
142✔
355
                this.sequenceIndex = 1;
42✔
356
            }
42✔
357
        } else if (c !== this.currentSequence[this.sequenceIndex - 1]) {
186✔
358
            // Allow long sequences, eg. --->, ]]]>
359
            this.sequenceIndex = 0;
37✔
360
        }
37✔
361
    }
316✔
362

363
    /**
364
     * HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name.
365
     *
366
     * XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar).
367
     * We allow anything that wouldn't end the tag.
368
     */
369
    private isTagStartChar(c: number) {
1✔
370
        return this.xmlMode ? !isEndOfTagSection(c) : isASCIIAlpha(c);
1,310✔
371
    }
1,310✔
372

373
    private startSpecial(sequence: Uint8Array, offset: number) {
1✔
374
        this.isSpecial = true;
52✔
375
        this.currentSequence = sequence;
52✔
376
        this.sequenceIndex = offset;
52✔
377
        this.state = State.SpecialStartSequence;
52✔
378
    }
52✔
379

380
    private stateBeforeTagName(c: number): void {
1✔
381
        if (c === CharCodes.ExclamationMark) {
1,019✔
382
            this.state = State.BeforeDeclaration;
71✔
383
            this.sectionStart = this.index + 1;
71✔
384
        } else if (c === CharCodes.Questionmark) {
1,019✔
385
            this.state = State.InProcessingInstruction;
12✔
386
            this.sectionStart = this.index + 1;
12✔
387
        } else if (this.isTagStartChar(c)) {
948✔
388
            const lower = c | 0x20;
549✔
389
            this.sectionStart = this.index;
549✔
390
            if (this.xmlMode) {
549✔
391
                this.state = State.InTagName;
317✔
392
            } else if (lower === Sequences.ScriptEnd[2]) {
549✔
393
                this.state = State.BeforeSpecialS;
36✔
394
            } else if (lower === Sequences.TitleEnd[2]) {
232✔
395
                this.state = State.BeforeSpecialT;
42✔
396
            } else {
196✔
397
                this.state = State.InTagName;
154✔
398
            }
154✔
399
        } else if (c === CharCodes.Slash) {
936✔
400
            this.state = State.BeforeClosingTagName;
376✔
401
        } else {
387✔
402
            this.state = State.Text;
11✔
403
            this.stateText(c);
11✔
404
        }
11✔
405
    }
1,019✔
406
    private stateInTagName(c: number): void {
1✔
407
        if (isEndOfTagSection(c)) {
2,709✔
408
            this.cbs.onopentagname(this.sectionStart, this.index);
547✔
409
            this.sectionStart = -1;
547✔
410
            this.state = State.BeforeAttributeName;
547✔
411
            this.stateBeforeAttributeName(c);
547✔
412
        }
547✔
413
    }
2,709✔
414
    private stateBeforeClosingTagName(c: number): void {
1✔
415
        if (isWhitespace(c)) {
378✔
416
            // Ignore
417
        } else if (c === CharCodes.Gt) {
378✔
418
            this.state = State.Text;
2✔
419
        } else {
376✔
420
            this.state = this.isTagStartChar(c)
374✔
421
                ? State.InClosingTagName
372✔
422
                : State.InSpecialComment;
2✔
423
            this.sectionStart = this.index;
374✔
424
        }
374✔
425
    }
378✔
426
    private stateInClosingTagName(c: number): void {
1✔
427
        if (c === CharCodes.Gt || isWhitespace(c)) {
2,272✔
428
            this.cbs.onclosetag(this.sectionStart, this.index);
409✔
429
            this.sectionStart = -1;
409✔
430
            this.state = State.AfterClosingTagName;
409✔
431
            this.stateAfterClosingTagName(c);
409✔
432
        }
409✔
433
    }
2,272✔
434
    private stateAfterClosingTagName(c: number): void {
1✔
435
        // Skip everything until ">"
436
        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
413✔
437
            this.state = State.Text;
409✔
438
            this.sectionStart = this.index + 1;
409✔
439
        }
409✔
440
    }
413✔
441
    private stateBeforeAttributeName(c: number): void {
1✔
442
        if (c === CharCodes.Gt) {
958✔
443
            this.cbs.onopentagend(this.index);
485✔
444
            if (this.isSpecial) {
485✔
445
                this.state = State.InSpecialTag;
41✔
446
                this.sequenceIndex = 0;
41✔
447
            } else {
485✔
448
                this.state = State.Text;
444✔
449
            }
444✔
450
            this.sectionStart = this.index + 1;
485✔
451
        } else if (c === CharCodes.Slash) {
683✔
452
            this.state = State.InSelfClosingTag;
64✔
453
        } else if (!isWhitespace(c)) {
473✔
454
            this.state = State.InAttributeName;
175✔
455
            this.sectionStart = this.index;
175✔
456
        }
175✔
457
    }
958✔
458
    private stateInSelfClosingTag(c: number): void {
1✔
459
        if (c === CharCodes.Gt) {
72✔
460
            this.cbs.onselfclosingtag(this.index);
62✔
461
            this.state = State.Text;
62✔
462
            this.sectionStart = this.index + 1;
62✔
463
            this.isSpecial = false; // Reset special state, in case of self-closing special tags
62✔
464
        } else if (!isWhitespace(c)) {
72✔
465
            this.state = State.BeforeAttributeName;
2✔
466
            this.stateBeforeAttributeName(c);
2✔
467
        }
2✔
468
    }
72✔
469
    private stateInAttributeName(c: number): void {
1✔
470
        if (c === CharCodes.Eq || isEndOfTagSection(c)) {
996✔
471
            this.cbs.onattribname(this.sectionStart, this.index);
177✔
472
            this.sectionStart = this.index;
177✔
473
            this.state = State.AfterAttributeName;
177✔
474
            this.stateAfterAttributeName(c);
177✔
475
        }
177✔
476
    }
996✔
477
    private stateAfterAttributeName(c: number): void {
1✔
478
        if (c === CharCodes.Eq) {
182✔
479
            this.state = State.BeforeAttributeValue;
164✔
480
        } else if (c === CharCodes.Slash || c === CharCodes.Gt) {
182✔
481
            this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
11✔
482
            this.sectionStart = -1;
11✔
483
            this.state = State.BeforeAttributeName;
11✔
484
            this.stateBeforeAttributeName(c);
11✔
485
        } else if (!isWhitespace(c)) {
11✔
486
            this.cbs.onattribend(QuoteType.NoValue, this.sectionStart);
2✔
487
            this.state = State.InAttributeName;
2✔
488
            this.sectionStart = this.index;
2✔
489
        }
2✔
490
    }
182✔
491
    private stateBeforeAttributeValue(c: number): void {
1✔
492
        if (c === CharCodes.DoubleQuote) {
166✔
493
            this.state = State.InAttributeValueDq;
123✔
494
            this.sectionStart = this.index + 1;
123✔
495
        } else if (c === CharCodes.SingleQuote) {
164✔
496
            this.state = State.InAttributeValueSq;
13✔
497
            this.sectionStart = this.index + 1;
13✔
498
        } else if (!isWhitespace(c)) {
42✔
499
            this.sectionStart = this.index;
28✔
500
            this.state = State.InAttributeValueNq;
28✔
501
            this.stateInAttributeValueNoQuotes(c); // Reconsume token
28✔
502
        }
28✔
503
    }
166✔
504
    private handleInAttributeValue(c: number, quote: number) {
1✔
505
        if (
2,842✔
506
            c === quote ||
2,842✔
507
            (!this.decodeEntities && this.fastForwardTo(quote))
2,706!
508
        ) {
2,842✔
509
            this.cbs.onattribdata(this.sectionStart, this.index);
136✔
510
            this.sectionStart = -1;
136✔
511
            this.cbs.onattribend(
136✔
512
                quote === CharCodes.DoubleQuote
136✔
513
                    ? QuoteType.Double
123✔
514
                    : QuoteType.Single,
13✔
515
                this.index + 1,
136✔
516
            );
136✔
517
            this.state = State.BeforeAttributeName;
136✔
518
        } else if (this.decodeEntities && c === CharCodes.Amp) {
2,842✔
519
            this.startEntity();
30✔
520
        }
30✔
521
    }
2,842✔
522
    private stateInAttributeValueDoubleQuotes(c: number): void {
1✔
523
        this.handleInAttributeValue(c, CharCodes.DoubleQuote);
2,670✔
524
    }
2,670✔
525
    private stateInAttributeValueSingleQuotes(c: number): void {
1✔
526
        this.handleInAttributeValue(c, CharCodes.SingleQuote);
172✔
527
    }
172✔
528
    private stateInAttributeValueNoQuotes(c: number): void {
1✔
529
        if (isWhitespace(c) || c === CharCodes.Gt) {
174✔
530
            this.cbs.onattribdata(this.sectionStart, this.index);
28✔
531
            this.sectionStart = -1;
28✔
532
            this.cbs.onattribend(QuoteType.Unquoted, this.index);
28✔
533
            this.state = State.BeforeAttributeName;
28✔
534
            this.stateBeforeAttributeName(c);
28✔
535
        } else if (this.decodeEntities && c === CharCodes.Amp) {
174✔
536
            this.startEntity();
2✔
537
        }
2✔
538
    }
174✔
539
    private stateBeforeDeclaration(c: number): void {
1✔
540
        if (c === CharCodes.OpeningSquareBracket) {
71✔
541
            this.state = State.CDATASequence;
35✔
542
            this.sequenceIndex = 0;
35✔
543
        } else {
71✔
544
            this.state =
36✔
545
                c === CharCodes.Dash
36✔
546
                    ? State.BeforeComment
27✔
547
                    : State.InDeclaration;
9✔
548
        }
36✔
549
    }
71✔
550
    private stateInDeclaration(c: number): void {
1✔
551
        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
28✔
552
            this.cbs.ondeclaration(this.sectionStart, this.index);
15✔
553
            this.state = State.Text;
15✔
554
            this.sectionStart = this.index + 1;
15✔
555
        }
15✔
556
    }
28✔
557
    private stateInProcessingInstruction(c: number): void {
1✔
558
        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
12✔
559
            this.cbs.onprocessinginstruction(this.sectionStart, this.index);
12✔
560
            this.state = State.Text;
12✔
561
            this.sectionStart = this.index + 1;
12✔
562
        }
12✔
563
    }
12✔
564
    private stateBeforeComment(c: number): void {
1✔
565
        if (c === CharCodes.Dash) {
27✔
566
            this.state = State.InCommentLike;
25✔
567
            this.currentSequence = Sequences.CommentEnd;
25✔
568
            // Allow short comments (eg. <!-->)
569
            this.sequenceIndex = 2;
25✔
570
            this.sectionStart = this.index + 1;
25✔
571
        } else {
27✔
572
            this.state = State.InDeclaration;
2✔
573
        }
2✔
574
    }
27✔
575
    private stateInSpecialComment(c: number): void {
1✔
576
        if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
3✔
577
            this.cbs.oncomment(this.sectionStart, this.index, 0);
2✔
578
            this.state = State.Text;
2✔
579
            this.sectionStart = this.index + 1;
2✔
580
        }
2✔
581
    }
3✔
582
    private stateBeforeSpecialS(c: number): void {
1✔
583
        const lower = c | 0x20;
36✔
584
        if (lower === Sequences.ScriptEnd[3]) {
36✔
585
            this.startSpecial(Sequences.ScriptEnd, 4);
19✔
586
        } else if (lower === Sequences.StyleEnd[3]) {
36✔
587
            this.startSpecial(Sequences.StyleEnd, 4);
9✔
588
        } else {
17✔
589
            this.state = State.InTagName;
8✔
590
            this.stateInTagName(c); // Consume the token again
8✔
591
        }
8✔
592
    }
36✔
593

594
    private stateBeforeSpecialT(c: number): void {
1✔
595
        const lower = c | 0x20;
42✔
596
        if (lower === Sequences.TitleEnd[3]) {
42✔
597
            this.startSpecial(Sequences.TitleEnd, 4);
21✔
598
        } else if (lower === Sequences.TextareaEnd[3]) {
21✔
599
            this.startSpecial(Sequences.TextareaEnd, 4);
3✔
600
        } else {
21✔
601
            this.state = State.InTagName;
18✔
602
            this.stateInTagName(c); // Consume the token again
18✔
603
        }
18✔
604
    }
42✔
605

606
    private startEntity() {
1✔
607
        this.baseState = this.state;
158✔
608
        this.state = State.InEntity;
158✔
609
        this.entityStart = this.index;
158✔
610
        this.entityDecoder.startEntity(
158✔
611
            this.xmlMode
158✔
612
                ? DecodingMode.Strict
54✔
613
                : this.baseState === State.Text ||
104✔
614
                    this.baseState === State.InSpecialTag
36✔
615
                  ? DecodingMode.Legacy
72✔
616
                  : DecodingMode.Attribute,
32✔
617
        );
158✔
618
    }
158✔
619

620
    private stateInEntity(): void {
1✔
621
        const length = this.entityDecoder.write(
373✔
622
            this.buffer,
373✔
623
            this.index - this.offset,
373✔
624
        );
373✔
625

626
        // If `length` is positive, we are done with the entity.
627
        if (length >= 0) {
373✔
628
            this.state = this.baseState;
140✔
629

630
            if (length === 0) {
140✔
631
                this.index = this.entityStart;
44✔
632
            }
44✔
633
        } else {
373✔
634
            // Mark buffer as consumed.
635
            this.index = this.offset + this.buffer.length - 1;
233✔
636
        }
233✔
637
    }
373✔
638

639
    /**
640
     * Remove data that has already been consumed from the buffer.
641
     */
642
    private cleanup() {
1✔
643
        // If we are inside of text or attributes, emit what we already have.
644
        if (this.running && this.sectionStart !== this.index) {
2,364✔
645
            if (
2,081✔
646
                this.state === State.Text ||
2,081✔
647
                (this.state === State.InSpecialTag && this.sequenceIndex === 0)
1,763✔
648
            ) {
2,081✔
649
                this.cbs.ontext(this.sectionStart, this.index);
468✔
650
                this.sectionStart = this.index;
468✔
651
            } else if (
468✔
652
                this.state === State.InAttributeValueDq ||
1,613✔
653
                this.state === State.InAttributeValueSq ||
1,563✔
654
                this.state === State.InAttributeValueNq
1,500✔
655
            ) {
1,613✔
656
                this.cbs.onattribdata(this.sectionStart, this.index);
176✔
657
                this.sectionStart = this.index;
176✔
658
            }
176✔
659
        }
2,081✔
660
    }
2,364✔
661

662
    private shouldContinue() {
1✔
663
        return this.index < this.buffer.length + this.offset && this.running;
26,048✔
664
    }
26,048✔
665

666
    /**
667
     * Iterates through the buffer, calling the function corresponding to the current state.
668
     *
669
     * States that are more likely to be hit are higher up, as a performance improvement.
670
     */
671
    private parse() {
1✔
672
        while (this.shouldContinue()) {
2,364✔
673
            const c = this.buffer.charCodeAt(this.index - this.offset);
23,684✔
674
            switch (this.state) {
23,684✔
675
                case State.Text: {
23,684✔
676
                    this.stateText(c);
10,781✔
677
                    break;
10,781✔
678
                }
10,781✔
679
                case State.SpecialStartSequence: {
23,684✔
680
                    this.stateSpecialStartSequence(c);
228✔
681
                    break;
228✔
682
                }
228✔
683
                case State.InSpecialTag: {
23,684✔
684
                    this.stateInSpecialTag(c);
703✔
685
                    break;
703✔
686
                }
703✔
687
                case State.CDATASequence: {
23,684✔
688
                    this.stateCDATASequence(c);
204✔
689
                    break;
204✔
690
                }
204✔
691
                case State.InAttributeValueDq: {
23,684✔
692
                    this.stateInAttributeValueDoubleQuotes(c);
2,670✔
693
                    break;
2,670✔
694
                }
2,670✔
695
                case State.InAttributeName: {
23,684✔
696
                    this.stateInAttributeName(c);
996✔
697
                    break;
996✔
698
                }
996✔
699
                case State.InCommentLike: {
23,684✔
700
                    this.stateInCommentLike(c);
316✔
701
                    break;
316✔
702
                }
316✔
703
                case State.InSpecialComment: {
23,684✔
704
                    this.stateInSpecialComment(c);
3✔
705
                    break;
3✔
706
                }
3✔
707
                case State.BeforeAttributeName: {
23,684✔
708
                    this.stateBeforeAttributeName(c);
370✔
709
                    break;
370✔
710
                }
370✔
711
                case State.InTagName: {
23,684✔
712
                    this.stateInTagName(c);
2,631✔
713
                    break;
2,631✔
714
                }
2,631✔
715
                case State.InClosingTagName: {
23,684✔
716
                    this.stateInClosingTagName(c);
2,235✔
717
                    break;
2,235✔
718
                }
2,235✔
719
                case State.BeforeTagName: {
23,684✔
720
                    this.stateBeforeTagName(c);
1,019✔
721
                    break;
1,019✔
722
                }
1,019✔
723
                case State.AfterAttributeName: {
23,684✔
724
                    this.stateAfterAttributeName(c);
5✔
725
                    break;
5✔
726
                }
5✔
727
                case State.InAttributeValueSq: {
23,684✔
728
                    this.stateInAttributeValueSingleQuotes(c);
172✔
729
                    break;
172✔
730
                }
172✔
731
                case State.BeforeAttributeValue: {
23,684✔
732
                    this.stateBeforeAttributeValue(c);
166✔
733
                    break;
166✔
734
                }
166✔
735
                case State.BeforeClosingTagName: {
23,684✔
736
                    this.stateBeforeClosingTagName(c);
378✔
737
                    break;
378✔
738
                }
378✔
739
                case State.AfterClosingTagName: {
23,684✔
740
                    this.stateAfterClosingTagName(c);
4✔
741
                    break;
4✔
742
                }
4✔
743
                case State.BeforeSpecialS: {
23,684✔
744
                    this.stateBeforeSpecialS(c);
36✔
745
                    break;
36✔
746
                }
36✔
747
                case State.BeforeSpecialT: {
23,684✔
748
                    this.stateBeforeSpecialT(c);
42✔
749
                    break;
42✔
750
                }
42✔
751
                case State.InAttributeValueNq: {
23,684✔
752
                    this.stateInAttributeValueNoQuotes(c);
146✔
753
                    break;
146✔
754
                }
146✔
755
                case State.InSelfClosingTag: {
23,684✔
756
                    this.stateInSelfClosingTag(c);
72✔
757
                    break;
72✔
758
                }
72✔
759
                case State.InDeclaration: {
23,684✔
760
                    this.stateInDeclaration(c);
24✔
761
                    break;
24✔
762
                }
24✔
763
                case State.BeforeDeclaration: {
23,684✔
764
                    this.stateBeforeDeclaration(c);
71✔
765
                    break;
71✔
766
                }
71✔
767
                case State.BeforeComment: {
23,684✔
768
                    this.stateBeforeComment(c);
27✔
769
                    break;
27✔
770
                }
27✔
771
                case State.InProcessingInstruction: {
23,684✔
772
                    this.stateInProcessingInstruction(c);
12✔
773
                    break;
12✔
774
                }
12✔
775
                case State.InEntity: {
23,684✔
776
                    this.stateInEntity();
373✔
777
                    break;
373✔
778
                }
373✔
779
            }
23,684✔
780
            this.index++;
23,684✔
781
        }
23,684✔
782
        this.cleanup();
2,364✔
783
    }
2,364✔
784

785
    private finish() {
1✔
786
        if (this.state === State.InEntity) {
157✔
787
            this.entityDecoder.end();
18✔
788
            this.state = this.baseState;
18✔
789
        }
18✔
790

791
        this.handleTrailingData();
157✔
792

793
        this.cbs.onend();
157✔
794
    }
157✔
795

796
    /** Handle any trailing data. */
797
    private handleTrailingData() {
1✔
798
        const endIndex = this.buffer.length + this.offset;
157✔
799

800
        // If there is no remaining data, we are done.
801
        if (this.sectionStart >= endIndex) {
157✔
802
            return;
139✔
803
        }
139✔
804

805
        if (this.state === State.InCommentLike) {
132✔
806
            if (this.currentSequence === Sequences.CdataEnd) {
4✔
807
                this.cbs.oncdata(this.sectionStart, endIndex, 0);
2✔
808
            } else {
2✔
809
                this.cbs.oncomment(this.sectionStart, endIndex, 0);
2✔
810
            }
2✔
811
        } else if (
4✔
812
            this.state === State.InTagName ||
14✔
813
            this.state === State.BeforeAttributeName ||
12✔
814
            this.state === State.BeforeAttributeValue ||
12✔
815
            this.state === State.AfterAttributeName ||
12✔
816
            this.state === State.InAttributeName ||
12✔
817
            this.state === State.InAttributeValueSq ||
12✔
818
            this.state === State.InAttributeValueDq ||
12✔
819
            this.state === State.InAttributeValueNq ||
12✔
820
            this.state === State.InClosingTagName
12✔
821
        ) {
14✔
822
            /*
823
             * If we are currently in an opening or closing tag, us not calling the
824
             * respective callback signals that the tag should be ignored.
825
             */
826
        } else {
14✔
827
            this.cbs.ontext(this.sectionStart, endIndex);
12✔
828
        }
12✔
829
    }
157✔
830

831
    private emitCodePoint(cp: number, consumed: number): void {
1✔
832
        if (
108✔
833
            this.baseState !== State.Text &&
108✔
834
            this.baseState !== State.InSpecialTag
17✔
835
        ) {
108✔
836
            if (this.sectionStart < this.entityStart) {
13✔
837
                this.cbs.onattribdata(this.sectionStart, this.entityStart);
5✔
838
            }
5✔
839
            this.sectionStart = this.entityStart + consumed;
13✔
840
            this.index = this.sectionStart - 1;
13✔
841

842
            this.cbs.onattribentity(cp);
13✔
843
        } else {
108✔
844
            if (this.sectionStart < this.entityStart) {
95✔
845
                this.cbs.ontext(this.sectionStart, this.entityStart);
45✔
846
            }
45✔
847
            this.sectionStart = this.entityStart + consumed;
95✔
848
            this.index = this.sectionStart - 1;
95✔
849

850
            this.cbs.ontextentity(cp, this.sectionStart);
95✔
851
        }
95✔
852
    }
108✔
853
}
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc