• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jstedfast / HtmlKit / 1.2.0.324

01 Jan 2026 02:23AM UTC coverage: 99.107% (+0.001%) from 99.106%
1.2.0.324

push

coveralls.net

jstedfast
Make use of SearchValues<char> for net8.0+

Makes little to no difference, but oh well.

2887 of 2913 relevant lines covered (99.11%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.78
/HtmlKit/HtmlTokenizer.cs
1
//
2
// HtmlTokenizer.cs
3
//
4
// Author: Jeffrey Stedfast <jestedfa@microsoft.com>
5
//
6
// Copyright (c) 2015-2025 Jeffrey Stedfast <jestedfa@microsoft.com>
7
//
8
// Permission is hereby granted, free of charge, to any person obtaining a copy
9
// of this software and associated documentation files (the "Software"), to deal
10
// in the Software without restriction, including without limitation the rights
11
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
// copies of the Software, and to permit persons to whom the Software is
13
// furnished to do so, subject to the following conditions:
14
//
15
// The above copyright notice and this permission notice shall be included in
16
// all copies or substantial portions of the Software.
17
//
18
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
// THE SOFTWARE.
25
//
26

27
using System;
28
using System.IO;
29
using System.Text;
30
using System.Buffers;
31
using System.Runtime.CompilerServices;
32
using System.Diagnostics.CodeAnalysis;
33

34
namespace HtmlKit {
35
        /// <summary>
36
        /// An HTML tokenizer.
37
        /// </summary>
38
        /// <remarks>
39
        /// Tokenizes HTML text, emitting an <see cref="HtmlToken"/> for each token it encounters.
40
        /// </remarks>
41
        public class HtmlTokenizer
42
        {
43
                // Specification: https://dev.w3.org/html5/spec-LC/tokenization.html
44
                const string DocType = "doctype";
45
                const string CData = "[CDATA[";
46

47
                const int MinimumBufferSize = 1024;
48

49
                readonly HtmlEntityDecoder entity = new HtmlEntityDecoder ();
1✔
50
                readonly CharBuffer data = new CharBuffer (2048);
1✔
51
                readonly CharBuffer name = new CharBuffer (32);
1✔
52

53
                readonly TextReader? textReader;
54
                readonly Stream? stream;
55
                Encoding? encoding;
56
                Decoder? decoder;
57

58
                readonly byte[] input;
59
                int inputEnd;
60

61
                readonly char[] buffer;
62
                int bufferIndex, bufferEnd;
63

64
                readonly char[] cdata = new char[3];
1✔
65
                int cdataIndex;
66

67
                string activeTagName = string.Empty;
1✔
68
                HtmlDocTypeToken? doctype;
69
                HtmlAttribute? attribute;
70
                HtmlTagToken? tag;
71
                char quote;
72

73
                bool decodeCharacterReferences = true;
1✔
74
                int linePosition = 1;
1✔
75
                int lineNumber = 1;
1✔
76

77
                bool detectByteOrderMark;
78
                bool isEndTag;
79
                bool bang;
80
                bool eof;
81

82
                /// <summary>
83
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
84
                /// </summary>
85
                /// <remarks>
86
                /// <para>Creates a new <see cref="HtmlTokenizer"/>.</para>
87
                /// <para>This constructor will attempt to auto-detect the appropriate encoding to use by examining the first four bytes of the stream
88
                /// and, if a unicode byte-order-mark is detected, use the appropriate unicode encoding. If no byte order mark is detected, then it will
89
                /// default to UTF-8.</para>
90
                /// </remarks>
91
                /// <param name="stream">The input stream.</param>
92
                public HtmlTokenizer (Stream stream) : this (stream, Encoding.UTF8)
1✔
93
                {
1✔
94
                }
1✔
95

96
                /// <summary>
97
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
98
                /// </summary>
99
                /// <remarks>
100
                /// <para>Creates a new <see cref="HtmlTokenizer"/>.</para>
101
                /// <para>This constructor will attempt to auto-detect the appropriate encoding to use by examining the first four bytes of the stream
102
                /// and, if a unicode byte-order-mark is detected, use the appropriate unicode encoding. If no byte order mark is detected, then it will
103
                /// default to the user-supplied encoding.</para>
104
                /// </remarks>
105
                /// <param name="stream">The input stream.</param>
106
                /// <param name="encoding">The charset encoding of the stream.</param>
107
                public HtmlTokenizer (Stream stream, Encoding encoding) : this (stream, encoding, true)
1✔
108
                {
1✔
109
                }
1✔
110

111
                /// <summary>
112
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
113
                /// </summary>
114
                /// <remarks>
115
                /// <para>Creates a new <see cref="HtmlTokenizer"/>.</para>
116
                /// <para>This constructor allows you to change the encoding the first time you read from the <see cref="HtmlTokenizer"/>. The
117
                /// <paramref name="detectEncodingFromByteOrderMarks"/> parameter detects the encoding by looking at the first four bytes of the stream.
118
                /// It will automatically recognize UTF-8, little-endian UTF-16, big-endian UTF-16, little-endian UTF-32, and big-endian UTF-32 text if
119
                /// the stream starts with the appropriate byte order marks. Otherwise, the user-provided encoding is used.</para>
120
                /// </remarks>
121
                /// <param name="stream">The input stream.</param>
122
                /// <param name="encoding">The charset encoding of the stream.</param>
123
                /// <param name="detectEncodingFromByteOrderMarks"><see langword="true" /> if byte order marks should be detected and used to override the <paramref name="encoding"/>; otherwise, <see langword="false" />.</param>
124
                /// <param name="bufferSize">The minimum buffer size to use for reading.</param>
125
                public HtmlTokenizer (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize = 4096)
1✔
126
                {
1✔
127
                        if (stream == null)
1✔
128
                                throw new ArgumentNullException (nameof (stream));
1✔
129

130
                        if (encoding == null)
1✔
131
                                throw new ArgumentNullException (nameof (encoding));
1✔
132

133
                        input = new byte[Math.Max (MinimumBufferSize, bufferSize)];
1✔
134
                        buffer = new char[input.Length];
1✔
135

136
                        if (!detectEncodingFromByteOrderMarks)
1✔
137
                                decoder = encoding.GetDecoder ();
1✔
138

139
                        this.detectByteOrderMark = !detectEncodingFromByteOrderMarks;
1✔
140
                        this.encoding = encoding;
1✔
141
                        this.stream = stream;
1✔
142
                }
1✔
143

144
                /// <summary>
145
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
146
                /// </summary>
147
                /// <remarks>
148
                /// Creates a new <see cref="HtmlTokenizer"/>.
149
                /// </remarks>
150
                /// <param name="reader">The <see cref="TextReader"/>.</param>
151
                public HtmlTokenizer (TextReader reader)
1✔
152
                {
1✔
153
                        if (reader == null)
1✔
154
                                throw new ArgumentNullException (nameof (reader));
1✔
155

156
                        input = Array.Empty<byte> ();
1✔
157
                        buffer = new char[2048];
1✔
158
                        textReader = reader;
1✔
159
                }
1✔
160

161
                /// <summary>
162
                /// Get or set whether the tokenizer should decode character references.
163
                /// </summary>
164
                /// <remarks>
165
                /// <para>Gets or sets whether the tokenizer should decode character references.</para>
166
                /// <note type="warning">Character references in attribute values will still be decoded
167
                /// even if this value is set to <see langword="false" />.</note>
168
                /// </remarks>
169
                /// <value><see langword="true" /> if character references should be decoded; otherwise, <see langword="false" />.</value>
170
                public bool DecodeCharacterReferences {
171
                        get { return decodeCharacterReferences; }
1✔
172
                        set { decodeCharacterReferences = value; }
1✔
173
                }
174

175
                /// <summary>
176
                /// Get the current HTML namespace detected by the tokenizer.
177
                /// </summary>
178
                /// <remarks>
179
                /// Gets the current HTML namespace detected by the tokenizer.
180
                /// </remarks>
181
                /// <value>The html namespace.</value>
182
                public HtmlNamespace HtmlNamespace {
183
                        get; private set;
184
                }
185

186
                /// <summary>
187
                /// Get or set whether the tokenizer should ignore truncated tags.
188
                /// </summary>
189
                /// <remarks>
190
                /// <para>Gets or sets whether the tokenizer should ignore truncated tags.</para>
191
                /// <para>If <see langword="false" /> and the stream abruptly ends in the middle of an HTML tag, it will be
192
                /// treated as an <see cref="HtmlDataToken"/> instead.</para>
193
                /// </remarks>
194
                /// <value><see langword="true" /> if truncated tags should be ignored; otherwise, <see langword="false" />.</value>
195
                public bool IgnoreTruncatedTags {
196
                        get; set;
197
                }
198

199
                /// <summary>
200
                /// Get the current line number.
201
                /// </summary>
202
                /// <remarks>
203
                /// <para>This property is most commonly used for error reporting, but can be called
204
                /// at any time. The starting value for this property is <c>1</c>.</para>
205
                /// <para>Combined with <see cref="LinePosition"/>, a value of <c>1,1</c> indicates
206
                /// the start of the document.</para>
207
                /// </remarks>
208
                /// <value>The current line number.</value>
209
                public int LineNumber {
210
                        get { return lineNumber; }
×
211
                }
212

213
                /// <summary>
214
                /// Get the current line position.
215
                /// </summary>
216
                /// <remarks>
217
                /// <para>This property is most commonly used for error reporting, but can be called
218
                /// at any time. The starting value for this property is <c>1</c>.</para>
219
                /// <para>Combined with <see cref="LineNumber"/>, a value of <c>1,1</c> indicates
220
                /// the start of the document.</para>
221
                /// </remarks>
222
                /// <value>The column position of the current line.</value>
223
                public int LinePosition {
224
                        get { return linePosition; }
×
225
                }
226

227
                /// <summary>
228
                /// Get the current state of the tokenizer.
229
                /// </summary>
230
                /// <remarks>
231
                /// Gets the current state of the tokenizer.
232
                /// </remarks>
233
                /// <value>The current state of the tokenizer.</value>
234
                public HtmlTokenizerState TokenizerState {
235
                        get; private set;
236
                }
237

238
                /// <summary>
239
                /// Create a DOCTYPE token.
240
                /// </summary>
241
                /// <remarks>
242
                /// Creates a DOCTYPE token.
243
                /// </remarks>
244
                /// <returns>The DOCTYPE token.</returns>
245
                protected virtual HtmlDocTypeToken CreateDocType ()
246
                {
1✔
247
                        return new HtmlDocTypeToken ();
1✔
248
                }
1✔
249

250
                HtmlDocTypeToken CreateDocTypeToken (string rawTagName)
251
                {
1✔
252
                        var token = CreateDocType ();
1✔
253
                        token.RawTagName = rawTagName;
1✔
254
                        return token;
1✔
255
                }
1✔
256

257
                /// <summary>
258
                /// Create an HTML comment token.
259
                /// </summary>
260
                /// <remarks>
261
                /// Creates an HTML comment token.
262
                /// </remarks>
263
                /// <returns>The HTML comment token.</returns>
264
                /// <param name="comment">The comment.</param>
265
                /// <param name="bogus"><see langword="true" /> if the comment is bogus; otherwise, <see langword="false" />.</param>
266
                protected virtual HtmlCommentToken CreateCommentToken (string comment, bool bogus = false)
267
                {
1✔
268
                        return new HtmlCommentToken (comment, bogus);
1✔
269
                }
1✔
270

271
                /// <summary>
272
                /// Create an HTML character data token.
273
                /// </summary>
274
                /// <remarks>
275
                /// Creates an HTML character data token.
276
                /// </remarks>
277
                /// <returns>The HTML character data token.</returns>
278
                /// <param name="data">The character data.</param>
279
                protected virtual HtmlDataToken CreateDataToken (string data)
280
                {
1✔
281
                        return new HtmlDataToken (data);
1✔
282
                }
1✔
283

284
                /// <summary>
285
                /// Create an HTML character data token.
286
                /// </summary>
287
                /// <remarks>
288
                /// Creates an HTML character data token.
289
                /// </remarks>
290
                /// <returns>The HTML character data token.</returns>
291
                /// <param name="data">The character data.</param>
292
                protected virtual HtmlCDataToken CreateCDataToken (string data)
293
                {
1✔
294
                        return new HtmlCDataToken (data);
1✔
295
                }
1✔
296

297
                /// <summary>
298
                /// Create an HTML script data token.
299
                /// </summary>
300
                /// <remarks>
301
                /// Creates an HTML script data token.
302
                /// </remarks>
303
                /// <returns>The HTML script data token.</returns>
304
                /// <param name="data">The script data.</param>
305
                protected virtual HtmlScriptDataToken CreateScriptDataToken (string data)
306
                {
1✔
307
                        return new HtmlScriptDataToken (data);
1✔
308
                }
1✔
309

310
                /// <summary>
311
                /// Create an HTML tag token.
312
                /// </summary>
313
                /// <remarks>
314
                /// Creates an HTML tag token.
315
                /// </remarks>
316
                /// <returns>The HTML tag token.</returns>
317
                /// <param name="name">The tag name.</param>
318
                /// <param name="isEndTag"><see langword="true" /> if the tag is an end tag; otherwise, <see langword="false" />.</param>
319
                protected virtual HtmlTagToken CreateTagToken (string name, bool isEndTag = false)
320
                {
1✔
321
                        return new HtmlTagToken (name, isEndTag);
1✔
322
                }
1✔
323

324
                /// <summary>
325
                /// Create an attribute.
326
                /// </summary>
327
                /// <remarks>
328
                /// Creates an attribute.
329
                /// </remarks>
330
                /// <returns>The attribute.</returns>
331
                /// <param name="name">The attribute name.</param>
332
                protected virtual HtmlAttribute CreateAttribute (string name)
333
                {
1✔
334
                        return new HtmlAttribute (name);
1✔
335
                }
1✔
336

337
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
338
                static bool IsAlphaNumeric (int c)
339
                {
1✔
340
                        return ((uint) (c - 'A') <= 'Z' - 'A') || ((uint) (c - 'a') <= 'z' - 'a') || ((uint) (c - '0') <= '9' - '0');
1✔
341
                }
1✔
342

343
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
344
                static bool IsAsciiLetter (int c)
345
                {
1✔
346
                        return ((uint) (c - 'A') <= 'Z' - 'A') || ((uint) (c - 'a') <= 'z' - 'a');
1✔
347
                }
1✔
348

349
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
350
                static char ToLower (int c)
351
                {
1✔
352
                        // check if the char is within the uppercase range
353
                        if ((uint) (c - 'A') <= 'Z' - 'A')
1✔
354
                                return (char) (c + 0x20);
1✔
355

356
                        return (char) c;
1✔
357
                }
1✔
358

359
                static int SkipByteOrderMark (byte[] input, ReadOnlySpan<byte> preamble)
360
                {
1✔
361
                        for (int i = 0; i < preamble.Length; i++) {
1✔
362
                                if (input[i] != preamble[i])
1✔
363
                                        return 0;
×
364
                        }
1✔
365

366
                        return preamble.Length;
1✔
367
                }
1✔
368

369
                int DetectByteOrderMark ()
370
                {
1✔
371
#if NET6_0_OR_GREATER
372
                        var preamble = encoding!.Preamble;
1✔
373
#else
374
                        var preamble = encoding!.GetPreamble ();
375
#endif
376

377
                        if (preamble.Length == 0)
1✔
378
                                return 0;
×
379

380
                        do {
1✔
381
                                int nread = stream!.Read (input, inputEnd, input.Length - inputEnd);
1✔
382

383
                                if (nread == 0)
1✔
384
                                        break;
×
385

386
                                inputEnd += nread;
1✔
387
                        } while (inputEnd < preamble.Length);
1✔
388

389
                        return SkipByteOrderMark (input, preamble);
1✔
390
                }
1✔
391

392
                [MemberNotNull (nameof (decoder))]
393
                int DetectEncodingFromByteOrderMarks ()
394
                {
1✔
395
                        do {
1✔
396
                                int nread = stream!.Read (input!, inputEnd, input!.Length - inputEnd);
1✔
397

398
                                if (nread == 0)
1✔
399
                                        break;
×
400

401
                                inputEnd += nread;
1✔
402
                        } while (inputEnd < 4);
1✔
403

404
                        int first2Bytes = inputEnd >= 2 ? input[0] << 8 | input[1] : 0;
1✔
405
                        int next2Bytes = inputEnd >= 4 ? (input[2] << 8 |input[3]) : 0;
1✔
406
                        const int UTF32BE = 12001;
407

408
                        switch (first2Bytes) {
1✔
409
                        case 0x0000:
410
                                if (next2Bytes == 0xFEFF)
1✔
411
                                        encoding = Encoding.GetEncoding (UTF32BE);
1✔
412
                                break;
1✔
413
                        case 0xFEFF:
414
                                encoding = Encoding.BigEndianUnicode;
1✔
415
                                break;
1✔
416
                        case 0xFFFE:
417
                                if (next2Bytes == 0x0000)
1✔
418
                                        encoding = Encoding.UTF32;
1✔
419
                                else
420
                                        encoding = Encoding.Unicode;
1✔
421
                                break;
1✔
422
                        case 0xEFBB:
423
                                if ((next2Bytes & 0xFF00) == 0xBF00)
1✔
424
                                        encoding = new UTF8Encoding (true, true);
1✔
425
                                break;
1✔
426
                        }
427

428
                        decoder = encoding!.GetDecoder ();
1✔
429

430
#if NET6_0_OR_GREATER
431
                        var preamble = encoding.Preamble;
1✔
432
#else
433
                        var preamble = encoding.GetPreamble ();
434
#endif
435

436
                        return SkipByteOrderMark (input, preamble);
1✔
437
                }
1✔
438

439
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
440
                void FillBuffer ()
441
                {
1✔
442
                        if (bufferIndex == bufferEnd && !eof) {
1✔
443
                                if (stream != null) {
1✔
444
                                        int inputIndex;
445

446
                                        if (decoder == null) {
1✔
447
                                                inputIndex = DetectEncodingFromByteOrderMarks ();
1✔
448
                                        } else {
1✔
449
                                                if (detectByteOrderMark) {
1✔
450
                                                        inputIndex = DetectByteOrderMark ();
1✔
451
                                                        detectByteOrderMark = false;
1✔
452
                                                } else {
1✔
453
                                                        inputIndex = 0;
1✔
454
                                                }
1✔
455
                                        }
1✔
456

457
                                        bufferIndex = 0;
1✔
458
                                        bufferEnd = 0;
1✔
459

460
                                        do {
1✔
461
                                                if (inputIndex == inputEnd) {
1✔
462
                                                        inputEnd = stream.Read (input, 0, input.Length);
1✔
463
                                                        inputIndex = 0;
1✔
464
                                                }
1✔
465

466
                                                bufferEnd = decoder.GetChars (input, inputIndex, inputEnd - inputIndex, buffer, 0, inputEnd == 0);
1✔
467
                                                inputIndex = inputEnd;
1✔
468
                                        } while (bufferEnd == 0 && inputEnd > 0);
1✔
469

470
                                        inputEnd = 0;
1✔
471
                                } else if (textReader != null) {
1✔
472
                                        bufferEnd = textReader.Read (buffer, 0, buffer.Length);
1✔
473
                                        bufferIndex = 0;
1✔
474
                                } else {
×
475
                                        throw new InvalidOperationException ("No input stream or text reader has been provided.");
×
476
                                }
477

478
                                eof = bufferEnd == 0;
1✔
479
                        }
1✔
480
                }
1✔
481

482
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
483
                bool TryPeek (out char c)
484
                {
1✔
485
                        FillBuffer ();
1✔
486

487
                        if (bufferIndex < bufferEnd) {
1✔
488
                                c = buffer[bufferIndex];
1✔
489
                                return true;
1✔
490
                        }
491

492
                        c = '\0';
1✔
493

494
                        return false;
1✔
495
                }
1✔
496

497
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
498
                void IncrementLineNumber ()
499
                {
1✔
500
                        linePosition = 1;
1✔
501
                        lineNumber++;
1✔
502
                }
1✔
503

504
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
505
                void ConsumeCharacter (char c)
506
                {
1✔
507
                        if (c == '\n') {
1✔
508
                                IncrementLineNumber ();
1✔
509
                        } else {
1✔
510
                                linePosition++;
1✔
511
                        }
1✔
512

513
                        bufferIndex++;
1✔
514
                }
1✔
515

516
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
517
                bool TryRead (out char c)
518
                {
1✔
519
                        FillBuffer ();
1✔
520

521
                        if (bufferIndex < bufferEnd) {
1✔
522
                                c = buffer[bufferIndex++];
1✔
523

524
                                if (c == '\n') {
1✔
525
                                        IncrementLineNumber ();
1✔
526
                                } else {
1✔
527
                                        linePosition++;
1✔
528
                                }
1✔
529

530
                                return true;
1✔
531
                        }
532

533
                        c = '\0';
1✔
534

535
                        return false;
1✔
536
                }
1✔
537

538
#if NET8_0_OR_GREATER
539
                bool TryReadDataUntil (SearchValues<char> specials, out char c)
540
#else
541
                bool TryReadDataUntil (ReadOnlySpan<char> specials, out char c)
542
#endif
543
                {
1✔
544
                        FillBuffer ();
1✔
545

546
                        while (bufferIndex < bufferEnd) {
1✔
547
                                int left = bufferEnd - bufferIndex;
1✔
548

549
                                // Note: 'specials' MUST contain '\n' for proper line number tracking...
550
                                var span = new ReadOnlySpan<char> (buffer, bufferIndex, left);
1✔
551
                                int count = span.IndexOfAny (specials);
1✔
552

553
                                if (count == -1) {
×
554
                                        data.Append (buffer, bufferIndex, left);
×
555
                                        bufferIndex += left;
×
556
                                        FillBuffer ();
×
557
                                        continue;
×
558
                                }
559

560
                                if (count > 0) {
1✔
561
                                        data.Append (buffer, bufferIndex, count);
1✔
562
                                        bufferIndex += count;
1✔
563
                                }
1✔
564

565
                                c = buffer[bufferIndex++];
1✔
566

567
                                if (c == '\n') {
1✔
568
                                        IncrementLineNumber ();
1✔
569
                                } else {
×
570
                                        linePosition++;
×
571
                                }
×
572

573
                                return true;
1✔
574
                        }
575

576
                        c = '\0';
1✔
577

578
                        return false;
1✔
579
                }
1✔
580

581
#if NET8_0_OR_GREATER
582
                bool TryReadNameUntil (SearchValues<char> specials, out char c)
583
#else
584
                bool TryReadNameUntil (ReadOnlySpan<char> specials, out char c)
585
#endif
586
                {
1✔
587
                        FillBuffer ();
1✔
588

589
                        while (bufferIndex < bufferEnd) {
1✔
590
                                int left = bufferEnd - bufferIndex;
1✔
591

592
                                // Note: 'specials' MUST contain '\n' for proper line number tracking...
593
                                var span = new ReadOnlySpan<char> (buffer, bufferIndex, left);
1✔
594
                                int count = span.IndexOfAny (specials);
1✔
595

596
                                if (count == -1) {
1✔
597
                                        data.Append (buffer, bufferIndex, left);
1✔
598
                                        name.Append (buffer, bufferIndex, left);
1✔
599
                                        bufferIndex += left;
1✔
600
                                        FillBuffer ();
1✔
601
                                        continue;
1✔
602
                                }
603

604
                                if (count > 0) {
1✔
605
                                        data.Append (buffer, bufferIndex, count);
1✔
606
                                        name.Append (buffer, bufferIndex, count);
1✔
607
                                        bufferIndex += count;
1✔
608
                                }
1✔
609

610
                                c = buffer[bufferIndex++];
1✔
611

612
                                if (c == '\n') {
1✔
613
                                        IncrementLineNumber ();
1✔
614
                                } else {
1✔
615
                                        linePosition++;
1✔
616
                                }
1✔
617

618
                                return true;
1✔
619
                        }
620

621
                        c = '\0';
1✔
622

623
                        return false;
1✔
624
                }
1✔
625

626
                bool NameIs (string value)
627
                {
1✔
628
                        if (name.Length != value.Length)
1✔
629
                                return false;
1✔
630

631
                        for (int i = 0; i < name.Length; i++) {
1✔
632
                                if (ToLower (name[i]) != ToLower (value[i]))
1✔
633
                                        return false;
1✔
634
                        }
1✔
635

636
                        return true;
1✔
637
                }
1✔
638

639
                void EmitTagAttribute ()
640
                {
1✔
641
                        attribute = CreateAttribute (name.ToString ());
1✔
642
                        tag!.Attributes.Add (attribute);
1✔
643
                        name.Length = 0;
1✔
644
                }
1✔
645

646
                HtmlToken EmitCommentToken (string comment, bool bogus = false)
647
                {
1✔
648
                        var token = CreateCommentToken (comment, bogus);
1✔
649
                        token.IsBangComment = bang;
1✔
650
                        data.Length = 0;
1✔
651
                        name.Length = 0;
1✔
652
                        bang = false;
1✔
653
                        return token;
1✔
654
                }
1✔
655

656
                HtmlToken EmitCommentToken (CharBuffer comment, bool bogus = false)
657
                {
1✔
658
                        return EmitCommentToken (comment.ToString (), bogus);
1✔
659
                }
1✔
660

661
                HtmlToken? EmitDocType ()
662
                {
1✔
663
                        var token = doctype;
1✔
664
                        data.Length = 0;
1✔
665
                        doctype = null;
1✔
666
                        return token;
1✔
667
                }
1✔
668

669
                HtmlToken? EmitDataToken (bool encodeEntities, bool truncated)
670
                {
1✔
671
                        if (data.Length == 0)
1✔
672
                                return null;
1✔
673

674
                        if (truncated && IgnoreTruncatedTags) {
1✔
675
                                data.Length = 0;
1✔
676
                                return null;
1✔
677
                        }
678

679
                        var token = CreateDataToken (data.ToString ());
1✔
680
                        token.EncodeEntities = encodeEntities;
1✔
681
                        data.Length = 0;
1✔
682

683
                        return token;
1✔
684
                }
1✔
685

686
                HtmlToken? EmitCDataToken ()
687
                {
1✔
688
                        if (data.Length == 0)
1✔
689
                                return null;
×
690

691
                        var token = CreateCDataToken (data.ToString ());
1✔
692
                        data.Length = 0;
1✔
693

694
                        return token;
1✔
695
                }
1✔
696

697
                HtmlToken? EmitScriptDataToken ()
698
                {
1✔
699
                        if (data.Length == 0)
1✔
700
                                return null;
1✔
701

702
                        var token = CreateScriptDataToken (data.ToString ());
1✔
703
                        data.Length = 0;
1✔
704

705
                        return token;
1✔
706
                }
1✔
707

708
                HtmlToken EmitTagToken ()
709
                {
1✔
710
                        if (!tag!.IsEndTag && !tag.IsEmptyElement) {
1✔
711
                                switch (tag.Id) {
1✔
712
                                case HtmlTagId.Style: case HtmlTagId.Xmp: case HtmlTagId.IFrame: case HtmlTagId.NoEmbed: case HtmlTagId.NoFrames:
713
                                        TokenizerState = HtmlTokenizerState.RawText;
1✔
714
                                        activeTagName = tag.Name;
1✔
715
                                        break;
1✔
716
                                case HtmlTagId.Title: case HtmlTagId.TextArea:
717
                                        TokenizerState = HtmlTokenizerState.RcData;
1✔
718
                                        activeTagName = tag.Name;
1✔
719
                                        break;
1✔
720
                                case HtmlTagId.PlainText:
721
                                        TokenizerState = HtmlTokenizerState.PlainText;
1✔
722
                                        break;
1✔
723
                                case HtmlTagId.Script:
724
                                        TokenizerState = HtmlTokenizerState.ScriptData;
1✔
725
                                        break;
1✔
726
                                case HtmlTagId.NoScript:
727
                                        // TODO: only switch into the RawText state if scripting is enabled
728
                                        TokenizerState = HtmlTokenizerState.RawText;
1✔
729
                                        activeTagName = tag.Name;
1✔
730
                                        break;
1✔
731
                                case HtmlTagId.Html:
732
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
733

734
                                        for (int i = tag.Attributes.Count; i > 0; i--) {
1✔
735
                                                var attr = tag.Attributes[i - 1];
1✔
736

737
                                                if (attr.Id == HtmlAttributeId.XmlNS && attr.Value != null) {
1✔
738
                                                        HtmlNamespace = attr.Value.ToHtmlNamespace ();
1✔
739
                                                        break;
1✔
740
                                                }
741
                                        }
1✔
742
                                        break;
1✔
743
                                default:
744
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
745
                                        break;
1✔
746
                                }
747
                        } else {
1✔
748
                                TokenizerState = HtmlTokenizerState.Data;
1✔
749
                        }
1✔
750

751
                        var token = tag;
1✔
752
                        data.Length = 0;
1✔
753
                        tag = null;
1✔
754

755
                        return token;
1✔
756
                }
1✔
757

758
                // 8.2.4.69 Tokenizing character references
759
                HtmlToken? ReadCharacterReference (HtmlTokenizerState next)
760
                {
1✔
761
                        if (!TryPeek (out char c)) {
1✔
762
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
763
                                data.Append ('&');
1✔
764

765
                                return EmitDataToken (true, false);
1✔
766
                        }
767

768
                        switch (c) {
1✔
769
                        case '\t': case '\r': case '\n': case '\f': case ' ': case '<': case '&':
770
                                // no character is consumed, emit '&'
771
                                TokenizerState = next;
1✔
772
                                data.Append ('&');
1✔
773
                                return null;
1✔
774
                        }
775

776
                        entity.Push ('&');
1✔
777

778
                        while (entity.Push (c)) {
1✔
779
                                ConsumeCharacter (c);
1✔
780

781
                                if (c == ';')
1✔
782
                                        break;
1✔
783

784
                                if (!TryPeek (out c)) {
1✔
785
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
786
                                        data.Append (entity.GetPushedInput ());
1✔
787
                                        entity.Reset ();
1✔
788

789
                                        return EmitDataToken (true, false);
1✔
790
                                }
791
                        }
1✔
792

793
                        TokenizerState = next;
1✔
794

795
                        data.Append (entity.GetValue ());
1✔
796
                        entity.Reset ();
1✔
797

798
                        return null;
1✔
799
                }
1✔
800

801
                HtmlToken? ReadGenericRawTextLessThan (HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagOpen)
802
                {
1✔
803
                        data.Append ('<');
1✔
804

805
                        if (TryPeek (out char c) && c == '/') {
1✔
806
                                TokenizerState = rawTextEndTagOpen;
1✔
807
                                ConsumeCharacter (c);
1✔
808
                                data.Append ('/');
1✔
809
                                name.Length = 0;
1✔
810
                        } else {
1✔
811
                                TokenizerState = rawText;
1✔
812
                        }
1✔
813

814
                        return null;
1✔
815
                }
1✔
816

817
                HtmlToken? ReadGenericRawTextEndTagOpen (bool decoded, HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagName)
818
                {
1✔
819
                        if (!TryPeek (out char c)) {
1✔
820
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
821
                                return EmitDataToken (decoded, true);
1✔
822
                        }
823

824
                        if (IsAsciiLetter (c)) {
1✔
825
                                TokenizerState = rawTextEndTagName;
1✔
826
                                ConsumeCharacter (c);
1✔
827
                                name.Append (c);
1✔
828
                                data.Append (c);
1✔
829
                        } else {
1✔
830
                                TokenizerState = rawText;
1✔
831
                        }
1✔
832

833
                        return null;
1✔
834
                }
1✔
835

836
                HtmlToken? ReadGenericRawTextEndTagName (bool decoded, HtmlTokenizerState rawText)
837
                {
1✔
838
                        var current = TokenizerState;
1✔
839

840
                        do {
1✔
841
                                if (!TryRead (out char c)) {
1✔
842
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
843
                                        name.Length = 0;
1✔
844

845
                                        return EmitDataToken (decoded, true);
1✔
846
                                }
847

848
                                // Note: we save the data in case we hit a parse error and have to emit a data token
849
                                data.Append (c);
1✔
850

851
                                switch (c) {
1✔
852
                                case '\t': case '\r': case '\n': case '\f': case ' ':
853
                                        if (NameIs (activeTagName)) {
1✔
854
                                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
855
                                                break;
1✔
856
                                        }
857

858
                                        goto default;
1✔
859
                                case '/':
860
                                        if (NameIs (activeTagName)) {
1✔
861
                                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
862
                                                break;
1✔
863
                                        }
864
                                        goto default;
1✔
865
                                case '>':
866
                                        if (NameIs (activeTagName)) {
1✔
867
                                                var token = CreateTagToken (name.ToString (), true);
1✔
868
                                                TokenizerState = HtmlTokenizerState.Data;
1✔
869
                                                data.Length = 0;
1✔
870
                                                name.Length = 0;
1✔
871
                                                return token;
1✔
872
                                        }
873
                                        goto default;
1✔
874
                                default:
875
                                        if (!IsAsciiLetter (c)) {
1✔
876
                                                TokenizerState = rawText;
1✔
877
                                                return null;
1✔
878
                                        }
879

880
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
881
                                        break;
1✔
882
                                }
883
                        } while (TokenizerState == current);
1✔
884

885
                        tag = CreateTagToken (name.ToString (), true);
1✔
886
                        name.Length = 0;
1✔
887

888
                        return null;
1✔
889
                }
1✔
890

891
                // 8.2.4.1 Data state
892
                HtmlToken? ReadData ()
893
                {
1✔
894
                        //ReadOnlySpan<char> specials = DecodeCharacterReferences ?
895
                        //        stackalloc char[] { '\n', '&', '<' } :
896
                        //        stackalloc char[] { '\n', '<' };
897

898
                        do {
1✔
899
                                if (!TryRead (out char c)) {
1✔
900
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
901
                                        break;
1✔
902
                                }
903

904
                                switch (c) {
1✔
905
                                case '&':
906
                                        if (DecodeCharacterReferences) {
1✔
907
                                                TokenizerState = HtmlTokenizerState.CharacterReferenceInData;
1✔
908
                                                return null;
1✔
909
                                        }
910

911
                                        goto default;
1✔
912
                                case '<':
913
                                        TokenizerState = HtmlTokenizerState.TagOpen;
1✔
914
                                        break;
1✔
915
                                //case 0: // parse error, but emit it anyway
916
                                default:
917
                                        data.Append (c);
1✔
918
                                        break;
1✔
919
                                }
920
                        } while (TokenizerState == HtmlTokenizerState.Data);
1✔
921

922
                        return EmitDataToken (DecodeCharacterReferences, false);
1✔
923
                }
1✔
924

925
                // 8.2.4.2 Character reference in data state
926
                HtmlToken? ReadCharacterReferenceInData ()
927
                {
1✔
928
                        return ReadCharacterReference (HtmlTokenizerState.Data);
1✔
929
                }
1✔
930

931
                // 8.2.4.3 RCDATA state
932
                HtmlToken? ReadRcData ()
933
                {
1✔
934
                        //ReadOnlySpan<char> specials = DecodeCharacterReferences ?
935
                        //        stackalloc char[] { '\0', '\n', '&', '<' } :
936
                        //        stackalloc char[] { '\0', '\n', '<' };
937

938
                        do {
1✔
939
                                if (!TryRead (out char c)) {
1✔
940
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
941
                                        break;
1✔
942
                                }
943

944
                                switch (c) {
1✔
945
                                case '&':
946
                                        if (DecodeCharacterReferences) {
1✔
947
                                                TokenizerState = HtmlTokenizerState.CharacterReferenceInRcData;
1✔
948
                                                return null;
1✔
949
                                        }
950

951
                                        goto default;
1✔
952
                                case '<':
953
                                        TokenizerState = HtmlTokenizerState.RcDataLessThan;
1✔
954
                                        return EmitDataToken (DecodeCharacterReferences, false);
1✔
955
                                default:
956
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
957
                                        break;
1✔
958
                                }
959
                        } while (TokenizerState == HtmlTokenizerState.RcData);
1✔
960

961
                        return EmitDataToken (DecodeCharacterReferences, false);
1✔
962
                }
1✔
963

964
                // 8.2.4.4 Character reference in RCDATA state
965
                HtmlToken? ReadCharacterReferenceInRcData ()
966
                {
1✔
967
                        return ReadCharacterReference (HtmlTokenizerState.RcData);
1✔
968
                }
1✔
969

970
                // 8.2.4.5 RAWTEXT state
971
                HtmlToken? ReadRawText ()
972
                {
1✔
973
                        //ReadOnlySpan<char> specials = stackalloc char[] { '\0', '\n', '<' };
974

975
                        do {
1✔
976
                                if (!TryRead (out char c)) {
1✔
977
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
978
                                        break;
1✔
979
                                }
980

981
                                switch (c) {
1✔
982
                                case '<':
983
                                        TokenizerState = HtmlTokenizerState.RawTextLessThan;
1✔
984
                                        return EmitDataToken (false, false);
1✔
985
                                default:
986
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
987
                                        break;
1✔
988
                                }
989
                        } while (TokenizerState == HtmlTokenizerState.RawText);
1✔
990

991
                        return EmitDataToken (false, false);
1✔
992
                }
1✔
993

994
                // 8.2.4.6 Script data state
995
                HtmlToken? ReadScriptData ()
996
                {
1✔
997
                        //ReadOnlySpan<char> specials = stackalloc char[] { '\0', '\n', '<' };
998

999
                        do {
1✔
1000
                                if (!TryRead (out char c)) {
1✔
1001
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1002
                                        break;
1✔
1003
                                }
1004

1005
                                switch (c) {
1✔
1006
                                case '<':
1007
                                        TokenizerState = HtmlTokenizerState.ScriptDataLessThan;
1✔
1008
                                        break;
1✔
1009
                                default:
1010
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1011
                                        break;
1✔
1012
                                }
1013
                        } while (TokenizerState == HtmlTokenizerState.ScriptData);
1✔
1014

1015
                        return EmitScriptDataToken ();
1✔
1016
                }
1✔
1017

1018
                static readonly char[] PlainTextSpecials = new char[] { '\0', '\n' };
1✔
1019
#if NET8_0_OR_GREATER
1020
                static readonly SearchValues<char> PlainTextSpecialsSV = SearchValues.Create (PlainTextSpecials);
1✔
1021
#endif
1022

1023
                // 8.2.4.7 PLAINTEXT state
1024
                HtmlToken? ReadPlainText ()
1025
                {
1✔
1026
#if NET8_0_OR_GREATER
1027
                        SearchValues<char> specials = PlainTextSpecialsSV;
1✔
1028
#else
1029
                        ReadOnlySpan<char> specials = PlainTextSpecials;
1030
#endif
1031

1032
                        do {
1✔
1033
                                if (!TryReadDataUntil (specials, out char c)) {
1✔
1034
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1035
                                        break;
1✔
1036
                                }
1037

1038
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1039
                        } while (true);
1✔
1040

1041
                        return EmitDataToken (false, false);
1✔
1042
                }
1✔
1043

1044
                // 8.2.4.8 Tag open state
1045
                HtmlToken? ReadTagOpen ()
1046
                {
1✔
1047
                        if (!TryRead (out char c)) {
1✔
1048
                                var token = IgnoreTruncatedTags ? null : CreateDataToken ("<");
1✔
1049
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1050
                                return token;
1✔
1051
                        }
1052

1053
                        // Note: we save the data in case we hit a parse error and have to emit a data token
1054
                        data.Append ('<');
1✔
1055
                        data.Append (c);
1✔
1056

1057
                        switch (c) {
1✔
1058
                        case '!':
1059
                                TokenizerState = HtmlTokenizerState.MarkupDeclarationOpen;
1✔
1060
                                break;
1✔
1061
                        case '?':
1062
                                TokenizerState = HtmlTokenizerState.BogusComment;
1✔
1063
                                data.Length = 1;
1✔
1064
                                data[0] = c;
1✔
1065
                                break;
1✔
1066
                        case '/':
1067
                                TokenizerState = HtmlTokenizerState.EndTagOpen;
1✔
1068
                                break;
1✔
1069
                        default:
1070
                                if (IsAsciiLetter (c)) {
1✔
1071
                                        TokenizerState = HtmlTokenizerState.TagName;
1✔
1072
                                        isEndTag = false;
1✔
1073
                                        name.Append (c);
1✔
1074
                                } else {
1✔
1075
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
1076
                                }
1✔
1077
                                break;
1✔
1078
                        }
1079

1080
                        return null;
1✔
1081
                }
1✔
1082

1083
                // 8.2.4.9 End tag open state
1084
                HtmlToken? ReadEndTagOpen ()
1085
                {
1✔
1086
                        if (!TryRead (out char c)) {
1✔
1087
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1088
                                return EmitDataToken (false, true);
1✔
1089
                        }
1090

1091
                        // Note: we save the data in case we hit a parse error and have to emit a data token
1092
                        data.Append (c);
1✔
1093

1094
                        switch (c) {
1✔
1095
                        case '>': // parse error
1096
                                TokenizerState = HtmlTokenizerState.Data;
1✔
1097
                                data.Length = 0; // FIXME: this is probably wrong
1✔
1098
                                break;
1✔
1099
                        default:
1100
                                if (IsAsciiLetter (c)) {
1✔
1101
                                        TokenizerState = HtmlTokenizerState.TagName;
1✔
1102
                                        isEndTag = true;
1✔
1103
                                        name.Append (c);
1✔
1104
                                } else {
1✔
1105
                                        TokenizerState = HtmlTokenizerState.BogusComment;
1✔
1106
                                        data.Length = 1;
1✔
1107
                                        data[0] = c;
1✔
1108
                                }
1✔
1109
                                break;
1✔
1110
                        }
1111

1112
                        return null;
1✔
1113
                }
1✔
1114

1115
                // 8.2.4.10 Tag name state
1116
                HtmlToken? ReadTagName ()
1117
                {
1✔
1118
                        //ReadOnlySpan<char> specials = stackalloc char[] { '\0', '\t', '\r', '\n', '\f', ' ', '/', '>' };
1119

1120
                        do {
1✔
1121
                                if (!TryRead (out char c)) {
1✔
1122
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1123
                                        name.Length = 0;
1✔
1124

1125
                                        return EmitDataToken (false, true);
1✔
1126
                                }
1127

1128
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1129
                                data.Append (c);
1✔
1130

1131
                                switch (c) {
1✔
1132
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1133
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1134
                                        break;
1✔
1135
                                case '/':
1136
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1137
                                        break;
1✔
1138
                                case '>':
1139
                                        tag = CreateTagToken (name.ToString (), isEndTag);
1✔
1140
                                        data.Length = 0;
1✔
1141
                                        name.Length = 0;
1✔
1142

1143
                                        return EmitTagToken ();
1✔
1144
                                default:
1145
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1146
                                        break;
1✔
1147
                                }
1148
                        } while (TokenizerState == HtmlTokenizerState.TagName);
1✔
1149

1150
                        tag = CreateTagToken (name.ToString (), isEndTag);
1✔
1151
                        name.Length = 0;
1✔
1152

1153
                        return null;
1✔
1154
                }
1✔
1155

1156
                // 8.2.4.11 RCDATA less-than sign state
1157
                HtmlToken? ReadRcDataLessThan ()
1158
                {
1✔
1159
                        return ReadGenericRawTextLessThan (HtmlTokenizerState.RcData, HtmlTokenizerState.RcDataEndTagOpen);
1✔
1160
                }
1✔
1161

1162
                // 8.2.4.12 RCDATA end tag open state
1163
                HtmlToken? ReadRcDataEndTagOpen ()
1164
                {
1✔
1165
                        return ReadGenericRawTextEndTagOpen (DecodeCharacterReferences, HtmlTokenizerState.RcData, HtmlTokenizerState.RcDataEndTagName);
1✔
1166
                }
1✔
1167

1168
                // 8.2.4.13 RCDATA end tag name state
1169
                HtmlToken? ReadRcDataEndTagName ()
1170
                {
1✔
1171
                        return ReadGenericRawTextEndTagName (DecodeCharacterReferences, HtmlTokenizerState.RcData);
1✔
1172
                }
1✔
1173

1174
                // 8.2.4.14 RAWTEXT less-than sign state
1175
                HtmlToken? ReadRawTextLessThan ()
1176
                {
1✔
1177
                        return ReadGenericRawTextLessThan (HtmlTokenizerState.RawText, HtmlTokenizerState.RawTextEndTagOpen);
1✔
1178
                }
1✔
1179

1180
                // 8.2.4.15 RAWTEXT end tag open state
1181
                HtmlToken? ReadRawTextEndTagOpen ()
1182
                {
1✔
1183
                        return ReadGenericRawTextEndTagOpen (false, HtmlTokenizerState.RawText, HtmlTokenizerState.RawTextEndTagName);
1✔
1184
                }
1✔
1185

1186
                // 8.2.4.16 RAWTEXT end tag name state
1187
                HtmlToken? ReadRawTextEndTagName ()
1188
                {
1✔
1189
                        return ReadGenericRawTextEndTagName (false, HtmlTokenizerState.RawText);
1✔
1190
                }
1✔
1191

1192
                // 8.2.4.17 Script data less-than sign state
1193
                HtmlToken? ReadScriptDataLessThan ()
1194
                {
1✔
1195
                        data.Append ('<');
1✔
1196

1197
                        if (TryPeek (out char c) && c == '/') {
1✔
1198
                                TokenizerState = HtmlTokenizerState.ScriptDataEndTagOpen;
1✔
1199
                                ConsumeCharacter (c);
1✔
1200
                                data.Append ('/');
1✔
1201
                                name.Length = 0;
1✔
1202
                        } else if (c == '!') {
1✔
1203
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapeStart;
1✔
1204
                                ConsumeCharacter (c);
1✔
1205
                                data.Append ('!');
1✔
1206
                        } else {
1✔
1207
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1208
                        }
1✔
1209

1210
                        return null;
1✔
1211
                }
1✔
1212

1213
                // 8.2.4.18 Script data end tag open state
1214
                HtmlToken? ReadScriptDataEndTagOpen ()
1215
                {
1✔
1216
                        if (!TryPeek (out char c)) {
1✔
1217
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1218
                                return EmitScriptDataToken ();
1✔
1219
                        }
1220

1221
                        if (c == 'S' || c == 's') {
1✔
1222
                                TokenizerState = HtmlTokenizerState.ScriptDataEndTagName;
1✔
1223
                                ConsumeCharacter (c);
1✔
1224
                                name.Append ('s');
1✔
1225
                                data.Append (c);
1✔
1226
                        } else {
1✔
1227
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1228
                        }
1✔
1229

1230
                        return null;
1✔
1231
                }
1✔
1232

1233
                // 8.2.4.19 Script data end tag name state
1234
                HtmlToken? ReadScriptDataEndTagName ()
1235
                {
1✔
1236
                        do {
1✔
1237
                                if (!TryRead (out char c)) {
1✔
1238
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1239
                                        name.Length = 0;
1✔
1240

1241
                                        return EmitScriptDataToken ();
1✔
1242
                                }
1243

1244
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1245
                                data.Append (c);
1✔
1246

1247
                                switch (c) {
1✔
1248
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1249
                                        if (NameIs ("script")) {
1✔
1250
                                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1251
                                                break;
1✔
1252
                                        }
1253
                                        goto default;
1✔
1254
                                case '/':
1255
                                        if (NameIs ("script")) {
1✔
1256
                                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1257
                                                break;
1✔
1258
                                        }
1259
                                        goto default;
1✔
1260
                                case '>':
1261
                                        if (NameIs ("script")) {
1✔
1262
                                                var token = CreateTagToken (name.ToString (), true);
1✔
1263
                                                TokenizerState = HtmlTokenizerState.Data;
1✔
1264
                                                data.Length = 0;
1✔
1265
                                                name.Length = 0;
1✔
1266
                                                return token;
1✔
1267
                                        }
1268
                                        goto default;
1✔
1269
                                default:
1270
                                        if (!IsAsciiLetter (c)) {
1✔
1271
                                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1272
                                                name.Length = 0;
1✔
1273
                                                return null;
1✔
1274
                                        }
1275

1276
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1277
                                        break;
1✔
1278
                                }
1279
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEndTagName);
1✔
1280

1281
                        tag = CreateTagToken (name.ToString (), true);
1✔
1282
                        name.Length = 0;
1✔
1283

1284
                        return null;
1✔
1285
                }
1✔
1286

1287
                // 8.2.4.20 Script data escape start state
1288
                HtmlToken? ReadScriptDataEscapeStart ()
1289
                {
1✔
1290
                        if (TryPeek (out char c) && c == '-') {
1✔
1291
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapeStartDash;
1✔
1292
                                ConsumeCharacter (c);
1✔
1293
                                data.Append ('-');
1✔
1294
                        } else {
1✔
1295
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1296
                        }
1✔
1297

1298
                        return null;
1✔
1299
                }
1✔
1300

1301
                // 8.2.4.21 Script data escape start dash state
1302
                HtmlToken? ReadScriptDataEscapeStartDash ()
1303
                {
1✔
1304
                        if (TryPeek (out char c) && c == '-') {
1✔
1305
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedDashDash;
1✔
1306
                                ConsumeCharacter (c);
1✔
1307
                                data.Append ('-');
1✔
1308
                        } else {
1✔
1309
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1310
                        }
1✔
1311

1312
                        return null;
1✔
1313
                }
1✔
1314

1315
                // 8.2.4.22 Script data escaped state
1316
                HtmlToken? ReadScriptDataEscaped ()
1317
                {
1✔
1318
                        HtmlToken? token = null;
1✔
1319

1320
                        do {
1✔
1321
                                if (!TryRead (out char c)) {
1✔
1322
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1323
                                        return EmitScriptDataToken ();
1✔
1324
                                }
1325

1326
                                switch (c) {
1✔
1327
                                case '-':
1328
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscapedDash;
1✔
1329
                                        data.Append ('-');
1✔
1330
                                        break;
1✔
1331
                                case '<':
1332
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan;
1✔
1333
                                        token = EmitScriptDataToken ();
1✔
1334
                                        data.Append ('<');
1✔
1335
                                        break;
1✔
1336
                                default:
1337
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1338
                                        break;
1✔
1339
                                }
1340
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscaped);
1✔
1341

1342
                        return token;
1✔
1343
                }
1✔
1344

1345
                // 8.2.4.23 Script data escaped dash state
1346
                HtmlToken? ReadScriptDataEscapedDash ()
1347
                {
1✔
1348
                        if (!TryRead (out char c)) {
1✔
1349
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1350
                                return EmitScriptDataToken ();
1✔
1351
                        }
1352

1353
                        HtmlToken? token = null;
1✔
1354

1355
                        switch (c) {
1✔
1356
                        case '-':
1357
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedDashDash;
1✔
1358
                                data.Append ('-');
1✔
1359
                                break;
1✔
1360
                        case '<':
1361
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan;
1✔
1362
                                token = EmitScriptDataToken ();
1✔
1363
                                data.Append ('<');
1✔
1364
                                break;
1✔
1365
                        default:
1366
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1367
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1368
                                break;
1✔
1369
                        }
1370

1371
                        return token;
1✔
1372
                }
1✔
1373

1374
                // 8.2.4.24 Script data escaped dash dash state
1375
                HtmlToken? ReadScriptDataEscapedDashDash ()
1376
                {
1✔
1377
                        HtmlToken? token = null;
1✔
1378

1379
                        do {
1✔
1380
                                if (!TryRead (out char c)) {
1✔
1381
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1382
                                        return EmitScriptDataToken ();
1✔
1383
                                }
1384

1385
                                switch (c) {
1✔
1386
                                case '-':
1387
                                        data.Append ('-');
1✔
1388
                                        break;
1✔
1389
                                case '<':
1390
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan;
1✔
1391
                                        token = EmitScriptDataToken ();
1✔
1392
                                        data.Append ('<');
1✔
1393
                                        break;
1✔
1394
                                case '>':
1395
                                        TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1396
                                        data.Append ('>');
1✔
1397
                                        break;
1✔
1398
                                default:
1399
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1400
                                        data.Append (c);
1✔
1401
                                        break;
1✔
1402
                                }
1403
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedDashDash);
1✔
1404

1405
                        return token;
1✔
1406
                }
1✔
1407

1408
                // 8.2.4.25 Script data escaped less-than sign state
1409
                HtmlToken? ReadScriptDataEscapedLessThan ()
1410
                {
1✔
1411
                        if (!TryPeek (out char c)) {
1✔
1412
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1413
                                return null;
1✔
1414
                        }
1415

1416
                        if (c == '/') {
1✔
1417
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedEndTagOpen;
1✔
1418
                                ConsumeCharacter (c);
1✔
1419
                                data.Append (c);
1✔
1420
                                name.Length = 0;
1✔
1421
                        } else if (IsAsciiLetter (c)) {
1✔
1422
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapeStart;
1✔
1423
                                ConsumeCharacter (c);
1✔
1424
                                data.Append (c);
1✔
1425
                                name.Append (c);
1✔
1426
                        } else {
1✔
1427
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1428
                        }
1✔
1429

1430
                        return null;
1✔
1431
                }
1✔
1432

1433
                // 8.2.4.26 Script data escaped end tag open state
1434
                HtmlToken? ReadScriptDataEscapedEndTagOpen ()
1435
                {
1✔
1436
                        if (!TryPeek (out char c)) {
1✔
1437
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1438
                                return EmitScriptDataToken ();
1✔
1439
                        }
1440

1441
                        if (IsAsciiLetter (c)) {
1✔
1442
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedEndTagName;
1✔
1443
                                ConsumeCharacter (c);
1✔
1444
                                data.Append (c);
1✔
1445
                                name.Append (c);
1✔
1446
                        } else {
1✔
1447
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1448
                        }
1✔
1449

1450
                        return null;
1✔
1451
                }
1✔
1452

1453
                // 8.2.4.27 Script data escaped end tag name state
1454
                HtmlToken? ReadScriptDataEscapedEndTagName ()
1455
                {
1✔
1456
                        do {
1✔
1457
                                if (!TryRead (out char c)) {
1✔
1458
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1459
                                        name.Length = 0;
1✔
1460

1461
                                        return EmitScriptDataToken ();
1✔
1462
                                }
1463

1464
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1465
                                data.Append (c);
1✔
1466

1467
                                switch (c) {
1✔
1468
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1469
                                        if (NameIs ("script")) {
1✔
1470
                                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1471
                                                break;
1✔
1472
                                        }
1473

1474
                                        goto default;
1✔
1475
                                case '/':
1476
                                        if (NameIs ("script")) {
1✔
1477
                                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1478
                                                break;
1✔
1479
                                        }
1480
                                        goto default;
1✔
1481
                                case '>':
1482
                                        if (NameIs ("script")) {
1✔
1483
                                                var token = CreateTagToken (name.ToString (), true);
1✔
1484
                                                TokenizerState = HtmlTokenizerState.Data;
1✔
1485
                                                data.Length = 0;
1✔
1486
                                                name.Length = 0;
1✔
1487
                                                return token;
1✔
1488
                                        }
1489
                                        goto default;
1✔
1490
                                default:
1491
                                        if (!IsAsciiLetter (c)) {
1✔
1492
                                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1493
                                                return null;
1✔
1494
                                        }
1495

1496
                                        name.Append (c);
1✔
1497
                                        break;
1✔
1498
                                }
1499
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedEndTagName);
1✔
1500

1501
                        tag = CreateTagToken (name.ToString (), true);
1✔
1502
                        name.Length = 0;
1✔
1503

1504
                        return null;
1✔
1505
                }
1✔
1506

1507
                // 8.2.4.28 Script data double escape start state
1508
                HtmlToken? ReadScriptDataDoubleEscapeStart ()
1509
                {
1✔
1510
                        do {
1✔
1511
                                if (!TryRead (out char c)) {
1✔
1512
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1513
                                        name.Length = 0;
1✔
1514

1515
                                        return EmitScriptDataToken ();
1✔
1516
                                }
1517

1518
                                data.Append (c);
1✔
1519

1520
                                switch (c) {
1✔
1521
                                case '\t': case '\r': case '\n': case '\f': case ' ': case '/': case '>':
1522
                                        if (NameIs ("script"))
1✔
1523
                                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1524
                                        else
1525
                                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1526
                                        name.Length = 0;
1✔
1527
                                        break;
1✔
1528
                                default:
1529
                                        if (!IsAsciiLetter (c))
1✔
1530
                                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1531
                                        else
1532
                                                name.Append (c);
1✔
1533
                                        break;
1✔
1534
                                }
1535
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataDoubleEscapeStart);
1✔
1536

1537
                        return null;
1✔
1538
                }
1✔
1539

1540
                // 8.2.4.29 Script data double escaped state
1541
                HtmlToken? ReadScriptDataDoubleEscaped ()
1542
                {
1✔
1543
                        do {
1✔
1544
                                if (!TryRead (out char c)) {
1✔
1545
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1546
                                        return EmitScriptDataToken ();
1✔
1547
                                }
1548

1549
                                switch (c) {
1✔
1550
                                case '-':
1551
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedDash;
1✔
1552
                                        data.Append ('-');
1✔
1553
                                        break;
1✔
1554
                                case '<':
1555
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedLessThan;
1✔
1556
                                        data.Append ('<');
1✔
1557
                                        break;
1✔
1558
                                default:
1559
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1560
                                        break;
1✔
1561
                                }
1562
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscaped);
1✔
1563

1564
                        return null;
1✔
1565
                }
1✔
1566

1567
                // 8.2.4.30 Script data double escaped dash state
1568
                HtmlToken? ReadScriptDataDoubleEscapedDash ()
1569
                {
1✔
1570
                        if (!TryRead (out char c)) {
1✔
1571
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1572
                                return EmitScriptDataToken ();
1✔
1573
                        }
1574

1575
                        switch (c) {
1✔
1576
                        case '-':
1577
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedDashDash;
1✔
1578
                                data.Append ('-');
1✔
1579
                                break;
1✔
1580
                        case '<':
1581
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedLessThan;
1✔
1582
                                data.Append ('<');
1✔
1583
                                break;
1✔
1584
                        default:
1585
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1586
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1587
                                break;
1✔
1588
                        }
1589

1590
                        return null;
1✔
1591
                }
1✔
1592

1593
                // 8.2.4.31 Script data double escaped dash dash state
1594
                HtmlToken? ReadScriptDataDoubleEscapedDashDash ()
1595
                {
1✔
1596
                        do {
1✔
1597
                                if (!TryRead (out char c)) {
1✔
1598
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1599
                                        return EmitScriptDataToken ();
1✔
1600
                                }
1601

1602
                                switch (c) {
1✔
1603
                                case '-':
1604
                                        data.Append ('-');
1✔
1605
                                        break;
1✔
1606
                                case '<':
1607
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedLessThan;
1✔
1608
                                        data.Append ('<');
1✔
1609
                                        break;
1✔
1610
                                case '>':
1611
                                        TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1612
                                        data.Append ('>');
1✔
1613
                                        break;
1✔
1614
                                default:
1615
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1616
                                        data.Append (c);
1✔
1617
                                        break;
1✔
1618
                                }
1619
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedDashDash);
1✔
1620

1621
                        return null;
1✔
1622
                }
1✔
1623

1624
                // 8.2.4.32 Script data double escaped less-than sign state
1625
                HtmlToken? ReadScriptDataDoubleEscapedLessThan ()
1626
                {
1✔
1627
                        if (TryPeek (out char c) && c == '/') {
1✔
1628
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapeEnd;
1✔
1629
                                ConsumeCharacter (c);
1✔
1630
                                data.Append ('/');
1✔
1631
                        } else {
1✔
1632
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1633
                        }
1✔
1634

1635
                        return null;
1✔
1636
                }
1✔
1637

1638
                // 8.2.4.33 Script data double escape end state
1639
                HtmlToken? ReadScriptDataDoubleEscapeEnd ()
1640
                {
1✔
1641
                        do {
1✔
1642
                                TryPeek (out char c);
1✔
1643

1644
                                switch (c) {
1✔
1645
                                case '\t': case '\r': case '\n': case '\f': case ' ': case '/': case '>':
1646
                                        if (NameIs ("script"))
1✔
1647
                                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1648
                                        else
1649
                                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1650
                                        ConsumeCharacter (c);
1✔
1651
                                        data.Append (c);
1✔
1652
                                        break;
1✔
1653
                                default:
1654
                                        if (!IsAsciiLetter (c)) {
1✔
1655
                                                // Note: EOF also hits this case.
1656
                                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1657
                                        } else {
1✔
1658
                                                ConsumeCharacter (c);
1✔
1659
                                                name.Append (c);
1✔
1660
                                                data.Append (c);
1✔
1661
                                        }
1✔
1662
                                        break;
1✔
1663
                                }
1664
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataDoubleEscapeEnd);
1✔
1665

1666
                        return null;
1✔
1667
                }
1✔
1668

1669
                // 8.2.4.34 Before attribute name state
1670
                HtmlToken? ReadBeforeAttributeName ()
1671
                {
1✔
1672
                        do {
1✔
1673
                                if (!TryRead (out char c)) {
1✔
1674
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1675
                                        tag = null;
1✔
1676

1677
                                        return EmitDataToken (false, true);
1✔
1678
                                }
1679

1680
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1681
                                data.Append (c);
1✔
1682

1683
                                switch (c) {
1✔
1684
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1685
                                        break;
1✔
1686
                                case '/':
1687
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1688
                                        return null;
1✔
1689
                                case '>':
1690
                                        return EmitTagToken ();
1✔
1691
                                case '"': case '\'': case '<': case '=':
1692
                                        // parse error
1693
                                        goto default;
1✔
1694
                                default:
1695
                                        TokenizerState = HtmlTokenizerState.AttributeName;
1✔
1696
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1697
                                        return null;
1✔
1698
                                }
1699
                        } while (true);
1✔
1700
                }
1✔
1701

1702
                // 8.2.4.35 Attribute name state
1703
                HtmlToken? ReadAttributeName ()
1704
                {
1✔
1705
                        do {
1✔
1706
                                if (!TryRead (out char c)) {
1✔
1707
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1708
                                        name.Length = 0;
1✔
1709
                                        tag = null;
1✔
1710

1711
                                        return EmitDataToken (false, true);
1✔
1712
                                }
1713

1714
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1715
                                data.Append (c);
1✔
1716

1717
                                switch (c) {
1✔
1718
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1719
                                        TokenizerState = HtmlTokenizerState.AfterAttributeName;
1✔
1720
                                        break;
1✔
1721
                                case '/':
1722
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1723
                                        break;
1✔
1724
                                case '=':
1725
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeValue;
1✔
1726
                                        break;
1✔
1727
                                case '>':
1728
                                        EmitTagAttribute ();
1✔
1729

1730
                                        return EmitTagToken ();
1✔
1731
                                default:
1732
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1733
                                        break;
1✔
1734
                                }
1735
                        } while (TokenizerState == HtmlTokenizerState.AttributeName);
1✔
1736

1737
                        EmitTagAttribute ();
1✔
1738

1739
                        return null;
1✔
1740
                }
1✔
1741

1742
                // 8.2.4.36 After attribute name state
1743
                HtmlToken? ReadAfterAttributeName ()
1744
                {
1✔
1745
                        do {
1✔
1746
                                if (!TryRead (out char c)) {
1✔
1747
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1748
                                        tag = null;
1✔
1749

1750
                                        return EmitDataToken (false, true);
1✔
1751
                                }
1752

1753
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1754
                                data.Append (c);
1✔
1755

1756
                                switch (c) {
1✔
1757
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1758
                                        break;
1✔
1759
                                case '/':
1760
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1761
                                        return null;
1✔
1762
                                case '=':
1763
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeValue;
1✔
1764
                                        return null;
1✔
1765
                                case '>':
1766
                                        return EmitTagToken ();
1✔
1767
                                case '"': case '\'': case '<':
1768
                                        // parse error
1769
                                        goto default;
1✔
1770
                                default:
1771
                                        TokenizerState = HtmlTokenizerState.AttributeName;
1✔
1772
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1773
                                        return null;
1✔
1774
                                }
1775
                        } while (true);
1✔
1776
                }
1✔
1777

1778
                // 8.2.4.37 Before attribute value state
1779
                HtmlToken? ReadBeforeAttributeValue ()
1780
                {
1✔
1781
                        do {
1✔
1782
                                if (!TryRead (out char c)) {
1✔
1783
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1784
                                        tag = null;
1✔
1785

1786
                                        return EmitDataToken (false, true);
1✔
1787
                                }
1788

1789
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1790
                                data.Append (c);
1✔
1791

1792
                                switch (c) {
1✔
1793
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1794
                                        break;
1✔
1795
                                case '"': case '\'':
1796
                                        TokenizerState = HtmlTokenizerState.AttributeValueQuoted;
1✔
1797
                                        quote = c;
1✔
1798
                                        return null;
1✔
1799
                                case '&':
1800
                                        TokenizerState = HtmlTokenizerState.CharacterReferenceInAttributeValue;
1✔
1801
                                        return null;
1✔
1802
                                case '/':
1803
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1804
                                        return null;
1✔
1805
                                case '>':
1806
                                        return EmitTagToken ();
1✔
1807
                                case '<': case '=': case '`':
1808
                                        // parse error
1809
                                        goto default;
1✔
1810
                                default:
1811
                                        TokenizerState = HtmlTokenizerState.AttributeValueUnquoted;
1✔
1812
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1813
                                        return null;
1✔
1814
                                }
1815
                        } while (true);
1✔
1816
                }
1✔
1817

1818
                static readonly char[] AttributeValueQuotedDQuoteSpecials = { '\0', '\n', '&', '\"' };
1✔
1819
                static readonly char[] AttributeValueQuotedSQuoteSpecials = { '\0', '\n', '&', '\'' };
1✔
1820
#if NET8_0_OR_GREATER
1821
                static readonly SearchValues<char> AttributeValueQuotedDQuoteSpecialsSV = SearchValues.Create (AttributeValueQuotedDQuoteSpecials);
1✔
1822
                static readonly SearchValues<char> AttributeValueQuotedSQuoteSpecialsSV = SearchValues.Create (AttributeValueQuotedSQuoteSpecials);
1✔
1823
#endif
1824

1825
                // 8.2.4.38 Attribute value (double-quoted) state
1826
                HtmlToken? ReadAttributeValueQuoted ()
1827
                {
1✔
1828
#if NET8_0_OR_GREATER
1829
                        SearchValues<char> specials = quote == '\"' ? AttributeValueQuotedDQuoteSpecialsSV : AttributeValueQuotedSQuoteSpecialsSV;
1✔
1830
#else
1831
                        ReadOnlySpan<char> specials = quote == '\"' ? AttributeValueQuotedDQuoteSpecials : AttributeValueQuotedSQuoteSpecials;
1832
#endif
1833

1834
                        do {
1✔
1835
                                if (!TryReadNameUntil (specials, out char c)) {
1✔
1836
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1837
                                        name.Length = 0;
1✔
1838

1839
                                        return EmitDataToken (false, true);
1✔
1840
                                }
1841

1842
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1843
                                data.Append (c);
1✔
1844

1845
                                switch (c) {
1✔
1846
                                case '&':
1847
                                        TokenizerState = HtmlTokenizerState.CharacterReferenceInAttributeValue;
1✔
1848
                                        return null;
1✔
1849
                                case '\0':
1850
                                        name.Append ('\uFFFD');
×
1851
                                        break;
×
1852
                                default: // quote or '\n'
1853
                                        if (c == quote) {
1✔
1854
                                                TokenizerState = HtmlTokenizerState.AfterAttributeValueQuoted;
1✔
1855
                                                quote = '\0';
1✔
1856
                                                break;
1✔
1857
                                        }
1858

1859
                                        name.Append (c);
1✔
1860
                                        break;
1✔
1861
                                }
1862
                        } while (TokenizerState == HtmlTokenizerState.AttributeValueQuoted);
1✔
1863

1864
                        attribute!.Value = name.ToString ();
1✔
1865
                        name.Length = 0;
1✔
1866

1867
                        return null;
1✔
1868
                }
1✔
1869

1870
                // 8.2.4.40 Attribute value (unquoted) state
1871
                HtmlToken? ReadAttributeValueUnquoted ()
1872
                {
1✔
1873
                        do {
1✔
1874
                                if (!TryRead (out char c)) {
1✔
1875
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1876
                                        name.Length = 0;
1✔
1877

1878
                                        return EmitDataToken (false, true);
1✔
1879
                                }
1880

1881
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1882
                                data.Append (c);
1✔
1883

1884
                                switch (c) {
1✔
1885
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1886
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1887
                                        break;
1✔
1888
                                case '&':
1889
                                        TokenizerState = HtmlTokenizerState.CharacterReferenceInAttributeValue;
1✔
1890
                                        return null;
1✔
1891
                                case '>':
1892
                                        attribute!.Value = name.ToString ();
1✔
1893
                                        name.Length = 0;
1✔
1894

1895
                                        return EmitTagToken ();
1✔
1896
                                case '\'': case '<': case '=': case '`':
1897
                                        // parse error
1898
                                        goto default;
1✔
1899
                                default:
1900
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1901
                                        break;
1✔
1902
                                }
1903
                        } while (TokenizerState == HtmlTokenizerState.AttributeValueUnquoted);
1✔
1904

1905
                        attribute!.Value = name.ToString ();
1✔
1906
                        name.Length = 0;
1✔
1907

1908
                        return null;
1✔
1909
                }
1✔
1910

1911
                // 8.2.4.41 Character reference in attribute value state
1912
                HtmlToken? ReadCharacterReferenceInAttributeValue ()
1913
                {
1✔
1914
                        char additionalAllowedCharacter = quote == '\0' ? '>' : quote;
1✔
1915

1916
                        if (!TryPeek (out char c)) {
1✔
1917
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1918
                                name.Length = 0;
1✔
1919

1920
                                return EmitDataToken (false, true);
1✔
1921
                        }
1922

1923
                        switch (c) {
1✔
1924
                        case '\t': case '\r': case '\n': case '\f': case ' ': case '<': case '&':
1925
                                // no character is consumed, emit '&'
1926
                                name.Append ('&');
1✔
1927
                                break;
1✔
1928
                        default:
1929
                                if (c == additionalAllowedCharacter) {
1✔
1930
                                        // this is not a character reference, nothing is consumed
1931
                                        name.Append ('&');
1✔
1932
                                        break;
1✔
1933
                                }
1934

1935
                                entity.Push ('&');
1✔
1936

1937
                                while (entity.Push (c)) {
1✔
1938
                                        ConsumeCharacter (c);
1✔
1939

1940
                                        if (c == ';')
1✔
1941
                                                break;
1✔
1942

1943
                                        if (!TryPeek (out c)) {
1✔
1944
                                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1945
                                                data.Length--;
1✔
1946
                                                data.Append (entity.GetPushedInput ());
1✔
1947
                                                entity.Reset ();
1✔
1948

1949
                                                return EmitDataToken (false, true);
1✔
1950
                                        }
1951
                                }
1✔
1952

1953
                                var pushed = entity.GetPushedInput ();
1✔
1954
                                string value;
1955

1956
                                if (c == '=' || IsAlphaNumeric (c))
1✔
1957
                                        value = pushed;
1✔
1958
                                else
1959
                                        value = entity.GetValue ();
1✔
1960

1961
                                data.Length--;
1✔
1962
                                data.Append (pushed);
1✔
1963
                                name.Append (value);
1✔
1964
                                entity.Reset ();
1✔
1965
                                break;
1✔
1966
                        }
1967

1968
                        if (quote == '\0')
1✔
1969
                                TokenizerState = HtmlTokenizerState.AttributeValueUnquoted;
1✔
1970
                        else
1971
                                TokenizerState = HtmlTokenizerState.AttributeValueQuoted;
1✔
1972

1973
                        return null;
1✔
1974
                }
1✔
1975

1976
                // 8.2.4.42 After attribute value (quoted) state
1977
                HtmlToken? ReadAfterAttributeValueQuoted ()
1978
                {
1✔
1979
                        HtmlToken? token = null;
1✔
1980

1981
                        if (!TryPeek (out char c)) {
1✔
1982
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1983
                                return EmitDataToken (false, true);
1✔
1984
                        }
1985

1986
                        switch (c) {
1✔
1987
                        case '\t': case '\r': case '\n': case '\f': case ' ':
1988
                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1989
                                ConsumeCharacter (c);
1✔
1990
                                data.Append (c);
1✔
1991
                                break;
1✔
1992
                        case '/':
1993
                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1994
                                ConsumeCharacter (c);
1✔
1995
                                data.Append (c);
1✔
1996
                                break;
1✔
1997
                        case '>':
1998
                                ConsumeCharacter (c);
1✔
1999
                                token = EmitTagToken ();
1✔
2000
                                break;
1✔
2001
                        default:
2002
                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
2003
                                break;
1✔
2004
                        }
2005

2006
                        return token;
1✔
2007
                }
1✔
2008

2009
                // 8.2.4.43 Self-closing start tag state
2010
                HtmlToken? ReadSelfClosingStartTag ()
2011
                {
1✔
2012
                        if (!TryRead (out char c)) {
1✔
2013
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2014
                                return EmitDataToken (false, true);
1✔
2015
                        }
2016

2017
                        if (c == '>') {
1✔
2018
                                tag!.IsEmptyElement = true;
1✔
2019

2020
                                return EmitTagToken ();
1✔
2021
                        }
2022

2023
                        // parse error
2024
                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
2025

2026
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2027
                        data.Append (c);
1✔
2028

2029
                        return null;
1✔
2030
                }
1✔
2031

2032
                // 8.2.4.44 Bogus comment state
2033
                HtmlToken ReadBogusComment ()
2034
                {
1✔
2035
                        do {
1✔
2036
                                if (!TryRead (out char c)) {
1✔
2037
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2038
                                        break;
1✔
2039
                                }
2040

2041
                                if (c == '>')
1✔
2042
                                        break;
1✔
2043

2044
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
2045
                        } while (true);
1✔
2046

2047
                        TokenizerState = HtmlTokenizerState.Data;
1✔
2048

2049
                        return EmitCommentToken (data, true);
1✔
2050
                }
1✔
2051

2052
                // 8.2.4.45 Markup declaration open state
2053
                HtmlToken? ReadMarkupDeclarationOpen ()
2054
                {
1✔
2055
                        int count = 0;
1✔
2056
                        char c = '\0';
1✔
2057

2058
                        while (count < 2) {
1✔
2059
                                if (!TryPeek (out c)) {
1✔
2060
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2061
                                        return EmitDataToken (false, true);
1✔
2062
                                }
2063

2064
                                if (c != '-')
1✔
2065
                                        break;
1✔
2066

2067
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2068
                                ConsumeCharacter (c);
1✔
2069
                                data.Append (c);
1✔
2070
                                count++;
1✔
2071
                        }
1✔
2072

2073
                        if (count == 2) {
1✔
2074
                                // "<!--"
2075
                                TokenizerState = HtmlTokenizerState.CommentStart;
1✔
2076
                                name.Length = 0;
1✔
2077
                                return null;
1✔
2078
                        }
2079

2080
                        if (count == 0) {
1✔
2081
                                // Check for "<!DOCTYPE " or "<![CDATA["
2082
                                if (c == 'D' || c == 'd') {
1✔
2083
                                        // Note: we save the data in case we hit a parse error and have to emit a data token
2084
                                        ConsumeCharacter (c);
1✔
2085
                                        data.Append (c);
1✔
2086
                                        name.Append (c);
1✔
2087
                                        count = 1;
1✔
2088

2089
                                        while (count < 7) {
1✔
2090
                                                if (!TryRead (out c)) {
1✔
2091
                                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2092
                                                        return EmitDataToken (false, true);
1✔
2093
                                                }
2094

2095
                                                // Note: we save the data in case we hit a parse error and have to emit a data token
2096
                                                data.Append (c);
1✔
2097
                                                name.Append (c);
1✔
2098

2099
                                                if (ToLower (c) != DocType[count])
1✔
2100
                                                        break;
1✔
2101

2102
                                                count++;
1✔
2103
                                        }
1✔
2104

2105
                                        if (count == 7) {
1✔
2106
                                                doctype = CreateDocTypeToken (name.ToString ());
1✔
2107
                                                TokenizerState = HtmlTokenizerState.DocType;
1✔
2108
                                                name.Length = 0;
1✔
2109
                                                return null;
1✔
2110
                                        }
2111

2112
                                        name.Length = 0;
1✔
2113
                                } else if (c == '[') {
1✔
2114
                                        // Note: we save the data in case we hit a parse error and have to emit a data token
2115
                                        ConsumeCharacter (c);
1✔
2116
                                        data.Append (c);
1✔
2117
                                        count = 1;
1✔
2118

2119
                                        while (count < 7) {
1✔
2120
                                                if (!TryRead (out c)) {
1✔
2121
                                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2122
                                                        return EmitDataToken (false, true);
1✔
2123
                                                }
2124

2125
                                                // Note: we save the data in case we hit a parse error and have to emit a data token
2126
                                                data.Append (c);
1✔
2127

2128
                                                if (c != CData[count])
1✔
2129
                                                        break;
1✔
2130

2131
                                                count++;
1✔
2132
                                        }
1✔
2133

2134
                                        if (count == 7) {
1✔
2135
                                                TokenizerState = HtmlTokenizerState.CDataSection;
1✔
2136
                                                data.Length = 0;
1✔
2137
                                                return null;
1✔
2138
                                        }
2139
                                }
1✔
2140
                        }
1✔
2141

2142
                        // parse error
2143
                        TokenizerState = HtmlTokenizerState.BogusComment;
1✔
2144

2145
                        // trim the leading "<!"
2146
                        for (int i = 0; i < data.Length - 2; i++)
1✔
2147
                                data[i] = data[i + 2];
1✔
2148
                        data.Length -= 2;
1✔
2149
                        bang = true;
1✔
2150

2151
                        return null;
1✔
2152
                }
1✔
2153

2154
                // 8.2.4.46 Comment start state
2155
                HtmlToken? ReadCommentStart ()
2156
                {
1✔
2157
                        if (!TryRead (out char c)) {
1✔
2158
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2159

2160
                                return EmitCommentToken (string.Empty);
1✔
2161
                        }
2162

2163
                        data.Append (c);
1✔
2164

2165
                        switch (c) {
1✔
2166
                        case '-':
2167
                                TokenizerState = HtmlTokenizerState.CommentStartDash;
1✔
2168
                                break;
1✔
2169
                        case '>': // parse error
2170
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2171
                                return EmitCommentToken (string.Empty);
1✔
2172
                        default:
2173
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2174
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2175
                                break;
1✔
2176
                        }
2177

2178
                        return null;
1✔
2179
                }
1✔
2180

2181
                // 8.2.4.47 Comment start dash state
2182
                HtmlToken? ReadCommentStartDash ()
2183
                {
1✔
2184
                        if (!TryRead (out char c)) {
1✔
2185
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2186
                                return EmitCommentToken (name);
1✔
2187
                        }
2188

2189
                        data.Append (c);
1✔
2190

2191
                        switch (c) {
1✔
2192
                        case '-':
2193
                                TokenizerState = HtmlTokenizerState.CommentEnd;
1✔
2194
                                break;
1✔
2195
                        case '>': // parse error
2196
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2197
                                return EmitCommentToken (name);
1✔
2198
                        default:
2199
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2200
                                name.Append ('-');
1✔
2201
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2202
                                break;
1✔
2203
                        }
2204

2205
                        return null;
1✔
2206
                }
1✔
2207

2208
                // 8.2.4.48 Comment state
2209
                HtmlToken? ReadComment ()
2210
                {
1✔
2211
                        do {
1✔
2212
                                if (!TryRead (out char c)) {
1✔
2213
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2214
                                        return EmitCommentToken (name);
1✔
2215
                                }
2216

2217
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2218
                                data.Append (c);
1✔
2219

2220
                                switch (c) {
1✔
2221
                                case '-':
2222
                                        TokenizerState = HtmlTokenizerState.CommentEndDash;
1✔
2223
                                        return null;
1✔
2224
                                default:
2225
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2226
                                        break;
1✔
2227
                                }
2228
                        } while (true);
1✔
2229
                }
1✔
2230

2231
                // 8.2.4.49 Comment end dash state
2232
                HtmlToken? ReadCommentEndDash ()
2233
                {
1✔
2234
                        if (!TryRead (out char c)) {
1✔
2235
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2236
                                return EmitCommentToken (name);
1✔
2237
                        }
2238

2239
                        data.Append (c);
1✔
2240

2241
                        switch (c) {
1✔
2242
                        case '-':
2243
                                TokenizerState = HtmlTokenizerState.CommentEnd;
1✔
2244
                                break;
1✔
2245
                        default:
2246
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2247
                                name.Append ('-');
1✔
2248
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2249
                                break;
1✔
2250
                        }
2251

2252
                        return null;
1✔
2253
                }
1✔
2254

2255
                // 8.2.4.50 Comment end state
2256
                HtmlToken? ReadCommentEnd ()
2257
                {
1✔
2258
                        do {
1✔
2259
                                if (!TryRead (out char c)) {
1✔
2260
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2261
                                        return EmitCommentToken (name);
1✔
2262
                                }
2263

2264
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2265
                                data.Append (c);
1✔
2266

2267
                                switch (c) {
1✔
2268
                                case '>':
2269
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2270
                                        return EmitCommentToken (name);
1✔
2271
                                case '!': // parse error
2272
                                        TokenizerState = HtmlTokenizerState.CommentEndBang;
1✔
2273
                                        return null;
1✔
2274
                                case '-':
2275
                                        name.Append ('-');
1✔
2276
                                        break;
1✔
2277
                                default:
2278
                                        TokenizerState = HtmlTokenizerState.Comment;
1✔
2279
                                        name.Append ("--");
1✔
2280
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2281
                                        return null;
1✔
2282
                                }
2283
                        } while (true);
1✔
2284
                }
1✔
2285

2286
                // 8.2.4.51 Comment end bang state
2287
                HtmlToken? ReadCommentEndBang ()
2288
                {
1✔
2289
                        if (!TryRead (out char c)) {
1✔
2290
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2291
                                return EmitCommentToken (name);
1✔
2292
                        }
2293

2294
                        data.Append (c);
1✔
2295

2296
                        switch (c) {
1✔
2297
                        case '-':
2298
                                TokenizerState = HtmlTokenizerState.CommentEndDash;
1✔
2299
                                name.Append ("--!");
1✔
2300
                                break;
1✔
2301
                        case '>':
2302
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2303
                                return EmitCommentToken (name);
1✔
2304
                        default: // parse error
2305
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2306
                                name.Append ("--!");
1✔
2307
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2308
                                break;
1✔
2309
                        }
2310

2311
                        return null;
1✔
2312
                }
1✔
2313

2314
                // 8.2.4.52 DOCTYPE state
2315
                HtmlToken? ReadDocType ()
2316
                {
1✔
2317
                        if (!TryPeek (out char c)) {
1✔
2318
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2319
                                doctype!.ForceQuirksMode = true;
1✔
2320
                                name.Length = 0;
1✔
2321

2322
                                return EmitDocType ();
1✔
2323
                        }
2324

2325
                        TokenizerState = HtmlTokenizerState.BeforeDocTypeName;
1✔
2326

2327
                        switch (c) {
1✔
2328
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2329
                                ConsumeCharacter (c);
1✔
2330
                                data.Append (c);
1✔
2331
                                break;
1✔
2332
                        }
2333

2334
                        return null;
1✔
2335
                }
1✔
2336

2337
                // 8.2.4.53 Before DOCTYPE name state
2338
                HtmlToken? ReadBeforeDocTypeName ()
2339
                {
1✔
2340
                        do {
1✔
2341
                                if (!TryRead (out char c)) {
1✔
2342
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2343
                                        doctype!.ForceQuirksMode = true;
1✔
2344
                                        return EmitDocType ();
1✔
2345
                                }
2346

2347
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2348
                                data.Append (c);
1✔
2349

2350
                                switch (c) {
1✔
2351
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2352
                                        break;
1✔
2353
                                case '>':
2354
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2355
                                        doctype!.ForceQuirksMode = true;
1✔
2356
                                        return EmitDocType ();
1✔
2357
                                default:
2358
                                        TokenizerState = HtmlTokenizerState.DocTypeName;
1✔
2359
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2360
                                        return null;
1✔
2361
                                }
2362
                        } while (true);
1✔
2363
                }
1✔
2364

2365
                // 8.2.4.54 DOCTYPE name state
2366
                HtmlToken? ReadDocTypeName ()
2367
                {
1✔
2368
                        do {
1✔
2369
                                if (!TryRead (out char c)) {
1✔
2370
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2371
                                        doctype!.Name = name.ToString ();
1✔
2372
                                        doctype.ForceQuirksMode = true;
1✔
2373
                                        name.Length = 0;
1✔
2374

2375
                                        return EmitDocType ();
1✔
2376
                                }
2377

2378
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2379
                                data.Append (c);
1✔
2380

2381
                                switch (c) {
1✔
2382
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2383
                                        TokenizerState = HtmlTokenizerState.AfterDocTypeName;
1✔
2384
                                        break;
1✔
2385
                                case '>':
2386
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2387
                                        doctype!.Name = name.ToString ();
1✔
2388
                                        name.Length = 0;
1✔
2389

2390
                                        return EmitDocType ();
1✔
2391
                                case '\0':
2392
                                        name.Append ('\uFFFD');
1✔
2393
                                        break;
1✔
2394
                                default:
2395
                                        name.Append (c);
1✔
2396
                                        break;
1✔
2397
                                }
2398
                        } while (TokenizerState == HtmlTokenizerState.DocTypeName);
1✔
2399

2400
                        doctype!.Name = name.ToString ();
1✔
2401
                        name.Length = 0;
1✔
2402

2403
                        return null;
1✔
2404
                }
1✔
2405

2406
                // 8.2.4.55 After DOCTYPE name state
2407
                HtmlToken? ReadAfterDocTypeName ()
2408
                {
1✔
2409
                        do {
1✔
2410
                                if (!TryRead (out char c)) {
1✔
2411
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2412
                                        doctype!.ForceQuirksMode = true;
1✔
2413
                                        return EmitDocType ();
1✔
2414
                                }
2415

2416
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2417
                                data.Append (c);
1✔
2418

2419
                                switch (c) {
1✔
2420
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2421
                                        break;
1✔
2422
                                case '>':
2423
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2424
                                        return EmitDocType ();
1✔
2425
                                default:
2426
                                        name.Append (c);
1✔
2427
                                        if (name.Length < 6)
1✔
2428
                                                break;
1✔
2429

2430
                                        if (NameIs ("public")) {
1✔
2431
                                                TokenizerState = HtmlTokenizerState.AfterDocTypePublicKeyword;
1✔
2432
                                                doctype!.PublicKeyword = name.ToString ();
1✔
2433
                                        } else if (NameIs ("system")) {
1✔
2434
                                                TokenizerState = HtmlTokenizerState.AfterDocTypeSystemKeyword;
1✔
2435
                                                doctype!.SystemKeyword = name.ToString ();
1✔
2436
                                        } else {
1✔
2437
                                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2438
                                        }
1✔
2439

2440
                                        name.Length = 0;
1✔
2441
                                        return null;
1✔
2442
                                }
2443
                        } while (true);
1✔
2444
                }
1✔
2445

2446
                // 8.2.4.56 After DOCTYPE public keyword state
2447
                HtmlToken? ReadAfterDocTypePublicKeyword ()
2448
                {
1✔
2449
                        if (!TryRead (out char c)) {
1✔
2450
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2451
                                doctype!.ForceQuirksMode = true;
1✔
2452
                                return EmitDocType ();
1✔
2453
                        }
2454

2455
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2456
                        data.Append (c);
1✔
2457

2458
                        switch (c) {
1✔
2459
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2460
                                TokenizerState = HtmlTokenizerState.BeforeDocTypePublicIdentifier;
1✔
2461
                                break;
1✔
2462
                        case '"': case '\'': // parse error
2463
                                TokenizerState = HtmlTokenizerState.DocTypePublicIdentifierQuoted;
1✔
2464
                                doctype!.PublicIdentifier = string.Empty;
1✔
2465
                                quote = c;
1✔
2466
                                break;
1✔
2467
                        case '>': // parse error
2468
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2469
                                doctype!.ForceQuirksMode = true;
1✔
2470
                                return EmitDocType ();
1✔
2471
                        default: // parse error
2472
                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2473
                                doctype!.ForceQuirksMode = true;
1✔
2474
                                break;
1✔
2475
                        }
2476

2477
                        return null;
1✔
2478
                }
1✔
2479

2480
                // 8.2.4.57 Before DOCTYPE public identifier state
2481
                HtmlToken? ReadBeforeDocTypePublicIdentifier ()
2482
                {
1✔
2483
                        do {
1✔
2484
                                if (!TryRead (out char c)) {
1✔
2485
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2486
                                        doctype!.ForceQuirksMode = true;
1✔
2487
                                        return EmitDocType ();
1✔
2488
                                }
2489

2490
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2491
                                data.Append (c);
1✔
2492

2493
                                switch (c) {
1✔
2494
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2495
                                        break;
1✔
2496
                                case '"': case '\'':
2497
                                        TokenizerState = HtmlTokenizerState.DocTypePublicIdentifierQuoted;
1✔
2498
                                        doctype!.PublicIdentifier = string.Empty;
1✔
2499
                                        quote = c;
1✔
2500
                                        return null;
1✔
2501
                                case '>': // parse error
2502
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2503
                                        doctype!.ForceQuirksMode = true;
1✔
2504
                                        return EmitDocType ();
1✔
2505
                                default: // parse error
2506
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2507
                                        doctype!.ForceQuirksMode = true;
1✔
2508
                                        return null;
1✔
2509
                                }
2510
                        } while (true);
1✔
2511
                }
1✔
2512

2513
                // 8.2.4.58 DOCTYPE public identifier (double-quoted) state
2514
                HtmlToken? ReadDocTypePublicIdentifierQuoted ()
2515
                {
1✔
2516
                        do {
1✔
2517
                                if (!TryRead (out char c)) {
1✔
2518
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2519
                                        doctype!.PublicIdentifier = name.ToString ();
1✔
2520
                                        doctype.ForceQuirksMode = true;
1✔
2521
                                        name.Length = 0;
1✔
2522

2523
                                        return EmitDocType ();
1✔
2524
                                }
2525

2526
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2527
                                data.Append (c);
1✔
2528

2529
                                switch (c) {
1✔
2530
                                case '\0': // parse error
2531
                                        name.Append ('\uFFFD');
1✔
2532
                                        break;
1✔
2533
                                case '>': // parse error
2534
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2535
                                        doctype!.PublicIdentifier = name.ToString ();
1✔
2536
                                        doctype.ForceQuirksMode = true;
1✔
2537
                                        name.Length = 0;
1✔
2538

2539
                                        return EmitDocType ();
1✔
2540
                                default:
2541
                                        if (c == quote) {
1✔
2542
                                                TokenizerState = HtmlTokenizerState.AfterDocTypePublicIdentifier;
1✔
2543
                                                quote = '\0';
1✔
2544
                                                break;
1✔
2545
                                        }
2546

2547
                                        name.Append (c);
1✔
2548
                                        break;
1✔
2549
                                }
2550
                        } while (TokenizerState == HtmlTokenizerState.DocTypePublicIdentifierQuoted);
1✔
2551

2552
                        doctype!.PublicIdentifier = name.ToString ();
1✔
2553
                        name.Length = 0;
1✔
2554

2555
                        return null;
1✔
2556
                }
1✔
2557

2558
                // 8.2.4.60 After DOCTYPE public identifier state
2559
                HtmlToken? ReadAfterDocTypePublicIdentifier ()
2560
                {
1✔
2561
                        if (!TryRead (out char c)) {
1✔
2562
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2563
                                doctype!.ForceQuirksMode = true;
1✔
2564
                                return EmitDocType ();
1✔
2565
                        }
2566

2567
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2568
                        data.Append (c);
1✔
2569

2570
                        switch (c) {
1✔
2571
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2572
                                TokenizerState = HtmlTokenizerState.BetweenDocTypePublicAndSystemIdentifiers;
1✔
2573
                                break;
1✔
2574
                        case '>':
2575
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2576
                                return EmitDocType ();
1✔
2577
                        case '"': case '\'': // parse error
2578
                                TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2579
                                doctype!.SystemIdentifier = string.Empty;
1✔
2580
                                quote = c;
1✔
2581
                                break;
1✔
2582
                        default: // parse error
2583
                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2584
                                doctype!.ForceQuirksMode = true;
1✔
2585
                                break;
1✔
2586
                        }
2587

2588
                        return null;
1✔
2589
                }
1✔
2590

2591
                // 8.2.4.61 Between DOCTYPE public and system identifiers state
2592
                HtmlToken? ReadBetweenDocTypePublicAndSystemIdentifiers ()
2593
                {
1✔
2594
                        do {
1✔
2595
                                if (!TryRead (out char c)) {
1✔
2596
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2597
                                        doctype!.ForceQuirksMode = true;
1✔
2598
                                        return EmitDocType ();
1✔
2599
                                }
2600

2601
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2602
                                data.Append (c);
1✔
2603

2604
                                switch (c) {
1✔
2605
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2606
                                        break;
1✔
2607
                                case '>':
2608
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2609
                                        return EmitDocType ();
1✔
2610
                                case '"': case '\'':
2611
                                        TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2612
                                        doctype!.SystemIdentifier = string.Empty;
1✔
2613
                                        quote = c;
1✔
2614
                                        return null;
1✔
2615
                                default: // parse error
2616
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2617
                                        doctype!.ForceQuirksMode = true;
1✔
2618
                                        return null;
1✔
2619
                                }
2620
                        } while (true);
1✔
2621
                }
1✔
2622

2623
                // 8.2.4.62 After DOCTYPE system keyword state
2624
                HtmlToken? ReadAfterDocTypeSystemKeyword ()
2625
                {
1✔
2626
                        if (!TryRead (out char c)) {
1✔
2627
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2628
                                doctype!.ForceQuirksMode = true;
1✔
2629
                                return EmitDocType ();
1✔
2630
                        }
2631

2632
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2633
                        data.Append (c);
1✔
2634

2635
                        switch (c) {
1✔
2636
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2637
                                TokenizerState = HtmlTokenizerState.BeforeDocTypeSystemIdentifier;
1✔
2638
                                break;
1✔
2639
                        case '"': case '\'': // parse error
2640
                                TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2641
                                doctype!.SystemIdentifier = string.Empty;
1✔
2642
                                quote = c;
1✔
2643
                                break;
1✔
2644
                        case '>': // parse error
2645
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2646
                                doctype!.ForceQuirksMode = true;
1✔
2647
                                return EmitDocType ();
1✔
2648
                        default: // parse error
2649
                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2650
                                doctype!.ForceQuirksMode = true;
1✔
2651
                                break;
1✔
2652
                        }
2653

2654
                        return null;
1✔
2655
                }
1✔
2656

2657
                // 8.2.4.63 Before DOCTYPE system identifier state
2658
                HtmlToken? ReadBeforeDocTypeSystemIdentifier ()
2659
                {
1✔
2660
                        do {
1✔
2661
                                if (!TryRead (out char c)) {
1✔
2662
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2663
                                        doctype!.ForceQuirksMode = true;
1✔
2664
                                        return EmitDocType ();
1✔
2665
                                }
2666

2667
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2668
                                data.Append (c);
1✔
2669

2670
                                switch (c) {
1✔
2671
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2672
                                        break;
1✔
2673
                                case '"': case '\'':
2674
                                        TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2675
                                        doctype!.SystemIdentifier = string.Empty;
1✔
2676
                                        quote = c;
1✔
2677
                                        return null;
1✔
2678
                                case '>': // parse error
2679
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2680
                                        doctype!.ForceQuirksMode = true;
1✔
2681
                                        return EmitDocType ();
1✔
2682
                                default: // parse error
2683
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2684
                                        doctype!.ForceQuirksMode = true;
1✔
2685
                                        return null;
1✔
2686
                                }
2687
                        } while (true);
1✔
2688
                }
1✔
2689

2690
                // 8.2.4.64 DOCTYPE system identifier (double-quoted) state
2691
                HtmlToken? ReadDocTypeSystemIdentifierQuoted ()
2692
                {
1✔
2693
                        do {
1✔
2694
                                if (!TryRead (out char c)) {
1✔
2695
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2696
                                        doctype!.SystemIdentifier = name.ToString ();
1✔
2697
                                        doctype.ForceQuirksMode = true;
1✔
2698
                                        name.Length = 0;
1✔
2699

2700
                                        return EmitDocType ();
1✔
2701
                                }
2702

2703
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2704
                                data.Append (c);
1✔
2705

2706
                                switch (c) {
1✔
2707
                                case '\0': // parse error
2708
                                        name.Append ('\uFFFD');
1✔
2709
                                        break;
1✔
2710
                                case '>': // parse error
2711
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2712
                                        doctype!.SystemIdentifier = name.ToString ();
1✔
2713
                                        doctype.ForceQuirksMode = true;
1✔
2714
                                        name.Length = 0;
1✔
2715

2716
                                        return EmitDocType ();
1✔
2717
                                default:
2718
                                        if (c == quote) {
1✔
2719
                                                TokenizerState = HtmlTokenizerState.AfterDocTypeSystemIdentifier;
1✔
2720
                                                quote = '\0';
1✔
2721
                                                break;
1✔
2722
                                        }
2723

2724
                                        name.Append (c);
1✔
2725
                                        break;
1✔
2726
                                }
2727
                        } while (TokenizerState == HtmlTokenizerState.DocTypeSystemIdentifierQuoted);
1✔
2728

2729
                        doctype!.SystemIdentifier = name.ToString ();
1✔
2730
                        name.Length = 0;
1✔
2731

2732
                        return null;
1✔
2733
                }
1✔
2734

2735
                // 8.2.4.66 After DOCTYPE system identifier state
2736
                HtmlToken? ReadAfterDocTypeSystemIdentifier ()
2737
                {
1✔
2738
                        do {
1✔
2739
                                if (!TryRead (out char c)) {
1✔
2740
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2741
                                        doctype!.ForceQuirksMode = true;
1✔
2742
                                        return EmitDocType ();
1✔
2743
                                }
2744

2745
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2746
                                data.Append (c);
1✔
2747

2748
                                switch (c) {
1✔
2749
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2750
                                        break;
1✔
2751
                                case '>':
2752
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2753
                                        return EmitDocType ();
1✔
2754
                                default: // parse error
2755
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2756
                                        return null;
1✔
2757
                                }
2758
                        } while (true);
1✔
2759
                }
1✔
2760

2761
                // 8.2.4.67 Bogus DOCTYPE state
2762
                HtmlToken? ReadBogusDocType ()
2763
                {
1✔
2764
                        do {
1✔
2765
                                if (!TryRead (out char c)) {
1✔
2766
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2767
                                        doctype!.ForceQuirksMode = true;
1✔
2768
                                        return EmitDocType ();
1✔
2769
                                }
2770

2771
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2772
                                data.Append (c);
1✔
2773

2774
                                if (c == '>') {
1✔
2775
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2776
                                        return EmitDocType ();
1✔
2777
                                }
2778
                        } while (true);
1✔
2779
                }
1✔
2780

2781
                // 8.2.4.68 CDATA section state
2782
                HtmlToken? ReadCDataSection ()
2783
                {
1✔
2784
                        do {
1✔
2785
                                while (bufferIndex < bufferEnd) {
1✔
2786
                                        char c = buffer[bufferIndex++];
1✔
2787

2788
                                        if (c == '\n') {
×
2789
                                                IncrementLineNumber ();
×
2790
                                        } else {
1✔
2791
                                                linePosition++;
1✔
2792
                                        }
1✔
2793

2794
                                        if (cdataIndex >= 3) {
1✔
2795
                                                data.Append (cdata[0]);
1✔
2796
                                                cdata[0] = cdata[1];
1✔
2797
                                                cdata[1] = cdata[2];
1✔
2798
                                                cdata[2] = c;
1✔
2799

2800
                                                if (cdata[0] == ']' && cdata[1] == ']' && cdata[2] == '>') {
1✔
2801
                                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2802
                                                        cdataIndex = 0;
1✔
2803

2804
                                                        return EmitCDataToken ();
1✔
2805
                                                }
2806
                                        } else {
1✔
2807
                                                cdata[cdataIndex++] = c;
1✔
2808
                                        }
1✔
2809
                                }
1✔
2810

2811
                                FillBuffer ();
1✔
2812
                        } while (!eof);
1✔
2813

2814
                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2815

2816
                        data.Append (cdata, 0, cdataIndex);
1✔
2817
                        cdataIndex = 0;
1✔
2818

2819
                        return EmitCDataToken ();
1✔
2820
                }
1✔
2821

2822
                /// <summary>
2823
                /// Read the next token.
2824
                /// </summary>
2825
                /// <remarks>
2826
                /// Reads the next token.
2827
                /// </remarks>
2828
                /// <returns><see langword="true" /> if the next token was read; otherwise, <see langword="false" />.</returns>
2829
                /// <param name="token">The token that was read.</param>
2830
                public bool ReadNextToken ([NotNullWhen (true)] out HtmlToken? token)
2831
                {
1✔
2832
                        do {
1✔
2833
                                switch (TokenizerState) {
1✔
2834
                                case HtmlTokenizerState.Data:
2835
                                        token = ReadData ();
1✔
2836
                                        break;
1✔
2837
                                case HtmlTokenizerState.CharacterReferenceInData:
2838
                                        token = ReadCharacterReferenceInData ();
1✔
2839
                                        break;
1✔
2840
                                case HtmlTokenizerState.RcData:
2841
                                        token = ReadRcData ();
1✔
2842
                                        break;
1✔
2843
                                case HtmlTokenizerState.CharacterReferenceInRcData:
2844
                                        token = ReadCharacterReferenceInRcData ();
1✔
2845
                                        break;
1✔
2846
                                case HtmlTokenizerState.RawText:
2847
                                        token = ReadRawText ();
1✔
2848
                                        break;
1✔
2849
                                case HtmlTokenizerState.ScriptData:
2850
                                        token = ReadScriptData ();
1✔
2851
                                        break;
1✔
2852
                                case HtmlTokenizerState.PlainText:
2853
                                        token = ReadPlainText ();
1✔
2854
                                        break;
1✔
2855
                                case HtmlTokenizerState.TagOpen:
2856
                                        token = ReadTagOpen ();
1✔
2857
                                        break;
1✔
2858
                                case HtmlTokenizerState.EndTagOpen:
2859
                                        token = ReadEndTagOpen ();
1✔
2860
                                        break;
1✔
2861
                                case HtmlTokenizerState.TagName:
2862
                                        token = ReadTagName ();
1✔
2863
                                        break;
1✔
2864
                                case HtmlTokenizerState.RcDataLessThan:
2865
                                        token = ReadRcDataLessThan ();
1✔
2866
                                        break;
1✔
2867
                                case HtmlTokenizerState.RcDataEndTagOpen:
2868
                                        token = ReadRcDataEndTagOpen ();
1✔
2869
                                        break;
1✔
2870
                                case HtmlTokenizerState.RcDataEndTagName:
2871
                                        token = ReadRcDataEndTagName ();
1✔
2872
                                        break;
1✔
2873
                                case HtmlTokenizerState.RawTextLessThan:
2874
                                        token = ReadRawTextLessThan ();
1✔
2875
                                        break;
1✔
2876
                                case HtmlTokenizerState.RawTextEndTagOpen:
2877
                                        token = ReadRawTextEndTagOpen ();
1✔
2878
                                        break;
1✔
2879
                                case HtmlTokenizerState.RawTextEndTagName:
2880
                                        token = ReadRawTextEndTagName ();
1✔
2881
                                        break;
1✔
2882
                                case HtmlTokenizerState.ScriptDataLessThan:
2883
                                        token = ReadScriptDataLessThan ();
1✔
2884
                                        break;
1✔
2885
                                case HtmlTokenizerState.ScriptDataEndTagOpen:
2886
                                        token = ReadScriptDataEndTagOpen ();
1✔
2887
                                        break;
1✔
2888
                                case HtmlTokenizerState.ScriptDataEndTagName:
2889
                                        token = ReadScriptDataEndTagName ();
1✔
2890
                                        break;
1✔
2891
                                case HtmlTokenizerState.ScriptDataEscapeStart:
2892
                                        token = ReadScriptDataEscapeStart ();
1✔
2893
                                        break;
1✔
2894
                                case HtmlTokenizerState.ScriptDataEscapeStartDash:
2895
                                        token = ReadScriptDataEscapeStartDash ();
1✔
2896
                                        break;
1✔
2897
                                case HtmlTokenizerState.ScriptDataEscaped:
2898
                                        token = ReadScriptDataEscaped ();
1✔
2899
                                        break;
1✔
2900
                                case HtmlTokenizerState.ScriptDataEscapedDash:
2901
                                        token = ReadScriptDataEscapedDash ();
1✔
2902
                                        break;
1✔
2903
                                case HtmlTokenizerState.ScriptDataEscapedDashDash:
2904
                                        token = ReadScriptDataEscapedDashDash ();
1✔
2905
                                        break;
1✔
2906
                                case HtmlTokenizerState.ScriptDataEscapedLessThan:
2907
                                        token = ReadScriptDataEscapedLessThan ();
1✔
2908
                                        break;
1✔
2909
                                case HtmlTokenizerState.ScriptDataEscapedEndTagOpen:
2910
                                        token = ReadScriptDataEscapedEndTagOpen ();
1✔
2911
                                        break;
1✔
2912
                                case HtmlTokenizerState.ScriptDataEscapedEndTagName:
2913
                                        token = ReadScriptDataEscapedEndTagName ();
1✔
2914
                                        break;
1✔
2915
                                case HtmlTokenizerState.ScriptDataDoubleEscapeStart:
2916
                                        token = ReadScriptDataDoubleEscapeStart ();
1✔
2917
                                        break;
1✔
2918
                                case HtmlTokenizerState.ScriptDataDoubleEscaped:
2919
                                        token = ReadScriptDataDoubleEscaped ();
1✔
2920
                                        break;
1✔
2921
                                case HtmlTokenizerState.ScriptDataDoubleEscapedDash:
2922
                                        token = ReadScriptDataDoubleEscapedDash ();
1✔
2923
                                        break;
1✔
2924
                                case HtmlTokenizerState.ScriptDataDoubleEscapedDashDash:
2925
                                        token = ReadScriptDataDoubleEscapedDashDash ();
1✔
2926
                                        break;
1✔
2927
                                case HtmlTokenizerState.ScriptDataDoubleEscapedLessThan:
2928
                                        token = ReadScriptDataDoubleEscapedLessThan ();
1✔
2929
                                        break;
1✔
2930
                                case HtmlTokenizerState.ScriptDataDoubleEscapeEnd:
2931
                                        token = ReadScriptDataDoubleEscapeEnd ();
1✔
2932
                                        break;
1✔
2933
                                case HtmlTokenizerState.BeforeAttributeName:
2934
                                        token = ReadBeforeAttributeName ();
1✔
2935
                                        break;
1✔
2936
                                case HtmlTokenizerState.AttributeName:
2937
                                        token = ReadAttributeName ();
1✔
2938
                                        break;
1✔
2939
                                case HtmlTokenizerState.AfterAttributeName:
2940
                                        token = ReadAfterAttributeName ();
1✔
2941
                                        break;
1✔
2942
                                case HtmlTokenizerState.BeforeAttributeValue:
2943
                                        token = ReadBeforeAttributeValue ();
1✔
2944
                                        break;
1✔
2945
                                case HtmlTokenizerState.AttributeValueQuoted:
2946
                                        token = ReadAttributeValueQuoted ();
1✔
2947
                                        break;
1✔
2948
                                case HtmlTokenizerState.AttributeValueUnquoted:
2949
                                        token = ReadAttributeValueUnquoted ();
1✔
2950
                                        break;
1✔
2951
                                case HtmlTokenizerState.CharacterReferenceInAttributeValue:
2952
                                        token = ReadCharacterReferenceInAttributeValue ();
1✔
2953
                                        break;
1✔
2954
                                case HtmlTokenizerState.AfterAttributeValueQuoted:
2955
                                        token = ReadAfterAttributeValueQuoted ();
1✔
2956
                                        break;
1✔
2957
                                case HtmlTokenizerState.SelfClosingStartTag:
2958
                                        token = ReadSelfClosingStartTag ();
1✔
2959
                                        break;
1✔
2960
                                case HtmlTokenizerState.BogusComment:
2961
                                        token = ReadBogusComment ();
1✔
2962
                                        break;
1✔
2963
                                case HtmlTokenizerState.MarkupDeclarationOpen:
2964
                                        token = ReadMarkupDeclarationOpen ();
1✔
2965
                                        break;
1✔
2966
                                case HtmlTokenizerState.CommentStart:
2967
                                        token = ReadCommentStart ();
1✔
2968
                                        break;
1✔
2969
                                case HtmlTokenizerState.CommentStartDash:
2970
                                        token = ReadCommentStartDash ();
1✔
2971
                                        break;
1✔
2972
                                case HtmlTokenizerState.Comment:
2973
                                        token = ReadComment ();
1✔
2974
                                        break;
1✔
2975
                                case HtmlTokenizerState.CommentEndDash:
2976
                                        token = ReadCommentEndDash ();
1✔
2977
                                        break;
1✔
2978
                                case HtmlTokenizerState.CommentEnd:
2979
                                        token = ReadCommentEnd ();
1✔
2980
                                        break;
1✔
2981
                                case HtmlTokenizerState.CommentEndBang:
2982
                                        token = ReadCommentEndBang ();
1✔
2983
                                        break;
1✔
2984
                                case HtmlTokenizerState.DocType:
2985
                                        token = ReadDocType ();
1✔
2986
                                        break;
1✔
2987
                                case HtmlTokenizerState.BeforeDocTypeName:
2988
                                        token = ReadBeforeDocTypeName ();
1✔
2989
                                        break;
1✔
2990
                                case HtmlTokenizerState.DocTypeName:
2991
                                        token = ReadDocTypeName ();
1✔
2992
                                        break;
1✔
2993
                                case HtmlTokenizerState.AfterDocTypeName:
2994
                                        token = ReadAfterDocTypeName ();
1✔
2995
                                        break;
1✔
2996
                                case HtmlTokenizerState.AfterDocTypePublicKeyword:
2997
                                        token = ReadAfterDocTypePublicKeyword ();
1✔
2998
                                        break;
1✔
2999
                                case HtmlTokenizerState.BeforeDocTypePublicIdentifier:
3000
                                        token = ReadBeforeDocTypePublicIdentifier ();
1✔
3001
                                        break;
1✔
3002
                                case HtmlTokenizerState.DocTypePublicIdentifierQuoted:
3003
                                        token = ReadDocTypePublicIdentifierQuoted ();
1✔
3004
                                        break;
1✔
3005
                                case HtmlTokenizerState.AfterDocTypePublicIdentifier:
3006
                                        token = ReadAfterDocTypePublicIdentifier ();
1✔
3007
                                        break;
1✔
3008
                                case HtmlTokenizerState.BetweenDocTypePublicAndSystemIdentifiers:
3009
                                        token = ReadBetweenDocTypePublicAndSystemIdentifiers ();
1✔
3010
                                        break;
1✔
3011
                                case HtmlTokenizerState.AfterDocTypeSystemKeyword:
3012
                                        token = ReadAfterDocTypeSystemKeyword ();
1✔
3013
                                        break;
1✔
3014
                                case HtmlTokenizerState.BeforeDocTypeSystemIdentifier:
3015
                                        token = ReadBeforeDocTypeSystemIdentifier ();
1✔
3016
                                        break;
1✔
3017
                                case HtmlTokenizerState.DocTypeSystemIdentifierQuoted:
3018
                                        token = ReadDocTypeSystemIdentifierQuoted ();
1✔
3019
                                        break;
1✔
3020
                                case HtmlTokenizerState.AfterDocTypeSystemIdentifier:
3021
                                        token = ReadAfterDocTypeSystemIdentifier ();
1✔
3022
                                        break;
1✔
3023
                                case HtmlTokenizerState.BogusDocType:
3024
                                        token = ReadBogusDocType ();
1✔
3025
                                        break;
1✔
3026
                                case HtmlTokenizerState.CDataSection:
3027
                                        token = ReadCDataSection ();
1✔
3028
                                        break;
1✔
3029
                                case HtmlTokenizerState.EndOfFile:
3030
                                default:
3031
                                        token = null;
1✔
3032
                                        return false;
1✔
3033
                                }
3034
                        } while (token is null);
1✔
3035

3036
                        return true;
1✔
3037
                }
1✔
3038
        }
3039
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc