• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jstedfast / HtmlKit / 1.2.0.267

05 Oct 2025 03:17PM UTC coverage: 99.241% (-0.2%) from 99.412%
1.2.0.267

push

coveralls.net

jstedfast
Oops, disable net9.0 in the build

2875 of 2897 relevant lines covered (99.24%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.1
/HtmlKit/HtmlTokenizer.cs
1
//
2
// HtmlTokenizer.cs
3
//
4
// Author: Jeffrey Stedfast <jestedfa@microsoft.com>
5
//
6
// Copyright (c) 2015-2025 Jeffrey Stedfast <jestedfa@microsoft.com>
7
//
8
// Permission is hereby granted, free of charge, to any person obtaining a copy
9
// of this software and associated documentation files (the "Software"), to deal
10
// in the Software without restriction, including without limitation the rights
11
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
// copies of the Software, and to permit persons to whom the Software is
13
// furnished to do so, subject to the following conditions:
14
//
15
// The above copyright notice and this permission notice shall be included in
16
// all copies or substantial portions of the Software.
17
//
18
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24
// THE SOFTWARE.
25
//
26

27
using System;
28
using System.IO;
29
using System.Text;
30
using System.Runtime.CompilerServices;
31
using System.Diagnostics.CodeAnalysis;
32

33
namespace HtmlKit {
34
        /// <summary>
35
        /// An HTML tokenizer.
36
        /// </summary>
37
        /// <remarks>
38
        /// Tokenizes HTML text, emitting an <see cref="HtmlToken"/> for each token it encounters.
39
        /// </remarks>
40
        public class HtmlTokenizer
41
        {
42
                // Specification: https://dev.w3.org/html5/spec-LC/tokenization.html
43
                const string DocType = "doctype";
44
                const string CData = "[CDATA[";
45

46
                const int MinimumBufferSize = 1024;
47

48
                readonly HtmlEntityDecoder entity = new HtmlEntityDecoder ();
1✔
49
                readonly CharBuffer data = new CharBuffer (2048);
1✔
50
                readonly CharBuffer name = new CharBuffer (32);
1✔
51

52
                readonly TextReader? textReader;
53
                readonly Stream? stream;
54
                Encoding? encoding;
55
                Decoder? decoder;
56

57
                readonly byte[] input;
58
                int inputEnd;
59

60
                readonly char[] buffer;
61
                int bufferIndex, bufferEnd;
62

63
                readonly char[] cdata = new char[3];
1✔
64
                int cdataIndex;
65

66
                string activeTagName = string.Empty;
1✔
67
                HtmlDocTypeToken? doctype;
68
                HtmlAttribute? attribute;
69
                HtmlTagToken? tag;
70
                char quote;
71

72
                bool decodeCharacterReferences = true;
1✔
73
                int linePosition = 1;
1✔
74
                int lineNumber = 1;
1✔
75

76
                bool detectByteOrderMark;
77
                bool isEndTag;
78
                bool bang;
79
                bool eof;
80

81
                /// <summary>
82
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
83
                /// </summary>
84
                /// <remarks>
85
                /// <para>Creates a new <see cref="HtmlTokenizer"/>.</para>
86
                /// <para>This constructor will attempt to auto-detect the appropriate encoding to use by examining the first four bytes of the stream
87
                /// and, if a unicode byte-order-mark is detected, use the appropriate unicode encoding. If no byte order mark is detected, then it will
88
                /// default to UTF-8.</para>
89
                /// </remarks>
90
                /// <param name="stream">The input stream.</param>
91
                public HtmlTokenizer (Stream stream) : this (stream, Encoding.UTF8)
1✔
92
                {
1✔
93
                }
1✔
94

95
                /// <summary>
96
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
97
                /// </summary>
98
                /// <remarks>
99
                /// <para>Creates a new <see cref="HtmlTokenizer"/>.</para>
100
                /// <para>This constructor will attempt to auto-detect the appropriate encoding to use by examining the first four bytes of the stream
101
                /// and, if a unicode byte-order-mark is detected, use the appropriate unicode encoding. If no byte order mark is detected, then it will
102
                /// default to the user-supplied encoding.</para>
103
                /// </remarks>
104
                /// <param name="stream">The input stream.</param>
105
                /// <param name="encoding">The charset encoding of the stream.</param>
106
                public HtmlTokenizer (Stream stream, Encoding encoding) : this (stream, encoding, true)
1✔
107
                {
1✔
108
                }
1✔
109

110
                /// <summary>
111
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
112
                /// </summary>
113
                /// <remarks>
114
                /// <para>Creates a new <see cref="HtmlTokenizer"/>.</para>
115
                /// <para>This constructor allows you to change the encoding the first time you read from the <see cref="HtmlTokenizer"/>. The
116
                /// <paramref name="detectEncodingFromByteOrderMarks"/> parameter detects the encoding by looking at the first four bytes of the stream.
117
                /// It will automatically recognize UTF-8, little-endian UTF-16, big-endian UTF-16, little-endian UTF-32, and big-endian UTF-32 text if
118
                /// the stream starts with the appropriate byte order marks. Otherwise, the user-provided encoding is used.</para>
119
                /// </remarks>
120
                /// <param name="stream">The input stream.</param>
121
                /// <param name="encoding">The charset encoding of the stream.</param>
122
                /// <param name="detectEncodingFromByteOrderMarks"><see langword="true" /> if byte order marks should be detected and used to override the <paramref name="encoding"/>; otherwise, <see langword="false" />.</param>
123
                /// <param name="bufferSize">The minimum buffer size to use for reading.</param>
124
                public HtmlTokenizer (Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize = 4096)
1✔
125
                {
1✔
126
                        if (stream == null)
1✔
127
                                throw new ArgumentNullException (nameof (stream));
1✔
128

129
                        if (encoding == null)
1✔
130
                                throw new ArgumentNullException (nameof (encoding));
1✔
131

132
                        input = new byte[Math.Max (MinimumBufferSize, bufferSize)];
1✔
133
                        buffer = new char[input.Length];
1✔
134

135
                        if (!detectEncodingFromByteOrderMarks)
1✔
136
                                decoder = encoding.GetDecoder ();
1✔
137

138
                        this.detectByteOrderMark = !detectEncodingFromByteOrderMarks;
1✔
139
                        this.encoding = encoding;
1✔
140
                        this.stream = stream;
1✔
141
                }
1✔
142

143
                /// <summary>
144
                /// Initialize a new instance of the <see cref="HtmlTokenizer"/> class.
145
                /// </summary>
146
                /// <remarks>
147
                /// Creates a new <see cref="HtmlTokenizer"/>.
148
                /// </remarks>
149
                /// <param name="reader">The <see cref="TextReader"/>.</param>
150
                public HtmlTokenizer (TextReader reader)
1✔
151
                {
1✔
152
                        if (reader == null)
1✔
153
                                throw new ArgumentNullException (nameof (reader));
1✔
154

155
                        input = Array.Empty<byte> ();
1✔
156
                        buffer = new char[2048];
1✔
157
                        textReader = reader;
1✔
158
                }
1✔
159

160
                /// <summary>
161
                /// Get or set whether the tokenizer should decode character references.
162
                /// </summary>
163
                /// <remarks>
164
                /// <para>Gets or sets whether the tokenizer should decode character references.</para>
165
                /// <note type="warning">Character references in attribute values will still be decoded
166
                /// even if this value is set to <see langword="false" />.</note>
167
                /// </remarks>
168
                /// <value><see langword="true" /> if character references should be decoded; otherwise, <see langword="false" />.</value>
169
                public bool DecodeCharacterReferences {
170
                        get { return decodeCharacterReferences; }
1✔
171
                        set { decodeCharacterReferences = value; }
1✔
172
                }
173

174
                /// <summary>
175
                /// Get the current HTML namespace detected by the tokenizer.
176
                /// </summary>
177
                /// <remarks>
178
                /// Gets the current HTML namespace detected by the tokenizer.
179
                /// </remarks>
180
                /// <value>The html namespace.</value>
181
                public HtmlNamespace HtmlNamespace {
182
                        get; private set;
183
                }
184

185
                /// <summary>
186
                /// Get or set whether the tokenizer should ignore truncated tags.
187
                /// </summary>
188
                /// <remarks>
189
                /// <para>Gets or sets whether the tokenizer should ignore truncated tags.</para>
190
                /// <para>If <see langword="false" /> and the stream abruptly ends in the middle of an HTML tag, it will be
191
                /// treated as an <see cref="HtmlDataToken"/> instead.</para>
192
                /// </remarks>
193
                /// <value><see langword="true" /> if truncated tags should be ignored; otherwise, <see langword="false" />.</value>
194
                public bool IgnoreTruncatedTags {
195
                        get; set;
196
                }
197

198
                /// <summary>
199
                /// Get the current line number.
200
                /// </summary>
201
                /// <remarks>
202
                /// <para>This property is most commonly used for error reporting, but can be called
203
                /// at any time. The starting value for this property is <c>1</c>.</para>
204
                /// <para>Combined with <see cref="LinePosition"/>, a value of <c>1,1</c> indicates
205
                /// the start of the document.</para>
206
                /// </remarks>
207
                /// <value>The current line number.</value>
208
                public int LineNumber {
209
                        get { return lineNumber; }
×
210
                }
211

212
                /// <summary>
213
                /// Get the current line position.
214
                /// </summary>
215
                /// <remarks>
216
                /// <para>This property is most commonly used for error reporting, but can be called
217
                /// at any time. The starting value for this property is <c>1</c>.</para>
218
                /// <para>Combined with <see cref="LineNumber"/>, a value of <c>1,1</c> indicates
219
                /// the start of the document.</para>
220
                /// </remarks>
221
                /// <value>The column position of the current line.</value>
222
                public int LinePosition {
223
                        get { return linePosition; }
×
224
                }
225

226
                /// <summary>
227
                /// Get the current state of the tokenizer.
228
                /// </summary>
229
                /// <remarks>
230
                /// Gets the current state of the tokenizer.
231
                /// </remarks>
232
                /// <value>The current state of the tokenizer.</value>
233
                public HtmlTokenizerState TokenizerState {
234
                        get; private set;
235
                }
236

237
                /// <summary>
238
                /// Create a DOCTYPE token.
239
                /// </summary>
240
                /// <remarks>
241
                /// Creates a DOCTYPE token.
242
                /// </remarks>
243
                /// <returns>The DOCTYPE token.</returns>
244
                protected virtual HtmlDocTypeToken CreateDocType ()
245
                {
1✔
246
                        return new HtmlDocTypeToken ();
1✔
247
                }
1✔
248

249
                HtmlDocTypeToken CreateDocTypeToken (string rawTagName)
250
                {
1✔
251
                        var token = CreateDocType ();
1✔
252
                        token.RawTagName = rawTagName;
1✔
253
                        return token;
1✔
254
                }
1✔
255

256
                /// <summary>
257
                /// Create an HTML comment token.
258
                /// </summary>
259
                /// <remarks>
260
                /// Creates an HTML comment token.
261
                /// </remarks>
262
                /// <returns>The HTML comment token.</returns>
263
                /// <param name="comment">The comment.</param>
264
                /// <param name="bogus"><see langword="true" /> if the comment is bogus; otherwise, <see langword="false" />.</param>
265
                protected virtual HtmlCommentToken CreateCommentToken (string comment, bool bogus = false)
266
                {
1✔
267
                        return new HtmlCommentToken (comment, bogus);
1✔
268
                }
1✔
269

270
                /// <summary>
271
                /// Create an HTML character data token.
272
                /// </summary>
273
                /// <remarks>
274
                /// Creates an HTML character data token.
275
                /// </remarks>
276
                /// <returns>The HTML character data token.</returns>
277
                /// <param name="data">The character data.</param>
278
                protected virtual HtmlDataToken CreateDataToken (string data)
279
                {
1✔
280
                        return new HtmlDataToken (data);
1✔
281
                }
1✔
282

283
                /// <summary>
284
                /// Create an HTML character data token.
285
                /// </summary>
286
                /// <remarks>
287
                /// Creates an HTML character data token.
288
                /// </remarks>
289
                /// <returns>The HTML character data token.</returns>
290
                /// <param name="data">The character data.</param>
291
                protected virtual HtmlCDataToken CreateCDataToken (string data)
292
                {
1✔
293
                        return new HtmlCDataToken (data);
1✔
294
                }
1✔
295

296
                /// <summary>
297
                /// Create an HTML script data token.
298
                /// </summary>
299
                /// <remarks>
300
                /// Creates an HTML script data token.
301
                /// </remarks>
302
                /// <returns>The HTML script data token.</returns>
303
                /// <param name="data">The script data.</param>
304
                protected virtual HtmlScriptDataToken CreateScriptDataToken (string data)
305
                {
1✔
306
                        return new HtmlScriptDataToken (data);
1✔
307
                }
1✔
308

309
                /// <summary>
310
                /// Create an HTML tag token.
311
                /// </summary>
312
                /// <remarks>
313
                /// Creates an HTML tag token.
314
                /// </remarks>
315
                /// <returns>The HTML tag token.</returns>
316
                /// <param name="name">The tag name.</param>
317
                /// <param name="isEndTag"><see langword="true" /> if the tag is an end tag; otherwise, <see langword="false" />.</param>
318
                protected virtual HtmlTagToken CreateTagToken (string name, bool isEndTag = false)
319
                {
1✔
320
                        return new HtmlTagToken (name, isEndTag);
1✔
321
                }
1✔
322

323
                /// <summary>
324
                /// Create an attribute.
325
                /// </summary>
326
                /// <remarks>
327
                /// Creates an attribute.
328
                /// </remarks>
329
                /// <returns>The attribute.</returns>
330
                /// <param name="name">The attribute name.</param>
331
                protected virtual HtmlAttribute CreateAttribute (string name)
332
                {
1✔
333
                        return new HtmlAttribute (name);
1✔
334
                }
1✔
335

336
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
337
                static bool IsAlphaNumeric (int c)
338
                {
1✔
339
                        return ((uint) (c - 'A') <= 'Z' - 'A') || ((uint) (c - 'a') <= 'z' - 'a') || ((uint) (c - '0') <= '9' - '0');
1✔
340
                }
1✔
341

342
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
343
                static bool IsAsciiLetter (int c)
344
                {
1✔
345
                        return ((uint) (c - 'A') <= 'Z' - 'A') || ((uint) (c - 'a') <= 'z' - 'a');
1✔
346
                }
1✔
347

348
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
349
                static char ToLower (int c)
350
                {
1✔
351
                        // check if the char is within the uppercase range
352
                        if ((uint) (c - 'A') <= 'Z' - 'A')
1✔
353
                                return (char) (c + 0x20);
1✔
354

355
                        return (char) c;
1✔
356
                }
1✔
357

358
                static int SkipByteOrderMark (byte[] input, ReadOnlySpan<byte> preamble)
359
                {
1✔
360
                        for (int i = 0; i < preamble.Length; i++) {
1✔
361
                                if (input[i] != preamble[i])
1✔
362
                                        return 0;
×
363
                        }
1✔
364

365
                        return preamble.Length;
1✔
366
                }
1✔
367

368
                int DetectByteOrderMark ()
369
                {
1✔
370
#if NET6_0_OR_GREATER
371
                        var preamble = encoding!.Preamble;
1✔
372
#else
373
                        var preamble = encoding!.GetPreamble ();
374
#endif
375

376
                        if (preamble.Length == 0)
1✔
377
                                return 0;
×
378

379
                        do {
1✔
380
                                int nread = stream!.Read (input, inputEnd, input.Length - inputEnd);
1✔
381

382
                                if (nread == 0)
1✔
383
                                        break;
×
384

385
                                inputEnd += nread;
1✔
386
                        } while (inputEnd < preamble.Length);
1✔
387

388
                        return SkipByteOrderMark (input, preamble);
1✔
389
                }
1✔
390

391
                [MemberNotNull (nameof (decoder))]
392
                int DetectEncodingFromByteOrderMarks ()
393
                {
1✔
394
                        do {
1✔
395
                                int nread = stream!.Read (input!, inputEnd, input!.Length - inputEnd);
1✔
396

397
                                if (nread == 0)
1✔
398
                                        break;
×
399

400
                                inputEnd += nread;
1✔
401
                        } while (inputEnd < 4);
1✔
402

403
                        int first2Bytes = inputEnd >= 2 ? input[0] << 8 | input[1] : 0;
1✔
404
                        int next2Bytes = inputEnd >= 4 ? (input[2] << 8 |input[3]) : 0;
1✔
405
                        const int UTF32BE = 12001;
406

407
                        switch (first2Bytes) {
1✔
408
                        case 0x0000:
409
                                if (next2Bytes == 0xFEFF)
1✔
410
                                        encoding = Encoding.GetEncoding (UTF32BE);
1✔
411
                                break;
1✔
412
                        case 0xFEFF:
413
                                encoding = Encoding.BigEndianUnicode;
1✔
414
                                break;
1✔
415
                        case 0xFFFE:
416
                                if (next2Bytes == 0x0000)
1✔
417
                                        encoding = Encoding.UTF32;
1✔
418
                                else
419
                                        encoding = Encoding.Unicode;
1✔
420
                                break;
1✔
421
                        case 0xEFBB:
422
                                if ((next2Bytes & 0xFF00) == 0xBF00)
1✔
423
                                        encoding = new UTF8Encoding (true, true);
1✔
424
                                break;
1✔
425
                        }
426

427
                        decoder = encoding!.GetDecoder ();
1✔
428

429
#if NET6_0_OR_GREATER
430
                        var preamble = encoding.Preamble;
1✔
431
#else
432
                        var preamble = encoding.GetPreamble ();
433
#endif
434

435
                        return SkipByteOrderMark (input, preamble);
1✔
436
                }
1✔
437

438
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
439
                void FillBuffer ()
440
                {
1✔
441
                        if (bufferIndex == bufferEnd && !eof) {
1✔
442
                                if (stream != null) {
1✔
443
                                        int inputIndex;
444

445
                                        if (decoder == null) {
1✔
446
                                                inputIndex = DetectEncodingFromByteOrderMarks ();
1✔
447
                                        } else {
1✔
448
                                                if (detectByteOrderMark) {
1✔
449
                                                        inputIndex = DetectByteOrderMark ();
1✔
450
                                                        detectByteOrderMark = false;
1✔
451
                                                } else {
1✔
452
                                                        inputIndex = 0;
1✔
453
                                                }
1✔
454
                                        }
1✔
455

456
                                        bufferIndex = 0;
1✔
457
                                        bufferEnd = 0;
1✔
458

459
                                        do {
1✔
460
                                                if (inputIndex == inputEnd) {
1✔
461
                                                        inputEnd = stream.Read (input, 0, input.Length);
1✔
462
                                                        inputIndex = 0;
1✔
463
                                                }
1✔
464

465
                                                bufferEnd = decoder.GetChars (input, inputIndex, inputEnd - inputIndex, buffer, 0, inputEnd == 0);
1✔
466
                                                inputIndex = inputEnd;
1✔
467
                                        } while (bufferEnd == 0 && inputEnd > 0);
1✔
468

469
                                        inputEnd = 0;
1✔
470
                                } else if (textReader != null) {
1✔
471
                                        bufferEnd = textReader.Read (buffer, 0, buffer.Length);
1✔
472
                                        bufferIndex = 0;
1✔
473
                                } else {
×
474
                                        throw new InvalidOperationException ("No input stream or text reader has been provided.");
×
475
                                }
476

477
                                eof = bufferEnd == 0;
1✔
478
                        }
1✔
479
                }
1✔
480

481
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
482
                bool TryPeek (out char c)
483
                {
1✔
484
                        FillBuffer ();
1✔
485

486
                        if (bufferIndex < bufferEnd) {
1✔
487
                                c = buffer[bufferIndex];
1✔
488
                                return true;
1✔
489
                        }
490

491
                        c = '\0';
1✔
492

493
                        return false;
1✔
494
                }
1✔
495

496
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
497
                void IncrementLineNumber ()
498
                {
1✔
499
                        linePosition = 1;
1✔
500
                        lineNumber++;
1✔
501
                }
1✔
502

503
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
504
                void ConsumeCharacter (char c)
505
                {
1✔
506
                        if (c == '\n') {
1✔
507
                                IncrementLineNumber ();
1✔
508
                        } else {
1✔
509
                                linePosition++;
1✔
510
                        }
1✔
511

512
                        bufferIndex++;
1✔
513
                }
1✔
514

515
                [MethodImpl (MethodImplOptions.AggressiveInlining)]
516
                bool TryRead (out char c)
517
                {
1✔
518
                        FillBuffer ();
1✔
519

520
                        if (bufferIndex < bufferEnd) {
1✔
521
                                c = buffer[bufferIndex++];
1✔
522

523
                                if (c == '\n') {
1✔
524
                                        IncrementLineNumber ();
1✔
525
                                } else {
1✔
526
                                        linePosition++;
1✔
527
                                }
1✔
528

529
                                return true;
1✔
530
                        }
531

532
                        c = '\0';
1✔
533

534
                        return false;
1✔
535
                }
1✔
536

537
                bool NameIs (string value)
538
                {
1✔
539
                        if (name.Length != value.Length)
1✔
540
                                return false;
1✔
541

542
                        for (int i = 0; i < name.Length; i++) {
1✔
543
                                if (ToLower (name[i]) != ToLower (value[i]))
1✔
544
                                        return false;
1✔
545
                        }
1✔
546

547
                        return true;
1✔
548
                }
1✔
549

550
                void EmitTagAttribute ()
551
                {
1✔
552
                        attribute = CreateAttribute (name.ToString ());
1✔
553
                        tag!.Attributes.Add (attribute);
1✔
554
                        name.Length = 0;
1✔
555
                }
1✔
556

557
                HtmlToken EmitCommentToken (string comment, bool bogus = false)
558
                {
1✔
559
                        var token = CreateCommentToken (comment, bogus);
1✔
560
                        token.IsBangComment = bang;
1✔
561
                        data.Length = 0;
1✔
562
                        name.Length = 0;
1✔
563
                        bang = false;
1✔
564
                        return token;
1✔
565
                }
1✔
566

567
                HtmlToken EmitCommentToken (CharBuffer comment, bool bogus = false)
568
                {
1✔
569
                        return EmitCommentToken (comment.ToString (), bogus);
1✔
570
                }
1✔
571

572
                HtmlToken? EmitDocType ()
573
                {
1✔
574
                        var token = doctype;
1✔
575
                        data.Length = 0;
1✔
576
                        doctype = null;
1✔
577
                        return token;
1✔
578
                }
1✔
579

580
                HtmlToken? EmitDataToken (bool encodeEntities, bool truncated)
581
                {
1✔
582
                        if (data.Length == 0)
1✔
583
                                return null;
1✔
584

585
                        if (truncated && IgnoreTruncatedTags) {
1✔
586
                                data.Length = 0;
1✔
587
                                return null;
1✔
588
                        }
589

590
                        var token = CreateDataToken (data.ToString ());
1✔
591
                        token.EncodeEntities = encodeEntities;
1✔
592
                        data.Length = 0;
1✔
593

594
                        return token;
1✔
595
                }
1✔
596

597
                HtmlToken? EmitCDataToken ()
598
                {
1✔
599
                        if (data.Length == 0)
1✔
600
                                return null;
×
601

602
                        var token = CreateCDataToken (data.ToString ());
1✔
603
                        data.Length = 0;
1✔
604

605
                        return token;
1✔
606
                }
1✔
607

608
                HtmlToken? EmitScriptDataToken ()
609
                {
1✔
610
                        if (data.Length == 0)
1✔
611
                                return null;
1✔
612

613
                        var token = CreateScriptDataToken (data.ToString ());
1✔
614
                        data.Length = 0;
1✔
615

616
                        return token;
1✔
617
                }
1✔
618

619
                HtmlToken EmitTagToken ()
620
                {
1✔
621
                        if (!tag!.IsEndTag && !tag.IsEmptyElement) {
1✔
622
                                switch (tag.Id) {
1✔
623
                                case HtmlTagId.Style: case HtmlTagId.Xmp: case HtmlTagId.IFrame: case HtmlTagId.NoEmbed: case HtmlTagId.NoFrames:
624
                                        TokenizerState = HtmlTokenizerState.RawText;
1✔
625
                                        activeTagName = tag.Name;
1✔
626
                                        break;
1✔
627
                                case HtmlTagId.Title: case HtmlTagId.TextArea:
628
                                        TokenizerState = HtmlTokenizerState.RcData;
1✔
629
                                        activeTagName = tag.Name;
1✔
630
                                        break;
1✔
631
                                case HtmlTagId.PlainText:
632
                                        TokenizerState = HtmlTokenizerState.PlainText;
1✔
633
                                        break;
1✔
634
                                case HtmlTagId.Script:
635
                                        TokenizerState = HtmlTokenizerState.ScriptData;
1✔
636
                                        break;
1✔
637
                                case HtmlTagId.NoScript:
638
                                        // TODO: only switch into the RawText state if scripting is enabled
639
                                        TokenizerState = HtmlTokenizerState.RawText;
1✔
640
                                        activeTagName = tag.Name;
1✔
641
                                        break;
1✔
642
                                case HtmlTagId.Html:
643
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
644

645
                                        for (int i = tag.Attributes.Count; i > 0; i--) {
1✔
646
                                                var attr = tag.Attributes[i - 1];
1✔
647

648
                                                if (attr.Id == HtmlAttributeId.XmlNS && attr.Value != null) {
1✔
649
                                                        HtmlNamespace = attr.Value.ToHtmlNamespace ();
1✔
650
                                                        break;
1✔
651
                                                }
652
                                        }
1✔
653
                                        break;
1✔
654
                                default:
655
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
656
                                        break;
1✔
657
                                }
658
                        } else {
1✔
659
                                TokenizerState = HtmlTokenizerState.Data;
1✔
660
                        }
1✔
661

662
                        var token = tag;
1✔
663
                        data.Length = 0;
1✔
664
                        tag = null;
1✔
665

666
                        return token;
1✔
667
                }
1✔
668

669
                // 8.2.4.69 Tokenizing character references
670
                HtmlToken? ReadCharacterReference (HtmlTokenizerState next)
671
                {
1✔
672
                        if (!TryPeek (out char c)) {
1✔
673
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
674
                                data.Append ('&');
1✔
675

676
                                return EmitDataToken (true, false);
1✔
677
                        }
678

679
                        switch (c) {
1✔
680
                        case '\t': case '\r': case '\n': case '\f': case ' ': case '<': case '&':
681
                                // no character is consumed, emit '&'
682
                                TokenizerState = next;
1✔
683
                                data.Append ('&');
1✔
684
                                return null;
1✔
685
                        }
686

687
                        entity.Push ('&');
1✔
688

689
                        while (entity.Push (c)) {
1✔
690
                                ConsumeCharacter (c);
1✔
691

692
                                if (c == ';')
1✔
693
                                        break;
1✔
694

695
                                if (!TryPeek (out c)) {
1✔
696
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
697
                                        data.Append (entity.GetPushedInput ());
1✔
698
                                        entity.Reset ();
1✔
699

700
                                        return EmitDataToken (true, false);
1✔
701
                                }
702
                        }
1✔
703

704
                        TokenizerState = next;
1✔
705

706
                        data.Append (entity.GetValue ());
1✔
707
                        entity.Reset ();
1✔
708

709
                        return null;
1✔
710
                }
1✔
711

712
                HtmlToken? ReadGenericRawTextLessThan (HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagOpen)
713
                {
1✔
714
                        data.Append ('<');
1✔
715

716
                        if (TryPeek (out char c) && c == '/') {
1✔
717
                                TokenizerState = rawTextEndTagOpen;
1✔
718
                                ConsumeCharacter (c);
1✔
719
                                data.Append ('/');
1✔
720
                                name.Length = 0;
1✔
721
                        } else {
1✔
722
                                TokenizerState = rawText;
1✔
723
                        }
1✔
724

725
                        return null;
1✔
726
                }
1✔
727

728
                HtmlToken? ReadGenericRawTextEndTagOpen (bool decoded, HtmlTokenizerState rawText, HtmlTokenizerState rawTextEndTagName)
729
                {
1✔
730
                        if (!TryPeek (out char c)) {
1✔
731
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
732
                                return EmitDataToken (decoded, true);
1✔
733
                        }
734

735
                        if (IsAsciiLetter (c)) {
1✔
736
                                TokenizerState = rawTextEndTagName;
1✔
737
                                ConsumeCharacter (c);
1✔
738
                                name.Append (c);
1✔
739
                                data.Append (c);
1✔
740
                        } else {
1✔
741
                                TokenizerState = rawText;
1✔
742
                        }
1✔
743

744
                        return null;
1✔
745
                }
1✔
746

747
                HtmlToken? ReadGenericRawTextEndTagName (bool decoded, HtmlTokenizerState rawText)
748
                {
1✔
749
                        var current = TokenizerState;
1✔
750

751
                        do {
1✔
752
                                if (!TryRead (out char c)) {
1✔
753
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
754
                                        name.Length = 0;
1✔
755

756
                                        return EmitDataToken (decoded, true);
1✔
757
                                }
758

759
                                // Note: we save the data in case we hit a parse error and have to emit a data token
760
                                data.Append (c);
1✔
761

762
                                switch (c) {
1✔
763
                                case '\t': case '\r': case '\n': case '\f': case ' ':
764
                                        if (NameIs (activeTagName)) {
1✔
765
                                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
766
                                                break;
1✔
767
                                        }
768

769
                                        goto default;
1✔
770
                                case '/':
771
                                        if (NameIs (activeTagName)) {
1✔
772
                                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
773
                                                break;
1✔
774
                                        }
775
                                        goto default;
1✔
776
                                case '>':
777
                                        if (NameIs (activeTagName)) {
1✔
778
                                                var token = CreateTagToken (name.ToString (), true);
1✔
779
                                                TokenizerState = HtmlTokenizerState.Data;
1✔
780
                                                data.Length = 0;
1✔
781
                                                name.Length = 0;
1✔
782
                                                return token;
1✔
783
                                        }
784
                                        goto default;
1✔
785
                                default:
786
                                        if (!IsAsciiLetter (c)) {
1✔
787
                                                TokenizerState = rawText;
1✔
788
                                                return null;
1✔
789
                                        }
790

791
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
792
                                        break;
1✔
793
                                }
794
                        } while (TokenizerState == current);
1✔
795

796
                        tag = CreateTagToken (name.ToString (), true);
1✔
797
                        name.Length = 0;
1✔
798

799
                        return null;
1✔
800
                }
1✔
801

802
                // 8.2.4.1 Data state
803
                HtmlToken? ReadData ()
804
                {
1✔
805
                        do {
1✔
806
                                if (!TryRead (out char c)) {
1✔
807
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
808
                                        break;
1✔
809
                                }
810

811
                                switch (c) {
1✔
812
                                case '&':
813
                                        if (DecodeCharacterReferences) {
1✔
814
                                                TokenizerState = HtmlTokenizerState.CharacterReferenceInData;
1✔
815
                                                return null;
1✔
816
                                        }
817

818
                                        goto default;
1✔
819
                                case '<':
820
                                        TokenizerState = HtmlTokenizerState.TagOpen;
1✔
821
                                        break;
1✔
822
                                //case 0: // parse error, but emit it anyway
823
                                default:
824
                                        data.Append (c);
1✔
825
                                        break;
1✔
826
                                }
827
                        } while (TokenizerState == HtmlTokenizerState.Data);
1✔
828

829
                        return EmitDataToken (DecodeCharacterReferences, false);
1✔
830
                }
1✔
831

832
                // 8.2.4.2 Character reference in data state
833
                HtmlToken? ReadCharacterReferenceInData ()
834
                {
1✔
835
                        return ReadCharacterReference (HtmlTokenizerState.Data);
1✔
836
                }
1✔
837

838
                // 8.2.4.3 RCDATA state
839
                HtmlToken? ReadRcData ()
840
                {
1✔
841
                        do {
1✔
842
                                if (!TryRead (out char c)) {
1✔
843
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
844
                                        break;
1✔
845
                                }
846

847
                                switch (c) {
1✔
848
                                case '&':
849
                                        if (DecodeCharacterReferences) {
1✔
850
                                                TokenizerState = HtmlTokenizerState.CharacterReferenceInRcData;
1✔
851
                                                return null;
1✔
852
                                        }
853

854
                                        goto default;
1✔
855
                                case '<':
856
                                        TokenizerState = HtmlTokenizerState.RcDataLessThan;
1✔
857
                                        return EmitDataToken (DecodeCharacterReferences, false);
1✔
858
                                default:
859
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
860
                                        break;
1✔
861
                                }
862
                        } while (TokenizerState == HtmlTokenizerState.RcData);
1✔
863

864
                        return EmitDataToken (DecodeCharacterReferences, false);
1✔
865
                }
1✔
866

867
                // 8.2.4.4 Character reference in RCDATA state
868
                HtmlToken? ReadCharacterReferenceInRcData ()
869
                {
1✔
870
                        return ReadCharacterReference (HtmlTokenizerState.RcData);
1✔
871
                }
1✔
872

873
                // 8.2.4.5 RAWTEXT state
874
                HtmlToken? ReadRawText ()
875
                {
1✔
876
                        do {
1✔
877
                                if (!TryRead (out char c)) {
1✔
878
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
879
                                        break;
1✔
880
                                }
881

882
                                switch (c) {
1✔
883
                                case '<':
884
                                        TokenizerState = HtmlTokenizerState.RawTextLessThan;
1✔
885
                                        return EmitDataToken (false, false);
1✔
886
                                default:
887
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
888
                                        break;
1✔
889
                                }
890
                        } while (TokenizerState == HtmlTokenizerState.RawText);
1✔
891

892
                        return EmitDataToken (false, false);
1✔
893
                }
1✔
894

895
                // 8.2.4.6 Script data state
896
                HtmlToken? ReadScriptData ()
897
                {
1✔
898
                        do {
1✔
899
                                if (!TryRead (out char c)) {
1✔
900
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
901
                                        break;
1✔
902
                                }
903

904
                                switch (c) {
1✔
905
                                case '<':
906
                                        TokenizerState = HtmlTokenizerState.ScriptDataLessThan;
1✔
907
                                        break;
1✔
908
                                default:
909
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
910
                                        break;
1✔
911
                                }
912
                        } while (TokenizerState == HtmlTokenizerState.ScriptData);
1✔
913

914
                        return EmitScriptDataToken ();
1✔
915
                }
1✔
916

917
                // 8.2.4.7 PLAINTEXT state
918
                HtmlToken? ReadPlainText ()
919
                {
1✔
920
                        do {
1✔
921
                                while (bufferIndex < bufferEnd) {
1✔
922
                                        char c = buffer[bufferIndex++];
1✔
923

924
                                        linePosition++;
1✔
925

926
                                        switch (c) {
1✔
927
                                        case '\0':
928
                                                data.Append ('\uFFFD');
×
929
                                                break;
×
930
                                        case '\n':
931
                                                IncrementLineNumber ();
1✔
932
                                                goto default;
1✔
933
                                        default:
934
                                                data.Append (c);
1✔
935
                                                break;
1✔
936
                                        }
937
                                }
1✔
938

939
                                FillBuffer ();
1✔
940
                        } while (!eof);
1✔
941

942
                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
943

944
                        return EmitDataToken (false, false);
1✔
945
                }
1✔
946

947
                // 8.2.4.8 Tag open state
948
                HtmlToken? ReadTagOpen ()
949
                {
1✔
950
                        if (!TryRead (out char c)) {
1✔
951
                                var token = IgnoreTruncatedTags ? null : CreateDataToken ("<");
1✔
952
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
953
                                return token;
1✔
954
                        }
955

956
                        // Note: we save the data in case we hit a parse error and have to emit a data token
957
                        data.Append ('<');
1✔
958
                        data.Append (c);
1✔
959

960
                        switch (c) {
1✔
961
                        case '!':
962
                                TokenizerState = HtmlTokenizerState.MarkupDeclarationOpen;
1✔
963
                                break;
1✔
964
                        case '?':
965
                                TokenizerState = HtmlTokenizerState.BogusComment;
1✔
966
                                data.Length = 1;
1✔
967
                                data[0] = c;
1✔
968
                                break;
1✔
969
                        case '/':
970
                                TokenizerState = HtmlTokenizerState.EndTagOpen;
1✔
971
                                break;
1✔
972
                        default:
973
                                if (IsAsciiLetter (c)) {
1✔
974
                                        TokenizerState = HtmlTokenizerState.TagName;
1✔
975
                                        isEndTag = false;
1✔
976
                                        name.Append (c);
1✔
977
                                } else {
1✔
978
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
979
                                }
1✔
980
                                break;
1✔
981
                        }
982

983
                        return null;
1✔
984
                }
1✔
985

986
                // 8.2.4.9 End tag open state
987
                HtmlToken? ReadEndTagOpen ()
988
                {
1✔
989
                        if (!TryRead (out char c)) {
1✔
990
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
991
                                return EmitDataToken (false, true);
1✔
992
                        }
993

994
                        // Note: we save the data in case we hit a parse error and have to emit a data token
995
                        data.Append (c);
1✔
996

997
                        switch (c) {
1✔
998
                        case '>': // parse error
999
                                TokenizerState = HtmlTokenizerState.Data;
1✔
1000
                                data.Length = 0; // FIXME: this is probably wrong
1✔
1001
                                break;
1✔
1002
                        default:
1003
                                if (IsAsciiLetter (c)) {
1✔
1004
                                        TokenizerState = HtmlTokenizerState.TagName;
1✔
1005
                                        isEndTag = true;
1✔
1006
                                        name.Append (c);
1✔
1007
                                } else {
1✔
1008
                                        TokenizerState = HtmlTokenizerState.BogusComment;
1✔
1009
                                        data.Length = 1;
1✔
1010
                                        data[0] = c;
1✔
1011
                                }
1✔
1012
                                break;
1✔
1013
                        }
1014

1015
                        return null;
1✔
1016
                }
1✔
1017

1018
                // 8.2.4.10 Tag name state
1019
                HtmlToken? ReadTagName ()
1020
                {
1✔
1021
                        do {
1✔
1022
                                if (!TryRead (out char c)) {
1✔
1023
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1024
                                        name.Length = 0;
1✔
1025

1026
                                        return EmitDataToken (false, true);
1✔
1027
                                }
1028

1029
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1030
                                data.Append (c);
1✔
1031

1032
                                switch (c) {
1✔
1033
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1034
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1035
                                        break;
1✔
1036
                                case '/':
1037
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1038
                                        break;
1✔
1039
                                case '>':
1040
                                        tag = CreateTagToken (name.ToString (), isEndTag);
1✔
1041
                                        data.Length = 0;
1✔
1042
                                        name.Length = 0;
1✔
1043

1044
                                        return EmitTagToken ();
1✔
1045
                                default:
1046
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1047
                                        break;
1✔
1048
                                }
1049
                        } while (TokenizerState == HtmlTokenizerState.TagName);
1✔
1050

1051
                        tag = CreateTagToken (name.ToString (), isEndTag);
1✔
1052
                        name.Length = 0;
1✔
1053

1054
                        return null;
1✔
1055
                }
1✔
1056

1057
                // 8.2.4.11 RCDATA less-than sign state
1058
                HtmlToken? ReadRcDataLessThan ()
1059
                {
1✔
1060
                        return ReadGenericRawTextLessThan (HtmlTokenizerState.RcData, HtmlTokenizerState.RcDataEndTagOpen);
1✔
1061
                }
1✔
1062

1063
                // 8.2.4.12 RCDATA end tag open state
1064
                HtmlToken? ReadRcDataEndTagOpen ()
1065
                {
1✔
1066
                        return ReadGenericRawTextEndTagOpen (DecodeCharacterReferences, HtmlTokenizerState.RcData, HtmlTokenizerState.RcDataEndTagName);
1✔
1067
                }
1✔
1068

1069
                // 8.2.4.13 RCDATA end tag name state
1070
                HtmlToken? ReadRcDataEndTagName ()
1071
                {
1✔
1072
                        return ReadGenericRawTextEndTagName (DecodeCharacterReferences, HtmlTokenizerState.RcData);
1✔
1073
                }
1✔
1074

1075
                // 8.2.4.14 RAWTEXT less-than sign state
1076
                HtmlToken? ReadRawTextLessThan ()
1077
                {
1✔
1078
                        return ReadGenericRawTextLessThan (HtmlTokenizerState.RawText, HtmlTokenizerState.RawTextEndTagOpen);
1✔
1079
                }
1✔
1080

1081
                // 8.2.4.15 RAWTEXT end tag open state
1082
                HtmlToken? ReadRawTextEndTagOpen ()
1083
                {
1✔
1084
                        return ReadGenericRawTextEndTagOpen (false, HtmlTokenizerState.RawText, HtmlTokenizerState.RawTextEndTagName);
1✔
1085
                }
1✔
1086

1087
                // 8.2.4.16 RAWTEXT end tag name state
1088
                HtmlToken? ReadRawTextEndTagName ()
1089
                {
1✔
1090
                        return ReadGenericRawTextEndTagName (false, HtmlTokenizerState.RawText);
1✔
1091
                }
1✔
1092

1093
                // 8.2.4.17 Script data less-than sign state
1094
                HtmlToken? ReadScriptDataLessThan ()
1095
                {
1✔
1096
                        data.Append ('<');
1✔
1097

1098
                        if (TryPeek (out char c) && c == '/') {
1✔
1099
                                TokenizerState = HtmlTokenizerState.ScriptDataEndTagOpen;
1✔
1100
                                ConsumeCharacter (c);
1✔
1101
                                data.Append ('/');
1✔
1102
                                name.Length = 0;
1✔
1103
                        } else if (c == '!') {
1✔
1104
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapeStart;
1✔
1105
                                ConsumeCharacter (c);
1✔
1106
                                data.Append ('!');
1✔
1107
                        } else {
1✔
1108
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1109
                        }
1✔
1110

1111
                        return null;
1✔
1112
                }
1✔
1113

1114
                // 8.2.4.18 Script data end tag open state
1115
                HtmlToken? ReadScriptDataEndTagOpen ()
1116
                {
1✔
1117
                        if (!TryPeek (out char c)) {
1✔
1118
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1119
                                return EmitScriptDataToken ();
1✔
1120
                        }
1121

1122
                        if (c == 'S' || c == 's') {
1✔
1123
                                TokenizerState = HtmlTokenizerState.ScriptDataEndTagName;
1✔
1124
                                ConsumeCharacter (c);
1✔
1125
                                name.Append ('s');
1✔
1126
                                data.Append (c);
1✔
1127
                        } else {
1✔
1128
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1129
                        }
1✔
1130

1131
                        return null;
1✔
1132
                }
1✔
1133

1134
                // 8.2.4.19 Script data end tag name state
1135
                HtmlToken? ReadScriptDataEndTagName ()
1136
                {
1✔
1137
                        do {
1✔
1138
                                if (!TryRead (out char c)) {
1✔
1139
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1140
                                        name.Length = 0;
1✔
1141

1142
                                        return EmitScriptDataToken ();
1✔
1143
                                }
1144

1145
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1146
                                data.Append (c);
1✔
1147

1148
                                switch (c) {
1✔
1149
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1150
                                        if (NameIs ("script")) {
1✔
1151
                                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1152
                                                break;
1✔
1153
                                        }
1154
                                        goto default;
1✔
1155
                                case '/':
1156
                                        if (NameIs ("script")) {
1✔
1157
                                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1158
                                                break;
1✔
1159
                                        }
1160
                                        goto default;
1✔
1161
                                case '>':
1162
                                        if (NameIs ("script")) {
1✔
1163
                                                var token = CreateTagToken (name.ToString (), true);
1✔
1164
                                                TokenizerState = HtmlTokenizerState.Data;
1✔
1165
                                                data.Length = 0;
1✔
1166
                                                name.Length = 0;
1✔
1167
                                                return token;
1✔
1168
                                        }
1169
                                        goto default;
1✔
1170
                                default:
1171
                                        if (!IsAsciiLetter (c)) {
1✔
1172
                                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1173
                                                name.Length = 0;
1✔
1174
                                                return null;
1✔
1175
                                        }
1176

1177
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1178
                                        break;
1✔
1179
                                }
1180
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEndTagName);
1✔
1181

1182
                        tag = CreateTagToken (name.ToString (), true);
1✔
1183
                        name.Length = 0;
1✔
1184

1185
                        return null;
1✔
1186
                }
1✔
1187

1188
                // 8.2.4.20 Script data escape start state
1189
                HtmlToken? ReadScriptDataEscapeStart ()
1190
                {
1✔
1191
                        if (TryPeek (out char c) && c == '-') {
1✔
1192
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapeStartDash;
1✔
1193
                                ConsumeCharacter (c);
1✔
1194
                                data.Append ('-');
1✔
1195
                        } else {
1✔
1196
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1197
                        }
1✔
1198

1199
                        return null;
1✔
1200
                }
1✔
1201

1202
                // 8.2.4.21 Script data escape start dash state
1203
                HtmlToken? ReadScriptDataEscapeStartDash ()
1204
                {
1✔
1205
                        if (TryPeek (out char c) && c == '-') {
1✔
1206
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedDashDash;
1✔
1207
                                ConsumeCharacter (c);
1✔
1208
                                data.Append ('-');
1✔
1209
                        } else {
1✔
1210
                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1211
                        }
1✔
1212

1213
                        return null;
1✔
1214
                }
1✔
1215

1216
                // 8.2.4.22 Script data escaped state
1217
                HtmlToken? ReadScriptDataEscaped ()
1218
                {
1✔
1219
                        HtmlToken? token = null;
1✔
1220

1221
                        do {
1✔
1222
                                if (!TryRead (out char c)) {
1✔
1223
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1224
                                        return EmitScriptDataToken ();
1✔
1225
                                }
1226

1227
                                switch (c) {
1✔
1228
                                case '-':
1229
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscapedDash;
1✔
1230
                                        data.Append ('-');
1✔
1231
                                        break;
1✔
1232
                                case '<':
1233
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan;
1✔
1234
                                        token = EmitScriptDataToken ();
1✔
1235
                                        data.Append ('<');
1✔
1236
                                        break;
1✔
1237
                                default:
1238
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1239
                                        break;
1✔
1240
                                }
1241
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscaped);
1✔
1242

1243
                        return token;
1✔
1244
                }
1✔
1245

1246
                // 8.2.4.23 Script data escaped dash state
1247
                HtmlToken? ReadScriptDataEscapedDash ()
1248
                {
1✔
1249
                        if (!TryRead (out char c)) {
1✔
1250
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1251
                                return EmitScriptDataToken ();
1✔
1252
                        }
1253

1254
                        HtmlToken? token = null;
1✔
1255

1256
                        switch (c) {
1✔
1257
                        case '-':
1258
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedDashDash;
1✔
1259
                                data.Append ('-');
1✔
1260
                                break;
1✔
1261
                        case '<':
1262
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan;
1✔
1263
                                token = EmitScriptDataToken ();
1✔
1264
                                data.Append ('<');
1✔
1265
                                break;
1✔
1266
                        default:
1267
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1268
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1269
                                break;
1✔
1270
                        }
1271

1272
                        return token;
1✔
1273
                }
1✔
1274

1275
                // 8.2.4.24 Script data escaped dash dash state
1276
                HtmlToken? ReadScriptDataEscapedDashDash ()
1277
                {
1✔
1278
                        HtmlToken? token = null;
1✔
1279

1280
                        do {
1✔
1281
                                if (!TryRead (out char c)) {
1✔
1282
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1283
                                        return EmitScriptDataToken ();
1✔
1284
                                }
1285

1286
                                switch (c) {
1✔
1287
                                case '-':
1288
                                        data.Append ('-');
1✔
1289
                                        break;
1✔
1290
                                case '<':
1291
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscapedLessThan;
1✔
1292
                                        token = EmitScriptDataToken ();
1✔
1293
                                        data.Append ('<');
1✔
1294
                                        break;
1✔
1295
                                case '>':
1296
                                        TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1297
                                        data.Append ('>');
1✔
1298
                                        break;
1✔
1299
                                default:
1300
                                        TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1301
                                        data.Append (c);
1✔
1302
                                        break;
1✔
1303
                                }
1304
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedDashDash);
1✔
1305

1306
                        return token;
1✔
1307
                }
1✔
1308

1309
                // 8.2.4.25 Script data escaped less-than sign state
1310
                HtmlToken? ReadScriptDataEscapedLessThan ()
1311
                {
1✔
1312
                        if (!TryPeek (out char c)) {
1✔
1313
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1314
                                return null;
1✔
1315
                        }
1316

1317
                        if (c == '/') {
1✔
1318
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedEndTagOpen;
1✔
1319
                                ConsumeCharacter (c);
1✔
1320
                                data.Append (c);
1✔
1321
                                name.Length = 0;
1✔
1322
                        } else if (IsAsciiLetter (c)) {
1✔
1323
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapeStart;
1✔
1324
                                ConsumeCharacter (c);
1✔
1325
                                data.Append (c);
1✔
1326
                                name.Append (c);
1✔
1327
                        } else {
1✔
1328
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1329
                        }
1✔
1330

1331
                        return null;
1✔
1332
                }
1✔
1333

1334
                // 8.2.4.26 Script data escaped end tag open state
1335
                HtmlToken? ReadScriptDataEscapedEndTagOpen ()
1336
                {
1✔
1337
                        if (!TryPeek (out char c)) {
1✔
1338
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1339
                                return EmitScriptDataToken ();
1✔
1340
                        }
1341

1342
                        if (IsAsciiLetter (c)) {
1✔
1343
                                TokenizerState = HtmlTokenizerState.ScriptDataEscapedEndTagName;
1✔
1344
                                ConsumeCharacter (c);
1✔
1345
                                data.Append (c);
1✔
1346
                                name.Append (c);
1✔
1347
                        } else {
1✔
1348
                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1349
                        }
1✔
1350

1351
                        return null;
1✔
1352
                }
1✔
1353

1354
                // 8.2.4.27 Script data escaped end tag name state
1355
                HtmlToken? ReadScriptDataEscapedEndTagName ()
1356
                {
1✔
1357
                        do {
1✔
1358
                                if (!TryRead (out char c)) {
1✔
1359
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1360
                                        name.Length = 0;
1✔
1361

1362
                                        return EmitScriptDataToken ();
1✔
1363
                                }
1364

1365
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1366
                                data.Append (c);
1✔
1367

1368
                                switch (c) {
1✔
1369
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1370
                                        if (NameIs ("script")) {
1✔
1371
                                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1372
                                                break;
1✔
1373
                                        }
1374

1375
                                        goto default;
1✔
1376
                                case '/':
1377
                                        if (NameIs ("script")) {
1✔
1378
                                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1379
                                                break;
1✔
1380
                                        }
1381
                                        goto default;
1✔
1382
                                case '>':
1383
                                        if (NameIs ("script")) {
1✔
1384
                                                var token = CreateTagToken (name.ToString (), true);
1✔
1385
                                                TokenizerState = HtmlTokenizerState.Data;
1✔
1386
                                                data.Length = 0;
1✔
1387
                                                name.Length = 0;
1✔
1388
                                                return token;
1✔
1389
                                        }
1390
                                        goto default;
1✔
1391
                                default:
1392
                                        if (!IsAsciiLetter (c)) {
1✔
1393
                                                TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1394
                                                return null;
1✔
1395
                                        }
1396

1397
                                        name.Append (c);
1✔
1398
                                        break;
1✔
1399
                                }
1400
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedEndTagName);
1✔
1401

1402
                        tag = CreateTagToken (name.ToString (), true);
1✔
1403
                        name.Length = 0;
1✔
1404

1405
                        return null;
1✔
1406
                }
1✔
1407

1408
                // 8.2.4.28 Script data double escape start state
1409
                HtmlToken? ReadScriptDataDoubleEscapeStart ()
1410
                {
1✔
1411
                        do {
1✔
1412
                                if (!TryRead (out char c)) {
1✔
1413
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1414
                                        name.Length = 0;
1✔
1415

1416
                                        return EmitScriptDataToken ();
1✔
1417
                                }
1418

1419
                                data.Append (c);
1✔
1420

1421
                                switch (c) {
1✔
1422
                                case '\t': case '\r': case '\n': case '\f': case ' ': case '/': case '>':
1423
                                        if (NameIs ("script"))
1✔
1424
                                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1425
                                        else
1426
                                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1427
                                        name.Length = 0;
1✔
1428
                                        break;
1✔
1429
                                default:
1430
                                        if (!IsAsciiLetter (c))
1✔
1431
                                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1432
                                        else
1433
                                                name.Append (c);
1✔
1434
                                        break;
1✔
1435
                                }
1436
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataDoubleEscapeStart);
1✔
1437

1438
                        return null;
1✔
1439
                }
1✔
1440

1441
                // 8.2.4.29 Script data double escaped state
1442
                HtmlToken? ReadScriptDataDoubleEscaped ()
1443
                {
1✔
1444
                        do {
1✔
1445
                                if (!TryRead (out char c)) {
1✔
1446
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1447
                                        return EmitScriptDataToken ();
1✔
1448
                                }
1449

1450
                                switch (c) {
1✔
1451
                                case '-':
1452
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedDash;
1✔
1453
                                        data.Append ('-');
1✔
1454
                                        break;
1✔
1455
                                case '<':
1456
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedLessThan;
1✔
1457
                                        data.Append ('<');
1✔
1458
                                        break;
1✔
1459
                                default:
1460
                                        data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1461
                                        break;
1✔
1462
                                }
1463
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscaped);
1✔
1464

1465
                        return null;
1✔
1466
                }
1✔
1467

1468
                // 8.2.4.30 Script data double escaped dash state
1469
                HtmlToken? ReadScriptDataDoubleEscapedDash ()
1470
                {
1✔
1471
                        if (!TryRead (out char c)) {
1✔
1472
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1473
                                return EmitScriptDataToken ();
1✔
1474
                        }
1475

1476
                        switch (c) {
1✔
1477
                        case '-':
1478
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedDashDash;
1✔
1479
                                data.Append ('-');
1✔
1480
                                break;
1✔
1481
                        case '<':
1482
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedLessThan;
1✔
1483
                                data.Append ('<');
1✔
1484
                                break;
1✔
1485
                        default:
1486
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1487
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1488
                                break;
1✔
1489
                        }
1490

1491
                        return null;
1✔
1492
                }
1✔
1493

1494
                // 8.2.4.31 Script data double escaped dash dash state
1495
                HtmlToken? ReadScriptDataDoubleEscapedDashDash ()
1496
                {
1✔
1497
                        do {
1✔
1498
                                if (!TryRead (out char c)) {
1✔
1499
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1500
                                        return EmitScriptDataToken ();
1✔
1501
                                }
1502

1503
                                switch (c) {
1✔
1504
                                case '-':
1505
                                        data.Append ('-');
1✔
1506
                                        break;
1✔
1507
                                case '<':
1508
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapedLessThan;
1✔
1509
                                        data.Append ('<');
1✔
1510
                                        break;
1✔
1511
                                case '>':
1512
                                        TokenizerState = HtmlTokenizerState.ScriptData;
1✔
1513
                                        data.Append ('>');
1✔
1514
                                        break;
1✔
1515
                                default:
1516
                                        TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1517
                                        data.Append (c);
1✔
1518
                                        break;
1✔
1519
                                }
1520
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataEscapedDashDash);
1✔
1521

1522
                        return null;
1✔
1523
                }
1✔
1524

1525
                // 8.2.4.32 Script data double escaped less-than sign state
1526
                HtmlToken? ReadScriptDataDoubleEscapedLessThan ()
1527
                {
1✔
1528
                        if (TryPeek (out char c) && c == '/') {
1✔
1529
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscapeEnd;
1✔
1530
                                ConsumeCharacter (c);
1✔
1531
                                data.Append ('/');
1✔
1532
                        } else {
1✔
1533
                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1534
                        }
1✔
1535

1536
                        return null;
1✔
1537
                }
1✔
1538

1539
                // 8.2.4.33 Script data double escape end state
1540
                HtmlToken? ReadScriptDataDoubleEscapeEnd ()
1541
                {
1✔
1542
                        do {
1✔
1543
                                TryPeek (out char c);
1✔
1544

1545
                                switch (c) {
1✔
1546
                                case '\t': case '\r': case '\n': case '\f': case ' ': case '/': case '>':
1547
                                        if (NameIs ("script"))
1✔
1548
                                                TokenizerState = HtmlTokenizerState.ScriptDataEscaped;
1✔
1549
                                        else
1550
                                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1551
                                        ConsumeCharacter (c);
1✔
1552
                                        data.Append (c);
1✔
1553
                                        break;
1✔
1554
                                default:
1555
                                        if (!IsAsciiLetter (c)) {
1✔
1556
                                                // Note: EOF also hits this case.
1557
                                                TokenizerState = HtmlTokenizerState.ScriptDataDoubleEscaped;
1✔
1558
                                        } else {
1✔
1559
                                                ConsumeCharacter (c);
1✔
1560
                                                name.Append (c);
1✔
1561
                                                data.Append (c);
1✔
1562
                                        }
1✔
1563
                                        break;
1✔
1564
                                }
1565
                        } while (TokenizerState == HtmlTokenizerState.ScriptDataDoubleEscapeEnd);
1✔
1566

1567
                        return null;
1✔
1568
                }
1✔
1569

1570
                // 8.2.4.34 Before attribute name state
1571
                HtmlToken? ReadBeforeAttributeName ()
1572
                {
1✔
1573
                        do {
1✔
1574
                                if (!TryRead (out char c)) {
1✔
1575
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1576
                                        tag = null;
1✔
1577

1578
                                        return EmitDataToken (false, true);
1✔
1579
                                }
1580

1581
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1582
                                data.Append (c);
1✔
1583

1584
                                switch (c) {
1✔
1585
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1586
                                        break;
1✔
1587
                                case '/':
1588
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1589
                                        return null;
1✔
1590
                                case '>':
1591
                                        return EmitTagToken ();
1✔
1592
                                case '"': case '\'': case '<': case '=':
1593
                                        // parse error
1594
                                        goto default;
1✔
1595
                                default:
1596
                                        TokenizerState = HtmlTokenizerState.AttributeName;
1✔
1597
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1598
                                        return null;
1✔
1599
                                }
1600
                        } while (true);
1✔
1601
                }
1✔
1602

1603
                // 8.2.4.35 Attribute name state
1604
                HtmlToken? ReadAttributeName ()
1605
                {
1✔
1606
                        do {
1✔
1607
                                if (!TryRead (out char c)) {
1✔
1608
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1609
                                        name.Length = 0;
1✔
1610
                                        tag = null;
1✔
1611

1612
                                        return EmitDataToken (false, true);
1✔
1613
                                }
1614

1615
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1616
                                data.Append (c);
1✔
1617

1618
                                switch (c) {
1✔
1619
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1620
                                        TokenizerState = HtmlTokenizerState.AfterAttributeName;
1✔
1621
                                        break;
1✔
1622
                                case '/':
1623
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1624
                                        break;
1✔
1625
                                case '=':
1626
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeValue;
1✔
1627
                                        break;
1✔
1628
                                case '>':
1629
                                        EmitTagAttribute ();
1✔
1630

1631
                                        return EmitTagToken ();
1✔
1632
                                default:
1633
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1634
                                        break;
1✔
1635
                                }
1636
                        } while (TokenizerState == HtmlTokenizerState.AttributeName);
1✔
1637

1638
                        EmitTagAttribute ();
1✔
1639

1640
                        return null;
1✔
1641
                }
1✔
1642

1643
                // 8.2.4.36 After attribute name state
1644
                HtmlToken? ReadAfterAttributeName ()
1645
                {
1✔
1646
                        do {
1✔
1647
                                if (!TryRead (out char c)) {
1✔
1648
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1649
                                        tag = null;
1✔
1650

1651
                                        return EmitDataToken (false, true);
1✔
1652
                                }
1653

1654
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1655
                                data.Append (c);
1✔
1656

1657
                                switch (c) {
1✔
1658
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1659
                                        break;
1✔
1660
                                case '/':
1661
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1662
                                        return null;
1✔
1663
                                case '=':
1664
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeValue;
1✔
1665
                                        return null;
1✔
1666
                                case '>':
1667
                                        return EmitTagToken ();
1✔
1668
                                case '"': case '\'': case '<':
1669
                                        // parse error
1670
                                        goto default;
1✔
1671
                                default:
1672
                                        TokenizerState = HtmlTokenizerState.AttributeName;
1✔
1673
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1674
                                        return null;
1✔
1675
                                }
1676
                        } while (true);
1✔
1677
                }
1✔
1678

1679
                // 8.2.4.37 Before attribute value state
1680
                HtmlToken? ReadBeforeAttributeValue ()
1681
                {
1✔
1682
                        do {
1✔
1683
                                if (!TryRead (out char c)) {
1✔
1684
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1685
                                        tag = null;
1✔
1686

1687
                                        return EmitDataToken (false, true);
1✔
1688
                                }
1689

1690
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1691
                                data.Append (c);
1✔
1692

1693
                                switch (c) {
1✔
1694
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1695
                                        break;
1✔
1696
                                case '"': case '\'':
1697
                                        TokenizerState = HtmlTokenizerState.AttributeValueQuoted;
1✔
1698
                                        quote = c;
1✔
1699
                                        return null;
1✔
1700
                                case '&':
1701
                                        TokenizerState = HtmlTokenizerState.CharacterReferenceInAttributeValue;
1✔
1702
                                        return null;
1✔
1703
                                case '/':
1704
                                        TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1705
                                        return null;
1✔
1706
                                case '>':
1707
                                        return EmitTagToken ();
1✔
1708
                                case '<': case '=': case '`':
1709
                                        // parse error
1710
                                        goto default;
1✔
1711
                                default:
1712
                                        TokenizerState = HtmlTokenizerState.AttributeValueUnquoted;
1✔
1713
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1714
                                        return null;
1✔
1715
                                }
1716
                        } while (true);
1✔
1717
                }
1✔
1718

1719
                // 8.2.4.38 Attribute value (double-quoted) state
1720
                HtmlToken? ReadAttributeValueQuoted ()
1721
                {
1✔
1722
                        do {
1✔
1723
                                if (!TryRead (out char c)) {
1✔
1724
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1725
                                        name.Length = 0;
1✔
1726

1727
                                        return EmitDataToken (false, true);
1✔
1728
                                }
1729

1730
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1731
                                data.Append (c);
1✔
1732

1733
                                switch (c) {
1✔
1734
                                case '&':
1735
                                        TokenizerState = HtmlTokenizerState.CharacterReferenceInAttributeValue;
1✔
1736
                                        return null;
1✔
1737
                                case '\0':
1738
                                        name.Append ('\uFFFD');
×
1739
                                        break;
×
1740
                                default:
1741
                                        if (c == quote) {
1✔
1742
                                                TokenizerState = HtmlTokenizerState.AfterAttributeValueQuoted;
1✔
1743
                                                quote = '\0';
1✔
1744
                                                break;
1✔
1745
                                        }
1746

1747
                                        name.Append (c);
1✔
1748
                                        break;
1✔
1749
                                }
1750
                        } while (TokenizerState == HtmlTokenizerState.AttributeValueQuoted);
1✔
1751

1752
                        attribute!.Value = name.ToString ();
1✔
1753
                        name.Length = 0;
1✔
1754

1755
                        return null;
1✔
1756
                }
1✔
1757

1758
                // 8.2.4.40 Attribute value (unquoted) state
1759
                HtmlToken? ReadAttributeValueUnquoted ()
1760
                {
1✔
1761
                        do {
1✔
1762
                                if (!TryRead (out char c)) {
1✔
1763
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1764
                                        name.Length = 0;
1✔
1765

1766
                                        return EmitDataToken (false, true);
1✔
1767
                                }
1768

1769
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1770
                                data.Append (c);
1✔
1771

1772
                                switch (c) {
1✔
1773
                                case '\t': case '\r': case '\n': case '\f': case ' ':
1774
                                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1775
                                        break;
1✔
1776
                                case '&':
1777
                                        TokenizerState = HtmlTokenizerState.CharacterReferenceInAttributeValue;
1✔
1778
                                        return null;
1✔
1779
                                case '>':
1780
                                        attribute!.Value = name.ToString ();
1✔
1781
                                        name.Length = 0;
1✔
1782

1783
                                        return EmitTagToken ();
1✔
1784
                                case '\'': case '<': case '=': case '`':
1785
                                        // parse error
1786
                                        goto default;
1✔
1787
                                default:
1788
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
1789
                                        break;
1✔
1790
                                }
1791
                        } while (TokenizerState == HtmlTokenizerState.AttributeValueUnquoted);
1✔
1792

1793
                        attribute!.Value = name.ToString ();
1✔
1794
                        name.Length = 0;
1✔
1795

1796
                        return null;
1✔
1797
                }
1✔
1798

1799
                // 8.2.4.41 Character reference in attribute value state
1800
                HtmlToken? ReadCharacterReferenceInAttributeValue ()
1801
                {
1✔
1802
                        char additionalAllowedCharacter = quote == '\0' ? '>' : quote;
1✔
1803

1804
                        if (!TryPeek (out char c)) {
1✔
1805
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1806
                                name.Length = 0;
1✔
1807

1808
                                return EmitDataToken (false, true);
1✔
1809
                        }
1810

1811
                        switch (c) {
1✔
1812
                        case '\t': case '\r': case '\n': case '\f': case ' ': case '<': case '&':
1813
                                // no character is consumed, emit '&'
1814
                                name.Append ('&');
1✔
1815
                                break;
1✔
1816
                        default:
1817
                                if (c == additionalAllowedCharacter) {
1✔
1818
                                        // this is not a character reference, nothing is consumed
1819
                                        name.Append ('&');
1✔
1820
                                        break;
1✔
1821
                                }
1822

1823
                                entity.Push ('&');
1✔
1824

1825
                                while (entity.Push (c)) {
1✔
1826
                                        ConsumeCharacter (c);
1✔
1827

1828
                                        if (c == ';')
1✔
1829
                                                break;
1✔
1830

1831
                                        if (!TryPeek (out c)) {
1✔
1832
                                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1833
                                                data.Length--;
1✔
1834
                                                data.Append (entity.GetPushedInput ());
1✔
1835
                                                entity.Reset ();
1✔
1836

1837
                                                return EmitDataToken (false, true);
1✔
1838
                                        }
1839
                                }
1✔
1840

1841
                                var pushed = entity.GetPushedInput ();
1✔
1842
                                string value;
1843

1844
                                if (c == '=' || IsAlphaNumeric (c))
1✔
1845
                                        value = pushed;
1✔
1846
                                else
1847
                                        value = entity.GetValue ();
1✔
1848

1849
                                data.Length--;
1✔
1850
                                data.Append (pushed);
1✔
1851
                                name.Append (value);
1✔
1852
                                entity.Reset ();
1✔
1853
                                break;
1✔
1854
                        }
1855

1856
                        if (quote == '\0')
1✔
1857
                                TokenizerState = HtmlTokenizerState.AttributeValueUnquoted;
1✔
1858
                        else
1859
                                TokenizerState = HtmlTokenizerState.AttributeValueQuoted;
1✔
1860

1861
                        return null;
1✔
1862
                }
1✔
1863

1864
                // 8.2.4.42 After attribute value (quoted) state
1865
                HtmlToken? ReadAfterAttributeValueQuoted ()
1866
                {
1✔
1867
                        HtmlToken? token = null;
1✔
1868

1869
                        if (!TryPeek (out char c)) {
1✔
1870
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1871
                                return EmitDataToken (false, true);
1✔
1872
                        }
1873

1874
                        switch (c) {
1✔
1875
                        case '\t': case '\r': case '\n': case '\f': case ' ':
1876
                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1877
                                ConsumeCharacter (c);
1✔
1878
                                data.Append (c);
1✔
1879
                                break;
1✔
1880
                        case '/':
1881
                                TokenizerState = HtmlTokenizerState.SelfClosingStartTag;
1✔
1882
                                ConsumeCharacter (c);
1✔
1883
                                data.Append (c);
1✔
1884
                                break;
1✔
1885
                        case '>':
1886
                                ConsumeCharacter (c);
1✔
1887
                                token = EmitTagToken ();
1✔
1888
                                break;
1✔
1889
                        default:
1890
                                TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1891
                                break;
1✔
1892
                        }
1893

1894
                        return token;
1✔
1895
                }
1✔
1896

1897
                // 8.2.4.43 Self-closing start tag state
1898
                HtmlToken? ReadSelfClosingStartTag ()
1899
                {
1✔
1900
                        if (!TryRead (out char c)) {
1✔
1901
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1902
                                return EmitDataToken (false, true);
1✔
1903
                        }
1904

1905
                        if (c == '>') {
1✔
1906
                                tag!.IsEmptyElement = true;
1✔
1907

1908
                                return EmitTagToken ();
1✔
1909
                        }
1910

1911
                        // parse error
1912
                        TokenizerState = HtmlTokenizerState.BeforeAttributeName;
1✔
1913

1914
                        // Note: we save the data in case we hit a parse error and have to emit a data token
1915
                        data.Append (c);
1✔
1916

1917
                        return null;
1✔
1918
                }
1✔
1919

1920
                // 8.2.4.44 Bogus comment state
1921
                HtmlToken ReadBogusComment ()
1922
                {
1✔
1923
                        do {
1✔
1924
                                if (!TryRead (out char c)) {
1✔
1925
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1926
                                        break;
1✔
1927
                                }
1928

1929
                                if (c == '>')
1✔
1930
                                        break;
1✔
1931

1932
                                data.Append (c == '\0' ? '\uFFFD' : c);
1✔
1933
                        } while (true);
1✔
1934

1935
                        TokenizerState = HtmlTokenizerState.Data;
1✔
1936

1937
                        return EmitCommentToken (data, true);
1✔
1938
                }
1✔
1939

1940
                // 8.2.4.45 Markup declaration open state
1941
                HtmlToken? ReadMarkupDeclarationOpen ()
1942
                {
1✔
1943
                        int count = 0;
1✔
1944
                        char c = '\0';
1✔
1945

1946
                        while (count < 2) {
1✔
1947
                                if (!TryPeek (out c)) {
1✔
1948
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1949
                                        return EmitDataToken (false, true);
1✔
1950
                                }
1951

1952
                                if (c != '-')
1✔
1953
                                        break;
1✔
1954

1955
                                // Note: we save the data in case we hit a parse error and have to emit a data token
1956
                                ConsumeCharacter (c);
1✔
1957
                                data.Append (c);
1✔
1958
                                count++;
1✔
1959
                        }
1✔
1960

1961
                        if (count == 2) {
1✔
1962
                                // "<!--"
1963
                                TokenizerState = HtmlTokenizerState.CommentStart;
1✔
1964
                                name.Length = 0;
1✔
1965
                                return null;
1✔
1966
                        }
1967

1968
                        if (count == 0) {
1✔
1969
                                // Check for "<!DOCTYPE " or "<![CDATA["
1970
                                if (c == 'D' || c == 'd') {
1✔
1971
                                        // Note: we save the data in case we hit a parse error and have to emit a data token
1972
                                        ConsumeCharacter (c);
1✔
1973
                                        data.Append (c);
1✔
1974
                                        name.Append (c);
1✔
1975
                                        count = 1;
1✔
1976

1977
                                        while (count < 7) {
1✔
1978
                                                if (!TryRead (out c)) {
1✔
1979
                                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
1980
                                                        return EmitDataToken (false, true);
1✔
1981
                                                }
1982

1983
                                                // Note: we save the data in case we hit a parse error and have to emit a data token
1984
                                                data.Append (c);
1✔
1985
                                                name.Append (c);
1✔
1986

1987
                                                if (ToLower (c) != DocType[count])
1✔
1988
                                                        break;
1✔
1989

1990
                                                count++;
1✔
1991
                                        }
1✔
1992

1993
                                        if (count == 7) {
1✔
1994
                                                doctype = CreateDocTypeToken (name.ToString ());
1✔
1995
                                                TokenizerState = HtmlTokenizerState.DocType;
1✔
1996
                                                name.Length = 0;
1✔
1997
                                                return null;
1✔
1998
                                        }
1999

2000
                                        name.Length = 0;
1✔
2001
                                } else if (c == '[') {
1✔
2002
                                        // Note: we save the data in case we hit a parse error and have to emit a data token
2003
                                        ConsumeCharacter (c);
1✔
2004
                                        data.Append (c);
1✔
2005
                                        count = 1;
1✔
2006

2007
                                        while (count < 7) {
1✔
2008
                                                if (!TryRead (out c)) {
1✔
2009
                                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2010
                                                        return EmitDataToken (false, true);
1✔
2011
                                                }
2012

2013
                                                // Note: we save the data in case we hit a parse error and have to emit a data token
2014
                                                data.Append (c);
1✔
2015

2016
                                                if (c != CData[count])
1✔
2017
                                                        break;
1✔
2018

2019
                                                count++;
1✔
2020
                                        }
1✔
2021

2022
                                        if (count == 7) {
1✔
2023
                                                TokenizerState = HtmlTokenizerState.CDataSection;
1✔
2024
                                                data.Length = 0;
1✔
2025
                                                return null;
1✔
2026
                                        }
2027
                                }
1✔
2028
                        }
1✔
2029

2030
                        // parse error
2031
                        TokenizerState = HtmlTokenizerState.BogusComment;
1✔
2032

2033
                        // trim the leading "<!"
2034
                        for (int i = 0; i < data.Length - 2; i++)
1✔
2035
                                data[i] = data[i + 2];
1✔
2036
                        data.Length -= 2;
1✔
2037
                        bang = true;
1✔
2038

2039
                        return null;
1✔
2040
                }
1✔
2041

2042
                // 8.2.4.46 Comment start state
2043
                HtmlToken? ReadCommentStart ()
2044
                {
1✔
2045
                        if (!TryRead (out char c)) {
1✔
2046
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2047

2048
                                return EmitCommentToken (string.Empty);
1✔
2049
                        }
2050

2051
                        data.Append (c);
1✔
2052

2053
                        switch (c) {
1✔
2054
                        case '-':
2055
                                TokenizerState = HtmlTokenizerState.CommentStartDash;
1✔
2056
                                break;
1✔
2057
                        case '>': // parse error
2058
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2059
                                return EmitCommentToken (string.Empty);
1✔
2060
                        default:
2061
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2062
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2063
                                break;
1✔
2064
                        }
2065

2066
                        return null;
1✔
2067
                }
1✔
2068

2069
                // 8.2.4.47 Comment start dash state
2070
                HtmlToken? ReadCommentStartDash ()
2071
                {
1✔
2072
                        if (!TryRead (out char c)) {
1✔
2073
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2074
                                return EmitCommentToken (name);
1✔
2075
                        }
2076

2077
                        data.Append (c);
1✔
2078

2079
                        switch (c) {
1✔
2080
                        case '-':
2081
                                TokenizerState = HtmlTokenizerState.CommentEnd;
1✔
2082
                                break;
1✔
2083
                        case '>': // parse error
2084
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2085
                                return EmitCommentToken (name);
1✔
2086
                        default:
2087
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2088
                                name.Append ('-');
1✔
2089
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2090
                                break;
1✔
2091
                        }
2092

2093
                        return null;
1✔
2094
                }
1✔
2095

2096
                // 8.2.4.48 Comment state
2097
                HtmlToken? ReadComment ()
2098
                {
1✔
2099
                        do {
1✔
2100
                                if (!TryRead (out char c)) {
1✔
2101
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2102
                                        return EmitCommentToken (name);
1✔
2103
                                }
2104

2105
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2106
                                data.Append (c);
1✔
2107

2108
                                switch (c) {
1✔
2109
                                case '-':
2110
                                        TokenizerState = HtmlTokenizerState.CommentEndDash;
1✔
2111
                                        return null;
1✔
2112
                                default:
2113
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2114
                                        break;
1✔
2115
                                }
2116
                        } while (true);
1✔
2117
                }
1✔
2118

2119
                // 8.2.4.49 Comment end dash state
2120
                HtmlToken? ReadCommentEndDash ()
2121
                {
1✔
2122
                        if (!TryRead (out char c)) {
1✔
2123
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2124
                                return EmitCommentToken (name);
1✔
2125
                        }
2126

2127
                        data.Append (c);
1✔
2128

2129
                        switch (c) {
1✔
2130
                        case '-':
2131
                                TokenizerState = HtmlTokenizerState.CommentEnd;
1✔
2132
                                break;
1✔
2133
                        default:
2134
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2135
                                name.Append ('-');
1✔
2136
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2137
                                break;
1✔
2138
                        }
2139

2140
                        return null;
1✔
2141
                }
1✔
2142

2143
                // 8.2.4.50 Comment end state
2144
                HtmlToken? ReadCommentEnd ()
2145
                {
1✔
2146
                        do {
1✔
2147
                                if (!TryRead (out char c)) {
1✔
2148
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2149
                                        return EmitCommentToken (name);
1✔
2150
                                }
2151

2152
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2153
                                data.Append (c);
1✔
2154

2155
                                switch (c) {
1✔
2156
                                case '>':
2157
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2158
                                        return EmitCommentToken (name);
1✔
2159
                                case '!': // parse error
2160
                                        TokenizerState = HtmlTokenizerState.CommentEndBang;
1✔
2161
                                        return null;
1✔
2162
                                case '-':
2163
                                        name.Append ('-');
1✔
2164
                                        break;
1✔
2165
                                default:
2166
                                        TokenizerState = HtmlTokenizerState.Comment;
1✔
2167
                                        name.Append ("--");
1✔
2168
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2169
                                        return null;
1✔
2170
                                }
2171
                        } while (true);
1✔
2172
                }
1✔
2173

2174
                // 8.2.4.51 Comment end bang state
2175
                HtmlToken? ReadCommentEndBang ()
2176
                {
1✔
2177
                        if (!TryRead (out char c)) {
1✔
2178
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2179
                                return EmitCommentToken (name);
1✔
2180
                        }
2181

2182
                        data.Append (c);
1✔
2183

2184
                        switch (c) {
1✔
2185
                        case '-':
2186
                                TokenizerState = HtmlTokenizerState.CommentEndDash;
1✔
2187
                                name.Append ("--!");
1✔
2188
                                break;
1✔
2189
                        case '>':
2190
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2191
                                return EmitCommentToken (name);
1✔
2192
                        default: // parse error
2193
                                TokenizerState = HtmlTokenizerState.Comment;
1✔
2194
                                name.Append ("--!");
1✔
2195
                                name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2196
                                break;
1✔
2197
                        }
2198

2199
                        return null;
1✔
2200
                }
1✔
2201

2202
                // 8.2.4.52 DOCTYPE state
2203
                HtmlToken? ReadDocType ()
2204
                {
1✔
2205
                        if (!TryPeek (out char c)) {
1✔
2206
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2207
                                doctype!.ForceQuirksMode = true;
1✔
2208
                                name.Length = 0;
1✔
2209

2210
                                return EmitDocType ();
1✔
2211
                        }
2212

2213
                        TokenizerState = HtmlTokenizerState.BeforeDocTypeName;
1✔
2214

2215
                        switch (c) {
1✔
2216
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2217
                                ConsumeCharacter (c);
1✔
2218
                                data.Append (c);
1✔
2219
                                break;
1✔
2220
                        }
2221

2222
                        return null;
1✔
2223
                }
1✔
2224

2225
                // 8.2.4.53 Before DOCTYPE name state
2226
                HtmlToken? ReadBeforeDocTypeName ()
2227
                {
1✔
2228
                        do {
1✔
2229
                                if (!TryRead (out char c)) {
1✔
2230
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2231
                                        doctype!.ForceQuirksMode = true;
1✔
2232
                                        return EmitDocType ();
1✔
2233
                                }
2234

2235
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2236
                                data.Append (c);
1✔
2237

2238
                                switch (c) {
1✔
2239
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2240
                                        break;
1✔
2241
                                case '>':
2242
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2243
                                        doctype!.ForceQuirksMode = true;
1✔
2244
                                        return EmitDocType ();
1✔
2245
                                default:
2246
                                        TokenizerState = HtmlTokenizerState.DocTypeName;
1✔
2247
                                        name.Append (c == '\0' ? '\uFFFD' : c);
1✔
2248
                                        return null;
1✔
2249
                                }
2250
                        } while (true);
1✔
2251
                }
1✔
2252

2253
                // 8.2.4.54 DOCTYPE name state
2254
                HtmlToken? ReadDocTypeName ()
2255
                {
1✔
2256
                        do {
1✔
2257
                                if (!TryRead (out char c)) {
1✔
2258
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2259
                                        doctype!.Name = name.ToString ();
1✔
2260
                                        doctype.ForceQuirksMode = true;
1✔
2261
                                        name.Length = 0;
1✔
2262

2263
                                        return EmitDocType ();
1✔
2264
                                }
2265

2266
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2267
                                data.Append (c);
1✔
2268

2269
                                switch (c) {
1✔
2270
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2271
                                        TokenizerState = HtmlTokenizerState.AfterDocTypeName;
1✔
2272
                                        break;
1✔
2273
                                case '>':
2274
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2275
                                        doctype!.Name = name.ToString ();
1✔
2276
                                        name.Length = 0;
1✔
2277

2278
                                        return EmitDocType ();
1✔
2279
                                case '\0':
2280
                                        name.Append ('\uFFFD');
1✔
2281
                                        break;
1✔
2282
                                default:
2283
                                        name.Append (c);
1✔
2284
                                        break;
1✔
2285
                                }
2286
                        } while (TokenizerState == HtmlTokenizerState.DocTypeName);
1✔
2287

2288
                        doctype!.Name = name.ToString ();
1✔
2289
                        name.Length = 0;
1✔
2290

2291
                        return null;
1✔
2292
                }
1✔
2293

2294
                // 8.2.4.55 After DOCTYPE name state
2295
                HtmlToken? ReadAfterDocTypeName ()
2296
                {
1✔
2297
                        do {
1✔
2298
                                if (!TryRead (out char c)) {
1✔
2299
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2300
                                        doctype!.ForceQuirksMode = true;
1✔
2301
                                        return EmitDocType ();
1✔
2302
                                }
2303

2304
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2305
                                data.Append (c);
1✔
2306

2307
                                switch (c) {
1✔
2308
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2309
                                        break;
1✔
2310
                                case '>':
2311
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2312
                                        return EmitDocType ();
1✔
2313
                                default:
2314
                                        name.Append (c);
1✔
2315
                                        if (name.Length < 6)
1✔
2316
                                                break;
1✔
2317

2318
                                        if (NameIs ("public")) {
1✔
2319
                                                TokenizerState = HtmlTokenizerState.AfterDocTypePublicKeyword;
1✔
2320
                                                doctype!.PublicKeyword = name.ToString ();
1✔
2321
                                        } else if (NameIs ("system")) {
1✔
2322
                                                TokenizerState = HtmlTokenizerState.AfterDocTypeSystemKeyword;
1✔
2323
                                                doctype!.SystemKeyword = name.ToString ();
1✔
2324
                                        } else {
1✔
2325
                                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2326
                                        }
1✔
2327

2328
                                        name.Length = 0;
1✔
2329
                                        return null;
1✔
2330
                                }
2331
                        } while (true);
1✔
2332
                }
1✔
2333

2334
                // 8.2.4.56 After DOCTYPE public keyword state
2335
                HtmlToken? ReadAfterDocTypePublicKeyword ()
2336
                {
1✔
2337
                        if (!TryRead (out char c)) {
1✔
2338
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2339
                                doctype!.ForceQuirksMode = true;
1✔
2340
                                return EmitDocType ();
1✔
2341
                        }
2342

2343
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2344
                        data.Append (c);
1✔
2345

2346
                        switch (c) {
1✔
2347
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2348
                                TokenizerState = HtmlTokenizerState.BeforeDocTypePublicIdentifier;
1✔
2349
                                break;
1✔
2350
                        case '"': case '\'': // parse error
2351
                                TokenizerState = HtmlTokenizerState.DocTypePublicIdentifierQuoted;
1✔
2352
                                doctype!.PublicIdentifier = string.Empty;
1✔
2353
                                quote = c;
1✔
2354
                                break;
1✔
2355
                        case '>': // parse error
2356
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2357
                                doctype!.ForceQuirksMode = true;
1✔
2358
                                return EmitDocType ();
1✔
2359
                        default: // parse error
2360
                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2361
                                doctype!.ForceQuirksMode = true;
1✔
2362
                                break;
1✔
2363
                        }
2364

2365
                        return null;
1✔
2366
                }
1✔
2367

2368
                // 8.2.4.57 Before DOCTYPE public identifier state
2369
                HtmlToken? ReadBeforeDocTypePublicIdentifier ()
2370
                {
1✔
2371
                        do {
1✔
2372
                                if (!TryRead (out char c)) {
1✔
2373
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2374
                                        doctype!.ForceQuirksMode = true;
1✔
2375
                                        return EmitDocType ();
1✔
2376
                                }
2377

2378
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2379
                                data.Append (c);
1✔
2380

2381
                                switch (c) {
1✔
2382
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2383
                                        break;
1✔
2384
                                case '"': case '\'':
2385
                                        TokenizerState = HtmlTokenizerState.DocTypePublicIdentifierQuoted;
1✔
2386
                                        doctype!.PublicIdentifier = string.Empty;
1✔
2387
                                        quote = c;
1✔
2388
                                        return null;
1✔
2389
                                case '>': // parse error
2390
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2391
                                        doctype!.ForceQuirksMode = true;
1✔
2392
                                        return EmitDocType ();
1✔
2393
                                default: // parse error
2394
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2395
                                        doctype!.ForceQuirksMode = true;
1✔
2396
                                        return null;
1✔
2397
                                }
2398
                        } while (true);
1✔
2399
                }
1✔
2400

2401
                // 8.2.4.58 DOCTYPE public identifier (double-quoted) state
2402
                HtmlToken? ReadDocTypePublicIdentifierQuoted ()
2403
                {
1✔
2404
                        do {
1✔
2405
                                if (!TryRead (out char c)) {
1✔
2406
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2407
                                        doctype!.PublicIdentifier = name.ToString ();
1✔
2408
                                        doctype.ForceQuirksMode = true;
1✔
2409
                                        name.Length = 0;
1✔
2410

2411
                                        return EmitDocType ();
1✔
2412
                                }
2413

2414
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2415
                                data.Append (c);
1✔
2416

2417
                                switch (c) {
1✔
2418
                                case '\0': // parse error
2419
                                        name.Append ('\uFFFD');
1✔
2420
                                        break;
1✔
2421
                                case '>': // parse error
2422
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2423
                                        doctype!.PublicIdentifier = name.ToString ();
1✔
2424
                                        doctype.ForceQuirksMode = true;
1✔
2425
                                        name.Length = 0;
1✔
2426

2427
                                        return EmitDocType ();
1✔
2428
                                default:
2429
                                        if (c == quote) {
1✔
2430
                                                TokenizerState = HtmlTokenizerState.AfterDocTypePublicIdentifier;
1✔
2431
                                                quote = '\0';
1✔
2432
                                                break;
1✔
2433
                                        }
2434

2435
                                        name.Append (c);
1✔
2436
                                        break;
1✔
2437
                                }
2438
                        } while (TokenizerState == HtmlTokenizerState.DocTypePublicIdentifierQuoted);
1✔
2439

2440
                        doctype!.PublicIdentifier = name.ToString ();
1✔
2441
                        name.Length = 0;
1✔
2442

2443
                        return null;
1✔
2444
                }
1✔
2445

2446
                // 8.2.4.60 After DOCTYPE public identifier state
2447
                HtmlToken? ReadAfterDocTypePublicIdentifier ()
2448
                {
1✔
2449
                        if (!TryRead (out char c)) {
1✔
2450
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2451
                                doctype!.ForceQuirksMode = true;
1✔
2452
                                return EmitDocType ();
1✔
2453
                        }
2454

2455
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2456
                        data.Append (c);
1✔
2457

2458
                        switch (c) {
1✔
2459
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2460
                                TokenizerState = HtmlTokenizerState.BetweenDocTypePublicAndSystemIdentifiers;
1✔
2461
                                break;
1✔
2462
                        case '>':
2463
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2464
                                return EmitDocType ();
1✔
2465
                        case '"': case '\'': // parse error
2466
                                TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2467
                                doctype!.SystemIdentifier = string.Empty;
1✔
2468
                                quote = c;
1✔
2469
                                break;
1✔
2470
                        default: // parse error
2471
                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2472
                                doctype!.ForceQuirksMode = true;
1✔
2473
                                break;
1✔
2474
                        }
2475

2476
                        return null;
1✔
2477
                }
1✔
2478

2479
                // 8.2.4.61 Between DOCTYPE public and system identifiers state
2480
                HtmlToken? ReadBetweenDocTypePublicAndSystemIdentifiers ()
2481
                {
1✔
2482
                        do {
1✔
2483
                                if (!TryRead (out char c)) {
1✔
2484
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2485
                                        doctype!.ForceQuirksMode = true;
1✔
2486
                                        return EmitDocType ();
1✔
2487
                                }
2488

2489
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2490
                                data.Append (c);
1✔
2491

2492
                                switch (c) {
1✔
2493
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2494
                                        break;
1✔
2495
                                case '>':
2496
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2497
                                        return EmitDocType ();
1✔
2498
                                case '"': case '\'':
2499
                                        TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2500
                                        doctype!.SystemIdentifier = string.Empty;
1✔
2501
                                        quote = c;
1✔
2502
                                        return null;
1✔
2503
                                default: // parse error
2504
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2505
                                        doctype!.ForceQuirksMode = true;
1✔
2506
                                        return null;
1✔
2507
                                }
2508
                        } while (true);
1✔
2509
                }
1✔
2510

2511
                // 8.2.4.62 After DOCTYPE system keyword state
2512
                HtmlToken? ReadAfterDocTypeSystemKeyword ()
2513
                {
1✔
2514
                        if (!TryRead (out char c)) {
1✔
2515
                                TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2516
                                doctype!.ForceQuirksMode = true;
1✔
2517
                                return EmitDocType ();
1✔
2518
                        }
2519

2520
                        // Note: we save the data in case we hit a parse error and have to emit a data token
2521
                        data.Append (c);
1✔
2522

2523
                        switch (c) {
1✔
2524
                        case '\t': case '\r': case '\n': case '\f': case ' ':
2525
                                TokenizerState = HtmlTokenizerState.BeforeDocTypeSystemIdentifier;
1✔
2526
                                break;
1✔
2527
                        case '"': case '\'': // parse error
2528
                                TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2529
                                doctype!.SystemIdentifier = string.Empty;
1✔
2530
                                quote = c;
1✔
2531
                                break;
1✔
2532
                        case '>': // parse error
2533
                                TokenizerState = HtmlTokenizerState.Data;
1✔
2534
                                doctype!.ForceQuirksMode = true;
1✔
2535
                                return EmitDocType ();
1✔
2536
                        default: // parse error
2537
                                TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2538
                                doctype!.ForceQuirksMode = true;
1✔
2539
                                break;
1✔
2540
                        }
2541

2542
                        return null;
1✔
2543
                }
1✔
2544

2545
                // 8.2.4.63 Before DOCTYPE system identifier state
2546
                HtmlToken? ReadBeforeDocTypeSystemIdentifier ()
2547
                {
1✔
2548
                        do {
1✔
2549
                                if (!TryRead (out char c)) {
1✔
2550
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2551
                                        doctype!.ForceQuirksMode = true;
1✔
2552
                                        return EmitDocType ();
1✔
2553
                                }
2554

2555
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2556
                                data.Append (c);
1✔
2557

2558
                                switch (c) {
1✔
2559
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2560
                                        break;
1✔
2561
                                case '"': case '\'':
2562
                                        TokenizerState = HtmlTokenizerState.DocTypeSystemIdentifierQuoted;
1✔
2563
                                        doctype!.SystemIdentifier = string.Empty;
1✔
2564
                                        quote = c;
1✔
2565
                                        return null;
1✔
2566
                                case '>': // parse error
2567
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2568
                                        doctype!.ForceQuirksMode = true;
1✔
2569
                                        return EmitDocType ();
1✔
2570
                                default: // parse error
2571
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2572
                                        doctype!.ForceQuirksMode = true;
1✔
2573
                                        return null;
1✔
2574
                                }
2575
                        } while (true);
1✔
2576
                }
1✔
2577

2578
                // 8.2.4.64 DOCTYPE system identifier (double-quoted) state
2579
                HtmlToken? ReadDocTypeSystemIdentifierQuoted ()
2580
                {
1✔
2581
                        do {
1✔
2582
                                if (!TryRead (out char c)) {
1✔
2583
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2584
                                        doctype!.SystemIdentifier = name.ToString ();
1✔
2585
                                        doctype.ForceQuirksMode = true;
1✔
2586
                                        name.Length = 0;
1✔
2587

2588
                                        return EmitDocType ();
1✔
2589
                                }
2590

2591
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2592
                                data.Append (c);
1✔
2593

2594
                                switch (c) {
1✔
2595
                                case '\0': // parse error
2596
                                        name.Append ('\uFFFD');
1✔
2597
                                        break;
1✔
2598
                                case '>': // parse error
2599
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2600
                                        doctype!.SystemIdentifier = name.ToString ();
1✔
2601
                                        doctype.ForceQuirksMode = true;
1✔
2602
                                        name.Length = 0;
1✔
2603

2604
                                        return EmitDocType ();
1✔
2605
                                default:
2606
                                        if (c == quote) {
1✔
2607
                                                TokenizerState = HtmlTokenizerState.AfterDocTypeSystemIdentifier;
1✔
2608
                                                quote = '\0';
1✔
2609
                                                break;
1✔
2610
                                        }
2611

2612
                                        name.Append (c);
1✔
2613
                                        break;
1✔
2614
                                }
2615
                        } while (TokenizerState == HtmlTokenizerState.DocTypeSystemIdentifierQuoted);
1✔
2616

2617
                        doctype!.SystemIdentifier = name.ToString ();
1✔
2618
                        name.Length = 0;
1✔
2619

2620
                        return null;
1✔
2621
                }
1✔
2622

2623
                // 8.2.4.66 After DOCTYPE system identifier state
2624
                HtmlToken? ReadAfterDocTypeSystemIdentifier ()
2625
                {
1✔
2626
                        do {
1✔
2627
                                if (!TryRead (out char c)) {
1✔
2628
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2629
                                        doctype!.ForceQuirksMode = true;
1✔
2630
                                        return EmitDocType ();
1✔
2631
                                }
2632

2633
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2634
                                data.Append (c);
1✔
2635

2636
                                switch (c) {
1✔
2637
                                case '\t': case '\r': case '\n': case '\f': case ' ':
2638
                                        break;
1✔
2639
                                case '>':
2640
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2641
                                        return EmitDocType ();
1✔
2642
                                default: // parse error
2643
                                        TokenizerState = HtmlTokenizerState.BogusDocType;
1✔
2644
                                        return null;
1✔
2645
                                }
2646
                        } while (true);
1✔
2647
                }
1✔
2648

2649
                // 8.2.4.67 Bogus DOCTYPE state
2650
                HtmlToken? ReadBogusDocType ()
2651
                {
1✔
2652
                        do {
1✔
2653
                                if (!TryRead (out char c)) {
1✔
2654
                                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2655
                                        doctype!.ForceQuirksMode = true;
1✔
2656
                                        return EmitDocType ();
1✔
2657
                                }
2658

2659
                                // Note: we save the data in case we hit a parse error and have to emit a data token
2660
                                data.Append (c);
1✔
2661

2662
                                if (c == '>') {
1✔
2663
                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2664
                                        return EmitDocType ();
1✔
2665
                                }
2666
                        } while (true);
1✔
2667
                }
1✔
2668

2669
                // 8.2.4.68 CDATA section state
2670
                HtmlToken? ReadCDataSection ()
2671
                {
1✔
2672
                        do {
1✔
2673
                                while (bufferIndex < bufferEnd) {
1✔
2674
                                        char c = buffer[bufferIndex++];
1✔
2675

2676
                                        if (c == '\n') {
×
2677
                                                IncrementLineNumber ();
×
2678
                                        } else {
1✔
2679
                                                linePosition++;
1✔
2680
                                        }
1✔
2681

2682
                                        if (cdataIndex >= 3) {
1✔
2683
                                                data.Append (cdata[0]);
1✔
2684
                                                cdata[0] = cdata[1];
1✔
2685
                                                cdata[1] = cdata[2];
1✔
2686
                                                cdata[2] = c;
1✔
2687

2688
                                                if (cdata[0] == ']' && cdata[1] == ']' && cdata[2] == '>') {
1✔
2689
                                                        TokenizerState = HtmlTokenizerState.Data;
1✔
2690
                                                        cdataIndex = 0;
1✔
2691

2692
                                                        return EmitCDataToken ();
1✔
2693
                                                }
2694
                                        } else {
1✔
2695
                                                cdata[cdataIndex++] = c;
1✔
2696
                                        }
1✔
2697
                                }
1✔
2698

2699
                                FillBuffer ();
1✔
2700
                        } while (!eof);
1✔
2701

2702
                        TokenizerState = HtmlTokenizerState.EndOfFile;
1✔
2703

2704
                        for (int i = 0; i < cdataIndex; i++)
1✔
2705
                                data.Append (cdata[i]);
1✔
2706

2707
                        cdataIndex = 0;
1✔
2708

2709
                        return EmitCDataToken ();
1✔
2710
                }
1✔
2711

2712
                /// <summary>
2713
                /// Read the next token.
2714
                /// </summary>
2715
                /// <remarks>
2716
                /// Reads the next token.
2717
                /// </remarks>
2718
                /// <returns><see langword="true" /> if the next token was read; otherwise, <see langword="false" />.</returns>
2719
                /// <param name="token">The token that was read.</param>
2720
                public bool ReadNextToken ([NotNullWhen (true)] out HtmlToken? token)
2721
                {
1✔
2722
                        do {
1✔
2723
                                switch (TokenizerState) {
1✔
2724
                                case HtmlTokenizerState.Data:
2725
                                        token = ReadData ();
1✔
2726
                                        break;
1✔
2727
                                case HtmlTokenizerState.CharacterReferenceInData:
2728
                                        token = ReadCharacterReferenceInData ();
1✔
2729
                                        break;
1✔
2730
                                case HtmlTokenizerState.RcData:
2731
                                        token = ReadRcData ();
1✔
2732
                                        break;
1✔
2733
                                case HtmlTokenizerState.CharacterReferenceInRcData:
2734
                                        token = ReadCharacterReferenceInRcData ();
1✔
2735
                                        break;
1✔
2736
                                case HtmlTokenizerState.RawText:
2737
                                        token = ReadRawText ();
1✔
2738
                                        break;
1✔
2739
                                case HtmlTokenizerState.ScriptData:
2740
                                        token = ReadScriptData ();
1✔
2741
                                        break;
1✔
2742
                                case HtmlTokenizerState.PlainText:
2743
                                        token = ReadPlainText ();
1✔
2744
                                        break;
1✔
2745
                                case HtmlTokenizerState.TagOpen:
2746
                                        token = ReadTagOpen ();
1✔
2747
                                        break;
1✔
2748
                                case HtmlTokenizerState.EndTagOpen:
2749
                                        token = ReadEndTagOpen ();
1✔
2750
                                        break;
1✔
2751
                                case HtmlTokenizerState.TagName:
2752
                                        token = ReadTagName ();
1✔
2753
                                        break;
1✔
2754
                                case HtmlTokenizerState.RcDataLessThan:
2755
                                        token = ReadRcDataLessThan ();
1✔
2756
                                        break;
1✔
2757
                                case HtmlTokenizerState.RcDataEndTagOpen:
2758
                                        token = ReadRcDataEndTagOpen ();
1✔
2759
                                        break;
1✔
2760
                                case HtmlTokenizerState.RcDataEndTagName:
2761
                                        token = ReadRcDataEndTagName ();
1✔
2762
                                        break;
1✔
2763
                                case HtmlTokenizerState.RawTextLessThan:
2764
                                        token = ReadRawTextLessThan ();
1✔
2765
                                        break;
1✔
2766
                                case HtmlTokenizerState.RawTextEndTagOpen:
2767
                                        token = ReadRawTextEndTagOpen ();
1✔
2768
                                        break;
1✔
2769
                                case HtmlTokenizerState.RawTextEndTagName:
2770
                                        token = ReadRawTextEndTagName ();
1✔
2771
                                        break;
1✔
2772
                                case HtmlTokenizerState.ScriptDataLessThan:
2773
                                        token = ReadScriptDataLessThan ();
1✔
2774
                                        break;
1✔
2775
                                case HtmlTokenizerState.ScriptDataEndTagOpen:
2776
                                        token = ReadScriptDataEndTagOpen ();
1✔
2777
                                        break;
1✔
2778
                                case HtmlTokenizerState.ScriptDataEndTagName:
2779
                                        token = ReadScriptDataEndTagName ();
1✔
2780
                                        break;
1✔
2781
                                case HtmlTokenizerState.ScriptDataEscapeStart:
2782
                                        token = ReadScriptDataEscapeStart ();
1✔
2783
                                        break;
1✔
2784
                                case HtmlTokenizerState.ScriptDataEscapeStartDash:
2785
                                        token = ReadScriptDataEscapeStartDash ();
1✔
2786
                                        break;
1✔
2787
                                case HtmlTokenizerState.ScriptDataEscaped:
2788
                                        token = ReadScriptDataEscaped ();
1✔
2789
                                        break;
1✔
2790
                                case HtmlTokenizerState.ScriptDataEscapedDash:
2791
                                        token = ReadScriptDataEscapedDash ();
1✔
2792
                                        break;
1✔
2793
                                case HtmlTokenizerState.ScriptDataEscapedDashDash:
2794
                                        token = ReadScriptDataEscapedDashDash ();
1✔
2795
                                        break;
1✔
2796
                                case HtmlTokenizerState.ScriptDataEscapedLessThan:
2797
                                        token = ReadScriptDataEscapedLessThan ();
1✔
2798
                                        break;
1✔
2799
                                case HtmlTokenizerState.ScriptDataEscapedEndTagOpen:
2800
                                        token = ReadScriptDataEscapedEndTagOpen ();
1✔
2801
                                        break;
1✔
2802
                                case HtmlTokenizerState.ScriptDataEscapedEndTagName:
2803
                                        token = ReadScriptDataEscapedEndTagName ();
1✔
2804
                                        break;
1✔
2805
                                case HtmlTokenizerState.ScriptDataDoubleEscapeStart:
2806
                                        token = ReadScriptDataDoubleEscapeStart ();
1✔
2807
                                        break;
1✔
2808
                                case HtmlTokenizerState.ScriptDataDoubleEscaped:
2809
                                        token = ReadScriptDataDoubleEscaped ();
1✔
2810
                                        break;
1✔
2811
                                case HtmlTokenizerState.ScriptDataDoubleEscapedDash:
2812
                                        token = ReadScriptDataDoubleEscapedDash ();
1✔
2813
                                        break;
1✔
2814
                                case HtmlTokenizerState.ScriptDataDoubleEscapedDashDash:
2815
                                        token = ReadScriptDataDoubleEscapedDashDash ();
1✔
2816
                                        break;
1✔
2817
                                case HtmlTokenizerState.ScriptDataDoubleEscapedLessThan:
2818
                                        token = ReadScriptDataDoubleEscapedLessThan ();
1✔
2819
                                        break;
1✔
2820
                                case HtmlTokenizerState.ScriptDataDoubleEscapeEnd:
2821
                                        token = ReadScriptDataDoubleEscapeEnd ();
1✔
2822
                                        break;
1✔
2823
                                case HtmlTokenizerState.BeforeAttributeName:
2824
                                        token = ReadBeforeAttributeName ();
1✔
2825
                                        break;
1✔
2826
                                case HtmlTokenizerState.AttributeName:
2827
                                        token = ReadAttributeName ();
1✔
2828
                                        break;
1✔
2829
                                case HtmlTokenizerState.AfterAttributeName:
2830
                                        token = ReadAfterAttributeName ();
1✔
2831
                                        break;
1✔
2832
                                case HtmlTokenizerState.BeforeAttributeValue:
2833
                                        token = ReadBeforeAttributeValue ();
1✔
2834
                                        break;
1✔
2835
                                case HtmlTokenizerState.AttributeValueQuoted:
2836
                                        token = ReadAttributeValueQuoted ();
1✔
2837
                                        break;
1✔
2838
                                case HtmlTokenizerState.AttributeValueUnquoted:
2839
                                        token = ReadAttributeValueUnquoted ();
1✔
2840
                                        break;
1✔
2841
                                case HtmlTokenizerState.CharacterReferenceInAttributeValue:
2842
                                        token = ReadCharacterReferenceInAttributeValue ();
1✔
2843
                                        break;
1✔
2844
                                case HtmlTokenizerState.AfterAttributeValueQuoted:
2845
                                        token = ReadAfterAttributeValueQuoted ();
1✔
2846
                                        break;
1✔
2847
                                case HtmlTokenizerState.SelfClosingStartTag:
2848
                                        token = ReadSelfClosingStartTag ();
1✔
2849
                                        break;
1✔
2850
                                case HtmlTokenizerState.BogusComment:
2851
                                        token = ReadBogusComment ();
1✔
2852
                                        break;
1✔
2853
                                case HtmlTokenizerState.MarkupDeclarationOpen:
2854
                                        token = ReadMarkupDeclarationOpen ();
1✔
2855
                                        break;
1✔
2856
                                case HtmlTokenizerState.CommentStart:
2857
                                        token = ReadCommentStart ();
1✔
2858
                                        break;
1✔
2859
                                case HtmlTokenizerState.CommentStartDash:
2860
                                        token = ReadCommentStartDash ();
1✔
2861
                                        break;
1✔
2862
                                case HtmlTokenizerState.Comment:
2863
                                        token = ReadComment ();
1✔
2864
                                        break;
1✔
2865
                                case HtmlTokenizerState.CommentEndDash:
2866
                                        token = ReadCommentEndDash ();
1✔
2867
                                        break;
1✔
2868
                                case HtmlTokenizerState.CommentEnd:
2869
                                        token = ReadCommentEnd ();
1✔
2870
                                        break;
1✔
2871
                                case HtmlTokenizerState.CommentEndBang:
2872
                                        token = ReadCommentEndBang ();
1✔
2873
                                        break;
1✔
2874
                                case HtmlTokenizerState.DocType:
2875
                                        token = ReadDocType ();
1✔
2876
                                        break;
1✔
2877
                                case HtmlTokenizerState.BeforeDocTypeName:
2878
                                        token = ReadBeforeDocTypeName ();
1✔
2879
                                        break;
1✔
2880
                                case HtmlTokenizerState.DocTypeName:
2881
                                        token = ReadDocTypeName ();
1✔
2882
                                        break;
1✔
2883
                                case HtmlTokenizerState.AfterDocTypeName:
2884
                                        token = ReadAfterDocTypeName ();
1✔
2885
                                        break;
1✔
2886
                                case HtmlTokenizerState.AfterDocTypePublicKeyword:
2887
                                        token = ReadAfterDocTypePublicKeyword ();
1✔
2888
                                        break;
1✔
2889
                                case HtmlTokenizerState.BeforeDocTypePublicIdentifier:
2890
                                        token = ReadBeforeDocTypePublicIdentifier ();
1✔
2891
                                        break;
1✔
2892
                                case HtmlTokenizerState.DocTypePublicIdentifierQuoted:
2893
                                        token = ReadDocTypePublicIdentifierQuoted ();
1✔
2894
                                        break;
1✔
2895
                                case HtmlTokenizerState.AfterDocTypePublicIdentifier:
2896
                                        token = ReadAfterDocTypePublicIdentifier ();
1✔
2897
                                        break;
1✔
2898
                                case HtmlTokenizerState.BetweenDocTypePublicAndSystemIdentifiers:
2899
                                        token = ReadBetweenDocTypePublicAndSystemIdentifiers ();
1✔
2900
                                        break;
1✔
2901
                                case HtmlTokenizerState.AfterDocTypeSystemKeyword:
2902
                                        token = ReadAfterDocTypeSystemKeyword ();
1✔
2903
                                        break;
1✔
2904
                                case HtmlTokenizerState.BeforeDocTypeSystemIdentifier:
2905
                                        token = ReadBeforeDocTypeSystemIdentifier ();
1✔
2906
                                        break;
1✔
2907
                                case HtmlTokenizerState.DocTypeSystemIdentifierQuoted:
2908
                                        token = ReadDocTypeSystemIdentifierQuoted ();
1✔
2909
                                        break;
1✔
2910
                                case HtmlTokenizerState.AfterDocTypeSystemIdentifier:
2911
                                        token = ReadAfterDocTypeSystemIdentifier ();
1✔
2912
                                        break;
1✔
2913
                                case HtmlTokenizerState.BogusDocType:
2914
                                        token = ReadBogusDocType ();
1✔
2915
                                        break;
1✔
2916
                                case HtmlTokenizerState.CDataSection:
2917
                                        token = ReadCDataSection ();
1✔
2918
                                        break;
1✔
2919
                                case HtmlTokenizerState.EndOfFile:
2920
                                default:
2921
                                        token = null;
1✔
2922
                                        return false;
1✔
2923
                                }
2924
                        } while (token is null);
1✔
2925

2926
                        return true;
1✔
2927
                }
1✔
2928
        }
2929
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc